diff --git a/src/DTRenderer.cpp b/src/DTRenderer.cpp index 9916949..eb30646 100644 --- a/src/DTRenderer.cpp +++ b/src/DTRenderer.cpp @@ -899,17 +899,8 @@ extern "C" void DTR_Update(PlatformRenderBuffer *const platformRenderBuffer, //////////////////////////////////////////////////////////////////////// DqnMemStack *const assetStack = &memory->assetStack; DqnMemStack *const tempStack = &memory->tempStack; - state->zDepthLock = input->api.LockInit(&memory->mainStack); - if (state->zDepthLock) - { - state->blitLock = input->api.LockInit(&memory->mainStack); - if (!state->blitLock) - { - // TODO(doyle): Not enough memory die gracefully - DQN_ASSERT(DQN_INVALID_CODE_PATH); - } - } - else + state->renderLock = input->api.LockInit(&memory->mainStack); + if (!state->renderLock) { // TODO(doyle): Not enough memory die gracefully DQN_ASSERT(DQN_INVALID_CODE_PATH); @@ -952,144 +943,149 @@ extern "C" void DTR_Update(PlatformRenderBuffer *const platformRenderBuffer, "byte_read_check.bmp"); } } + } - auto tempMemRegion = DqnMemStackTempRegionScoped(&memory->tempStack); - if (tempMemRegion.isInit) { - size_t debugSize = DQN_MEGABYTE(1); - u8 *debugMemory = (u8 *)DqnMemStack_Push(&memory->tempStack, debugSize); - DqnMemStack_InitWithFixedMem(&globalDebug.memStack, debugMemory, debugSize); - DTRDebug_BeginCycleCount("DTR_Update", DTRDebugCycleCount_DTR_Update); + auto tempMemRegion = DqnMemStackTempRegionScoped(&memory->tempStack); + if (tempMemRegion.isInit) + { + size_t debugSize = DQN_MEGABYTE(1); + u8 *debugMemory = (u8 *)DqnMemStack_Push(&memory->tempStack, debugSize); + DqnMemStack_InitWithFixedMem(&globalDebug.memStack, debugMemory, debugSize); + DTRDebug_BeginCycleCount("DTR_Update", DTRDebugCycleCount_DTR_Update); - DTRRenderBuffer renderBuffer = {}; - renderBuffer.width = platformRenderBuffer->width; - renderBuffer.height = platformRenderBuffer->height; - renderBuffer.bytesPerPixel = platformRenderBuffer->bytesPerPixel; - renderBuffer.memory = (u8 *)platformRenderBuffer->memory; - renderBuffer.zDepthLock = state->zDepthLock; - renderBuffer.blitLock = state->blitLock; + DTRRenderBuffer renderBuffer = {}; + renderBuffer.width = platformRenderBuffer->width; + renderBuffer.height = platformRenderBuffer->height; + renderBuffer.bytesPerPixel = platformRenderBuffer->bytesPerPixel; + renderBuffer.memory = (u8 *)platformRenderBuffer->memory; + renderBuffer.renderLock = state->renderLock; - u32 zBufferSize = platformRenderBuffer->width * platformRenderBuffer->height; - renderBuffer.zBuffer = (f32 *)DqnMemStack_Push(&memory->tempStack, - zBufferSize * sizeof(*renderBuffer.zBuffer)); + u32 zBufferSize = platformRenderBuffer->width * platformRenderBuffer->height; + renderBuffer.zBuffer = (f32 *)DqnMemStack_Push( + &memory->tempStack, zBufferSize * sizeof(*renderBuffer.zBuffer)); - for (u32 i = 0; i < zBufferSize; i++) - renderBuffer.zBuffer[i] = DQN_F32_MIN; + for (u32 i = 0; i < zBufferSize; i++) + renderBuffer.zBuffer[i] = DQN_F32_MIN; - DTRRenderContext renderContext = {}; - renderContext.renderBuffer = &renderBuffer; - renderContext.tempStack = &memory->tempStack; - renderContext.api = &input->api; - //////////////////////////////////////////////////////////////////////////// - // Update and Render - //////////////////////////////////////////////////////////////////////////// - DTRRender_Clear(renderContext, DqnV3_3f(0.5f, 0.0f, 1.0f)); + DTRRenderContext renderContext = {}; + renderContext.renderBuffer = &renderBuffer; + renderContext.tempStack = &memory->tempStack; + renderContext.api = &input->api; + //////////////////////////////////////////////////////////////////////////// + // Update and Render + //////////////////////////////////////////////////////////////////////////// + DTRRender_Clear(renderContext, DqnV3_3f(0.5f, 0.0f, 1.0f)); #if 1 - DqnV4 colorRed = DqnV4_4f(0.8f, 0, 0, 1); - DqnV2i bufferMidP = DqnV2i_2f(renderBuffer.width * 0.5f, renderBuffer.height * 0.5f); - f32 rotation = (f32)input->timeNowInS * 0.25f; + DqnV4 colorRed = DqnV4_4f(0.8f, 0, 0, 1); + DqnV2i bufferMidP = DqnV2i_2f(renderBuffer.width * 0.5f, renderBuffer.height * 0.5f); + f32 rotation = (f32)input->timeNowInS * 0.25f; - // Triangle Drawing - { - DqnV4 redTransparent = DqnV4_4f(1, 0, 0, 0.5f); - - i32 boundsOffset = 100; - DqnV3 t0[3] = {DqnV3_3i(10, 70, 0), DqnV3_3i(50, 160, 0), DqnV3_3i(70, 80, 0)}; - DqnV3 t1[3] = {DqnV3_3i(180, 50, 0), DqnV3_3i(150, 1, 0), DqnV3_3i(70, 180, 0)}; - DqnV3 t2[3] = {DqnV3_3i(180, 150, 0), DqnV3_3i(120, 160, 0), DqnV3_3i(130, 180, 0)}; - DqnV3 t3[3] = {DqnV3_3i(boundsOffset, boundsOffset, 0), - DqnV3_3i(bufferMidP.w, renderBuffer.height - boundsOffset, 0), - DqnV3_3i(renderBuffer.width - boundsOffset, boundsOffset, 0)}; - DqnV3 t4[3] = {DqnV3_3i(100, 150, 0), DqnV3_3i(200, 150, 0), DqnV3_3i(200, 250, 0)}; - DqnV3 t5[3] = {DqnV3_3i(300, 150, 0), DqnV3_3i(201, 150, 0), DqnV3_3i(200, 250, 0)}; - - DTRRenderTransform rotatingXform = DTRRender_DefaultTriangleTransform(); - rotatingXform.rotation = rotation; - - if (0) + // Triangle Drawing { - DTRDebug_BeginCycleCount("DTR_Update_RenderPrimitiveTriangles", - DTRDebugCycleCount_DTR_Update_RenderPrimitiveTriangles); + DqnV4 redTransparent = DqnV4_4f(1, 0, 0, 0.5f); - DTRRender_Triangle(renderContext, t0[0], t0[1], t0[2], colorRed); - DTRRender_Triangle(renderContext, t1[0], t1[1], t1[2], colorRed); - DTRRender_Triangle(renderContext, t3[0], t3[1], t3[2], colorRed, rotatingXform); - DTRRender_Triangle(renderContext, t2[0], t2[1], t2[2], colorRed); - DTRRender_Triangle(renderContext, t4[0], t4[1], t4[2], colorRed); - DTRRender_Triangle(renderContext, t5[0], t5[1], t5[2], colorRed); - DTRDebug_EndCycleCount(DTRDebugCycleCount_DTR_Update_RenderPrimitiveTriangles); - } + i32 boundsOffset = 100; + DqnV3 t0[3] = {DqnV3_3i(10, 70, 0), DqnV3_3i(50, 160, 0), DqnV3_3i(70, 80, 0)}; + DqnV3 t1[3] = {DqnV3_3i(180, 50, 0), DqnV3_3i(150, 1, 0), DqnV3_3i(70, 180, 0)}; + DqnV3 t2[3] = {DqnV3_3i(180, 150, 0), DqnV3_3i(120, 160, 0), DqnV3_3i(130, 180, 0)}; + DqnV3 t3[3] = {DqnV3_3i(boundsOffset, boundsOffset, 0), + DqnV3_3i(bufferMidP.w, renderBuffer.height - boundsOffset, 0), + DqnV3_3i(renderBuffer.width - boundsOffset, boundsOffset, 0)}; + DqnV3 t4[3] = {DqnV3_3i(100, 150, 0), DqnV3_3i(200, 150, 0), DqnV3_3i(200, 250, 0)}; + DqnV3 t5[3] = {DqnV3_3i(300, 150, 0), DqnV3_3i(201, 150, 0), DqnV3_3i(200, 250, 0)}; - if (1) - { - LOCAL_PERSIST bool runTinyRendererOnce = false; - if (1 && runTinyRendererOnce) + DTRRenderTransform rotatingXform = DTRRender_DefaultTriangleTransform(); + rotatingXform.rotation = rotation; + + if (0) { - DTRDebug_RunTinyRenderer(); - runTinyRendererOnce = false; + DTRDebug_BeginCycleCount( + "DTR_Update_RenderPrimitiveTriangles", + DTRDebugCycleCount_DTR_Update_RenderPrimitiveTriangles); + + DTRRender_Triangle(renderContext, t0[0], t0[1], t0[2], colorRed); + DTRRender_Triangle(renderContext, t1[0], t1[1], t1[2], colorRed); + DTRRender_Triangle(renderContext, t3[0], t3[1], t3[2], colorRed, rotatingXform); + DTRRender_Triangle(renderContext, t2[0], t2[1], t2[2], colorRed); + DTRRender_Triangle(renderContext, t4[0], t4[1], t4[2], colorRed); + DTRRender_Triangle(renderContext, t5[0], t5[1], t5[2], colorRed); + DTRDebug_EndCycleCount(DTRDebugCycleCount_DTR_Update_RenderPrimitiveTriangles); } - DTRDebug_BeginCycleCount("DTR_Update_RenderModel", - DTRDebugCycleCount_DTR_Update_RenderModel); - //////////////////////////////////////////////////////////////////////// - // Draw Loaded Model - //////////////////////////////////////////////////////////////////////// - const DqnV3 LIGHT = DqnV3_Normalise(DqnV3_3f(1, -1, 1.0f)); - const f32 MODEL_SCALE = 1; - DTRMesh *const mesh = &state->mesh; - DqnV3 modelP = DqnV3_3f(0, 0, 0); + if (1) + { + LOCAL_PERSIST bool runTinyRendererOnce = false; + if (1 && runTinyRendererOnce) + { + DTRDebug_RunTinyRenderer(); + runTinyRendererOnce = false; + } - LOCAL_PERSIST f32 modelRotation = 0; - modelRotation += (input->deltaForFrame * 20.0f); - DqnV3 axis = DqnV3_3f(0, 1, 0); + DTRDebug_BeginCycleCount("DTR_Update_RenderModel", + DTRDebugCycleCount_DTR_Update_RenderModel); + //////////////////////////////////////////////////////////////////////// + // Draw Loaded Model + //////////////////////////////////////////////////////////////////////// + const DqnV3 LIGHT = DqnV3_Normalise(DqnV3_3f(1, -1, 1.0f)); + const f32 MODEL_SCALE = 1; + DTRMesh *const mesh = &state->mesh; + DqnV3 modelP = DqnV3_3f(0, 0, 0); - DTRRenderTransform transform = DTRRender_DefaultTransform(); - transform.scale = DqnV3_1f(MODEL_SCALE); - transform.rotation = modelRotation; - transform.anchor = axis; + LOCAL_PERSIST f32 modelRotation = 0; + modelRotation += (input->deltaForFrame * 20.0f); + DqnV3 axis = DqnV3_3f(0, 1, 0); - DTRRenderLight lighting = {}; - lighting.mode = DTRRenderShadingMode_Gouraud; - lighting.vector = LIGHT; - lighting.color = DqnV4_4f(1, 1, 1, 1); + DTRRenderTransform transform = DTRRender_DefaultTransform(); + transform.scale = DqnV3_1f(MODEL_SCALE); + transform.rotation = modelRotation; + transform.anchor = axis; - DTRRender_Mesh(renderContext, input->jobQueue, mesh, lighting, modelP, transform); - DTRDebug_EndCycleCount(DTRDebugCycleCount_DTR_Update_RenderModel); + DTRRenderLight lighting = {}; + lighting.mode = DTRRenderShadingMode_Gouraud; + lighting.vector = LIGHT; + lighting.color = DqnV4_4f(1, 1, 1, 1); + + DTRRender_Mesh(renderContext, input->jobQueue, mesh, lighting, modelP, + transform); + DTRDebug_EndCycleCount(DTRDebugCycleCount_DTR_Update_RenderModel); + } } - } - // Rect drawing - if (0) - { - DTRRenderTransform transform = DTRRender_DefaultTransform(); - transform.rotation = rotation + 45; + // Rect drawing + if (0) + { + DTRRenderTransform transform = DTRRender_DefaultTransform(); + transform.rotation = rotation + 45; - DTRRender_Rectangle(renderContext, DqnV2_1f(300.0f), DqnV2_1f(300 + 100.0f), - DqnV4_4f(0, 1.0f, 1.0f, 1.0f), transform); - } + DTRRender_Rectangle(renderContext, DqnV2_1f(300.0f), DqnV2_1f(300 + 100.0f), + DqnV4_4f(0, 1.0f, 1.0f, 1.0f), transform); + } - // Bitmap drawing - if (0) - { - DTRRenderTransform transform = DTRRender_DefaultTransform(); - transform.scale = DqnV3_1f(2.0f); + // Bitmap drawing + if (0) + { + DTRRenderTransform transform = DTRRender_DefaultTransform(); + transform.scale = DqnV3_1f(2.0f); - LOCAL_PERSIST DqnV2 bitmapP = DqnV2_2f(500, 250); - bitmapP.x += 2.0f * sinf((f32)input->timeNowInS * 0.5f); + LOCAL_PERSIST DqnV2 bitmapP = DqnV2_2f(500, 250); + bitmapP.x += 2.0f * sinf((f32)input->timeNowInS * 0.5f); - f32 cAngle = (f32)input->timeNowInS; - DqnV4 color = DqnV4_4f(0.5f + 0.5f * sinf(cAngle), 0.5f + 0.5f * sinf(2.9f * cAngle), - 0.5f + 0.5f * cosf(10.0f * cAngle), 1.0f); - DTRRender_Bitmap(renderContext, &state->bitmap, bitmapP, transform, color); - } + f32 cAngle = (f32)input->timeNowInS; + DqnV4 color = + DqnV4_4f(0.5f + 0.5f * sinf(cAngle), 0.5f + 0.5f * sinf(2.9f * cAngle), + 0.5f + 0.5f * cosf(10.0f * cAngle), 1.0f); + DTRRender_Bitmap(renderContext, &state->bitmap, bitmapP, transform, color); + } #else - // CompAssignment(renderBuffer, input, memory); +// CompAssignment(renderBuffer, input, memory); #endif - DTRDebug_EndCycleCount(DTRDebugCycleCount_DTR_Update); - DTRDebug_Update(state, renderContext, input, memory); + DTRDebug_EndCycleCount(DTRDebugCycleCount_DTR_Update); + DTRDebug_Update(state, renderContext, input, memory); + } } //////////////////////////////////////////////////////////////////////////// @@ -1097,13 +1093,12 @@ extern "C" void DTR_Update(PlatformRenderBuffer *const platformRenderBuffer, //////////////////////////////////////////////////////////////////////////// if (DTR_DEBUG) { - // NOTE: We should have one temp region, that is the scoped region for the - // main loop which will remove itself when we leave scope. - DQN_ASSERT(memory->tempStack.tempRegionCount == 1); + DQN_ASSERT(input->api.QueueAllJobsComplete(input->jobQueue)); for (i32 i = 0; i < DQN_ARRAY_COUNT(memory->stacks); i++) { if (&memory->stacks[i] == &memory->tempStack) continue; DQN_ASSERT(memory->stacks[i].tempRegionCount == 0); } + DqnMemStack_ClearCurrBlock(&memory->tempStack, true); } } diff --git a/src/DTRenderer.h b/src/DTRenderer.h index 849d635..4abede4 100644 --- a/src/DTRenderer.h +++ b/src/DTRenderer.h @@ -14,8 +14,7 @@ typedef struct DTRState DTRBitmap bitmap; DTRMesh mesh; - struct PlatformLock *zDepthLock; - struct PlatformLock *blitLock; + struct PlatformLock *renderLock; } DTRState; extern PlatformFlags globalDTRPlatformFlags; diff --git a/src/DTRendererPlatform.h b/src/DTRendererPlatform.h index 348e5bf..d44f37f 100644 --- a/src/DTRendererPlatform.h +++ b/src/DTRendererPlatform.h @@ -51,6 +51,7 @@ typedef struct PlatformJob // Multithreading API typedef bool PlatformAPI_QueueAddJob (PlatformJobQueue *const queue, const PlatformJob job); typedef bool PlatformAPI_QueueTryExecuteNextJob(PlatformJobQueue *const queue); +typedef bool PlatformAPI_QueueAllJobsComplete (PlatformJobQueue *const queue); //////////////////////////////////////////////////////////////////////////////// // Platform Locks @@ -75,6 +76,7 @@ typedef struct PlatformAPI PlatformAPI_QueueAddJob *QueueAddJob; PlatformAPI_QueueTryExecuteNextJob *QueueTryExecuteNextJob; + PlatformAPI_QueueAllJobsComplete *QueueAllJobsComplete; PlatformAPI_LockInit *LockInit; PlatformAPI_LockAcquire *LockAcquire; diff --git a/src/DTRendererRender.cpp b/src/DTRendererRender.cpp index c5c97e9..d5bfc5d 100644 --- a/src/DTRendererRender.cpp +++ b/src/DTRendererRender.cpp @@ -630,22 +630,20 @@ FILE_SCOPE inline void SIMDSetPixel(DTRRenderContext context, const i32 x, const DTR_DEBUG_EP_TIMED_FUNCTION(); DebugSIMDAssertColorInRange(color, 0.0f, 1.0f); - u32 *const bitmapPtr = (u32 *)renderBuffer->memory; - const u32 pitchInU32 = (renderBuffer->width * renderBuffer->bytesPerPixel) / 4; - // If some alpha is involved, we need to apply gamma correction, but if the // new pixel is totally opaque or invisible then we're just flat out // overwriting/keeping the state of the pixel so we can save cycles by skipping. f32 alpha = ((f32 *)&color)[3]; - bool needGammaFix = (alpha > 0.0f || alpha < (1.0f + COLOR_EPSILON)) && (colorSpace == ColorSpace_SRGB); + bool needGammaFix = + (alpha > 0.0f || alpha < (1.0f + COLOR_EPSILON)) && (colorSpace == ColorSpace_SRGB); if (needGammaFix) color = SIMDSRGB1ToLinearSpace(color); // Format: u32 == (XX, RR, GG, BB) - context.api->LockAcquire(renderBuffer->blitLock); + u32 *const bitmapPtr = (u32 *)renderBuffer->memory; + const u32 pitchInU32 = (renderBuffer->width * renderBuffer->bytesPerPixel) / 4; + u32 srcPixel = bitmapPtr[x + (y * pitchInU32)]; - __m128 src = _mm_set_ps(0, - (f32)((srcPixel >> 0) & 0xFF), - (f32)((srcPixel >> 8) & 0xFF), + __m128 src = _mm_set_ps(0, (f32)((srcPixel >> 0) & 0xFF), (f32)((srcPixel >> 8) & 0xFF), (f32)((srcPixel >> 16) & 0xFF)); src = SIMDSRGB255ToLinearSpace1(src); @@ -665,13 +663,8 @@ FILE_SCOPE inline void SIMDSetPixel(DTRRenderContext context, const i32 x, const f32 destB = ((f32 *)&dest)[2]; u32 pixel = // ((u32)(destA) << 24 | - (u32)(destR) << 16 | - (u32)(destG) << 8 | - (u32)(destB) << 0; + (u32)(destR) << 16 | (u32)(destG) << 8 | (u32)(destB) << 0; bitmapPtr[x + (y * pitchInU32)] = pixel; - context.api->LockRelease(renderBuffer->blitLock); - - DTRDebug_CounterIncrement(DTRDebugCounter_SetPixels); } // colorModulate: _mm_set_ps(a, b, g, r) ie. 0=r, 1=g, 2=b, 3=a @@ -783,9 +776,9 @@ FILE_SCOPE inline f32 GetCurrZDepth(DTRRenderContext context, i32 posX, i32 posY i32 zBufferIndex = posX + (posY * zBufferPitch); DQN_ASSERT(zBufferIndex < (renderBuffer->width * renderBuffer->height)); - context.api->LockAcquire(renderBuffer->zDepthLock); + context.api->LockAcquire(renderBuffer->renderLock); f32 currZDepth = renderBuffer->zBuffer[zBufferIndex]; - context.api->LockRelease(renderBuffer->zDepthLock); + context.api->LockRelease(renderBuffer->renderLock); return currZDepth; } @@ -798,9 +791,9 @@ FILE_SCOPE inline void SetCurrZDepth(DTRRenderContext context, i32 posX, i32 pos i32 zBufferIndex = posX + (posY * zBufferPitch); DQN_ASSERT(zBufferIndex < (renderBuffer->width * renderBuffer->height)); - context.api->LockAcquire(renderBuffer->zDepthLock); + context.api->LockAcquire(renderBuffer->renderLock); renderBuffer->zBuffer[zBufferIndex] = newZDepth; - context.api->LockRelease(renderBuffer->zDepthLock); + context.api->LockRelease(renderBuffer->renderLock); } #define DEBUG_SIMD_AUTO_CHOOSE_BEGIN_CYCLE_COUNT(type) \ @@ -882,7 +875,7 @@ FILE_SCOPE void SIMDTriangle(DTRRenderContext context, DEBUG_SIMD_AUTO_CHOOSE_BEGIN_CYCLE_COUNT(Triangle_Preamble); - DTRRenderBuffer *renderBuffer = context.renderBuffer; + DTRRenderBuffer *const renderBuffer = context.renderBuffer; //////////////////////////////////////////////////////////////////////////// // Convert color //////////////////////////////////////////////////////////////////////////// @@ -911,12 +904,12 @@ FILE_SCOPE void SIMDTriangle(DTRRenderContext context, const u32 NUM_Y_PIXELS_TO_SIMD = 1; // SignedArea: _mm_set_ps(unused, p3, p2, p1) ie 0=p1, 1=p1, 2=p3, 3=unused - __m128 signedAreaPixel1; - __m128 signedAreaPixel2; + __m128 signedAreaPixel1 = _mm_set_ps1(0); + __m128 signedAreaPixel2 = _mm_set_ps1(0); - __m128 signedAreaPixelDeltaX; - __m128 signedAreaPixelDeltaY; - __m128 invSignedAreaParallelogram_4x; + __m128 signedAreaPixelDeltaX = _mm_set_ps1(0); + __m128 signedAreaPixelDeltaY = _mm_set_ps1(0); + __m128 invSignedAreaParallelogram_4x = _mm_set_ps1(0); __m128 triangleZ = _mm_set_ps(0, p3.z, p2.z, p1.z); { @@ -1003,22 +996,11 @@ FILE_SCOPE void SIMDTriangle(DTRRenderContext context, ((f32 *)&barycentricZ)[1] + ((f32 *)&barycentricZ)[2]; -#if 0 - // f32 currZDepth = GetCurrZDepth(context, posX, posY); -#else - DQN_ASSERT(renderBuffer); i32 zBufferIndex = posX + (posY * zBufferPitch); - - context.api->LockAcquire(renderBuffer->zDepthLock); - f32 currZDepth = renderBuffer->zBuffer[zBufferIndex]; - context.api->LockRelease(renderBuffer->zDepthLock); -#endif - if (pixelZDepth > currZDepth) + context.api->LockAcquire(renderBuffer->renderLock); + if (pixelZDepth > renderBuffer->zBuffer[zBufferIndex]) { - - context.api->LockAcquire(renderBuffer->zDepthLock); renderBuffer->zBuffer[zBufferIndex] = pixelZDepth; - context.api->LockRelease(renderBuffer->zDepthLock); __m128 finalColor = simdColor; if (!ignoreLight) @@ -1046,6 +1028,7 @@ FILE_SCOPE void SIMDTriangle(DTRRenderContext context, } SIMDSetPixel(context, posX, posY, finalColor, ColorSpace_Linear); } + context.api->LockRelease(renderBuffer->renderLock); DEBUG_SIMD_AUTO_CHOOSE_END_CYCLE_COUNT(Triangle_RasterisePixel); } signedArea1 = _mm_add_ps(signedArea1, signedAreaPixelDeltaX); @@ -1067,16 +1050,10 @@ FILE_SCOPE void SIMDTriangle(DTRRenderContext context, ((f32 *)&barycentricZ)[1] + ((f32 *)&barycentricZ)[2]; i32 zBufferIndex = posX + (posY * zBufferPitch); - - context.api->LockAcquire(renderBuffer->zDepthLock); - f32 currZDepth = renderBuffer->zBuffer[zBufferIndex]; - context.api->LockRelease(renderBuffer->zDepthLock); - - if (pixelZDepth > currZDepth) + context.api->LockAcquire(renderBuffer->renderLock); + if (pixelZDepth > renderBuffer->zBuffer[zBufferIndex]) { - context.api->LockAcquire(renderBuffer->zDepthLock); renderBuffer->zBuffer[zBufferIndex] = pixelZDepth; - context.api->LockRelease(renderBuffer->zDepthLock); __m128 finalColor = simdColor; if (!ignoreLight) @@ -1104,6 +1081,7 @@ FILE_SCOPE void SIMDTriangle(DTRRenderContext context, } SIMDSetPixel(context, posX, posY, finalColor, ColorSpace_Linear); } + context.api->LockRelease(renderBuffer->renderLock); } signedArea2 = _mm_add_ps(signedArea2, signedAreaPixelDeltaX); } @@ -1424,7 +1402,7 @@ typedef struct RenderMeshJob DqnV4 color; } RenderMeshJob; -void MultiThreadedRenderMesh(struct PlatformJobQueue *const queue, void *const userData) +void MultiThreadedRenderMesh(PlatformJobQueue *const queue, void *const userData) { if (!queue || !userData) { @@ -1433,18 +1411,16 @@ void MultiThreadedRenderMesh(struct PlatformJobQueue *const queue, void *const u } RenderMeshJob *job = (RenderMeshJob *)userData; -#if 1 TexturedTriangleInternal(job->context, job->lighting, job->v1, job->v2, job->v3, job->uv1, job->uv2, job->uv3, job->tex, job->color); -#endif } void DTRRender_Mesh(DTRRenderContext context, PlatformJobQueue *const jobQueue, DTRMesh *const mesh, DTRRenderLight lighting, const DqnV3 pos, const DTRRenderTransform transform) { - DqnMemStack *tempStack = context.tempStack; - DTRRenderBuffer *renderBuffer = context.renderBuffer; - PlatformAPI *api = context.api; + DqnMemStack *const tempStack = context.tempStack; + DTRRenderBuffer *const renderBuffer = context.renderBuffer; + PlatformAPI *const api = context.api; if (!mesh || !renderBuffer || !tempStack || !api || !jobQueue) return; @@ -1477,7 +1453,7 @@ void DTRRender_Mesh(DTRRenderContext context, PlatformJobQueue *const jobQueue, viewPModelViewProjection = DqnMat4_Mul(viewport, modelViewProjection); } - bool RUN_MULTITHREADED = false; + bool RUN_MULTITHREADED = true; for (u32 i = 0; i < mesh->numFaces; i++) { DTRMeshFace face = mesh->faces[i]; @@ -1549,7 +1525,8 @@ void DTRRender_Mesh(DTRRenderContext context, PlatformJobQueue *const jobQueue, DqnV2 uv2 = mesh->texUV[uv2Index].xy; DqnV2 uv3 = mesh->texUV[uv3Index].xy; - DqnV4 color = lighting.color; + DqnV4 color = lighting.color; + RenderLightInternal lightingInternal = {}; lightingInternal.mode = lighting.mode; lightingInternal.vector = lighting.vector; @@ -1558,8 +1535,7 @@ void DTRRender_Mesh(DTRRenderContext context, PlatformJobQueue *const jobQueue, lightingInternal.normals[2] = norm3; lightingInternal.numNormals = 3; - bool DEBUG_NO_TEX = false; - + bool DEBUG_NO_TEX = false; if (RUN_MULTITHREADED) { RenderMeshJob *jobData = (RenderMeshJob *)DqnMemStack_Push(tempStack, sizeof(*jobData)); @@ -1612,6 +1588,7 @@ void DTRRender_Mesh(DTRRenderContext context, PlatformJobQueue *const jobQueue, uv1, uv2, uv3, &mesh->tex, color); } } + bool DEBUG_WIREFRAME = false; if (DTR_DEBUG && DEBUG_WIREFRAME) { @@ -1622,9 +1599,11 @@ void DTRRender_Mesh(DTRRenderContext context, PlatformJobQueue *const jobQueue, } } + // NOTE(doyle): Complete remaining jobs and wait until all jobs finished + // before leaving function. if (RUN_MULTITHREADED) { - while (api->QueueTryExecuteNextJob(jobQueue)) + while (api->QueueTryExecuteNextJob(jobQueue) || !api->QueueAllJobsComplete(jobQueue)) ; } } @@ -1858,3 +1837,4 @@ void DTRRender_Clear(DTRRenderContext context, DqnV3 color) } } } + diff --git a/src/DTRendererRender.h b/src/DTRendererRender.h index 3d4f6fc..81d1c21 100644 --- a/src/DTRendererRender.h +++ b/src/DTRendererRender.h @@ -16,12 +16,9 @@ typedef struct DTRRenderBuffer i32 width; i32 height; i32 bytesPerPixel; - - PlatformLock *volatile zDepthLock; - PlatformLock *volatile blitLock; - - u8 *memory; // Format: XX RR GG BB, and has (width * height * bytesPerPixels) elements - f32 *zBuffer; // zBuffer has (width * height) elements + PlatformLock *renderLock; + volatile u8 *memory; // Format: XX RR GG BB, and has (width * height * bytesPerPixels) elements + volatile f32 *zBuffer; // zBuffer has (width * height) elements } DTRRenderBuffer; diff --git a/src/Win32DTRenderer.cpp b/src/Win32DTRenderer.cpp index c8a25f4..b1b087f 100644 --- a/src/Win32DTRenderer.cpp +++ b/src/Win32DTRenderer.cpp @@ -64,9 +64,11 @@ struct PlatformJobQueue // NOTE: Modified by main+worker threads LONG volatile jobToExecuteIndex; HANDLE volatile win32Semaphore; + LONG volatile numJobsToComplete; // NOTE: Modified by main thread ONLY LONG volatile jobInsertIndex; + }; bool Platform_QueueAddJob(PlatformJobQueue *const queue, const PlatformJob job) @@ -76,22 +78,12 @@ bool Platform_QueueAddJob(PlatformJobQueue *const queue, const PlatformJob job) queue->jobList[queue->jobInsertIndex] = job; - _WriteBarrier(); - _mm_sfence(); - - queue->jobInsertIndex = newJobInsertIndex; + InterlockedIncrement(&queue->numJobsToComplete); ReleaseSemaphore(queue->win32Semaphore, 1, NULL); - + queue->jobInsertIndex = newJobInsertIndex; return true; } -FILE_SCOPE void DebugWin32JobPrintNumber(PlatformJobQueue *const queue, void *const userData) -{ - i32 numberToPrint = *((i32 *)userData); - DqnWin32_OutputDebugString("Thread %d: Printing number: %d\n", GetCurrentThreadId(), - numberToPrint); -} - bool Platform_QueueTryExecuteNextJob(PlatformJobQueue *const queue) { LONG originalJobToExecute = queue->jobToExecuteIndex; @@ -109,6 +101,7 @@ bool Platform_QueueTryExecuteNextJob(PlatformJobQueue *const queue) { PlatformJob job = queue->jobList[index]; job.callback(queue, job.userData); + InterlockedDecrement(&queue->numJobsToComplete); } return true; @@ -117,6 +110,37 @@ bool Platform_QueueTryExecuteNextJob(PlatformJobQueue *const queue) return false; } +bool Platform_QueueAllJobsComplete(PlatformJobQueue *const queue) +{ + bool result = (queue->numJobsToComplete == 0); + return result; +} + +FILE_SCOPE u32 volatile globalDebugCounter; +FILE_SCOPE bool volatile globalDebugCounterMemoize[2048]; +FILE_SCOPE PlatformLock *globalDebugLock; +FILE_SCOPE void DebugWin32IncrementCounter(PlatformJobQueue *const queue, void *const userData) +{ + Platform_LockAcquire(globalDebugLock); + DQN_ASSERT(!globalDebugCounterMemoize[globalDebugCounter]); + globalDebugCounterMemoize[globalDebugCounter] = true; + globalDebugCounter++; + u32 number = globalDebugCounter; + Platform_LockRelease(globalDebugLock); + + DqnWin32_OutputDebugString("Thread %d: Incrementing Number: %d\n", GetCurrentThreadId(), + number); +} + +FILE_SCOPE void DebugWin32JobPrintNumber(PlatformJobQueue *const queue, void *const userData) +{ + i32 numberToPrint = *((i32 *)userData); + DqnWin32_OutputDebugString("Thread %d: Printing number: %d\n", GetCurrentThreadId(), + numberToPrint); +} + + + DWORD WINAPI Win32ThreadCallback(void *lpParameter) { PlatformJobQueue *queue = (PlatformJobQueue *)lpParameter; @@ -682,6 +706,7 @@ int WINAPI wWinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPWSTR lpCmdLi platformAPI.QueueAddJob = Platform_QueueAddJob; platformAPI.QueueTryExecuteNextJob = Platform_QueueTryExecuteNextJob; + platformAPI.QueueAllJobsComplete = Platform_QueueAllJobsComplete; platformAPI.LockInit = Platform_LockInit; platformAPI.LockAcquire = Platform_LockAcquire; @@ -795,7 +820,7 @@ int WINAPI wWinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPWSTR lpCmdLi } #if 0 - // DEBUG Create jobs + // DEBUG Create print jobs for (i32 i = 0; i < 20; i++) { PlatformJob job = {}; @@ -811,6 +836,29 @@ int WINAPI wWinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPWSTR lpCmdLi while (Platform_QueueTryExecuteNextJob(&jobQueue)) ; #endif + +#if 1 + globalDebugLock = Platform_LockInit(&globalPlatformMemory.mainStack); + DQN_ASSERT(globalDebugLock); + for (i32 i = 0; i < DQN_ARRAY_COUNT(globalDebugCounterMemoize); i++) + { + PlatformJob job = {}; + job.callback = DebugWin32IncrementCounter; + while (!Platform_QueueAddJob(&jobQueue, job)) + { + Platform_QueueTryExecuteNextJob(&jobQueue); + } + } + + while (Platform_QueueTryExecuteNextJob(&jobQueue)) + ; + + for (i32 i = 0; i < DQN_ARRAY_COUNT(globalDebugCounterMemoize); i++) + DQN_ASSERT(globalDebugCounterMemoize[i]); + + DqnWin32_OutputDebugString("\nFinal incremented value: %d\n", globalDebugCounter); + DQN_ASSERT(globalDebugCounter == DQN_ARRAY_COUNT(globalDebugCounterMemoize)); +#endif } else { diff --git a/src/build.bat b/src/build.bat index 9b76365..c30124b 100644 --- a/src/build.bat +++ b/src/build.bat @@ -29,7 +29,6 @@ REM EHa- disable exception handling (currently it's on /EHsc since libraries n REM GR- disable c runtime type information (we don't use) REM MD use dynamic runtime library REM MT use static runtime library, so build and link it into exe -REM Od disables optimisations REM Oi enable intrinsics optimisation, let us use CPU intrinsics if there is one REM instead of generating a call to external library (i.e. CRT). REM Zi enables debug data, Z7 combines the debug files into one. @@ -39,26 +38,44 @@ REM wd4100 unused argument parameters REM wd4201 nonstandard extension used: nameless struct/union REM wd4189 local variable is initialised but not referenced REM wd4505 unreferenced local function not used will be removed -set CompileFlags=-EHsc -GR- -Oi -MT -Z7 -W4 -wd4100 -wd4201 -wd4189 -wd4505 -O2 -FAsc /I..\src\external\ +set CompileFlags=-EHsc -GR- -Oi -MT -Z7 -W4 -wd4100 -wd4201 -wd4189 -wd4505 -FAsc /I..\src\external\ set DLLFlags=/Fm%ProjectName% /Fo%ProjectName% /Fa%ProjectName% /Fe%ProjectName% set Win32Flags=/FmWin32DTRenderer /FeWin32DTRenderer +REM Link libraries +set LinkLibraries=user32.lib kernel32.lib gdi32.lib + +REM incremental:no, turn incremental builds off +REM opt:ref, try to remove functions from libs that are not referenced at all +set LinkFlags=-incremental:no -opt:ref -subsystem:WINDOWS -machine:x64 -nologo + +set DebugMode=0 + +if %DebugMode%==1 goto :DebugFlags +goto :ReleaseFlags + +:DebugFlags +REM Od disables optimisations +REM RTC1 runtime error checks +set CompileFlags=%CompileFlags% -Od -RTC1 +goto compile + +:ReleaseFlags +REM opt:icf, COMDAT folding for debugging release build +REM DEBUG:[FULL|NONE] enforce debugging for release build +set CompileFlags=%CompileFlags% -O2 +set LinkFlags=%LinkFlags% + +REM //////////////////////////////////////////////////////////////////////////// +REM Compile +REM //////////////////////////////////////////////////////////////////////////// +:compile REM Clean time necessary for hours <10, which produces H:MM:SS.SS where the REM first character of time is an empty space. CleanTime will pad a 0 if REM necessary. set CleanTime=%time: =0% set TimeStamp=%date:~10,4%%date:~7,2%%date:~4,2%_%CleanTime:~0,2%%CleanTime:~3,2%%CleanTime:~6,2% -REM Link libraries -set LinkLibraries=user32.lib kernel32.lib gdi32.lib - -REM incremental:no, turn incremental builds off -REM opt:ref, try to remove functions from libs that are not referenced at all -set LinkFlags=-incremental:no -opt:ref -subsystem:WINDOWS -machine:x64 -nologo - -REM //////////////////////////////////////////////////////////////////////////// -REM Compile -REM //////////////////////////////////////////////////////////////////////////// del *.pdb >NUL 2>NUL cl %CompileFlags% %Win32Flags% ..\src\Win32DTRenderer.cpp /link %LinkLibraries% %LinkFlags% REM cl %CompileFlags% %DLLFlags% ..\src\UnityBuild\UnityBuild.cpp /LD /link ..\src\external\easy\easy_profiler.lib /PDB:%ProjectName%_%TimeStamp%.pdb /export:DTR_Update %LinkFlags%