Fix multithread bug causing invalid ptr references

Mesh rendering was not waiting until all jobs were complete before moving on
causing longer jobs to use old pointer references once the next frame started
rendering.
This commit is contained in:
Doyle Thai 2017-06-19 14:14:13 +10:00
parent 630522f8a3
commit e660281211
7 changed files with 250 additions and 212 deletions

View File

@ -899,17 +899,8 @@ extern "C" void DTR_Update(PlatformRenderBuffer *const platformRenderBuffer,
////////////////////////////////////////////////////////////////////////
DqnMemStack *const assetStack = &memory->assetStack;
DqnMemStack *const tempStack = &memory->tempStack;
state->zDepthLock = input->api.LockInit(&memory->mainStack);
if (state->zDepthLock)
{
state->blitLock = input->api.LockInit(&memory->mainStack);
if (!state->blitLock)
{
// TODO(doyle): Not enough memory die gracefully
DQN_ASSERT(DQN_INVALID_CODE_PATH);
}
}
else
state->renderLock = input->api.LockInit(&memory->mainStack);
if (!state->renderLock)
{
// TODO(doyle): Not enough memory die gracefully
DQN_ASSERT(DQN_INVALID_CODE_PATH);
@ -952,144 +943,149 @@ extern "C" void DTR_Update(PlatformRenderBuffer *const platformRenderBuffer,
"byte_read_check.bmp");
}
}
}
auto tempMemRegion = DqnMemStackTempRegionScoped(&memory->tempStack);
if (tempMemRegion.isInit)
{
size_t debugSize = DQN_MEGABYTE(1);
u8 *debugMemory = (u8 *)DqnMemStack_Push(&memory->tempStack, debugSize);
DqnMemStack_InitWithFixedMem(&globalDebug.memStack, debugMemory, debugSize);
DTRDebug_BeginCycleCount("DTR_Update", DTRDebugCycleCount_DTR_Update);
auto tempMemRegion = DqnMemStackTempRegionScoped(&memory->tempStack);
if (tempMemRegion.isInit)
{
size_t debugSize = DQN_MEGABYTE(1);
u8 *debugMemory = (u8 *)DqnMemStack_Push(&memory->tempStack, debugSize);
DqnMemStack_InitWithFixedMem(&globalDebug.memStack, debugMemory, debugSize);
DTRDebug_BeginCycleCount("DTR_Update", DTRDebugCycleCount_DTR_Update);
DTRRenderBuffer renderBuffer = {};
renderBuffer.width = platformRenderBuffer->width;
renderBuffer.height = platformRenderBuffer->height;
renderBuffer.bytesPerPixel = platformRenderBuffer->bytesPerPixel;
renderBuffer.memory = (u8 *)platformRenderBuffer->memory;
renderBuffer.zDepthLock = state->zDepthLock;
renderBuffer.blitLock = state->blitLock;
DTRRenderBuffer renderBuffer = {};
renderBuffer.width = platformRenderBuffer->width;
renderBuffer.height = platformRenderBuffer->height;
renderBuffer.bytesPerPixel = platformRenderBuffer->bytesPerPixel;
renderBuffer.memory = (u8 *)platformRenderBuffer->memory;
renderBuffer.renderLock = state->renderLock;
u32 zBufferSize = platformRenderBuffer->width * platformRenderBuffer->height;
renderBuffer.zBuffer = (f32 *)DqnMemStack_Push(&memory->tempStack,
zBufferSize * sizeof(*renderBuffer.zBuffer));
u32 zBufferSize = platformRenderBuffer->width * platformRenderBuffer->height;
renderBuffer.zBuffer = (f32 *)DqnMemStack_Push(
&memory->tempStack, zBufferSize * sizeof(*renderBuffer.zBuffer));
for (u32 i = 0; i < zBufferSize; i++)
renderBuffer.zBuffer[i] = DQN_F32_MIN;
for (u32 i = 0; i < zBufferSize; i++)
renderBuffer.zBuffer[i] = DQN_F32_MIN;
DTRRenderContext renderContext = {};
renderContext.renderBuffer = &renderBuffer;
renderContext.tempStack = &memory->tempStack;
renderContext.api = &input->api;
////////////////////////////////////////////////////////////////////////////
// Update and Render
////////////////////////////////////////////////////////////////////////////
DTRRender_Clear(renderContext, DqnV3_3f(0.5f, 0.0f, 1.0f));
DTRRenderContext renderContext = {};
renderContext.renderBuffer = &renderBuffer;
renderContext.tempStack = &memory->tempStack;
renderContext.api = &input->api;
////////////////////////////////////////////////////////////////////////////
// Update and Render
////////////////////////////////////////////////////////////////////////////
DTRRender_Clear(renderContext, DqnV3_3f(0.5f, 0.0f, 1.0f));
#if 1
DqnV4 colorRed = DqnV4_4f(0.8f, 0, 0, 1);
DqnV2i bufferMidP = DqnV2i_2f(renderBuffer.width * 0.5f, renderBuffer.height * 0.5f);
f32 rotation = (f32)input->timeNowInS * 0.25f;
DqnV4 colorRed = DqnV4_4f(0.8f, 0, 0, 1);
DqnV2i bufferMidP = DqnV2i_2f(renderBuffer.width * 0.5f, renderBuffer.height * 0.5f);
f32 rotation = (f32)input->timeNowInS * 0.25f;
// Triangle Drawing
{
DqnV4 redTransparent = DqnV4_4f(1, 0, 0, 0.5f);
i32 boundsOffset = 100;
DqnV3 t0[3] = {DqnV3_3i(10, 70, 0), DqnV3_3i(50, 160, 0), DqnV3_3i(70, 80, 0)};
DqnV3 t1[3] = {DqnV3_3i(180, 50, 0), DqnV3_3i(150, 1, 0), DqnV3_3i(70, 180, 0)};
DqnV3 t2[3] = {DqnV3_3i(180, 150, 0), DqnV3_3i(120, 160, 0), DqnV3_3i(130, 180, 0)};
DqnV3 t3[3] = {DqnV3_3i(boundsOffset, boundsOffset, 0),
DqnV3_3i(bufferMidP.w, renderBuffer.height - boundsOffset, 0),
DqnV3_3i(renderBuffer.width - boundsOffset, boundsOffset, 0)};
DqnV3 t4[3] = {DqnV3_3i(100, 150, 0), DqnV3_3i(200, 150, 0), DqnV3_3i(200, 250, 0)};
DqnV3 t5[3] = {DqnV3_3i(300, 150, 0), DqnV3_3i(201, 150, 0), DqnV3_3i(200, 250, 0)};
DTRRenderTransform rotatingXform = DTRRender_DefaultTriangleTransform();
rotatingXform.rotation = rotation;
if (0)
// Triangle Drawing
{
DTRDebug_BeginCycleCount("DTR_Update_RenderPrimitiveTriangles",
DTRDebugCycleCount_DTR_Update_RenderPrimitiveTriangles);
DqnV4 redTransparent = DqnV4_4f(1, 0, 0, 0.5f);
DTRRender_Triangle(renderContext, t0[0], t0[1], t0[2], colorRed);
DTRRender_Triangle(renderContext, t1[0], t1[1], t1[2], colorRed);
DTRRender_Triangle(renderContext, t3[0], t3[1], t3[2], colorRed, rotatingXform);
DTRRender_Triangle(renderContext, t2[0], t2[1], t2[2], colorRed);
DTRRender_Triangle(renderContext, t4[0], t4[1], t4[2], colorRed);
DTRRender_Triangle(renderContext, t5[0], t5[1], t5[2], colorRed);
DTRDebug_EndCycleCount(DTRDebugCycleCount_DTR_Update_RenderPrimitiveTriangles);
}
i32 boundsOffset = 100;
DqnV3 t0[3] = {DqnV3_3i(10, 70, 0), DqnV3_3i(50, 160, 0), DqnV3_3i(70, 80, 0)};
DqnV3 t1[3] = {DqnV3_3i(180, 50, 0), DqnV3_3i(150, 1, 0), DqnV3_3i(70, 180, 0)};
DqnV3 t2[3] = {DqnV3_3i(180, 150, 0), DqnV3_3i(120, 160, 0), DqnV3_3i(130, 180, 0)};
DqnV3 t3[3] = {DqnV3_3i(boundsOffset, boundsOffset, 0),
DqnV3_3i(bufferMidP.w, renderBuffer.height - boundsOffset, 0),
DqnV3_3i(renderBuffer.width - boundsOffset, boundsOffset, 0)};
DqnV3 t4[3] = {DqnV3_3i(100, 150, 0), DqnV3_3i(200, 150, 0), DqnV3_3i(200, 250, 0)};
DqnV3 t5[3] = {DqnV3_3i(300, 150, 0), DqnV3_3i(201, 150, 0), DqnV3_3i(200, 250, 0)};
if (1)
{
LOCAL_PERSIST bool runTinyRendererOnce = false;
if (1 && runTinyRendererOnce)
DTRRenderTransform rotatingXform = DTRRender_DefaultTriangleTransform();
rotatingXform.rotation = rotation;
if (0)
{
DTRDebug_RunTinyRenderer();
runTinyRendererOnce = false;
DTRDebug_BeginCycleCount(
"DTR_Update_RenderPrimitiveTriangles",
DTRDebugCycleCount_DTR_Update_RenderPrimitiveTriangles);
DTRRender_Triangle(renderContext, t0[0], t0[1], t0[2], colorRed);
DTRRender_Triangle(renderContext, t1[0], t1[1], t1[2], colorRed);
DTRRender_Triangle(renderContext, t3[0], t3[1], t3[2], colorRed, rotatingXform);
DTRRender_Triangle(renderContext, t2[0], t2[1], t2[2], colorRed);
DTRRender_Triangle(renderContext, t4[0], t4[1], t4[2], colorRed);
DTRRender_Triangle(renderContext, t5[0], t5[1], t5[2], colorRed);
DTRDebug_EndCycleCount(DTRDebugCycleCount_DTR_Update_RenderPrimitiveTriangles);
}
DTRDebug_BeginCycleCount("DTR_Update_RenderModel",
DTRDebugCycleCount_DTR_Update_RenderModel);
////////////////////////////////////////////////////////////////////////
// Draw Loaded Model
////////////////////////////////////////////////////////////////////////
const DqnV3 LIGHT = DqnV3_Normalise(DqnV3_3f(1, -1, 1.0f));
const f32 MODEL_SCALE = 1;
DTRMesh *const mesh = &state->mesh;
DqnV3 modelP = DqnV3_3f(0, 0, 0);
if (1)
{
LOCAL_PERSIST bool runTinyRendererOnce = false;
if (1 && runTinyRendererOnce)
{
DTRDebug_RunTinyRenderer();
runTinyRendererOnce = false;
}
LOCAL_PERSIST f32 modelRotation = 0;
modelRotation += (input->deltaForFrame * 20.0f);
DqnV3 axis = DqnV3_3f(0, 1, 0);
DTRDebug_BeginCycleCount("DTR_Update_RenderModel",
DTRDebugCycleCount_DTR_Update_RenderModel);
////////////////////////////////////////////////////////////////////////
// Draw Loaded Model
////////////////////////////////////////////////////////////////////////
const DqnV3 LIGHT = DqnV3_Normalise(DqnV3_3f(1, -1, 1.0f));
const f32 MODEL_SCALE = 1;
DTRMesh *const mesh = &state->mesh;
DqnV3 modelP = DqnV3_3f(0, 0, 0);
DTRRenderTransform transform = DTRRender_DefaultTransform();
transform.scale = DqnV3_1f(MODEL_SCALE);
transform.rotation = modelRotation;
transform.anchor = axis;
LOCAL_PERSIST f32 modelRotation = 0;
modelRotation += (input->deltaForFrame * 20.0f);
DqnV3 axis = DqnV3_3f(0, 1, 0);
DTRRenderLight lighting = {};
lighting.mode = DTRRenderShadingMode_Gouraud;
lighting.vector = LIGHT;
lighting.color = DqnV4_4f(1, 1, 1, 1);
DTRRenderTransform transform = DTRRender_DefaultTransform();
transform.scale = DqnV3_1f(MODEL_SCALE);
transform.rotation = modelRotation;
transform.anchor = axis;
DTRRender_Mesh(renderContext, input->jobQueue, mesh, lighting, modelP, transform);
DTRDebug_EndCycleCount(DTRDebugCycleCount_DTR_Update_RenderModel);
DTRRenderLight lighting = {};
lighting.mode = DTRRenderShadingMode_Gouraud;
lighting.vector = LIGHT;
lighting.color = DqnV4_4f(1, 1, 1, 1);
DTRRender_Mesh(renderContext, input->jobQueue, mesh, lighting, modelP,
transform);
DTRDebug_EndCycleCount(DTRDebugCycleCount_DTR_Update_RenderModel);
}
}
}
// Rect drawing
if (0)
{
DTRRenderTransform transform = DTRRender_DefaultTransform();
transform.rotation = rotation + 45;
// Rect drawing
if (0)
{
DTRRenderTransform transform = DTRRender_DefaultTransform();
transform.rotation = rotation + 45;
DTRRender_Rectangle(renderContext, DqnV2_1f(300.0f), DqnV2_1f(300 + 100.0f),
DqnV4_4f(0, 1.0f, 1.0f, 1.0f), transform);
}
DTRRender_Rectangle(renderContext, DqnV2_1f(300.0f), DqnV2_1f(300 + 100.0f),
DqnV4_4f(0, 1.0f, 1.0f, 1.0f), transform);
}
// Bitmap drawing
if (0)
{
DTRRenderTransform transform = DTRRender_DefaultTransform();
transform.scale = DqnV3_1f(2.0f);
// Bitmap drawing
if (0)
{
DTRRenderTransform transform = DTRRender_DefaultTransform();
transform.scale = DqnV3_1f(2.0f);
LOCAL_PERSIST DqnV2 bitmapP = DqnV2_2f(500, 250);
bitmapP.x += 2.0f * sinf((f32)input->timeNowInS * 0.5f);
LOCAL_PERSIST DqnV2 bitmapP = DqnV2_2f(500, 250);
bitmapP.x += 2.0f * sinf((f32)input->timeNowInS * 0.5f);
f32 cAngle = (f32)input->timeNowInS;
DqnV4 color = DqnV4_4f(0.5f + 0.5f * sinf(cAngle), 0.5f + 0.5f * sinf(2.9f * cAngle),
0.5f + 0.5f * cosf(10.0f * cAngle), 1.0f);
DTRRender_Bitmap(renderContext, &state->bitmap, bitmapP, transform, color);
}
f32 cAngle = (f32)input->timeNowInS;
DqnV4 color =
DqnV4_4f(0.5f + 0.5f * sinf(cAngle), 0.5f + 0.5f * sinf(2.9f * cAngle),
0.5f + 0.5f * cosf(10.0f * cAngle), 1.0f);
DTRRender_Bitmap(renderContext, &state->bitmap, bitmapP, transform, color);
}
#else
// CompAssignment(renderBuffer, input, memory);
// CompAssignment(renderBuffer, input, memory);
#endif
DTRDebug_EndCycleCount(DTRDebugCycleCount_DTR_Update);
DTRDebug_Update(state, renderContext, input, memory);
DTRDebug_EndCycleCount(DTRDebugCycleCount_DTR_Update);
DTRDebug_Update(state, renderContext, input, memory);
}
}
////////////////////////////////////////////////////////////////////////////
@ -1097,13 +1093,12 @@ extern "C" void DTR_Update(PlatformRenderBuffer *const platformRenderBuffer,
////////////////////////////////////////////////////////////////////////////
if (DTR_DEBUG)
{
// NOTE: We should have one temp region, that is the scoped region for the
// main loop which will remove itself when we leave scope.
DQN_ASSERT(memory->tempStack.tempRegionCount == 1);
DQN_ASSERT(input->api.QueueAllJobsComplete(input->jobQueue));
for (i32 i = 0; i < DQN_ARRAY_COUNT(memory->stacks); i++)
{
if (&memory->stacks[i] == &memory->tempStack) continue;
DQN_ASSERT(memory->stacks[i].tempRegionCount == 0);
}
DqnMemStack_ClearCurrBlock(&memory->tempStack, true);
}
}

View File

@ -14,8 +14,7 @@ typedef struct DTRState
DTRBitmap bitmap;
DTRMesh mesh;
struct PlatformLock *zDepthLock;
struct PlatformLock *blitLock;
struct PlatformLock *renderLock;
} DTRState;
extern PlatformFlags globalDTRPlatformFlags;

View File

@ -51,6 +51,7 @@ typedef struct PlatformJob
// Multithreading API
typedef bool PlatformAPI_QueueAddJob (PlatformJobQueue *const queue, const PlatformJob job);
typedef bool PlatformAPI_QueueTryExecuteNextJob(PlatformJobQueue *const queue);
typedef bool PlatformAPI_QueueAllJobsComplete (PlatformJobQueue *const queue);
////////////////////////////////////////////////////////////////////////////////
// Platform Locks
@ -75,6 +76,7 @@ typedef struct PlatformAPI
PlatformAPI_QueueAddJob *QueueAddJob;
PlatformAPI_QueueTryExecuteNextJob *QueueTryExecuteNextJob;
PlatformAPI_QueueAllJobsComplete *QueueAllJobsComplete;
PlatformAPI_LockInit *LockInit;
PlatformAPI_LockAcquire *LockAcquire;

View File

@ -630,22 +630,20 @@ FILE_SCOPE inline void SIMDSetPixel(DTRRenderContext context, const i32 x, const
DTR_DEBUG_EP_TIMED_FUNCTION();
DebugSIMDAssertColorInRange(color, 0.0f, 1.0f);
u32 *const bitmapPtr = (u32 *)renderBuffer->memory;
const u32 pitchInU32 = (renderBuffer->width * renderBuffer->bytesPerPixel) / 4;
// If some alpha is involved, we need to apply gamma correction, but if the
// new pixel is totally opaque or invisible then we're just flat out
// overwriting/keeping the state of the pixel so we can save cycles by skipping.
f32 alpha = ((f32 *)&color)[3];
bool needGammaFix = (alpha > 0.0f || alpha < (1.0f + COLOR_EPSILON)) && (colorSpace == ColorSpace_SRGB);
bool needGammaFix =
(alpha > 0.0f || alpha < (1.0f + COLOR_EPSILON)) && (colorSpace == ColorSpace_SRGB);
if (needGammaFix) color = SIMDSRGB1ToLinearSpace(color);
// Format: u32 == (XX, RR, GG, BB)
context.api->LockAcquire(renderBuffer->blitLock);
u32 *const bitmapPtr = (u32 *)renderBuffer->memory;
const u32 pitchInU32 = (renderBuffer->width * renderBuffer->bytesPerPixel) / 4;
u32 srcPixel = bitmapPtr[x + (y * pitchInU32)];
__m128 src = _mm_set_ps(0,
(f32)((srcPixel >> 0) & 0xFF),
(f32)((srcPixel >> 8) & 0xFF),
__m128 src = _mm_set_ps(0, (f32)((srcPixel >> 0) & 0xFF), (f32)((srcPixel >> 8) & 0xFF),
(f32)((srcPixel >> 16) & 0xFF));
src = SIMDSRGB255ToLinearSpace1(src);
@ -665,13 +663,8 @@ FILE_SCOPE inline void SIMDSetPixel(DTRRenderContext context, const i32 x, const
f32 destB = ((f32 *)&dest)[2];
u32 pixel = // ((u32)(destA) << 24 |
(u32)(destR) << 16 |
(u32)(destG) << 8 |
(u32)(destB) << 0;
(u32)(destR) << 16 | (u32)(destG) << 8 | (u32)(destB) << 0;
bitmapPtr[x + (y * pitchInU32)] = pixel;
context.api->LockRelease(renderBuffer->blitLock);
DTRDebug_CounterIncrement(DTRDebugCounter_SetPixels);
}
// colorModulate: _mm_set_ps(a, b, g, r) ie. 0=r, 1=g, 2=b, 3=a
@ -783,9 +776,9 @@ FILE_SCOPE inline f32 GetCurrZDepth(DTRRenderContext context, i32 posX, i32 posY
i32 zBufferIndex = posX + (posY * zBufferPitch);
DQN_ASSERT(zBufferIndex < (renderBuffer->width * renderBuffer->height));
context.api->LockAcquire(renderBuffer->zDepthLock);
context.api->LockAcquire(renderBuffer->renderLock);
f32 currZDepth = renderBuffer->zBuffer[zBufferIndex];
context.api->LockRelease(renderBuffer->zDepthLock);
context.api->LockRelease(renderBuffer->renderLock);
return currZDepth;
}
@ -798,9 +791,9 @@ FILE_SCOPE inline void SetCurrZDepth(DTRRenderContext context, i32 posX, i32 pos
i32 zBufferIndex = posX + (posY * zBufferPitch);
DQN_ASSERT(zBufferIndex < (renderBuffer->width * renderBuffer->height));
context.api->LockAcquire(renderBuffer->zDepthLock);
context.api->LockAcquire(renderBuffer->renderLock);
renderBuffer->zBuffer[zBufferIndex] = newZDepth;
context.api->LockRelease(renderBuffer->zDepthLock);
context.api->LockRelease(renderBuffer->renderLock);
}
#define DEBUG_SIMD_AUTO_CHOOSE_BEGIN_CYCLE_COUNT(type) \
@ -882,7 +875,7 @@ FILE_SCOPE void SIMDTriangle(DTRRenderContext context,
DEBUG_SIMD_AUTO_CHOOSE_BEGIN_CYCLE_COUNT(Triangle_Preamble);
DTRRenderBuffer *renderBuffer = context.renderBuffer;
DTRRenderBuffer *const renderBuffer = context.renderBuffer;
////////////////////////////////////////////////////////////////////////////
// Convert color
////////////////////////////////////////////////////////////////////////////
@ -911,12 +904,12 @@ FILE_SCOPE void SIMDTriangle(DTRRenderContext context,
const u32 NUM_Y_PIXELS_TO_SIMD = 1;
// SignedArea: _mm_set_ps(unused, p3, p2, p1) ie 0=p1, 1=p1, 2=p3, 3=unused
__m128 signedAreaPixel1;
__m128 signedAreaPixel2;
__m128 signedAreaPixel1 = _mm_set_ps1(0);
__m128 signedAreaPixel2 = _mm_set_ps1(0);
__m128 signedAreaPixelDeltaX;
__m128 signedAreaPixelDeltaY;
__m128 invSignedAreaParallelogram_4x;
__m128 signedAreaPixelDeltaX = _mm_set_ps1(0);
__m128 signedAreaPixelDeltaY = _mm_set_ps1(0);
__m128 invSignedAreaParallelogram_4x = _mm_set_ps1(0);
__m128 triangleZ = _mm_set_ps(0, p3.z, p2.z, p1.z);
{
@ -1003,22 +996,11 @@ FILE_SCOPE void SIMDTriangle(DTRRenderContext context,
((f32 *)&barycentricZ)[1] +
((f32 *)&barycentricZ)[2];
#if 0
// f32 currZDepth = GetCurrZDepth(context, posX, posY);
#else
DQN_ASSERT(renderBuffer);
i32 zBufferIndex = posX + (posY * zBufferPitch);
context.api->LockAcquire(renderBuffer->zDepthLock);
f32 currZDepth = renderBuffer->zBuffer[zBufferIndex];
context.api->LockRelease(renderBuffer->zDepthLock);
#endif
if (pixelZDepth > currZDepth)
context.api->LockAcquire(renderBuffer->renderLock);
if (pixelZDepth > renderBuffer->zBuffer[zBufferIndex])
{
context.api->LockAcquire(renderBuffer->zDepthLock);
renderBuffer->zBuffer[zBufferIndex] = pixelZDepth;
context.api->LockRelease(renderBuffer->zDepthLock);
__m128 finalColor = simdColor;
if (!ignoreLight)
@ -1046,6 +1028,7 @@ FILE_SCOPE void SIMDTriangle(DTRRenderContext context,
}
SIMDSetPixel(context, posX, posY, finalColor, ColorSpace_Linear);
}
context.api->LockRelease(renderBuffer->renderLock);
DEBUG_SIMD_AUTO_CHOOSE_END_CYCLE_COUNT(Triangle_RasterisePixel);
}
signedArea1 = _mm_add_ps(signedArea1, signedAreaPixelDeltaX);
@ -1067,16 +1050,10 @@ FILE_SCOPE void SIMDTriangle(DTRRenderContext context,
((f32 *)&barycentricZ)[1] +
((f32 *)&barycentricZ)[2];
i32 zBufferIndex = posX + (posY * zBufferPitch);
context.api->LockAcquire(renderBuffer->zDepthLock);
f32 currZDepth = renderBuffer->zBuffer[zBufferIndex];
context.api->LockRelease(renderBuffer->zDepthLock);
if (pixelZDepth > currZDepth)
context.api->LockAcquire(renderBuffer->renderLock);
if (pixelZDepth > renderBuffer->zBuffer[zBufferIndex])
{
context.api->LockAcquire(renderBuffer->zDepthLock);
renderBuffer->zBuffer[zBufferIndex] = pixelZDepth;
context.api->LockRelease(renderBuffer->zDepthLock);
__m128 finalColor = simdColor;
if (!ignoreLight)
@ -1104,6 +1081,7 @@ FILE_SCOPE void SIMDTriangle(DTRRenderContext context,
}
SIMDSetPixel(context, posX, posY, finalColor, ColorSpace_Linear);
}
context.api->LockRelease(renderBuffer->renderLock);
}
signedArea2 = _mm_add_ps(signedArea2, signedAreaPixelDeltaX);
}
@ -1424,7 +1402,7 @@ typedef struct RenderMeshJob
DqnV4 color;
} RenderMeshJob;
void MultiThreadedRenderMesh(struct PlatformJobQueue *const queue, void *const userData)
void MultiThreadedRenderMesh(PlatformJobQueue *const queue, void *const userData)
{
if (!queue || !userData)
{
@ -1433,18 +1411,16 @@ void MultiThreadedRenderMesh(struct PlatformJobQueue *const queue, void *const u
}
RenderMeshJob *job = (RenderMeshJob *)userData;
#if 1
TexturedTriangleInternal(job->context, job->lighting, job->v1, job->v2, job->v3, job->uv1,
job->uv2, job->uv3, job->tex, job->color);
#endif
}
void DTRRender_Mesh(DTRRenderContext context, PlatformJobQueue *const jobQueue, DTRMesh *const mesh,
DTRRenderLight lighting, const DqnV3 pos, const DTRRenderTransform transform)
{
DqnMemStack *tempStack = context.tempStack;
DTRRenderBuffer *renderBuffer = context.renderBuffer;
PlatformAPI *api = context.api;
DqnMemStack *const tempStack = context.tempStack;
DTRRenderBuffer *const renderBuffer = context.renderBuffer;
PlatformAPI *const api = context.api;
if (!mesh || !renderBuffer || !tempStack || !api || !jobQueue) return;
@ -1477,7 +1453,7 @@ void DTRRender_Mesh(DTRRenderContext context, PlatformJobQueue *const jobQueue,
viewPModelViewProjection = DqnMat4_Mul(viewport, modelViewProjection);
}
bool RUN_MULTITHREADED = false;
bool RUN_MULTITHREADED = true;
for (u32 i = 0; i < mesh->numFaces; i++)
{
DTRMeshFace face = mesh->faces[i];
@ -1549,7 +1525,8 @@ void DTRRender_Mesh(DTRRenderContext context, PlatformJobQueue *const jobQueue,
DqnV2 uv2 = mesh->texUV[uv2Index].xy;
DqnV2 uv3 = mesh->texUV[uv3Index].xy;
DqnV4 color = lighting.color;
DqnV4 color = lighting.color;
RenderLightInternal lightingInternal = {};
lightingInternal.mode = lighting.mode;
lightingInternal.vector = lighting.vector;
@ -1558,8 +1535,7 @@ void DTRRender_Mesh(DTRRenderContext context, PlatformJobQueue *const jobQueue,
lightingInternal.normals[2] = norm3;
lightingInternal.numNormals = 3;
bool DEBUG_NO_TEX = false;
bool DEBUG_NO_TEX = false;
if (RUN_MULTITHREADED)
{
RenderMeshJob *jobData = (RenderMeshJob *)DqnMemStack_Push(tempStack, sizeof(*jobData));
@ -1612,6 +1588,7 @@ void DTRRender_Mesh(DTRRenderContext context, PlatformJobQueue *const jobQueue,
uv1, uv2, uv3, &mesh->tex, color);
}
}
bool DEBUG_WIREFRAME = false;
if (DTR_DEBUG && DEBUG_WIREFRAME)
{
@ -1622,9 +1599,11 @@ void DTRRender_Mesh(DTRRenderContext context, PlatformJobQueue *const jobQueue,
}
}
// NOTE(doyle): Complete remaining jobs and wait until all jobs finished
// before leaving function.
if (RUN_MULTITHREADED)
{
while (api->QueueTryExecuteNextJob(jobQueue))
while (api->QueueTryExecuteNextJob(jobQueue) || !api->QueueAllJobsComplete(jobQueue))
;
}
}
@ -1858,3 +1837,4 @@ void DTRRender_Clear(DTRRenderContext context, DqnV3 color)
}
}
}

View File

@ -16,12 +16,9 @@ typedef struct DTRRenderBuffer
i32 width;
i32 height;
i32 bytesPerPixel;
PlatformLock *volatile zDepthLock;
PlatformLock *volatile blitLock;
u8 *memory; // Format: XX RR GG BB, and has (width * height * bytesPerPixels) elements
f32 *zBuffer; // zBuffer has (width * height) elements
PlatformLock *renderLock;
volatile u8 *memory; // Format: XX RR GG BB, and has (width * height * bytesPerPixels) elements
volatile f32 *zBuffer; // zBuffer has (width * height) elements
} DTRRenderBuffer;

View File

@ -64,9 +64,11 @@ struct PlatformJobQueue
// NOTE: Modified by main+worker threads
LONG volatile jobToExecuteIndex;
HANDLE volatile win32Semaphore;
LONG volatile numJobsToComplete;
// NOTE: Modified by main thread ONLY
LONG volatile jobInsertIndex;
};
bool Platform_QueueAddJob(PlatformJobQueue *const queue, const PlatformJob job)
@ -76,22 +78,12 @@ bool Platform_QueueAddJob(PlatformJobQueue *const queue, const PlatformJob job)
queue->jobList[queue->jobInsertIndex] = job;
_WriteBarrier();
_mm_sfence();
queue->jobInsertIndex = newJobInsertIndex;
InterlockedIncrement(&queue->numJobsToComplete);
ReleaseSemaphore(queue->win32Semaphore, 1, NULL);
queue->jobInsertIndex = newJobInsertIndex;
return true;
}
FILE_SCOPE void DebugWin32JobPrintNumber(PlatformJobQueue *const queue, void *const userData)
{
i32 numberToPrint = *((i32 *)userData);
DqnWin32_OutputDebugString("Thread %d: Printing number: %d\n", GetCurrentThreadId(),
numberToPrint);
}
bool Platform_QueueTryExecuteNextJob(PlatformJobQueue *const queue)
{
LONG originalJobToExecute = queue->jobToExecuteIndex;
@ -109,6 +101,7 @@ bool Platform_QueueTryExecuteNextJob(PlatformJobQueue *const queue)
{
PlatformJob job = queue->jobList[index];
job.callback(queue, job.userData);
InterlockedDecrement(&queue->numJobsToComplete);
}
return true;
@ -117,6 +110,37 @@ bool Platform_QueueTryExecuteNextJob(PlatformJobQueue *const queue)
return false;
}
bool Platform_QueueAllJobsComplete(PlatformJobQueue *const queue)
{
bool result = (queue->numJobsToComplete == 0);
return result;
}
FILE_SCOPE u32 volatile globalDebugCounter;
FILE_SCOPE bool volatile globalDebugCounterMemoize[2048];
FILE_SCOPE PlatformLock *globalDebugLock;
FILE_SCOPE void DebugWin32IncrementCounter(PlatformJobQueue *const queue, void *const userData)
{
Platform_LockAcquire(globalDebugLock);
DQN_ASSERT(!globalDebugCounterMemoize[globalDebugCounter]);
globalDebugCounterMemoize[globalDebugCounter] = true;
globalDebugCounter++;
u32 number = globalDebugCounter;
Platform_LockRelease(globalDebugLock);
DqnWin32_OutputDebugString("Thread %d: Incrementing Number: %d\n", GetCurrentThreadId(),
number);
}
FILE_SCOPE void DebugWin32JobPrintNumber(PlatformJobQueue *const queue, void *const userData)
{
i32 numberToPrint = *((i32 *)userData);
DqnWin32_OutputDebugString("Thread %d: Printing number: %d\n", GetCurrentThreadId(),
numberToPrint);
}
DWORD WINAPI Win32ThreadCallback(void *lpParameter)
{
PlatformJobQueue *queue = (PlatformJobQueue *)lpParameter;
@ -682,6 +706,7 @@ int WINAPI wWinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPWSTR lpCmdLi
platformAPI.QueueAddJob = Platform_QueueAddJob;
platformAPI.QueueTryExecuteNextJob = Platform_QueueTryExecuteNextJob;
platformAPI.QueueAllJobsComplete = Platform_QueueAllJobsComplete;
platformAPI.LockInit = Platform_LockInit;
platformAPI.LockAcquire = Platform_LockAcquire;
@ -795,7 +820,7 @@ int WINAPI wWinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPWSTR lpCmdLi
}
#if 0
// DEBUG Create jobs
// DEBUG Create print jobs
for (i32 i = 0; i < 20; i++)
{
PlatformJob job = {};
@ -811,6 +836,29 @@ int WINAPI wWinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPWSTR lpCmdLi
while (Platform_QueueTryExecuteNextJob(&jobQueue))
;
#endif
#if 1
globalDebugLock = Platform_LockInit(&globalPlatformMemory.mainStack);
DQN_ASSERT(globalDebugLock);
for (i32 i = 0; i < DQN_ARRAY_COUNT(globalDebugCounterMemoize); i++)
{
PlatformJob job = {};
job.callback = DebugWin32IncrementCounter;
while (!Platform_QueueAddJob(&jobQueue, job))
{
Platform_QueueTryExecuteNextJob(&jobQueue);
}
}
while (Platform_QueueTryExecuteNextJob(&jobQueue))
;
for (i32 i = 0; i < DQN_ARRAY_COUNT(globalDebugCounterMemoize); i++)
DQN_ASSERT(globalDebugCounterMemoize[i]);
DqnWin32_OutputDebugString("\nFinal incremented value: %d\n", globalDebugCounter);
DQN_ASSERT(globalDebugCounter == DQN_ARRAY_COUNT(globalDebugCounterMemoize));
#endif
}
else
{

View File

@ -29,7 +29,6 @@ REM EHa- disable exception handling (currently it's on /EHsc since libraries n
REM GR- disable c runtime type information (we don't use)
REM MD use dynamic runtime library
REM MT use static runtime library, so build and link it into exe
REM Od disables optimisations
REM Oi enable intrinsics optimisation, let us use CPU intrinsics if there is one
REM instead of generating a call to external library (i.e. CRT).
REM Zi enables debug data, Z7 combines the debug files into one.
@ -39,26 +38,44 @@ REM wd4100 unused argument parameters
REM wd4201 nonstandard extension used: nameless struct/union
REM wd4189 local variable is initialised but not referenced
REM wd4505 unreferenced local function not used will be removed
set CompileFlags=-EHsc -GR- -Oi -MT -Z7 -W4 -wd4100 -wd4201 -wd4189 -wd4505 -O2 -FAsc /I..\src\external\
set CompileFlags=-EHsc -GR- -Oi -MT -Z7 -W4 -wd4100 -wd4201 -wd4189 -wd4505 -FAsc /I..\src\external\
set DLLFlags=/Fm%ProjectName% /Fo%ProjectName% /Fa%ProjectName% /Fe%ProjectName%
set Win32Flags=/FmWin32DTRenderer /FeWin32DTRenderer
REM Link libraries
set LinkLibraries=user32.lib kernel32.lib gdi32.lib
REM incremental:no, turn incremental builds off
REM opt:ref, try to remove functions from libs that are not referenced at all
set LinkFlags=-incremental:no -opt:ref -subsystem:WINDOWS -machine:x64 -nologo
set DebugMode=0
if %DebugMode%==1 goto :DebugFlags
goto :ReleaseFlags
:DebugFlags
REM Od disables optimisations
REM RTC1 runtime error checks
set CompileFlags=%CompileFlags% -Od -RTC1
goto compile
:ReleaseFlags
REM opt:icf, COMDAT folding for debugging release build
REM DEBUG:[FULL|NONE] enforce debugging for release build
set CompileFlags=%CompileFlags% -O2
set LinkFlags=%LinkFlags%
REM ////////////////////////////////////////////////////////////////////////////
REM Compile
REM ////////////////////////////////////////////////////////////////////////////
:compile
REM Clean time necessary for hours <10, which produces H:MM:SS.SS where the
REM first character of time is an empty space. CleanTime will pad a 0 if
REM necessary.
set CleanTime=%time: =0%
set TimeStamp=%date:~10,4%%date:~7,2%%date:~4,2%_%CleanTime:~0,2%%CleanTime:~3,2%%CleanTime:~6,2%
REM Link libraries
set LinkLibraries=user32.lib kernel32.lib gdi32.lib
REM incremental:no, turn incremental builds off
REM opt:ref, try to remove functions from libs that are not referenced at all
set LinkFlags=-incremental:no -opt:ref -subsystem:WINDOWS -machine:x64 -nologo
REM ////////////////////////////////////////////////////////////////////////////
REM Compile
REM ////////////////////////////////////////////////////////////////////////////
del *.pdb >NUL 2>NUL
cl %CompileFlags% %Win32Flags% ..\src\Win32DTRenderer.cpp /link %LinkLibraries% %LinkFlags%
REM cl %CompileFlags% %DLLFlags% ..\src\UnityBuild\UnityBuild.cpp /LD /link ..\src\external\easy\easy_profiler.lib /PDB:%ProjectName%_%TimeStamp%.pdb /export:DTR_Update %LinkFlags%