Fix multithread bug causing invalid ptr references
Mesh rendering was not waiting until all jobs were complete before moving on causing longer jobs to use old pointer references once the next frame started rendering.
This commit is contained in:
parent
630522f8a3
commit
e660281211
@ -899,17 +899,8 @@ extern "C" void DTR_Update(PlatformRenderBuffer *const platformRenderBuffer,
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
DqnMemStack *const assetStack = &memory->assetStack;
|
DqnMemStack *const assetStack = &memory->assetStack;
|
||||||
DqnMemStack *const tempStack = &memory->tempStack;
|
DqnMemStack *const tempStack = &memory->tempStack;
|
||||||
state->zDepthLock = input->api.LockInit(&memory->mainStack);
|
state->renderLock = input->api.LockInit(&memory->mainStack);
|
||||||
if (state->zDepthLock)
|
if (!state->renderLock)
|
||||||
{
|
|
||||||
state->blitLock = input->api.LockInit(&memory->mainStack);
|
|
||||||
if (!state->blitLock)
|
|
||||||
{
|
|
||||||
// TODO(doyle): Not enough memory die gracefully
|
|
||||||
DQN_ASSERT(DQN_INVALID_CODE_PATH);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
{
|
||||||
// TODO(doyle): Not enough memory die gracefully
|
// TODO(doyle): Not enough memory die gracefully
|
||||||
DQN_ASSERT(DQN_INVALID_CODE_PATH);
|
DQN_ASSERT(DQN_INVALID_CODE_PATH);
|
||||||
@ -952,8 +943,10 @@ extern "C" void DTR_Update(PlatformRenderBuffer *const platformRenderBuffer,
|
|||||||
"byte_read_check.bmp");
|
"byte_read_check.bmp");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
{
|
||||||
auto tempMemRegion = DqnMemStackTempRegionScoped(&memory->tempStack);
|
auto tempMemRegion = DqnMemStackTempRegionScoped(&memory->tempStack);
|
||||||
if (tempMemRegion.isInit)
|
if (tempMemRegion.isInit)
|
||||||
{
|
{
|
||||||
@ -967,12 +960,11 @@ extern "C" void DTR_Update(PlatformRenderBuffer *const platformRenderBuffer,
|
|||||||
renderBuffer.height = platformRenderBuffer->height;
|
renderBuffer.height = platformRenderBuffer->height;
|
||||||
renderBuffer.bytesPerPixel = platformRenderBuffer->bytesPerPixel;
|
renderBuffer.bytesPerPixel = platformRenderBuffer->bytesPerPixel;
|
||||||
renderBuffer.memory = (u8 *)platformRenderBuffer->memory;
|
renderBuffer.memory = (u8 *)platformRenderBuffer->memory;
|
||||||
renderBuffer.zDepthLock = state->zDepthLock;
|
renderBuffer.renderLock = state->renderLock;
|
||||||
renderBuffer.blitLock = state->blitLock;
|
|
||||||
|
|
||||||
u32 zBufferSize = platformRenderBuffer->width * platformRenderBuffer->height;
|
u32 zBufferSize = platformRenderBuffer->width * platformRenderBuffer->height;
|
||||||
renderBuffer.zBuffer = (f32 *)DqnMemStack_Push(&memory->tempStack,
|
renderBuffer.zBuffer = (f32 *)DqnMemStack_Push(
|
||||||
zBufferSize * sizeof(*renderBuffer.zBuffer));
|
&memory->tempStack, zBufferSize * sizeof(*renderBuffer.zBuffer));
|
||||||
|
|
||||||
for (u32 i = 0; i < zBufferSize; i++)
|
for (u32 i = 0; i < zBufferSize; i++)
|
||||||
renderBuffer.zBuffer[i] = DQN_F32_MIN;
|
renderBuffer.zBuffer[i] = DQN_F32_MIN;
|
||||||
@ -1010,7 +1002,8 @@ extern "C" void DTR_Update(PlatformRenderBuffer *const platformRenderBuffer,
|
|||||||
|
|
||||||
if (0)
|
if (0)
|
||||||
{
|
{
|
||||||
DTRDebug_BeginCycleCount("DTR_Update_RenderPrimitiveTriangles",
|
DTRDebug_BeginCycleCount(
|
||||||
|
"DTR_Update_RenderPrimitiveTriangles",
|
||||||
DTRDebugCycleCount_DTR_Update_RenderPrimitiveTriangles);
|
DTRDebugCycleCount_DTR_Update_RenderPrimitiveTriangles);
|
||||||
|
|
||||||
DTRRender_Triangle(renderContext, t0[0], t0[1], t0[2], colorRed);
|
DTRRender_Triangle(renderContext, t0[0], t0[1], t0[2], colorRed);
|
||||||
@ -1055,7 +1048,8 @@ extern "C" void DTR_Update(PlatformRenderBuffer *const platformRenderBuffer,
|
|||||||
lighting.vector = LIGHT;
|
lighting.vector = LIGHT;
|
||||||
lighting.color = DqnV4_4f(1, 1, 1, 1);
|
lighting.color = DqnV4_4f(1, 1, 1, 1);
|
||||||
|
|
||||||
DTRRender_Mesh(renderContext, input->jobQueue, mesh, lighting, modelP, transform);
|
DTRRender_Mesh(renderContext, input->jobQueue, mesh, lighting, modelP,
|
||||||
|
transform);
|
||||||
DTRDebug_EndCycleCount(DTRDebugCycleCount_DTR_Update_RenderModel);
|
DTRDebug_EndCycleCount(DTRDebugCycleCount_DTR_Update_RenderModel);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1080,30 +1074,31 @@ extern "C" void DTR_Update(PlatformRenderBuffer *const platformRenderBuffer,
|
|||||||
bitmapP.x += 2.0f * sinf((f32)input->timeNowInS * 0.5f);
|
bitmapP.x += 2.0f * sinf((f32)input->timeNowInS * 0.5f);
|
||||||
|
|
||||||
f32 cAngle = (f32)input->timeNowInS;
|
f32 cAngle = (f32)input->timeNowInS;
|
||||||
DqnV4 color = DqnV4_4f(0.5f + 0.5f * sinf(cAngle), 0.5f + 0.5f * sinf(2.9f * cAngle),
|
DqnV4 color =
|
||||||
|
DqnV4_4f(0.5f + 0.5f * sinf(cAngle), 0.5f + 0.5f * sinf(2.9f * cAngle),
|
||||||
0.5f + 0.5f * cosf(10.0f * cAngle), 1.0f);
|
0.5f + 0.5f * cosf(10.0f * cAngle), 1.0f);
|
||||||
DTRRender_Bitmap(renderContext, &state->bitmap, bitmapP, transform, color);
|
DTRRender_Bitmap(renderContext, &state->bitmap, bitmapP, transform, color);
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
// CompAssignment(renderBuffer, input, memory);
|
// CompAssignment(renderBuffer, input, memory);
|
||||||
#endif
|
#endif
|
||||||
DTRDebug_EndCycleCount(DTRDebugCycleCount_DTR_Update);
|
DTRDebug_EndCycleCount(DTRDebugCycleCount_DTR_Update);
|
||||||
DTRDebug_Update(state, renderContext, input, memory);
|
DTRDebug_Update(state, renderContext, input, memory);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////
|
||||||
// End Update
|
// End Update
|
||||||
////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////
|
||||||
if (DTR_DEBUG)
|
if (DTR_DEBUG)
|
||||||
{
|
{
|
||||||
// NOTE: We should have one temp region, that is the scoped region for the
|
DQN_ASSERT(input->api.QueueAllJobsComplete(input->jobQueue));
|
||||||
// main loop which will remove itself when we leave scope.
|
|
||||||
DQN_ASSERT(memory->tempStack.tempRegionCount == 1);
|
|
||||||
for (i32 i = 0; i < DQN_ARRAY_COUNT(memory->stacks); i++)
|
for (i32 i = 0; i < DQN_ARRAY_COUNT(memory->stacks); i++)
|
||||||
{
|
{
|
||||||
if (&memory->stacks[i] == &memory->tempStack) continue;
|
if (&memory->stacks[i] == &memory->tempStack) continue;
|
||||||
DQN_ASSERT(memory->stacks[i].tempRegionCount == 0);
|
DQN_ASSERT(memory->stacks[i].tempRegionCount == 0);
|
||||||
}
|
}
|
||||||
|
DqnMemStack_ClearCurrBlock(&memory->tempStack, true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -14,8 +14,7 @@ typedef struct DTRState
|
|||||||
DTRBitmap bitmap;
|
DTRBitmap bitmap;
|
||||||
DTRMesh mesh;
|
DTRMesh mesh;
|
||||||
|
|
||||||
struct PlatformLock *zDepthLock;
|
struct PlatformLock *renderLock;
|
||||||
struct PlatformLock *blitLock;
|
|
||||||
} DTRState;
|
} DTRState;
|
||||||
|
|
||||||
extern PlatformFlags globalDTRPlatformFlags;
|
extern PlatformFlags globalDTRPlatformFlags;
|
||||||
|
@ -51,6 +51,7 @@ typedef struct PlatformJob
|
|||||||
// Multithreading API
|
// Multithreading API
|
||||||
typedef bool PlatformAPI_QueueAddJob (PlatformJobQueue *const queue, const PlatformJob job);
|
typedef bool PlatformAPI_QueueAddJob (PlatformJobQueue *const queue, const PlatformJob job);
|
||||||
typedef bool PlatformAPI_QueueTryExecuteNextJob(PlatformJobQueue *const queue);
|
typedef bool PlatformAPI_QueueTryExecuteNextJob(PlatformJobQueue *const queue);
|
||||||
|
typedef bool PlatformAPI_QueueAllJobsComplete (PlatformJobQueue *const queue);
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Platform Locks
|
// Platform Locks
|
||||||
@ -75,6 +76,7 @@ typedef struct PlatformAPI
|
|||||||
|
|
||||||
PlatformAPI_QueueAddJob *QueueAddJob;
|
PlatformAPI_QueueAddJob *QueueAddJob;
|
||||||
PlatformAPI_QueueTryExecuteNextJob *QueueTryExecuteNextJob;
|
PlatformAPI_QueueTryExecuteNextJob *QueueTryExecuteNextJob;
|
||||||
|
PlatformAPI_QueueAllJobsComplete *QueueAllJobsComplete;
|
||||||
|
|
||||||
PlatformAPI_LockInit *LockInit;
|
PlatformAPI_LockInit *LockInit;
|
||||||
PlatformAPI_LockAcquire *LockAcquire;
|
PlatformAPI_LockAcquire *LockAcquire;
|
||||||
|
@ -630,22 +630,20 @@ FILE_SCOPE inline void SIMDSetPixel(DTRRenderContext context, const i32 x, const
|
|||||||
DTR_DEBUG_EP_TIMED_FUNCTION();
|
DTR_DEBUG_EP_TIMED_FUNCTION();
|
||||||
DebugSIMDAssertColorInRange(color, 0.0f, 1.0f);
|
DebugSIMDAssertColorInRange(color, 0.0f, 1.0f);
|
||||||
|
|
||||||
u32 *const bitmapPtr = (u32 *)renderBuffer->memory;
|
|
||||||
const u32 pitchInU32 = (renderBuffer->width * renderBuffer->bytesPerPixel) / 4;
|
|
||||||
|
|
||||||
// If some alpha is involved, we need to apply gamma correction, but if the
|
// If some alpha is involved, we need to apply gamma correction, but if the
|
||||||
// new pixel is totally opaque or invisible then we're just flat out
|
// new pixel is totally opaque or invisible then we're just flat out
|
||||||
// overwriting/keeping the state of the pixel so we can save cycles by skipping.
|
// overwriting/keeping the state of the pixel so we can save cycles by skipping.
|
||||||
f32 alpha = ((f32 *)&color)[3];
|
f32 alpha = ((f32 *)&color)[3];
|
||||||
bool needGammaFix = (alpha > 0.0f || alpha < (1.0f + COLOR_EPSILON)) && (colorSpace == ColorSpace_SRGB);
|
bool needGammaFix =
|
||||||
|
(alpha > 0.0f || alpha < (1.0f + COLOR_EPSILON)) && (colorSpace == ColorSpace_SRGB);
|
||||||
if (needGammaFix) color = SIMDSRGB1ToLinearSpace(color);
|
if (needGammaFix) color = SIMDSRGB1ToLinearSpace(color);
|
||||||
|
|
||||||
// Format: u32 == (XX, RR, GG, BB)
|
// Format: u32 == (XX, RR, GG, BB)
|
||||||
context.api->LockAcquire(renderBuffer->blitLock);
|
u32 *const bitmapPtr = (u32 *)renderBuffer->memory;
|
||||||
|
const u32 pitchInU32 = (renderBuffer->width * renderBuffer->bytesPerPixel) / 4;
|
||||||
|
|
||||||
u32 srcPixel = bitmapPtr[x + (y * pitchInU32)];
|
u32 srcPixel = bitmapPtr[x + (y * pitchInU32)];
|
||||||
__m128 src = _mm_set_ps(0,
|
__m128 src = _mm_set_ps(0, (f32)((srcPixel >> 0) & 0xFF), (f32)((srcPixel >> 8) & 0xFF),
|
||||||
(f32)((srcPixel >> 0) & 0xFF),
|
|
||||||
(f32)((srcPixel >> 8) & 0xFF),
|
|
||||||
(f32)((srcPixel >> 16) & 0xFF));
|
(f32)((srcPixel >> 16) & 0xFF));
|
||||||
src = SIMDSRGB255ToLinearSpace1(src);
|
src = SIMDSRGB255ToLinearSpace1(src);
|
||||||
|
|
||||||
@ -665,13 +663,8 @@ FILE_SCOPE inline void SIMDSetPixel(DTRRenderContext context, const i32 x, const
|
|||||||
f32 destB = ((f32 *)&dest)[2];
|
f32 destB = ((f32 *)&dest)[2];
|
||||||
|
|
||||||
u32 pixel = // ((u32)(destA) << 24 |
|
u32 pixel = // ((u32)(destA) << 24 |
|
||||||
(u32)(destR) << 16 |
|
(u32)(destR) << 16 | (u32)(destG) << 8 | (u32)(destB) << 0;
|
||||||
(u32)(destG) << 8 |
|
|
||||||
(u32)(destB) << 0;
|
|
||||||
bitmapPtr[x + (y * pitchInU32)] = pixel;
|
bitmapPtr[x + (y * pitchInU32)] = pixel;
|
||||||
context.api->LockRelease(renderBuffer->blitLock);
|
|
||||||
|
|
||||||
DTRDebug_CounterIncrement(DTRDebugCounter_SetPixels);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// colorModulate: _mm_set_ps(a, b, g, r) ie. 0=r, 1=g, 2=b, 3=a
|
// colorModulate: _mm_set_ps(a, b, g, r) ie. 0=r, 1=g, 2=b, 3=a
|
||||||
@ -783,9 +776,9 @@ FILE_SCOPE inline f32 GetCurrZDepth(DTRRenderContext context, i32 posX, i32 posY
|
|||||||
i32 zBufferIndex = posX + (posY * zBufferPitch);
|
i32 zBufferIndex = posX + (posY * zBufferPitch);
|
||||||
DQN_ASSERT(zBufferIndex < (renderBuffer->width * renderBuffer->height));
|
DQN_ASSERT(zBufferIndex < (renderBuffer->width * renderBuffer->height));
|
||||||
|
|
||||||
context.api->LockAcquire(renderBuffer->zDepthLock);
|
context.api->LockAcquire(renderBuffer->renderLock);
|
||||||
f32 currZDepth = renderBuffer->zBuffer[zBufferIndex];
|
f32 currZDepth = renderBuffer->zBuffer[zBufferIndex];
|
||||||
context.api->LockRelease(renderBuffer->zDepthLock);
|
context.api->LockRelease(renderBuffer->renderLock);
|
||||||
return currZDepth;
|
return currZDepth;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -798,9 +791,9 @@ FILE_SCOPE inline void SetCurrZDepth(DTRRenderContext context, i32 posX, i32 pos
|
|||||||
i32 zBufferIndex = posX + (posY * zBufferPitch);
|
i32 zBufferIndex = posX + (posY * zBufferPitch);
|
||||||
DQN_ASSERT(zBufferIndex < (renderBuffer->width * renderBuffer->height));
|
DQN_ASSERT(zBufferIndex < (renderBuffer->width * renderBuffer->height));
|
||||||
|
|
||||||
context.api->LockAcquire(renderBuffer->zDepthLock);
|
context.api->LockAcquire(renderBuffer->renderLock);
|
||||||
renderBuffer->zBuffer[zBufferIndex] = newZDepth;
|
renderBuffer->zBuffer[zBufferIndex] = newZDepth;
|
||||||
context.api->LockRelease(renderBuffer->zDepthLock);
|
context.api->LockRelease(renderBuffer->renderLock);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define DEBUG_SIMD_AUTO_CHOOSE_BEGIN_CYCLE_COUNT(type) \
|
#define DEBUG_SIMD_AUTO_CHOOSE_BEGIN_CYCLE_COUNT(type) \
|
||||||
@ -882,7 +875,7 @@ FILE_SCOPE void SIMDTriangle(DTRRenderContext context,
|
|||||||
|
|
||||||
DEBUG_SIMD_AUTO_CHOOSE_BEGIN_CYCLE_COUNT(Triangle_Preamble);
|
DEBUG_SIMD_AUTO_CHOOSE_BEGIN_CYCLE_COUNT(Triangle_Preamble);
|
||||||
|
|
||||||
DTRRenderBuffer *renderBuffer = context.renderBuffer;
|
DTRRenderBuffer *const renderBuffer = context.renderBuffer;
|
||||||
////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////
|
||||||
// Convert color
|
// Convert color
|
||||||
////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////
|
||||||
@ -911,12 +904,12 @@ FILE_SCOPE void SIMDTriangle(DTRRenderContext context,
|
|||||||
const u32 NUM_Y_PIXELS_TO_SIMD = 1;
|
const u32 NUM_Y_PIXELS_TO_SIMD = 1;
|
||||||
|
|
||||||
// SignedArea: _mm_set_ps(unused, p3, p2, p1) ie 0=p1, 1=p1, 2=p3, 3=unused
|
// SignedArea: _mm_set_ps(unused, p3, p2, p1) ie 0=p1, 1=p1, 2=p3, 3=unused
|
||||||
__m128 signedAreaPixel1;
|
__m128 signedAreaPixel1 = _mm_set_ps1(0);
|
||||||
__m128 signedAreaPixel2;
|
__m128 signedAreaPixel2 = _mm_set_ps1(0);
|
||||||
|
|
||||||
__m128 signedAreaPixelDeltaX;
|
__m128 signedAreaPixelDeltaX = _mm_set_ps1(0);
|
||||||
__m128 signedAreaPixelDeltaY;
|
__m128 signedAreaPixelDeltaY = _mm_set_ps1(0);
|
||||||
__m128 invSignedAreaParallelogram_4x;
|
__m128 invSignedAreaParallelogram_4x = _mm_set_ps1(0);
|
||||||
|
|
||||||
__m128 triangleZ = _mm_set_ps(0, p3.z, p2.z, p1.z);
|
__m128 triangleZ = _mm_set_ps(0, p3.z, p2.z, p1.z);
|
||||||
{
|
{
|
||||||
@ -1003,22 +996,11 @@ FILE_SCOPE void SIMDTriangle(DTRRenderContext context,
|
|||||||
((f32 *)&barycentricZ)[1] +
|
((f32 *)&barycentricZ)[1] +
|
||||||
((f32 *)&barycentricZ)[2];
|
((f32 *)&barycentricZ)[2];
|
||||||
|
|
||||||
#if 0
|
|
||||||
// f32 currZDepth = GetCurrZDepth(context, posX, posY);
|
|
||||||
#else
|
|
||||||
DQN_ASSERT(renderBuffer);
|
|
||||||
i32 zBufferIndex = posX + (posY * zBufferPitch);
|
i32 zBufferIndex = posX + (posY * zBufferPitch);
|
||||||
|
context.api->LockAcquire(renderBuffer->renderLock);
|
||||||
context.api->LockAcquire(renderBuffer->zDepthLock);
|
if (pixelZDepth > renderBuffer->zBuffer[zBufferIndex])
|
||||||
f32 currZDepth = renderBuffer->zBuffer[zBufferIndex];
|
|
||||||
context.api->LockRelease(renderBuffer->zDepthLock);
|
|
||||||
#endif
|
|
||||||
if (pixelZDepth > currZDepth)
|
|
||||||
{
|
{
|
||||||
|
|
||||||
context.api->LockAcquire(renderBuffer->zDepthLock);
|
|
||||||
renderBuffer->zBuffer[zBufferIndex] = pixelZDepth;
|
renderBuffer->zBuffer[zBufferIndex] = pixelZDepth;
|
||||||
context.api->LockRelease(renderBuffer->zDepthLock);
|
|
||||||
|
|
||||||
__m128 finalColor = simdColor;
|
__m128 finalColor = simdColor;
|
||||||
if (!ignoreLight)
|
if (!ignoreLight)
|
||||||
@ -1046,6 +1028,7 @@ FILE_SCOPE void SIMDTriangle(DTRRenderContext context,
|
|||||||
}
|
}
|
||||||
SIMDSetPixel(context, posX, posY, finalColor, ColorSpace_Linear);
|
SIMDSetPixel(context, posX, posY, finalColor, ColorSpace_Linear);
|
||||||
}
|
}
|
||||||
|
context.api->LockRelease(renderBuffer->renderLock);
|
||||||
DEBUG_SIMD_AUTO_CHOOSE_END_CYCLE_COUNT(Triangle_RasterisePixel);
|
DEBUG_SIMD_AUTO_CHOOSE_END_CYCLE_COUNT(Triangle_RasterisePixel);
|
||||||
}
|
}
|
||||||
signedArea1 = _mm_add_ps(signedArea1, signedAreaPixelDeltaX);
|
signedArea1 = _mm_add_ps(signedArea1, signedAreaPixelDeltaX);
|
||||||
@ -1067,16 +1050,10 @@ FILE_SCOPE void SIMDTriangle(DTRRenderContext context,
|
|||||||
((f32 *)&barycentricZ)[1] +
|
((f32 *)&barycentricZ)[1] +
|
||||||
((f32 *)&barycentricZ)[2];
|
((f32 *)&barycentricZ)[2];
|
||||||
i32 zBufferIndex = posX + (posY * zBufferPitch);
|
i32 zBufferIndex = posX + (posY * zBufferPitch);
|
||||||
|
context.api->LockAcquire(renderBuffer->renderLock);
|
||||||
context.api->LockAcquire(renderBuffer->zDepthLock);
|
if (pixelZDepth > renderBuffer->zBuffer[zBufferIndex])
|
||||||
f32 currZDepth = renderBuffer->zBuffer[zBufferIndex];
|
|
||||||
context.api->LockRelease(renderBuffer->zDepthLock);
|
|
||||||
|
|
||||||
if (pixelZDepth > currZDepth)
|
|
||||||
{
|
{
|
||||||
context.api->LockAcquire(renderBuffer->zDepthLock);
|
|
||||||
renderBuffer->zBuffer[zBufferIndex] = pixelZDepth;
|
renderBuffer->zBuffer[zBufferIndex] = pixelZDepth;
|
||||||
context.api->LockRelease(renderBuffer->zDepthLock);
|
|
||||||
|
|
||||||
__m128 finalColor = simdColor;
|
__m128 finalColor = simdColor;
|
||||||
if (!ignoreLight)
|
if (!ignoreLight)
|
||||||
@ -1104,6 +1081,7 @@ FILE_SCOPE void SIMDTriangle(DTRRenderContext context,
|
|||||||
}
|
}
|
||||||
SIMDSetPixel(context, posX, posY, finalColor, ColorSpace_Linear);
|
SIMDSetPixel(context, posX, posY, finalColor, ColorSpace_Linear);
|
||||||
}
|
}
|
||||||
|
context.api->LockRelease(renderBuffer->renderLock);
|
||||||
}
|
}
|
||||||
signedArea2 = _mm_add_ps(signedArea2, signedAreaPixelDeltaX);
|
signedArea2 = _mm_add_ps(signedArea2, signedAreaPixelDeltaX);
|
||||||
}
|
}
|
||||||
@ -1424,7 +1402,7 @@ typedef struct RenderMeshJob
|
|||||||
DqnV4 color;
|
DqnV4 color;
|
||||||
} RenderMeshJob;
|
} RenderMeshJob;
|
||||||
|
|
||||||
void MultiThreadedRenderMesh(struct PlatformJobQueue *const queue, void *const userData)
|
void MultiThreadedRenderMesh(PlatformJobQueue *const queue, void *const userData)
|
||||||
{
|
{
|
||||||
if (!queue || !userData)
|
if (!queue || !userData)
|
||||||
{
|
{
|
||||||
@ -1433,18 +1411,16 @@ void MultiThreadedRenderMesh(struct PlatformJobQueue *const queue, void *const u
|
|||||||
}
|
}
|
||||||
|
|
||||||
RenderMeshJob *job = (RenderMeshJob *)userData;
|
RenderMeshJob *job = (RenderMeshJob *)userData;
|
||||||
#if 1
|
|
||||||
TexturedTriangleInternal(job->context, job->lighting, job->v1, job->v2, job->v3, job->uv1,
|
TexturedTriangleInternal(job->context, job->lighting, job->v1, job->v2, job->v3, job->uv1,
|
||||||
job->uv2, job->uv3, job->tex, job->color);
|
job->uv2, job->uv3, job->tex, job->color);
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void DTRRender_Mesh(DTRRenderContext context, PlatformJobQueue *const jobQueue, DTRMesh *const mesh,
|
void DTRRender_Mesh(DTRRenderContext context, PlatformJobQueue *const jobQueue, DTRMesh *const mesh,
|
||||||
DTRRenderLight lighting, const DqnV3 pos, const DTRRenderTransform transform)
|
DTRRenderLight lighting, const DqnV3 pos, const DTRRenderTransform transform)
|
||||||
{
|
{
|
||||||
DqnMemStack *tempStack = context.tempStack;
|
DqnMemStack *const tempStack = context.tempStack;
|
||||||
DTRRenderBuffer *renderBuffer = context.renderBuffer;
|
DTRRenderBuffer *const renderBuffer = context.renderBuffer;
|
||||||
PlatformAPI *api = context.api;
|
PlatformAPI *const api = context.api;
|
||||||
|
|
||||||
if (!mesh || !renderBuffer || !tempStack || !api || !jobQueue) return;
|
if (!mesh || !renderBuffer || !tempStack || !api || !jobQueue) return;
|
||||||
|
|
||||||
@ -1477,7 +1453,7 @@ void DTRRender_Mesh(DTRRenderContext context, PlatformJobQueue *const jobQueue,
|
|||||||
viewPModelViewProjection = DqnMat4_Mul(viewport, modelViewProjection);
|
viewPModelViewProjection = DqnMat4_Mul(viewport, modelViewProjection);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RUN_MULTITHREADED = false;
|
bool RUN_MULTITHREADED = true;
|
||||||
for (u32 i = 0; i < mesh->numFaces; i++)
|
for (u32 i = 0; i < mesh->numFaces; i++)
|
||||||
{
|
{
|
||||||
DTRMeshFace face = mesh->faces[i];
|
DTRMeshFace face = mesh->faces[i];
|
||||||
@ -1550,6 +1526,7 @@ void DTRRender_Mesh(DTRRenderContext context, PlatformJobQueue *const jobQueue,
|
|||||||
DqnV2 uv3 = mesh->texUV[uv3Index].xy;
|
DqnV2 uv3 = mesh->texUV[uv3Index].xy;
|
||||||
|
|
||||||
DqnV4 color = lighting.color;
|
DqnV4 color = lighting.color;
|
||||||
|
|
||||||
RenderLightInternal lightingInternal = {};
|
RenderLightInternal lightingInternal = {};
|
||||||
lightingInternal.mode = lighting.mode;
|
lightingInternal.mode = lighting.mode;
|
||||||
lightingInternal.vector = lighting.vector;
|
lightingInternal.vector = lighting.vector;
|
||||||
@ -1559,7 +1536,6 @@ void DTRRender_Mesh(DTRRenderContext context, PlatformJobQueue *const jobQueue,
|
|||||||
lightingInternal.numNormals = 3;
|
lightingInternal.numNormals = 3;
|
||||||
|
|
||||||
bool DEBUG_NO_TEX = false;
|
bool DEBUG_NO_TEX = false;
|
||||||
|
|
||||||
if (RUN_MULTITHREADED)
|
if (RUN_MULTITHREADED)
|
||||||
{
|
{
|
||||||
RenderMeshJob *jobData = (RenderMeshJob *)DqnMemStack_Push(tempStack, sizeof(*jobData));
|
RenderMeshJob *jobData = (RenderMeshJob *)DqnMemStack_Push(tempStack, sizeof(*jobData));
|
||||||
@ -1612,6 +1588,7 @@ void DTRRender_Mesh(DTRRenderContext context, PlatformJobQueue *const jobQueue,
|
|||||||
uv1, uv2, uv3, &mesh->tex, color);
|
uv1, uv2, uv3, &mesh->tex, color);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DEBUG_WIREFRAME = false;
|
bool DEBUG_WIREFRAME = false;
|
||||||
if (DTR_DEBUG && DEBUG_WIREFRAME)
|
if (DTR_DEBUG && DEBUG_WIREFRAME)
|
||||||
{
|
{
|
||||||
@ -1622,9 +1599,11 @@ void DTRRender_Mesh(DTRRenderContext context, PlatformJobQueue *const jobQueue,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NOTE(doyle): Complete remaining jobs and wait until all jobs finished
|
||||||
|
// before leaving function.
|
||||||
if (RUN_MULTITHREADED)
|
if (RUN_MULTITHREADED)
|
||||||
{
|
{
|
||||||
while (api->QueueTryExecuteNextJob(jobQueue))
|
while (api->QueueTryExecuteNextJob(jobQueue) || !api->QueueAllJobsComplete(jobQueue))
|
||||||
;
|
;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1858,3 +1837,4 @@ void DTRRender_Clear(DTRRenderContext context, DqnV3 color)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -16,12 +16,9 @@ typedef struct DTRRenderBuffer
|
|||||||
i32 width;
|
i32 width;
|
||||||
i32 height;
|
i32 height;
|
||||||
i32 bytesPerPixel;
|
i32 bytesPerPixel;
|
||||||
|
PlatformLock *renderLock;
|
||||||
PlatformLock *volatile zDepthLock;
|
volatile u8 *memory; // Format: XX RR GG BB, and has (width * height * bytesPerPixels) elements
|
||||||
PlatformLock *volatile blitLock;
|
volatile f32 *zBuffer; // zBuffer has (width * height) elements
|
||||||
|
|
||||||
u8 *memory; // Format: XX RR GG BB, and has (width * height * bytesPerPixels) elements
|
|
||||||
f32 *zBuffer; // zBuffer has (width * height) elements
|
|
||||||
|
|
||||||
} DTRRenderBuffer;
|
} DTRRenderBuffer;
|
||||||
|
|
||||||
|
@ -64,9 +64,11 @@ struct PlatformJobQueue
|
|||||||
// NOTE: Modified by main+worker threads
|
// NOTE: Modified by main+worker threads
|
||||||
LONG volatile jobToExecuteIndex;
|
LONG volatile jobToExecuteIndex;
|
||||||
HANDLE volatile win32Semaphore;
|
HANDLE volatile win32Semaphore;
|
||||||
|
LONG volatile numJobsToComplete;
|
||||||
|
|
||||||
// NOTE: Modified by main thread ONLY
|
// NOTE: Modified by main thread ONLY
|
||||||
LONG volatile jobInsertIndex;
|
LONG volatile jobInsertIndex;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
bool Platform_QueueAddJob(PlatformJobQueue *const queue, const PlatformJob job)
|
bool Platform_QueueAddJob(PlatformJobQueue *const queue, const PlatformJob job)
|
||||||
@ -76,22 +78,12 @@ bool Platform_QueueAddJob(PlatformJobQueue *const queue, const PlatformJob job)
|
|||||||
|
|
||||||
queue->jobList[queue->jobInsertIndex] = job;
|
queue->jobList[queue->jobInsertIndex] = job;
|
||||||
|
|
||||||
_WriteBarrier();
|
InterlockedIncrement(&queue->numJobsToComplete);
|
||||||
_mm_sfence();
|
|
||||||
|
|
||||||
queue->jobInsertIndex = newJobInsertIndex;
|
|
||||||
ReleaseSemaphore(queue->win32Semaphore, 1, NULL);
|
ReleaseSemaphore(queue->win32Semaphore, 1, NULL);
|
||||||
|
queue->jobInsertIndex = newJobInsertIndex;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
FILE_SCOPE void DebugWin32JobPrintNumber(PlatformJobQueue *const queue, void *const userData)
|
|
||||||
{
|
|
||||||
i32 numberToPrint = *((i32 *)userData);
|
|
||||||
DqnWin32_OutputDebugString("Thread %d: Printing number: %d\n", GetCurrentThreadId(),
|
|
||||||
numberToPrint);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool Platform_QueueTryExecuteNextJob(PlatformJobQueue *const queue)
|
bool Platform_QueueTryExecuteNextJob(PlatformJobQueue *const queue)
|
||||||
{
|
{
|
||||||
LONG originalJobToExecute = queue->jobToExecuteIndex;
|
LONG originalJobToExecute = queue->jobToExecuteIndex;
|
||||||
@ -109,6 +101,7 @@ bool Platform_QueueTryExecuteNextJob(PlatformJobQueue *const queue)
|
|||||||
{
|
{
|
||||||
PlatformJob job = queue->jobList[index];
|
PlatformJob job = queue->jobList[index];
|
||||||
job.callback(queue, job.userData);
|
job.callback(queue, job.userData);
|
||||||
|
InterlockedDecrement(&queue->numJobsToComplete);
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
@ -117,6 +110,37 @@ bool Platform_QueueTryExecuteNextJob(PlatformJobQueue *const queue)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Platform_QueueAllJobsComplete(PlatformJobQueue *const queue)
|
||||||
|
{
|
||||||
|
bool result = (queue->numJobsToComplete == 0);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
FILE_SCOPE u32 volatile globalDebugCounter;
|
||||||
|
FILE_SCOPE bool volatile globalDebugCounterMemoize[2048];
|
||||||
|
FILE_SCOPE PlatformLock *globalDebugLock;
|
||||||
|
FILE_SCOPE void DebugWin32IncrementCounter(PlatformJobQueue *const queue, void *const userData)
|
||||||
|
{
|
||||||
|
Platform_LockAcquire(globalDebugLock);
|
||||||
|
DQN_ASSERT(!globalDebugCounterMemoize[globalDebugCounter]);
|
||||||
|
globalDebugCounterMemoize[globalDebugCounter] = true;
|
||||||
|
globalDebugCounter++;
|
||||||
|
u32 number = globalDebugCounter;
|
||||||
|
Platform_LockRelease(globalDebugLock);
|
||||||
|
|
||||||
|
DqnWin32_OutputDebugString("Thread %d: Incrementing Number: %d\n", GetCurrentThreadId(),
|
||||||
|
number);
|
||||||
|
}
|
||||||
|
|
||||||
|
FILE_SCOPE void DebugWin32JobPrintNumber(PlatformJobQueue *const queue, void *const userData)
|
||||||
|
{
|
||||||
|
i32 numberToPrint = *((i32 *)userData);
|
||||||
|
DqnWin32_OutputDebugString("Thread %d: Printing number: %d\n", GetCurrentThreadId(),
|
||||||
|
numberToPrint);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
DWORD WINAPI Win32ThreadCallback(void *lpParameter)
|
DWORD WINAPI Win32ThreadCallback(void *lpParameter)
|
||||||
{
|
{
|
||||||
PlatformJobQueue *queue = (PlatformJobQueue *)lpParameter;
|
PlatformJobQueue *queue = (PlatformJobQueue *)lpParameter;
|
||||||
@ -682,6 +706,7 @@ int WINAPI wWinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPWSTR lpCmdLi
|
|||||||
|
|
||||||
platformAPI.QueueAddJob = Platform_QueueAddJob;
|
platformAPI.QueueAddJob = Platform_QueueAddJob;
|
||||||
platformAPI.QueueTryExecuteNextJob = Platform_QueueTryExecuteNextJob;
|
platformAPI.QueueTryExecuteNextJob = Platform_QueueTryExecuteNextJob;
|
||||||
|
platformAPI.QueueAllJobsComplete = Platform_QueueAllJobsComplete;
|
||||||
|
|
||||||
platformAPI.LockInit = Platform_LockInit;
|
platformAPI.LockInit = Platform_LockInit;
|
||||||
platformAPI.LockAcquire = Platform_LockAcquire;
|
platformAPI.LockAcquire = Platform_LockAcquire;
|
||||||
@ -795,7 +820,7 @@ int WINAPI wWinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPWSTR lpCmdLi
|
|||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
// DEBUG Create jobs
|
// DEBUG Create print jobs
|
||||||
for (i32 i = 0; i < 20; i++)
|
for (i32 i = 0; i < 20; i++)
|
||||||
{
|
{
|
||||||
PlatformJob job = {};
|
PlatformJob job = {};
|
||||||
@ -811,6 +836,29 @@ int WINAPI wWinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPWSTR lpCmdLi
|
|||||||
while (Platform_QueueTryExecuteNextJob(&jobQueue))
|
while (Platform_QueueTryExecuteNextJob(&jobQueue))
|
||||||
;
|
;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if 1
|
||||||
|
globalDebugLock = Platform_LockInit(&globalPlatformMemory.mainStack);
|
||||||
|
DQN_ASSERT(globalDebugLock);
|
||||||
|
for (i32 i = 0; i < DQN_ARRAY_COUNT(globalDebugCounterMemoize); i++)
|
||||||
|
{
|
||||||
|
PlatformJob job = {};
|
||||||
|
job.callback = DebugWin32IncrementCounter;
|
||||||
|
while (!Platform_QueueAddJob(&jobQueue, job))
|
||||||
|
{
|
||||||
|
Platform_QueueTryExecuteNextJob(&jobQueue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
while (Platform_QueueTryExecuteNextJob(&jobQueue))
|
||||||
|
;
|
||||||
|
|
||||||
|
for (i32 i = 0; i < DQN_ARRAY_COUNT(globalDebugCounterMemoize); i++)
|
||||||
|
DQN_ASSERT(globalDebugCounterMemoize[i]);
|
||||||
|
|
||||||
|
DqnWin32_OutputDebugString("\nFinal incremented value: %d\n", globalDebugCounter);
|
||||||
|
DQN_ASSERT(globalDebugCounter == DQN_ARRAY_COUNT(globalDebugCounterMemoize));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -29,7 +29,6 @@ REM EHa- disable exception handling (currently it's on /EHsc since libraries n
|
|||||||
REM GR- disable c runtime type information (we don't use)
|
REM GR- disable c runtime type information (we don't use)
|
||||||
REM MD use dynamic runtime library
|
REM MD use dynamic runtime library
|
||||||
REM MT use static runtime library, so build and link it into exe
|
REM MT use static runtime library, so build and link it into exe
|
||||||
REM Od disables optimisations
|
|
||||||
REM Oi enable intrinsics optimisation, let us use CPU intrinsics if there is one
|
REM Oi enable intrinsics optimisation, let us use CPU intrinsics if there is one
|
||||||
REM instead of generating a call to external library (i.e. CRT).
|
REM instead of generating a call to external library (i.e. CRT).
|
||||||
REM Zi enables debug data, Z7 combines the debug files into one.
|
REM Zi enables debug data, Z7 combines the debug files into one.
|
||||||
@ -39,16 +38,10 @@ REM wd4100 unused argument parameters
|
|||||||
REM wd4201 nonstandard extension used: nameless struct/union
|
REM wd4201 nonstandard extension used: nameless struct/union
|
||||||
REM wd4189 local variable is initialised but not referenced
|
REM wd4189 local variable is initialised but not referenced
|
||||||
REM wd4505 unreferenced local function not used will be removed
|
REM wd4505 unreferenced local function not used will be removed
|
||||||
set CompileFlags=-EHsc -GR- -Oi -MT -Z7 -W4 -wd4100 -wd4201 -wd4189 -wd4505 -O2 -FAsc /I..\src\external\
|
set CompileFlags=-EHsc -GR- -Oi -MT -Z7 -W4 -wd4100 -wd4201 -wd4189 -wd4505 -FAsc /I..\src\external\
|
||||||
set DLLFlags=/Fm%ProjectName% /Fo%ProjectName% /Fa%ProjectName% /Fe%ProjectName%
|
set DLLFlags=/Fm%ProjectName% /Fo%ProjectName% /Fa%ProjectName% /Fe%ProjectName%
|
||||||
set Win32Flags=/FmWin32DTRenderer /FeWin32DTRenderer
|
set Win32Flags=/FmWin32DTRenderer /FeWin32DTRenderer
|
||||||
|
|
||||||
REM Clean time necessary for hours <10, which produces H:MM:SS.SS where the
|
|
||||||
REM first character of time is an empty space. CleanTime will pad a 0 if
|
|
||||||
REM necessary.
|
|
||||||
set CleanTime=%time: =0%
|
|
||||||
set TimeStamp=%date:~10,4%%date:~7,2%%date:~4,2%_%CleanTime:~0,2%%CleanTime:~3,2%%CleanTime:~6,2%
|
|
||||||
|
|
||||||
REM Link libraries
|
REM Link libraries
|
||||||
set LinkLibraries=user32.lib kernel32.lib gdi32.lib
|
set LinkLibraries=user32.lib kernel32.lib gdi32.lib
|
||||||
|
|
||||||
@ -56,9 +49,33 @@ REM incremental:no, turn incremental builds off
|
|||||||
REM opt:ref, try to remove functions from libs that are not referenced at all
|
REM opt:ref, try to remove functions from libs that are not referenced at all
|
||||||
set LinkFlags=-incremental:no -opt:ref -subsystem:WINDOWS -machine:x64 -nologo
|
set LinkFlags=-incremental:no -opt:ref -subsystem:WINDOWS -machine:x64 -nologo
|
||||||
|
|
||||||
|
set DebugMode=0
|
||||||
|
|
||||||
|
if %DebugMode%==1 goto :DebugFlags
|
||||||
|
goto :ReleaseFlags
|
||||||
|
|
||||||
|
:DebugFlags
|
||||||
|
REM Od disables optimisations
|
||||||
|
REM RTC1 runtime error checks
|
||||||
|
set CompileFlags=%CompileFlags% -Od -RTC1
|
||||||
|
goto compile
|
||||||
|
|
||||||
|
:ReleaseFlags
|
||||||
|
REM opt:icf, COMDAT folding for debugging release build
|
||||||
|
REM DEBUG:[FULL|NONE] enforce debugging for release build
|
||||||
|
set CompileFlags=%CompileFlags% -O2
|
||||||
|
set LinkFlags=%LinkFlags%
|
||||||
|
|
||||||
REM ////////////////////////////////////////////////////////////////////////////
|
REM ////////////////////////////////////////////////////////////////////////////
|
||||||
REM Compile
|
REM Compile
|
||||||
REM ////////////////////////////////////////////////////////////////////////////
|
REM ////////////////////////////////////////////////////////////////////////////
|
||||||
|
:compile
|
||||||
|
REM Clean time necessary for hours <10, which produces H:MM:SS.SS where the
|
||||||
|
REM first character of time is an empty space. CleanTime will pad a 0 if
|
||||||
|
REM necessary.
|
||||||
|
set CleanTime=%time: =0%
|
||||||
|
set TimeStamp=%date:~10,4%%date:~7,2%%date:~4,2%_%CleanTime:~0,2%%CleanTime:~3,2%%CleanTime:~6,2%
|
||||||
|
|
||||||
del *.pdb >NUL 2>NUL
|
del *.pdb >NUL 2>NUL
|
||||||
cl %CompileFlags% %Win32Flags% ..\src\Win32DTRenderer.cpp /link %LinkLibraries% %LinkFlags%
|
cl %CompileFlags% %Win32Flags% ..\src\Win32DTRenderer.cpp /link %LinkLibraries% %LinkFlags%
|
||||||
REM cl %CompileFlags% %DLLFlags% ..\src\UnityBuild\UnityBuild.cpp /LD /link ..\src\external\easy\easy_profiler.lib /PDB:%ProjectName%_%TimeStamp%.pdb /export:DTR_Update %LinkFlags%
|
REM cl %CompileFlags% %DLLFlags% ..\src\UnityBuild\UnityBuild.cpp /LD /link ..\src\external\easy\easy_profiler.lib /PDB:%ProjectName%_%TimeStamp%.pdb /export:DTR_Update %LinkFlags%
|
||||||
|
Loading…
Reference in New Issue
Block a user