Better assert(), move some Win32 functions to DQN

Asserts are now more defensive and diagnostic. Initially asserts provide a hard
break in the program, and the intention is to disable asserts for release.
Having experienced bugs in release mode due to optimisations with asserts off
there's little protection against errors that occur in release mode since all
asserts are off.

Now asserts can be used in release mode whilst still evaluating the expression,
and allowing user messages/diagnostics to console.
This commit is contained in:
Doyle Thai 2017-06-20 02:13:03 +10:00
parent e860145e77
commit 5397cdd9b9
6 changed files with 431 additions and 174 deletions

View File

@ -976,6 +976,7 @@ extern "C" void DTR_Update(PlatformRenderBuffer *const platformRenderBuffer,
} }
DTRRenderContext renderContext = {}; DTRRenderContext renderContext = {};
renderContext.multithread = true;
renderContext.renderBuffer = &renderBuffer; renderContext.renderBuffer = &renderBuffer;
renderContext.tempStack = &memory->tempStack; renderContext.tempStack = &memory->tempStack;
renderContext.api = &input->api; renderContext.api = &input->api;

View File

@ -978,13 +978,9 @@ FILE_SCOPE void SIMDTriangle(DTRRenderContext context, const DqnV3 p1, const Dqn
const DqnV2 uv2SubUv1 = uv2 - uv1; const DqnV2 uv2SubUv1 = uv2 - uv1;
const DqnV2 uv3SubUv1 = uv3 - uv1; const DqnV2 uv3SubUv1 = uv3 - uv1;
#define INLINE_RASTERISE 1
DEBUG_SIMD_AUTO_CHOOSE_END_CYCLE_COUNT(Triangle_Preamble); DEBUG_SIMD_AUTO_CHOOSE_END_CYCLE_COUNT(Triangle_Preamble);
#if INLINE_RASTERISE
const u32 IS_GREATER_MASK = 0xF; const u32 IS_GREATER_MASK = 0xF;
const u32 zBufferPitch = renderBuffer->width; const u32 zBufferPitch = renderBuffer->width;
#endif
//////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////
// Scan and Render // Scan and Render
@ -997,7 +993,6 @@ FILE_SCOPE void SIMDTriangle(DTRRenderContext context, const DqnV3 p1, const Dqn
for (i32 bufferX = min.x; bufferX < max.x; bufferX += NUM_X_PIXELS_TO_SIMD) for (i32 bufferX = min.x; bufferX < max.x; bufferX += NUM_X_PIXELS_TO_SIMD)
{ {
#if INLINE_RASTERISE
// Rasterise buffer(X, Y) pixel // Rasterise buffer(X, Y) pixel
{ {
__m128 checkArea = signedArea1; __m128 checkArea = signedArea1;
@ -1017,43 +1012,46 @@ FILE_SCOPE void SIMDTriangle(DTRRenderContext context, const DqnV3 p1, const Dqn
((f32 *)&barycentricZ)[2]; ((f32 *)&barycentricZ)[2];
i32 zBufferIndex = posX + (posY * zBufferPitch); i32 zBufferIndex = posX + (posY * zBufferPitch);
__m128 finalColor = simdColor; if (context.multithread)
if (!ignoreLight)
{ {
__m128 barycentricA_4x = _mm_set_ps1(((f32 *)&barycentric)[0]); bool currLockValue;
__m128 barycentricB_4x = _mm_set_ps1(((f32 *)&barycentric)[1]); do
__m128 barycentricC_4x = _mm_set_ps1(((f32 *)&barycentric)[2]); {
currLockValue = (bool)context.api->AtomicCompareSwap(
__m128 barycentricLight1 = _mm_mul_ps(p1Light, barycentricA_4x); (u32 *)&renderBuffer->pixelLockTable[zBufferIndex], (u32) true,
__m128 barycentricLight2 = _mm_mul_ps(p2Light, barycentricB_4x); (u32) false);
__m128 barycentricLight3 = _mm_mul_ps(p3Light, barycentricC_4x); } while (currLockValue != false);
__m128 light = _mm_add_ps(barycentricLight3,
_mm_add_ps(barycentricLight1, barycentricLight2));
finalColor = _mm_mul_ps(finalColor, light);
((f32 *)&finalColor)[3] = preserveAlpha;
} }
if (texture)
{
__m128 texSampledColor = SIMDSampleTextureForTriangle(
texture, uv1, uv2SubUv1, uv3SubUv1, barycentric);
finalColor = _mm_mul_ps(texSampledColor, finalColor);
}
#if 1
bool currLockValue;
do
{
currLockValue = (bool)context.api->AtomicCompareSwap(
(u32 *)&renderBuffer->pixelLockTable[zBufferIndex], (u32)true, (u32)false);
} while (currLockValue != false);
#endif
if (pixelZDepth > renderBuffer->zBuffer[zBufferIndex]) if (pixelZDepth > renderBuffer->zBuffer[zBufferIndex])
{ {
__m128 finalColor = simdColor;
renderBuffer->zBuffer[zBufferIndex] = pixelZDepth; renderBuffer->zBuffer[zBufferIndex] = pixelZDepth;
if (!ignoreLight)
{
__m128 barycentricA_4x = _mm_set_ps1(((f32 *)&barycentric)[0]);
__m128 barycentricB_4x = _mm_set_ps1(((f32 *)&barycentric)[1]);
__m128 barycentricC_4x = _mm_set_ps1(((f32 *)&barycentric)[2]);
__m128 barycentricLight1 = _mm_mul_ps(p1Light, barycentricA_4x);
__m128 barycentricLight2 = _mm_mul_ps(p2Light, barycentricB_4x);
__m128 barycentricLight3 = _mm_mul_ps(p3Light, barycentricC_4x);
__m128 light =
_mm_add_ps(barycentricLight3,
_mm_add_ps(barycentricLight1, barycentricLight2));
finalColor = _mm_mul_ps(finalColor, light);
((f32 *)&finalColor)[3] = preserveAlpha;
}
if (texture)
{
__m128 texSampledColor = SIMDSampleTextureForTriangle(
texture, uv1, uv2SubUv1, uv3SubUv1, barycentric);
finalColor = _mm_mul_ps(texSampledColor, finalColor);
}
SIMDSetPixel(context, posX, posY, finalColor, ColorSpace_Linear); SIMDSetPixel(context, posX, posY, finalColor, ColorSpace_Linear);
} }
renderBuffer->pixelLockTable[zBufferIndex] = false; renderBuffer->pixelLockTable[zBufferIndex] = false;
@ -1061,15 +1059,192 @@ FILE_SCOPE void SIMDTriangle(DTRRenderContext context, const DqnV3 p1, const Dqn
} }
signedArea1 = _mm_add_ps(signedArea1, signedAreaPixelDeltaX); signedArea1 = _mm_add_ps(signedArea1, signedAreaPixelDeltaX);
} }
}
signedAreaPixel1 = _mm_add_ps(signedAreaPixel1, signedAreaPixelDeltaY);
// signedAreaPixel2 = _mm_add_ps(signedAreaPixel2, signedAreaPixelDeltaY);
}
#else DEBUG_SIMD_AUTO_CHOOSE_END_CYCLE_COUNT(Triangle_Rasterise);
SIMDRasteriseTrianglePixel(context, texture, bufferX, bufferY, max.x, uv1, uv2SubUv1, DEBUG_SIMD_AUTO_CHOOSE_END_CYCLE_COUNT(Triangle);
uv3SubUv1, simdColor, triangleZ, signedArea1, }
invSignedAreaParallelogram_4x, preserveAlpha, ignoreLight,
p1Light, p2Light, p3Light); FILE_SCOPE void SIMDBetterTriangle(DTRRenderContext context, const DqnV3 p1, const DqnV3 p2,
signedArea1 = _mm_add_ps(signedArea1, signedAreaPixelDeltaX); const DqnV3 p3, const DqnV2 uv1, const DqnV2 uv2,
// signedArea2 = _mm_add_ps(signedArea2, signedAreaPixelDeltaX); const DqnV2 uv3, const f32 lightIntensity1,
#endif const f32 lightIntensity2, const f32 lightIntensity3,
const bool ignoreLight, DTRBitmap *const texture, DqnV4 color,
const DqnV2i min, const DqnV2i max)
{
DTR_DEBUG_EP_TIMED_FUNCTION();
DEBUG_SIMD_AUTO_CHOOSE_BEGIN_CYCLE_COUNT(Triangle);
DEBUG_SIMD_AUTO_CHOOSE_BEGIN_CYCLE_COUNT(Triangle_Preamble);
DTRRenderBuffer *const renderBuffer = context.renderBuffer;
////////////////////////////////////////////////////////////////////////////
// Convert color
////////////////////////////////////////////////////////////////////////////
__m128 simdColor = _mm_set_ps(color.a, color.b, color.g, color.r);
simdColor = SIMDSRGB1ToLinearSpace(simdColor);
simdColor = SIMDPreMultiplyAlpha1(simdColor);
f32 preserveAlpha = ((f32 *)&simdColor)[3];
const __m128 ZERO_4X = _mm_set_ps1(0.0f);
__m128 simdLightIntensity1 = _mm_set_ps1(lightIntensity1);
__m128 simdLightIntensity2 = _mm_set_ps1(lightIntensity2);
__m128 simdLightIntensity3 = _mm_set_ps1(lightIntensity3);
simdLightIntensity1 = _mm_max_ps(simdLightIntensity1, ZERO_4X);
simdLightIntensity2 = _mm_max_ps(simdLightIntensity2, ZERO_4X);
simdLightIntensity3 = _mm_max_ps(simdLightIntensity3, ZERO_4X);
__m128 p1Light = _mm_mul_ps(simdColor, simdLightIntensity1);
__m128 p2Light = _mm_mul_ps(simdColor, simdLightIntensity2);
__m128 p3Light = _mm_mul_ps(simdColor, simdLightIntensity3);
////////////////////////////////////////////////////////////////////////////
// Setup SIMD data
////////////////////////////////////////////////////////////////////////////
const u32 NUM_X_PIXELS_TO_SIMD = 1;
const u32 NUM_Y_PIXELS_TO_SIMD = 1;
// SignedArea: _mm_set_ps(unused, p3, p2, p1) ie 0=p1, 1=p1, 2=p3, 3=unused
__m128 signedAreaPixel1 = _mm_set_ps1(0);
// __m128 signedAreaPixel2 = _mm_set_ps1(0);
__m128 signedAreaPixelDeltaX = _mm_set_ps1(0);
__m128 signedAreaPixelDeltaY = _mm_set_ps1(0);
__m128 invSignedAreaParallelogram_4x = _mm_set_ps1(0);
__m128 triangleZ = _mm_set_ps(0, p3.z, p2.z, p1.z);
{
DEBUG_SIMD_AUTO_CHOOSE_BEGIN_CYCLE_COUNT(Triangle_Preamble_SArea);
DTR_DEBUG_EP_TIMED_BLOCK("SIMDTriangle_Preamble_SArea");
DqnV2 startP = DqnV2_V2i(min);
f32 signedArea1Start = Triangle2TimesSignedArea(p2.xy, p3.xy, startP);
f32 signedArea1DeltaX = p2.y - p3.y;
f32 signedArea1DeltaY = p3.x - p2.x;
f32 signedArea2Start = Triangle2TimesSignedArea(p3.xy, p1.xy, startP);
f32 signedArea2DeltaX = p3.y - p1.y;
f32 signedArea2DeltaY = p1.x - p3.x;
f32 signedArea3Start = Triangle2TimesSignedArea(p1.xy, p2.xy, startP);
f32 signedArea3DeltaX = p1.y - p2.y;
f32 signedArea3DeltaY = p2.x - p1.x;
DTR_DEBUG_EP_TIMED_END_BLOCK();
DEBUG_SIMD_AUTO_CHOOSE_END_CYCLE_COUNT(Triangle_Preamble_SArea);
DEBUG_SIMD_AUTO_CHOOSE_BEGIN_CYCLE_COUNT(Triangle_Preamble_SIMDStep);
f32 signedAreaParallelogram = signedArea1Start + signedArea2Start + signedArea3Start;
if (signedAreaParallelogram == 0) return;
f32 invSignedAreaParallelogram = 1.0f / signedAreaParallelogram;
invSignedAreaParallelogram_4x = _mm_set_ps1(invSignedAreaParallelogram);
// NOTE: Order is important here!
signedAreaPixelDeltaX = _mm_set_ps(0, signedArea3DeltaX, signedArea2DeltaX, signedArea1DeltaX);
signedAreaPixelDeltaY = _mm_set_ps(0, signedArea3DeltaY, signedArea2DeltaY, signedArea1DeltaY);
signedAreaPixel1 = _mm_set_ps(0, signedArea3Start, signedArea2Start, signedArea1Start);
// signedAreaPixel2 = _mm_add_ps(signedAreaPixel1, signedAreaPixelDeltaX);
// NOTE: Increase step size to the number of pixels rasterised with SIMD
{
const __m128 STEP_X_4X = _mm_set_ps1((f32)NUM_X_PIXELS_TO_SIMD);
const __m128 STEP_Y_4X = _mm_set_ps1((f32)NUM_Y_PIXELS_TO_SIMD);
signedAreaPixelDeltaX = _mm_mul_ps(signedAreaPixelDeltaX, STEP_X_4X);
signedAreaPixelDeltaY = _mm_mul_ps(signedAreaPixelDeltaY, STEP_Y_4X);
}
DEBUG_SIMD_AUTO_CHOOSE_END_CYCLE_COUNT(Triangle_Preamble_SIMDStep);
}
const DqnV2 uv2SubUv1 = uv2 - uv1;
const DqnV2 uv3SubUv1 = uv3 - uv1;
DEBUG_SIMD_AUTO_CHOOSE_END_CYCLE_COUNT(Triangle_Preamble);
const u32 IS_GREATER_MASK = 0xF;
const u32 zBufferPitch = renderBuffer->width;
////////////////////////////////////////////////////////////////////////////
// Scan and Render
////////////////////////////////////////////////////////////////////////////
DEBUG_SIMD_AUTO_CHOOSE_BEGIN_CYCLE_COUNT(Triangle_Rasterise);
for (i32 bufferY = min.y; bufferY < max.y; bufferY += NUM_Y_PIXELS_TO_SIMD)
{
__m128 signedArea1 = signedAreaPixel1;
// __m128 signedArea2 = signedAreaPixel2;
for (i32 bufferX = min.x; bufferX < max.x; bufferX += NUM_X_PIXELS_TO_SIMD)
{
// Rasterise buffer(X, Y) pixel
{
__m128 checkArea = signedArea1;
__m128 isGreater = _mm_cmpge_ps(checkArea, ZERO_4X);
i32 isGreaterResult = _mm_movemask_ps(isGreater);
i32 posX = bufferX;
i32 posY = bufferY;
if ((isGreaterResult & IS_GREATER_MASK) == IS_GREATER_MASK)
{
DEBUG_SIMD_AUTO_CHOOSE_BEGIN_CYCLE_COUNT(Triangle_RasterisePixel);
__m128 barycentric = _mm_mul_ps(checkArea, invSignedAreaParallelogram_4x);
__m128 barycentricZ = _mm_mul_ps(triangleZ, barycentric);
f32 pixelZDepth = ((f32 *)&barycentricZ)[0] +
((f32 *)&barycentricZ)[1] +
((f32 *)&barycentricZ)[2];
i32 zBufferIndex = posX + (posY * zBufferPitch);
if (context.multithread)
{
bool currLockValue;
do
{
currLockValue = (bool)context.api->AtomicCompareSwap(
(u32 *)&renderBuffer->pixelLockTable[zBufferIndex], (u32) true,
(u32) false);
} while (currLockValue != false);
}
if (pixelZDepth > renderBuffer->zBuffer[zBufferIndex])
{
__m128 finalColor = simdColor;
renderBuffer->zBuffer[zBufferIndex] = pixelZDepth;
if (!ignoreLight)
{
__m128 barycentricA_4x = _mm_set_ps1(((f32 *)&barycentric)[0]);
__m128 barycentricB_4x = _mm_set_ps1(((f32 *)&barycentric)[1]);
__m128 barycentricC_4x = _mm_set_ps1(((f32 *)&barycentric)[2]);
__m128 barycentricLight1 = _mm_mul_ps(p1Light, barycentricA_4x);
__m128 barycentricLight2 = _mm_mul_ps(p2Light, barycentricB_4x);
__m128 barycentricLight3 = _mm_mul_ps(p3Light, barycentricC_4x);
__m128 light =
_mm_add_ps(barycentricLight3,
_mm_add_ps(barycentricLight1, barycentricLight2));
finalColor = _mm_mul_ps(finalColor, light);
((f32 *)&finalColor)[3] = preserveAlpha;
}
if (texture)
{
__m128 texSampledColor = SIMDSampleTextureForTriangle(
texture, uv1, uv2SubUv1, uv3SubUv1, barycentric);
finalColor = _mm_mul_ps(texSampledColor, finalColor);
}
SIMDSetPixel(context, posX, posY, finalColor, ColorSpace_Linear);
}
renderBuffer->pixelLockTable[zBufferIndex] = false;
DEBUG_SIMD_AUTO_CHOOSE_END_CYCLE_COUNT(Triangle_RasterisePixel);
}
signedArea1 = _mm_add_ps(signedArea1, signedAreaPixelDeltaX);
}
} }
signedAreaPixel1 = _mm_add_ps(signedAreaPixel1, signedAreaPixelDeltaY); signedAreaPixel1 = _mm_add_ps(signedAreaPixel1, signedAreaPixelDeltaY);
// signedAreaPixel2 = _mm_add_ps(signedAreaPixel2, signedAreaPixelDeltaY); // signedAreaPixel2 = _mm_add_ps(signedAreaPixel2, signedAreaPixelDeltaY);
@ -1169,13 +1344,24 @@ FILE_SCOPE void SlowTriangle(DTRRenderContext context, const DqnV3 p1, const Dqn
f32 barycentricB = signedArea2 * invSignedAreaParallelogram; f32 barycentricB = signedArea2 * invSignedAreaParallelogram;
f32 barycentricC = signedArea3 * invSignedAreaParallelogram; f32 barycentricC = signedArea3 * invSignedAreaParallelogram;
f32 pixelZDepth = p1.z + (barycentricB * (p2SubP1.z)) + (barycentricC * (p3SubP1.z)); i32 zBufferIndex = bufferX + (bufferY * zBufferPitch);
f32 currZDepth = GetCurrZDepth(context, bufferX, bufferY); if (context.multithread)
if (pixelZDepth > currZDepth)
{ {
SetCurrZDepth(context, bufferX, bufferY, pixelZDepth); bool currLockValue;
DqnV4 finalColor = color; do
{
currLockValue = (bool)context.api->AtomicCompareSwap(
(u32 *)&renderBuffer->pixelLockTable[zBufferIndex], (u32) true,
(u32) false);
} while (currLockValue != false);
}
f32 pixelZDepth =
p1.z + (barycentricB * (p2SubP1.z)) + (barycentricC * (p3SubP1.z));
if (pixelZDepth > renderBuffer->zBuffer[zBufferIndex])
{
DqnV4 finalColor = color;
renderBuffer->zBuffer[zBufferIndex] = pixelZDepth;
if (!ignoreLight) if (!ignoreLight)
{ {
@ -1218,6 +1404,7 @@ FILE_SCOPE void SlowTriangle(DTRRenderContext context, const DqnV3 p1, const Dqn
SetPixel(context, bufferX, bufferY, finalColor, ColorSpace_Linear); SetPixel(context, bufferX, bufferY, finalColor, ColorSpace_Linear);
} }
renderBuffer->pixelLockTable[zBufferIndex] = false;
DEBUG_SLOW_AUTO_CHOOSE_END_CYCLE_COUNT(Triangle_RasterisePixel); DEBUG_SLOW_AUTO_CHOOSE_END_CYCLE_COUNT(Triangle_RasterisePixel);
} }
@ -1429,7 +1616,6 @@ void DTRRender_Mesh(DTRRenderContext context, PlatformJobQueue *const jobQueue,
viewPModelViewProjection = DqnMat4_Mul(viewport, modelViewProjection); viewPModelViewProjection = DqnMat4_Mul(viewport, modelViewProjection);
} }
bool RUN_MULTITHREADED = true;
for (u32 i = 0; i < mesh->numFaces; i++) for (u32 i = 0; i < mesh->numFaces; i++)
{ {
DTRMeshFace face = mesh->faces[i]; DTRMeshFace face = mesh->faces[i];
@ -1512,7 +1698,7 @@ void DTRRender_Mesh(DTRRenderContext context, PlatformJobQueue *const jobQueue,
lightingInternal.numNormals = 3; lightingInternal.numNormals = 3;
bool DEBUG_NO_TEX = false; bool DEBUG_NO_TEX = false;
if (RUN_MULTITHREADED) if (context.multithread)
{ {
RenderMeshJob *jobData = (RenderMeshJob *)DqnMemStack_Push(tempStack, sizeof(*jobData)); RenderMeshJob *jobData = (RenderMeshJob *)DqnMemStack_Push(tempStack, sizeof(*jobData));
if (jobData) if (jobData)
@ -1575,10 +1761,10 @@ void DTRRender_Mesh(DTRRenderContext context, PlatformJobQueue *const jobQueue,
} }
} }
// NOTE(doyle): Complete remaining jobs and wait until all jobs finished if (context.multithread)
// before leaving function.
if (RUN_MULTITHREADED)
{ {
// NOTE(doyle): Complete remaining jobs and wait until all jobs finished
// before leaving function.
while (api->QueueTryExecuteNextJob(jobQueue) || !api->QueueAllJobsComplete(jobQueue)) while (api->QueueTryExecuteNextJob(jobQueue) || !api->QueueAllJobsComplete(jobQueue))
; ;
} }

View File

@ -82,6 +82,8 @@ typedef struct DTRRenderContext
DqnMemStack *tempStack; DqnMemStack *tempStack;
PlatformAPI *api; PlatformAPI *api;
PlatformJobQueue *jobQueue; PlatformJobQueue *jobQueue;
bool multithread;
} DTRRenderContext; } DTRRenderContext;
// NOTE: All colors should be in the range of [0->1] where DqnV4 is a struct with 4 floats, rgba // NOTE: All colors should be in the range of [0->1] where DqnV4 is a struct with 4 floats, rgba

View File

@ -570,55 +570,25 @@ FILE_SCOPE void Win32ProcessMessages(HWND window, PlatformInput *input)
} }
} }
// Return the index of the last slash
i32 Win32GetModuleDirectory(char *const buf, const u32 bufLen)
{
if (!buf || bufLen == 0) return 0;
u32 copiedLen = GetModuleFileName(NULL, buf, bufLen);
if (copiedLen == bufLen)
{
DQN_WIN32_ERROR_BOX(
"GetModuleFileName() buffer maxed: Len of copied text is len "
"of supplied buffer.",
NULL);
DQN_ASSERT(DQN_INVALID_CODE_PATH);
}
// NOTE: Should always work if GetModuleFileName works and we're running an
// executable.
i32 lastSlashIndex = 0;
for (i32 i = copiedLen; i > 0; i--)
{
if (buf[i] == '\\')
{
lastSlashIndex = i;
break;
}
}
return lastSlashIndex;
}
int WINAPI wWinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPWSTR lpCmdLine, int nShowCmd) int WINAPI wWinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPWSTR lpCmdLine, int nShowCmd)
{ {
//////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////
// Initialise Win32 Window // Initialise Win32 Window
//////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////
WNDCLASSEXW wc = WNDCLASSEXW wc = {
{ sizeof(WNDCLASSEX),
sizeof(WNDCLASSEX), CS_HREDRAW | CS_VREDRAW | CS_OWNDC,
CS_HREDRAW | CS_VREDRAW | CS_OWNDC, Win32MainProcCallback,
Win32MainProcCallback, 0, // int cbClsExtra
0, // int cbClsExtra 0, // int cbWndExtra
0, // int cbWndExtra hInstance,
hInstance, LoadIcon(NULL, IDI_APPLICATION),
LoadIcon(NULL, IDI_APPLICATION), LoadCursor(NULL, IDC_ARROW),
LoadCursor(NULL, IDC_ARROW), GetSysColorBrush(COLOR_3DFACE),
GetSysColorBrush(COLOR_3DFACE), L"", // LPCTSTR lpszMenuName
L"", // LPCTSTR lpszMenuName L"DRendererClass",
L"DRendererClass", NULL, // HICON hIconSm
NULL, // HICON hIconSm
}; };
if (!RegisterClassExW(&wc)) if (!RegisterClassExW(&wc))
@ -665,7 +635,7 @@ int WINAPI wWinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPWSTR lpCmdLi
globalRenderBitmap.width = header.biWidth; globalRenderBitmap.width = header.biWidth;
globalRenderBitmap.height = header.biHeight; globalRenderBitmap.height = header.biHeight;
globalRenderBitmap.bytesPerPixel = header.biBitCount / 8; globalRenderBitmap.bytesPerPixel = header.biBitCount / 8;
DQN_ASSERT(globalRenderBitmap.bytesPerPixel >= 1); if (!DQN_ASSERT(globalRenderBitmap.bytesPerPixel >= 1)) return -1;
HDC deviceContext = GetDC(mainWindow); HDC deviceContext = GetDC(mainWindow);
globalRenderBitmap.handle = CreateDIBSection( globalRenderBitmap.handle = CreateDIBSection(
@ -688,17 +658,22 @@ int WINAPI wWinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPWSTR lpCmdLi
char dllTmpPath[MAX_PATH] = {}; char dllTmpPath[MAX_PATH] = {};
{ {
char exeDir[MAX_PATH] = {}; char exeDir[MAX_PATH] = {};
i32 lastSlashIndex = i32 lastSlashIndex = DqnWin32_GetEXEDirectory(exeDir, DQN_ARRAY_COUNT(exeDir));
Win32GetModuleDirectory(exeDir, DQN_ARRAY_COUNT(exeDir)); if (lastSlashIndex != -1)
DQN_ASSERT(lastSlashIndex + 1 < DQN_ARRAY_COUNT(exeDir)); {
DQN_ASSERT(lastSlashIndex + 1 < DQN_ARRAY_COUNT(exeDir));
exeDir[lastSlashIndex + 1] = 0; exeDir[lastSlashIndex + 1] = 0;
u32 numCopied = Dqn_sprintf(dllPath, "%s%s", exeDir, DLL_NAME); u32 numCopied = Dqn_sprintf(dllPath, "%s%s", exeDir, DLL_NAME);
DQN_ASSERT(numCopied < DQN_ARRAY_COUNT(dllPath)); DQN_ASSERT(numCopied < DQN_ARRAY_COUNT(dllPath));
numCopied = numCopied = Dqn_sprintf(dllTmpPath, "%s%s", exeDir, DLL_TMP_NAME);
Dqn_sprintf(dllTmpPath, "%s%s", exeDir, DLL_TMP_NAME); DQN_ASSERT(numCopied < DQN_ARRAY_COUNT(dllTmpPath));
DQN_ASSERT(numCopied < DQN_ARRAY_COUNT(dllTmpPath)); }
else
{
DQN_ASSERT(DQN_INVALID_CODE_PATH);
}
} }
//////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////
@ -748,62 +723,8 @@ int WINAPI wWinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPWSTR lpCmdLi
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// Query CPU Cores // Query CPU Cores
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
i32 numCores = 0; i32 numCores, numThreadsPerCore;
i32 numLogicalCores = 0; DqnWin32_GetNumThreadsAndCores(&numCores, &numThreadsPerCore);
SYSTEM_INFO systemInfo;
GetNativeSystemInfo(&systemInfo);
DqnWin32_OutputDebugString("Number of Logical Processors: %d\n",
systemInfo.dwNumberOfProcessors);
numLogicalCores = systemInfo.dwNumberOfProcessors;
DWORD logicalProcInfoRequiredSize = 0;
u8 insufficientBuffer = {};
GetLogicalProcessorInformationEx(
RelationProcessorCore, (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)insufficientBuffer,
&logicalProcInfoRequiredSize);
u8 *rawProcInfoArray =
(u8 *)DqnMemStack_Push(&globalPlatformMemory.tempStack, logicalProcInfoRequiredSize);
if (rawProcInfoArray)
{
if (GetLogicalProcessorInformationEx(
RelationProcessorCore,
(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)rawProcInfoArray,
&logicalProcInfoRequiredSize))
{
SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *logicalProcInfo =
(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)rawProcInfoArray;
DWORD bytesRead = 0;
do
{
// NOTE: High efficiency value has greater performance and less efficiency.
PROCESSOR_RELATIONSHIP *procInfo = &logicalProcInfo->Processor;
u32 efficiency = procInfo->EfficiencyClass;
DqnWin32_OutputDebugString("Core %d: Efficiency: %d\n", numCores++, efficiency);
DQN_ASSERT(logicalProcInfo->Relationship == RelationProcessorCore);
DQN_ASSERT(procInfo->GroupCount == 1);
bytesRead += logicalProcInfo->Size;
logicalProcInfo =
(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((u8 *)logicalProcInfo +
logicalProcInfo->Size);
} while (bytesRead < logicalProcInfoRequiredSize);
}
else
{
DqnWin32_DisplayLastError("GetLogicalProcessorInformationEx() failed");
}
}
else
{
DQN_WIN32_ERROR_BOX("DqnMemStack_Push() failed", NULL);
}
DqnMemStackTempRegion_End(memRegion);
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// Threading // Threading
@ -815,8 +736,7 @@ int WINAPI wWinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPWSTR lpCmdLi
DQN_ASSERT(((size_t)&jobQueue.jobToExecuteIndex) % 4 == 0); DQN_ASSERT(((size_t)&jobQueue.jobToExecuteIndex) % 4 == 0);
// NOTE: (numCores - 1), 1 core is already exclusively for main thread // NOTE: (numCores - 1), 1 core is already exclusively for main thread
i32 availableThreads = (numCores - 1) * numLogicalCores; i32 availableThreads = (numCores - 1) * numThreadsPerCore;
// TODO(doyle): Logic for single core/thread processors // TODO(doyle): Logic for single core/thread processors
DQN_ASSERT(availableThreads > 0); DQN_ASSERT(availableThreads > 0);

View File

@ -78,6 +78,7 @@ set TimeStamp=%date:~10,4%%date:~7,2%%date:~4,2%_%CleanTime:~0,2%%CleanTime:~3,2
del *.pdb >NUL 2>NUL del *.pdb >NUL 2>NUL
cl %CompileFlags% %Win32Flags% ..\src\Win32DTRenderer.cpp /link %LinkLibraries% %LinkFlags% cl %CompileFlags% %Win32Flags% ..\src\Win32DTRenderer.cpp /link %LinkLibraries% %LinkFlags%
REM cl /P ..\src\Win32DTRenderer.cpp
REM cl %CompileFlags% %DLLFlags% ..\src\UnityBuild\UnityBuild.cpp /LD /link ..\src\external\easy\easy_profiler.lib /PDB:%ProjectName%_%TimeStamp%.pdb /export:DTR_Update %LinkFlags% REM cl %CompileFlags% %DLLFlags% ..\src\UnityBuild\UnityBuild.cpp /LD /link ..\src\external\easy\easy_profiler.lib /PDB:%ProjectName%_%TimeStamp%.pdb /export:DTR_Update %LinkFlags%
cl %CompileFlags% %DLLFlags% ..\src\UnityBuild\UnityBuild.cpp /LD /link /PDB:%ProjectName%_%TimeStamp%.pdb /export:DTR_Update %LinkFlags% cl %CompileFlags% %DLLFlags% ..\src\UnityBuild\UnityBuild.cpp /LD /link /PDB:%ProjectName%_%TimeStamp%.pdb /export:DTR_Update %LinkFlags%

161
src/dqn.h
View File

@ -52,7 +52,6 @@ typedef float f32;
#define DQN_INVALID_CODE_PATH 0 #define DQN_INVALID_CODE_PATH 0
#define DQN_ARRAY_COUNT(array) (sizeof(array) / sizeof(array[0])) #define DQN_ARRAY_COUNT(array) (sizeof(array) / sizeof(array[0]))
#define DQN_ASSERT(expr) if (!(expr)) { (*((i32 *)0)) = 0; }
#define DQN_PI 3.14159265359f #define DQN_PI 3.14159265359f
#define DQN_SQUARED(x) ((x) * (x)) #define DQN_SQUARED(x) ((x) * (x))
@ -63,6 +62,18 @@ typedef float f32;
#define DQN_MAX(a, b) ((a) < (b) ? (b) : (a)) #define DQN_MAX(a, b) ((a) < (b) ? (b) : (a))
#define DQN_MIN(a, b) ((a) < (b) ? (a) : (b)) #define DQN_MIN(a, b) ((a) < (b) ? (a) : (b))
#define DQN_SWAP(type, a, b) do { type tmp = a; a = b; b = tmp; } while(0) #define DQN_SWAP(type, a, b) do { type tmp = a; a = b; b = tmp; } while(0)
////////////////////////////////////////////////////////////////////////////////
// Dqn Error
////////////////////////////////////////////////////////////////////////////////
#define DQN_ASSERT_HARD(expr) if (!(expr)) { *((int *)0) = 0; }
#define DQN_ASSERT(expr) DqnAssertInternal(expr, __FILE__, __LINE__, #expr, NULL)
#define DQN_ASSERT_MSG(expr, msg) DqnAssertInternal(expr, __FILE__, __LINE__, #expr, msg)
DQN_FILE_SCOPE bool DqnAssertInternal(const bool result, const char *const file, const i32 lineNum,
const char *const expr, const char *const msg);
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// DqnMem - Memory // DqnMem - Memory
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
@ -445,6 +456,29 @@ bool DqnArray_RemoveStable(DqnArray<T> *array, u64 index)
} }
#endif // DQN_CPP_MODE #endif // DQN_CPP_MODE
////////////////////////////////////////////////////////////////////////////////
// DqnJobQueue - Multithreaded Job Queue
////////////////////////////////////////////////////////////////////////////////
typedef void DqnJob_Callback(struct DqnJobQueue *const queue, void *const userData);
typedef struct DqnJob
{
DqnJob_Callback *callback;
void *userData;
} DqnJob;
typedef struct DqnJobQueue
{
DqnJob *volatile jobList;
u32 size;
// NOTE: Modified by main+worker threads
u32 volatile jobToExecuteIndex;
void *win32Semaphore;
u32 volatile numJobsToComplete;
// NOTE: Modified by main thread ONLY
u32 volatile jobInsertIndex;
} DqnJobQueue;
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Math // Math
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
@ -817,14 +851,20 @@ DQN_FILE_SCOPE i32 DqnRnd_PCGRange(DqnRandPCGState *pcg, i32 min, i32 max);
DQN_FILE_SCOPE bool DqnWin32_UTF8ToWChar (const char *const in, wchar_t *const out, const i32 outLen); DQN_FILE_SCOPE bool DqnWin32_UTF8ToWChar (const char *const in, wchar_t *const out, const i32 outLen);
DQN_FILE_SCOPE bool DqnWin32_WCharToUTF8 (const wchar_t *const in, char *const out, const i32 outLen); DQN_FILE_SCOPE bool DqnWin32_WCharToUTF8 (const wchar_t *const in, char *const out, const i32 outLen);
DQN_FILE_SCOPE void DqnWin32_GetClientDim (const HWND window, LONG *width, LONG *height); DQN_FILE_SCOPE void DqnWin32_GetClientDim (const HWND window, LONG *width, LONG *height);
DQN_FILE_SCOPE void DqnWin32_GetRectDim (RECT rect, LONG *width, LONG *height); DQN_FILE_SCOPE void DqnWin32_GetRectDim (RECT rect, LONG *width, LONG *height);
DQN_FILE_SCOPE void DqnWin32_DisplayLastError(const char *const errorPrefix); DQN_FILE_SCOPE void DqnWin32_DisplayLastError (const char *const errorPrefix);
DQN_FILE_SCOPE void DqnWin32_DisplayErrorCode(const DWORD error, const char *const errorPrefix); DQN_FILE_SCOPE void DqnWin32_DisplayErrorCode (const DWORD error, const char *const errorPrefix);
DQN_FILE_SCOPE void DqnWin32_OutputDebugString(const char *const formatStr, ...); DQN_FILE_SCOPE void DqnWin32_OutputDebugString(const char *const formatStr, ...);
#endif /* DQN_WIN32_IMPLEMENTATION */ // buf: Filled with the path to the executable file.
// Returns the offset to the last backslash, -1 if bufLen was not large enough or buf is null.
DQN_FILE_SCOPE i32 DqnWin32_GetEXEDirectory(char *const buf, const u32 bufLen);
// numCores: numThreadsPerCore: Can be NULL, the function will just skip it.
// Uses calloc and free for querying numCores.
DQN_FILE_SCOPE void DqnWin32_GetNumThreadsAndCores(i32 *const numCores, i32 *const numThreadsPerCore);
#endif /* DQN_WIN32_IMPLEMENTATION */
#ifndef DQN_INI_H #ifndef DQN_INI_H
#define DQN_INI_H #define DQN_INI_H
@ -1360,6 +1400,34 @@ STBSP__PUBLICDEF void STB_SPRINTF_DECORATE(set_separators)(char comma, char peri
// NOTE: DQN_INI_IMPLEMENTATION modified to be included when DQN_IMPLEMENTATION defined // NOTE: DQN_INI_IMPLEMENTATION modified to be included when DQN_IMPLEMENTATION defined
// #define DQN_INI_IMPLEMENTATION // #define DQN_INI_IMPLEMENTATION
#define DQN_INI_STRLEN(s) Dqn_strlen(s) #define DQN_INI_STRLEN(s) Dqn_strlen(s)
////////////////////////////////////////////////////////////////////////////////
// Dqn Error
////////////////////////////////////////////////////////////////////////////////
#if (defined(_WIN32) || defined(_WIN64)) && defined(DQN_WIN32_IMPLEMENTATION)
#else
#include <stdio.h>
#endif
DQN_FILE_SCOPE bool DqnAssertInternal(const bool result, const char *const file, const i32 lineNum,
const char *const expr, const char *const msg)
{
if (!result)
{
const char *const formatStrNoMsg = "DqnAssert() failed: %s|%d| (%s)\n";
const char *const formatStrWithMsg = "DqnAssert() failed: %s|%d| (%s): %s\n";
const char *const formatStr = (msg) ? formatStrWithMsg : formatStrNoMsg;
#if (defined(_WIN32) || defined(_WIN64)) && defined(DQN_WIN32_IMPLEMENTATION)
DqnWin32_OutputDebugString(formatStr, file, lineNum, expr, msg);
#else
printf(formatStr, file, lineNum, expr, msg);
#endif
(*((i32 *)0)) = 0;
}
return result;
}
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// DqnMemory - Default Memory Routines // DqnMemory - Default Memory Routines
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
@ -3328,12 +3396,91 @@ DQN_FILE_SCOPE void DqnWin32_OutputDebugString(const char *const formatStr, ...)
va_start(argList, formatStr); va_start(argList, formatStr);
{ {
i32 numCopied = Dqn_vsprintf(str, formatStr, argList); i32 numCopied = Dqn_vsprintf(str, formatStr, argList);
DQN_ASSERT(numCopied < DQN_ARRAY_COUNT(str)); DQN_ASSERT_HARD(numCopied < DQN_ARRAY_COUNT(str));
} }
va_end(argList); va_end(argList);
OutputDebugString(str); OutputDebugString(str);
} }
DQN_FILE_SCOPE i32 DqnWin32_GetEXEDirectory(char *const buf, const u32 bufLen)
{
if (!buf || bufLen == 0) return 0;
u32 copiedLen = GetModuleFileName(NULL, buf, bufLen);
if (copiedLen == bufLen) return -1;
// NOTE: Should always work if GetModuleFileName works and we're running an
// executable.
i32 lastSlashIndex = 0;
for (i32 i = copiedLen; i > 0; i--)
{
if (buf[i] == '\\')
{
lastSlashIndex = i;
break;
}
}
return lastSlashIndex;
}
DQN_FILE_SCOPE void DqnWin32_GetNumThreadsAndCores(i32 *const numCores, i32 *const numThreadsPerCore)
{
if (numThreadsPerCore)
{
SYSTEM_INFO systemInfo;
GetNativeSystemInfo(&systemInfo);
*numThreadsPerCore = systemInfo.dwNumberOfProcessors;
}
if (numCores)
{
*numCores = 0;
DWORD requiredSize = 0;
u8 insufficientBuffer = {0};
GetLogicalProcessorInformationEx(
RelationProcessorCore, (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)insufficientBuffer,
&requiredSize);
u8 *rawProcInfoArray = (u8 *)DqnMem_Calloc(requiredSize);
if (!rawProcInfoArray)
{
DQN_WIN32_ERROR_BOX("DqnMem_Calloc() failed", NULL);
DQN_ASSERT(DQN_INVALID_CODE_PATH);
return;
}
if (GetLogicalProcessorInformationEx(
RelationProcessorCore, (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)rawProcInfoArray,
&requiredSize))
{
SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *logicalProcInfo =
(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)rawProcInfoArray;
DWORD bytesRead = 0;
do
{
// NOTE: High efficiency value has greater performance and less efficiency.
PROCESSOR_RELATIONSHIP *procInfo = &logicalProcInfo->Processor;
u32 efficiency = procInfo->EfficiencyClass;
(*numCores)++;
DQN_ASSERT(logicalProcInfo->Relationship == RelationProcessorCore);
DQN_ASSERT(procInfo->GroupCount == 1);
bytesRead += logicalProcInfo->Size;
logicalProcInfo =
(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((u8 *)logicalProcInfo +
logicalProcInfo->Size);
} while (bytesRead < requiredSize);
}
else
{
DqnWin32_DisplayLastError("GetLogicalProcessorInformationEx() failed");
}
DqnMem_Free(rawProcInfoArray);
}
}
#endif // DQN_WIN32_PLATFROM #endif // DQN_WIN32_PLATFROM
FILE_SCOPE bool DqnFile_OpenInternal(const wchar_t *const path, FILE_SCOPE bool DqnFile_OpenInternal(const wchar_t *const path,