From a03de5de808f90991679d807e1c62ce61b1bc727 Mon Sep 17 00:00:00 2001 From: Doyle Thai Date: Wed, 24 May 2017 00:04:18 +1000 Subject: [PATCH] Start zbuffering pass of pixels for triangles --- src/DTRenderer.cpp | 17 ++++++++----- src/DTRendererDebug.cpp | 33 ++++++++++++++++++++++-- src/DTRendererDebug.h | 44 ++++++++++++++++++++------------ src/DTRendererPlatform.h | 2 ++ src/DTRendererRender.cpp | 55 +++++++++++++++++++++++++++++++--------- src/Win32DTRenderer.cpp | 1 + src/dqn.h | 3 +++ 7 files changed, 118 insertions(+), 37 deletions(-) diff --git a/src/DTRenderer.cpp b/src/DTRenderer.cpp index 9e6d670..fbc7c26 100644 --- a/src/DTRenderer.cpp +++ b/src/DTRenderer.cpp @@ -1452,15 +1452,15 @@ extern "C" void DTR_Update(PlatformRenderBuffer *const platformRenderBuffer, DTRState *state = (DTRState *)memory->context; if (input->executableReloaded) { - DTR_DEBUG_PROFILE_END(); - DTR_DEBUG_PROFILE_START(); + DTR_DEBUG_EP_PROFILE_END(); + DTR_DEBUG_EP_PROFILE_START(); } - DTR_DEBUG_TIMED_FUNCTION(); + DTR_DEBUG_EP_TIMED_FUNCTION(); if (!memory->isInit) { TestStrToF32Converter(); - DTR_DEBUG_TIMED_BLOCK("DTR_Update Memory Initialisation"); + DTR_DEBUG_EP_TIMED_BLOCK("DTR_Update Memory Initialisation"); // NOTE(doyle): Do premultiply ourselves stbi_set_unpremultiply_on_load(true); stbi_set_flip_vertically_on_load(true); @@ -1492,9 +1492,12 @@ extern "C" void DTR_Update(PlatformRenderBuffer *const platformRenderBuffer, renderBuffer.height = platformRenderBuffer->height; renderBuffer.bytesPerPixel = platformRenderBuffer->bytesPerPixel; renderBuffer.memory = (u8 *)platformRenderBuffer->memory; - renderBuffer.zBuffer = (f32 *)DqnMemStack_Push( - &memory->transMemStack, - platformRenderBuffer->width * platformRenderBuffer->height * sizeof(*renderBuffer.zBuffer)); + + u32 zBufferSize = platformRenderBuffer->width * platformRenderBuffer->height; + renderBuffer.zBuffer = (f32 *)DqnMemStack_Push(&memory->transMemStack, + zBufferSize * sizeof(*renderBuffer.zBuffer)); + + for (u32 i = 0; i < zBufferSize; i++) renderBuffer.zBuffer[i] = DQN_F32_MIN; //////////////////////////////////////////////////////////////////////////// // Update and Render diff --git a/src/DTRendererDebug.cpp b/src/DTRendererDebug.cpp index edca02e..e869e6d 100644 --- a/src/DTRendererDebug.cpp +++ b/src/DTRendererDebug.cpp @@ -28,8 +28,29 @@ void DTRDebug_PushText(const char *const formatStr, ...) } } -FILE_SCOPE void PushMemStackText(const char *const name, - const DqnMemStack *const stack) +void inline DTRDebug_BeginCycleCount(enum DTRDebugCycleCount tag) +{ + if (DTR_DEBUG_PROFILING) + { + if (globalDebug.input && globalDebug.input->canUseRdtsc) + { + globalDebug.cycleCount[tag] = __rdtsc(); + } + } +} + +void inline DTRDebug_EndCycleCount(enum DTRDebugCycleCount tag) +{ + if (DTR_DEBUG_PROFILING) + { + if (globalDebug.input && globalDebug.input->canUseRdtsc) + { + globalDebug.cycleCount[tag] = __rdtsc() - globalDebug.cycleCount[tag]; + } + } +} + +FILE_SCOPE void PushMemStackText(const char *const name, const DqnMemStack *const stack) { if (DTR_DEBUG) { @@ -72,6 +93,7 @@ void DTRDebug_Update(DTRState *const state, DTRDebug *const debug = &globalDebug; debug->renderBuffer = renderBuffer; + debug->input = input; debug->font = &state->font; debug->displayColor = DqnV4_4f(1, 1, 1, 1); if (debug->font->bitmap && debug->renderBuffer) @@ -88,6 +110,12 @@ void DTRDebug_Update(DTRState *const state, DTRDebug_PushText("TrianglesRendered: %'lld", debug->counter[DTRDebugCounter_RenderTriangle]); DTRDebug_PushText(""); + for (i32 i = 0; i < DQN_ARRAY_COUNT(debug->cycleCount); i++) + { + DTRDebug_PushText("%d: %'lld cycles", i, debug->cycleCount[i]); + } + DTRDebug_PushText(""); + // memory { PushMemStackText("PermBuffer", &memory->permMemStack); @@ -95,6 +123,7 @@ void DTRDebug_Update(DTRState *const state, } DTRDebug_PushText("SSE2Support: %s", (input->canUseSSE2) ? "true" : "false"); + DTRDebug_PushText("SSE2Support: %s", (input->canUseRdtsc) ? "true" : "false"); debug->displayP = DqnV2_2i(0, debug->renderBuffer->height + globalDebug.displayYOffset); diff --git a/src/DTRendererDebug.h b/src/DTRendererDebug.h index 1a446f6..daf5474 100644 --- a/src/DTRendererDebug.h +++ b/src/DTRendererDebug.h @@ -4,30 +4,31 @@ #include "dqn.h" #define DTR_DEBUG 1 #if DTR_DEBUG - #define DTR_DEBUG_RENDER 0 + #define DTR_DEBUG_RENDER 0 - #define DTR_DEBUG_PROFILING 0 - #if DTR_DEBUG_PROFILING + #define DTR_DEBUG_PROFILING_EASY_PROFILER 0 + #if DTR_DEBUG_PROFILING_EASY_PROFILER #define BUILD_WITH_EASY_PROFILER 1 #include "external/easy/profiler.h" - #define DTR_DEBUG_PROFILE_START() profiler::startListen() - #define DTR_DEBUG_PROFILE_END() profiler::stopListen() + #define DTR_DEBUG_EP_PROFILE_START() profiler::startListen() + #define DTR_DEBUG_EP_PROFILE_END() profiler::stopListen() - #define DTR_DEBUG_TIMED_BLOCK(name) EASY_BLOCK(name) - #define DTR_DEBUG_TIMED_NONSCOPED_BLOCK(name) EASY_NONSCOPED_BLOCK(name) - #define DTR_DEBUG_TIMED_END_BLOCK() EASY_END_BLOCK() - #define DTR_DEBUG_TIMED_FUNCTION() EASY_FUNCTION() + #define DTR_DEBUG_EP_TIMED_BLOCK(name) EASY_BLOCK(name) + #define DTR_DEBUG_EP_TIMED_NONSCOPED_BLOCK(name) EASY_NONSCOPED_BLOCK(name) + #define DTR_DEBUG_EP_TIMED_END_BLOCK() EASY_END_BLOCK() + #define DTR_DEBUG_EP_TIMED_FUNCTION() EASY_FUNCTION() #else - #define DTR_DEBUG_PROFILE_START() - #define DTR_DEBUG_PROFILE_END() + #define DTR_DEBUG_EP_PROFILE_START() + #define DTR_DEBUG_EP_PROFILE_END() - #define DTR_DEBUG_TIMED_BLOCK(name) - #define DTR_DEBUG_TIMED_NONSCOPED_BLOCK(name) - #define DTR_DEBUG_TIMED_END_BLOCK() - #define DTR_DEBUG_TIMED_FUNCTION() + #define DTR_DEBUG_EP_TIMED_BLOCK(name) + #define DTR_DEBUG_EP_TIMED_NONSCOPED_BLOCK(name) + #define DTR_DEBUG_EP_TIMED_END_BLOCK() + #define DTR_DEBUG_EP_TIMED_FUNCTION() #endif + #define DTR_DEBUG_PROFILING 1 #endif typedef struct DTRRenderBuffer DTRRenderBuffer; @@ -43,16 +44,24 @@ enum DTRDebugCounter DTRDebugCounter_Count, }; +enum DTRDebugCycleCount +{ + DTRDebugCycleCount_RenderTriangle_Rasterise, + DTRDebugCycleCount_Count, +}; + typedef struct DTRDebug { DTRFont *font; DTRRenderBuffer *renderBuffer; + PlatformInput *input; DqnV4 displayColor; DqnV2 displayP; i32 displayYOffset; - u64 counter[DTRDebugCounter_Count]; + u64 cycleCount[DTRDebugCycleCount_Count]; + u64 counter [DTRDebugCounter_Count]; u64 totalSetPixels; } DTRDebug; @@ -63,6 +72,9 @@ void DTRDebug_Update(DTRState *const state, DTRRenderBuffer *const renderBuffer, PlatformInput *const input, PlatformMemory *const memory); +void inline DTRDebug_BeginCycleCount(enum DTRDebugCycleCount tag); +void inline DTRDebug_EndCycleCount (enum DTRDebugCycleCount tag); + void inline DTRDebug_CounterIncrement(enum DTRDebugCounter tag) { if (DTR_DEBUG) diff --git a/src/DTRendererPlatform.h b/src/DTRendererPlatform.h index c676ccb..65ff57b 100644 --- a/src/DTRendererPlatform.h +++ b/src/DTRendererPlatform.h @@ -2,6 +2,7 @@ #define DRENDERER_PLATFORM_H #include "dqn.h" +#include enum PlatformFilePermissionFlag { @@ -73,6 +74,7 @@ typedef struct PlatformInput f64 timeNowInS; bool executableReloaded; bool canUseSSE2; + bool canUseRdtsc; PlatformAPI api; union { diff --git a/src/DTRendererRender.cpp b/src/DTRendererRender.cpp index a5b7169..04b2698 100644 --- a/src/DTRendererRender.cpp +++ b/src/DTRendererRender.cpp @@ -105,7 +105,7 @@ FILE_SCOPE inline void SetPixel(DTRRenderBuffer *const renderBuffer, const i32 x if (!renderBuffer) return; if (x < 0 || x > (renderBuffer->width - 1)) return; if (y < 0 || y > (renderBuffer->height - 1)) return; - DTR_DEBUG_TIMED_FUNCTION(); + DTR_DEBUG_EP_TIMED_FUNCTION(); u32 *const bitmapPtr = (u32 *)renderBuffer->memory; const u32 pitchInU32 = (renderBuffer->width * renderBuffer->bytesPerPixel) / 4; @@ -175,7 +175,7 @@ void DTRRender_Text(DTRRenderBuffer *const renderBuffer, { if (!text) return; if (!font.bitmap || !font.atlas || !renderBuffer) return; - DTR_DEBUG_TIMED_FUNCTION(); + DTR_DEBUG_EP_TIMED_FUNCTION(); if (len == -1) len = Dqn_strlen(text); @@ -253,7 +253,7 @@ FILE_SCOPE void TransformPoints(const DqnV2 origin, DqnV2 *const pList, const f32 rotation) { if (!pList || numP == 0) return; - DTR_DEBUG_TIMED_FUNCTION(); + DTR_DEBUG_EP_TIMED_FUNCTION(); DqnV2 xAxis = (DqnV2_2f(cosf(rotation), sinf(rotation))); DqnV2 yAxis = DqnV2_2f(-xAxis.y, xAxis.x); @@ -271,7 +271,7 @@ void DTRRender_Line(DTRRenderBuffer *const renderBuffer, DqnV2i a, DqnV2i b, DqnV4 color) { if (!renderBuffer) return; - DTR_DEBUG_TIMED_FUNCTION(); + DTR_DEBUG_EP_TIMED_FUNCTION(); color = DTRRender_SRGB1ToLinearSpaceV4(color); color = PreMultiplyAlpha1(color); @@ -390,7 +390,7 @@ FILE_SCOPE DqnRect GetBoundingBox(const DqnV2 *const pList, const i32 numP) void DTRRender_Rectangle(DTRRenderBuffer *const renderBuffer, DqnV2 min, DqnV2 max, DqnV4 color, const DTRRenderTransform transform) { - DTR_DEBUG_TIMED_FUNCTION(); + DTR_DEBUG_EP_TIMED_FUNCTION(); //////////////////////////////////////////////////////////////////////////// // Transform vertexes //////////////////////////////////////////////////////////////////////////// @@ -487,7 +487,7 @@ void DTRRender_Rectangle(DTRRenderBuffer *const renderBuffer, DqnV2 min, DqnV2 m void DTRRender_Triangle(DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2, DqnV3 p3, DqnV4 color, const DTRRenderTransform transform) { - DTR_DEBUG_TIMED_FUNCTION(); + DTR_DEBUG_EP_TIMED_FUNCTION(); //////////////////////////////////////////////////////////////////////////// // Transform vertexes @@ -528,6 +528,10 @@ void DTRRender_Triangle(DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2, determine whether a point lies on the line, or is to the left or right of a the line. + We can do this using the PerpDotProduct conceptually known as the cross + product in 2D. This can be expressed using the determinant and is the + method we are using. + First forming a 3x3 matrix of our terms with a, b being from the triangle and test point c, we can derive a 2x2 matrix by subtracting the 1st column from the 2nd and 1st column from the third. @@ -646,15 +650,18 @@ void DTRRender_Triangle(DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2, f32 signedArea3DeltaX = c.y - a.y; f32 signedArea3DeltaY = a.x - c.x; - f32 invSignedAreaParallelogram = 1 / ((b.x - a.x) * (c.y - a.y) - (b.y - a.y) * (c.x - a.x)); + f32 signedAreaParallelogram = ((b.x - a.x) * (c.y - a.y) - (b.y - a.y) * (c.x - a.x)); + if (signedAreaParallelogram == 0) return; + f32 invSignedAreaParallelogram = 1 / signedAreaParallelogram; + DTRDebug_BeginCycleCount(DTRDebugCycleCount_RenderTriangle_Rasterise); //////////////////////////////////////////////////////////////////////////// // Scan and Render //////////////////////////////////////////////////////////////////////////// - color.rgb *= 0.1f; + const u32 zBufferPitch = renderBuffer->width; + const f32 BARYCENTRIC_EPSILON = 0.1f; for (i32 bufferY = min.y; bufferY < max.y; bufferY++) { - f32 signedArea1Row = signedArea1; f32 signedArea2Row = signedArea2; f32 signedArea3Row = signedArea3; @@ -663,7 +670,30 @@ void DTRRender_Triangle(DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2, { if (signedArea1Row >= 0 && signedArea2Row >= 0 && signedArea3Row >= 0) { +#if 1 + f32 barycentricA = signedArea2Row * invSignedAreaParallelogram; + f32 barycentricB = signedArea3Row * invSignedAreaParallelogram; + f32 barycentricC = signedArea1Row * invSignedAreaParallelogram; + + if (DTR_DEBUG) + { + f32 barycentricSum = barycentricA + barycentricB + barycentricC; + DQN_ASSERT((1.0f - barycentricSum) < BARYCENTRIC_EPSILON); + } + + f32 pixelZValue = + (a.z * barycentricA) + (b.z * barycentricB) + (c.z * barycentricC); + + i32 zBufferIndex = bufferX + (bufferY * zBufferPitch); + f32 currZValue = renderBuffer->zBuffer[zBufferIndex]; + if (pixelZValue > currZValue) + { + renderBuffer->zBuffer[zBufferIndex] = pixelZValue; + SetPixel(renderBuffer, bufferX, bufferY, color, ColorSpace_Linear); + } +#else SetPixel(renderBuffer, bufferX, bufferY, color, ColorSpace_Linear); +#endif } signedArea1Row += signedArea1DeltaX; @@ -675,6 +705,7 @@ void DTRRender_Triangle(DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2, signedArea2 += signedArea2DeltaY; signedArea3 += signedArea3DeltaY; } + DTRDebug_EndCycleCount(DTRDebugCycleCount_RenderTriangle_Rasterise); //////////////////////////////////////////////////////////////////////////// // Debug @@ -718,7 +749,7 @@ void DTRRender_Bitmap(DTRRenderBuffer *const renderBuffer, const DTRRenderTransform transform, DqnV4 color) { if (!bitmap || !bitmap->memory || !renderBuffer) return; - DTR_DEBUG_TIMED_FUNCTION(); + DTR_DEBUG_EP_TIMED_FUNCTION(); //////////////////////////////////////////////////////////////////////////// // Transform vertexes @@ -785,7 +816,7 @@ void DTRRender_Bitmap(DTRRenderBuffer *const renderBuffer, if (bufXYIsInside) { - DTR_DEBUG_TIMED_BLOCK("DTRRender_Bitmap TexelCalculation"); + DTR_DEBUG_EP_TIMED_BLOCK("DTRRender_Bitmap TexelCalculation"); DqnV2 bufPRelToBasis = DqnV2_2i(bufferX, bufferY) - rectBasis; f32 u = DqnV2_Dot(bufPRelToBasis, xAxisRelToBasis) * invXAxisLenSq; @@ -816,7 +847,7 @@ void DTRRender_Bitmap(DTRRenderBuffer *const renderBuffer, i32 texel4Y = DQN_MIN((texelY + 1), bitmap->dim.h - 1); { - DTR_DEBUG_TIMED_BLOCK("DTRRender_Bitmap TexelBilinearInterpolation"); + DTR_DEBUG_EP_TIMED_BLOCK("DTRRender_Bitmap TexelBilinearInterpolation"); u32 texel1 = *(u32 *)(bitmapPtr + ((texel1X * bitmap->bytesPerPixel) + (texel1Y * pitch))); u32 texel2 = *(u32 *)(bitmapPtr + ((texel2X * bitmap->bytesPerPixel) + (texel2Y * pitch))); u32 texel3 = *(u32 *)(bitmapPtr + ((texel3X * bitmap->bytesPerPixel) + (texel3Y * pitch))); diff --git a/src/Win32DTRenderer.cpp b/src/Win32DTRenderer.cpp index 985df9f..d332088 100644 --- a/src/Win32DTRenderer.cpp +++ b/src/Win32DTRenderer.cpp @@ -513,6 +513,7 @@ int WINAPI wWinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, PlatformInput platformInput = {}; platformInput.canUseSSE2 = IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE); + platformInput.canUseRdtsc = IsProcessorFeaturePresent(PF_RDTSC_INSTRUCTION_AVAILABLE); platformInput.api = platformAPI; //////////////////////////////////////////////////////////////////////////// diff --git a/src/dqn.h b/src/dqn.h index 38965dc..3e70f3a 100644 --- a/src/dqn.h +++ b/src/dqn.h @@ -20,6 +20,7 @@ #endif #include // For standard types +#include #define LOCAL_PERSIST static #define FILE_SCOPE static @@ -35,6 +36,8 @@ typedef int16_t i16; typedef double f64; typedef float f32; +#define DQN_F32_MIN FLT_MIN + #define DQN_TERABYTE(val) (DQN_GIGABYTE(val) * 1024LL) #define DQN_GIGABYTE(val) (DQN_MEGABYTE(val) * 1024LL) #define DQN_MEGABYTE(val) (DQN_KILOBYTE(val) * 1024LL)