Start zbuffering pass of pixels for triangles

This commit is contained in:
Doyle Thai 2017-05-24 00:04:18 +10:00
parent d42d6ef5bf
commit a03de5de80
7 changed files with 118 additions and 37 deletions

View File

@ -1452,15 +1452,15 @@ extern "C" void DTR_Update(PlatformRenderBuffer *const platformRenderBuffer,
DTRState *state = (DTRState *)memory->context;
if (input->executableReloaded)
{
DTR_DEBUG_PROFILE_END();
DTR_DEBUG_PROFILE_START();
DTR_DEBUG_EP_PROFILE_END();
DTR_DEBUG_EP_PROFILE_START();
}
DTR_DEBUG_TIMED_FUNCTION();
DTR_DEBUG_EP_TIMED_FUNCTION();
if (!memory->isInit)
{
TestStrToF32Converter();
DTR_DEBUG_TIMED_BLOCK("DTR_Update Memory Initialisation");
DTR_DEBUG_EP_TIMED_BLOCK("DTR_Update Memory Initialisation");
// NOTE(doyle): Do premultiply ourselves
stbi_set_unpremultiply_on_load(true);
stbi_set_flip_vertically_on_load(true);
@ -1492,9 +1492,12 @@ extern "C" void DTR_Update(PlatformRenderBuffer *const platformRenderBuffer,
renderBuffer.height = platformRenderBuffer->height;
renderBuffer.bytesPerPixel = platformRenderBuffer->bytesPerPixel;
renderBuffer.memory = (u8 *)platformRenderBuffer->memory;
renderBuffer.zBuffer = (f32 *)DqnMemStack_Push(
&memory->transMemStack,
platformRenderBuffer->width * platformRenderBuffer->height * sizeof(*renderBuffer.zBuffer));
u32 zBufferSize = platformRenderBuffer->width * platformRenderBuffer->height;
renderBuffer.zBuffer = (f32 *)DqnMemStack_Push(&memory->transMemStack,
zBufferSize * sizeof(*renderBuffer.zBuffer));
for (u32 i = 0; i < zBufferSize; i++) renderBuffer.zBuffer[i] = DQN_F32_MIN;
////////////////////////////////////////////////////////////////////////////
// Update and Render

View File

@ -28,8 +28,29 @@ void DTRDebug_PushText(const char *const formatStr, ...)
}
}
FILE_SCOPE void PushMemStackText(const char *const name,
const DqnMemStack *const stack)
void inline DTRDebug_BeginCycleCount(enum DTRDebugCycleCount tag)
{
if (DTR_DEBUG_PROFILING)
{
if (globalDebug.input && globalDebug.input->canUseRdtsc)
{
globalDebug.cycleCount[tag] = __rdtsc();
}
}
}
void inline DTRDebug_EndCycleCount(enum DTRDebugCycleCount tag)
{
if (DTR_DEBUG_PROFILING)
{
if (globalDebug.input && globalDebug.input->canUseRdtsc)
{
globalDebug.cycleCount[tag] = __rdtsc() - globalDebug.cycleCount[tag];
}
}
}
FILE_SCOPE void PushMemStackText(const char *const name, const DqnMemStack *const stack)
{
if (DTR_DEBUG)
{
@ -72,6 +93,7 @@ void DTRDebug_Update(DTRState *const state,
DTRDebug *const debug = &globalDebug;
debug->renderBuffer = renderBuffer;
debug->input = input;
debug->font = &state->font;
debug->displayColor = DqnV4_4f(1, 1, 1, 1);
if (debug->font->bitmap && debug->renderBuffer)
@ -88,6 +110,12 @@ void DTRDebug_Update(DTRState *const state,
DTRDebug_PushText("TrianglesRendered: %'lld", debug->counter[DTRDebugCounter_RenderTriangle]);
DTRDebug_PushText("");
for (i32 i = 0; i < DQN_ARRAY_COUNT(debug->cycleCount); i++)
{
DTRDebug_PushText("%d: %'lld cycles", i, debug->cycleCount[i]);
}
DTRDebug_PushText("");
// memory
{
PushMemStackText("PermBuffer", &memory->permMemStack);
@ -95,6 +123,7 @@ void DTRDebug_Update(DTRState *const state,
}
DTRDebug_PushText("SSE2Support: %s", (input->canUseSSE2) ? "true" : "false");
DTRDebug_PushText("SSE2Support: %s", (input->canUseRdtsc) ? "true" : "false");
debug->displayP =
DqnV2_2i(0, debug->renderBuffer->height + globalDebug.displayYOffset);

View File

@ -4,30 +4,31 @@
#include "dqn.h"
#define DTR_DEBUG 1
#if DTR_DEBUG
#define DTR_DEBUG_RENDER 0
#define DTR_DEBUG_RENDER 0
#define DTR_DEBUG_PROFILING 0
#if DTR_DEBUG_PROFILING
#define DTR_DEBUG_PROFILING_EASY_PROFILER 0
#if DTR_DEBUG_PROFILING_EASY_PROFILER
#define BUILD_WITH_EASY_PROFILER 1
#include "external/easy/profiler.h"
#define DTR_DEBUG_PROFILE_START() profiler::startListen()
#define DTR_DEBUG_PROFILE_END() profiler::stopListen()
#define DTR_DEBUG_EP_PROFILE_START() profiler::startListen()
#define DTR_DEBUG_EP_PROFILE_END() profiler::stopListen()
#define DTR_DEBUG_TIMED_BLOCK(name) EASY_BLOCK(name)
#define DTR_DEBUG_TIMED_NONSCOPED_BLOCK(name) EASY_NONSCOPED_BLOCK(name)
#define DTR_DEBUG_TIMED_END_BLOCK() EASY_END_BLOCK()
#define DTR_DEBUG_TIMED_FUNCTION() EASY_FUNCTION()
#define DTR_DEBUG_EP_TIMED_BLOCK(name) EASY_BLOCK(name)
#define DTR_DEBUG_EP_TIMED_NONSCOPED_BLOCK(name) EASY_NONSCOPED_BLOCK(name)
#define DTR_DEBUG_EP_TIMED_END_BLOCK() EASY_END_BLOCK()
#define DTR_DEBUG_EP_TIMED_FUNCTION() EASY_FUNCTION()
#else
#define DTR_DEBUG_PROFILE_START()
#define DTR_DEBUG_PROFILE_END()
#define DTR_DEBUG_EP_PROFILE_START()
#define DTR_DEBUG_EP_PROFILE_END()
#define DTR_DEBUG_TIMED_BLOCK(name)
#define DTR_DEBUG_TIMED_NONSCOPED_BLOCK(name)
#define DTR_DEBUG_TIMED_END_BLOCK()
#define DTR_DEBUG_TIMED_FUNCTION()
#define DTR_DEBUG_EP_TIMED_BLOCK(name)
#define DTR_DEBUG_EP_TIMED_NONSCOPED_BLOCK(name)
#define DTR_DEBUG_EP_TIMED_END_BLOCK()
#define DTR_DEBUG_EP_TIMED_FUNCTION()
#endif
#define DTR_DEBUG_PROFILING 1
#endif
typedef struct DTRRenderBuffer DTRRenderBuffer;
@ -43,16 +44,24 @@ enum DTRDebugCounter
DTRDebugCounter_Count,
};
enum DTRDebugCycleCount
{
DTRDebugCycleCount_RenderTriangle_Rasterise,
DTRDebugCycleCount_Count,
};
typedef struct DTRDebug
{
DTRFont *font;
DTRRenderBuffer *renderBuffer;
PlatformInput *input;
DqnV4 displayColor;
DqnV2 displayP;
i32 displayYOffset;
u64 counter[DTRDebugCounter_Count];
u64 cycleCount[DTRDebugCycleCount_Count];
u64 counter [DTRDebugCounter_Count];
u64 totalSetPixels;
} DTRDebug;
@ -63,6 +72,9 @@ void DTRDebug_Update(DTRState *const state,
DTRRenderBuffer *const renderBuffer,
PlatformInput *const input, PlatformMemory *const memory);
void inline DTRDebug_BeginCycleCount(enum DTRDebugCycleCount tag);
void inline DTRDebug_EndCycleCount (enum DTRDebugCycleCount tag);
void inline DTRDebug_CounterIncrement(enum DTRDebugCounter tag)
{
if (DTR_DEBUG)

View File

@ -2,6 +2,7 @@
#define DRENDERER_PLATFORM_H
#include "dqn.h"
#include <intrin.h>
enum PlatformFilePermissionFlag
{
@ -73,6 +74,7 @@ typedef struct PlatformInput
f64 timeNowInS;
bool executableReloaded;
bool canUseSSE2;
bool canUseRdtsc;
PlatformAPI api;
union {

View File

@ -105,7 +105,7 @@ FILE_SCOPE inline void SetPixel(DTRRenderBuffer *const renderBuffer, const i32 x
if (!renderBuffer) return;
if (x < 0 || x > (renderBuffer->width - 1)) return;
if (y < 0 || y > (renderBuffer->height - 1)) return;
DTR_DEBUG_TIMED_FUNCTION();
DTR_DEBUG_EP_TIMED_FUNCTION();
u32 *const bitmapPtr = (u32 *)renderBuffer->memory;
const u32 pitchInU32 = (renderBuffer->width * renderBuffer->bytesPerPixel) / 4;
@ -175,7 +175,7 @@ void DTRRender_Text(DTRRenderBuffer *const renderBuffer,
{
if (!text) return;
if (!font.bitmap || !font.atlas || !renderBuffer) return;
DTR_DEBUG_TIMED_FUNCTION();
DTR_DEBUG_EP_TIMED_FUNCTION();
if (len == -1) len = Dqn_strlen(text);
@ -253,7 +253,7 @@ FILE_SCOPE void TransformPoints(const DqnV2 origin, DqnV2 *const pList,
const f32 rotation)
{
if (!pList || numP == 0) return;
DTR_DEBUG_TIMED_FUNCTION();
DTR_DEBUG_EP_TIMED_FUNCTION();
DqnV2 xAxis = (DqnV2_2f(cosf(rotation), sinf(rotation)));
DqnV2 yAxis = DqnV2_2f(-xAxis.y, xAxis.x);
@ -271,7 +271,7 @@ void DTRRender_Line(DTRRenderBuffer *const renderBuffer, DqnV2i a,
DqnV2i b, DqnV4 color)
{
if (!renderBuffer) return;
DTR_DEBUG_TIMED_FUNCTION();
DTR_DEBUG_EP_TIMED_FUNCTION();
color = DTRRender_SRGB1ToLinearSpaceV4(color);
color = PreMultiplyAlpha1(color);
@ -390,7 +390,7 @@ FILE_SCOPE DqnRect GetBoundingBox(const DqnV2 *const pList, const i32 numP)
void DTRRender_Rectangle(DTRRenderBuffer *const renderBuffer, DqnV2 min, DqnV2 max,
DqnV4 color, const DTRRenderTransform transform)
{
DTR_DEBUG_TIMED_FUNCTION();
DTR_DEBUG_EP_TIMED_FUNCTION();
////////////////////////////////////////////////////////////////////////////
// Transform vertexes
////////////////////////////////////////////////////////////////////////////
@ -487,7 +487,7 @@ void DTRRender_Rectangle(DTRRenderBuffer *const renderBuffer, DqnV2 min, DqnV2 m
void DTRRender_Triangle(DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2, DqnV3 p3,
DqnV4 color, const DTRRenderTransform transform)
{
DTR_DEBUG_TIMED_FUNCTION();
DTR_DEBUG_EP_TIMED_FUNCTION();
////////////////////////////////////////////////////////////////////////////
// Transform vertexes
@ -528,6 +528,10 @@ void DTRRender_Triangle(DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2,
determine whether a point lies on the line, or is to the left or right of
a the line.
We can do this using the PerpDotProduct conceptually known as the cross
product in 2D. This can be expressed using the determinant and is the
method we are using.
First forming a 3x3 matrix of our terms with a, b being from the triangle
and test point c, we can derive a 2x2 matrix by subtracting the 1st
column from the 2nd and 1st column from the third.
@ -646,15 +650,18 @@ void DTRRender_Triangle(DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2,
f32 signedArea3DeltaX = c.y - a.y;
f32 signedArea3DeltaY = a.x - c.x;
f32 invSignedAreaParallelogram = 1 / ((b.x - a.x) * (c.y - a.y) - (b.y - a.y) * (c.x - a.x));
f32 signedAreaParallelogram = ((b.x - a.x) * (c.y - a.y) - (b.y - a.y) * (c.x - a.x));
if (signedAreaParallelogram == 0) return;
f32 invSignedAreaParallelogram = 1 / signedAreaParallelogram;
DTRDebug_BeginCycleCount(DTRDebugCycleCount_RenderTriangle_Rasterise);
////////////////////////////////////////////////////////////////////////////
// Scan and Render
////////////////////////////////////////////////////////////////////////////
color.rgb *= 0.1f;
const u32 zBufferPitch = renderBuffer->width;
const f32 BARYCENTRIC_EPSILON = 0.1f;
for (i32 bufferY = min.y; bufferY < max.y; bufferY++)
{
f32 signedArea1Row = signedArea1;
f32 signedArea2Row = signedArea2;
f32 signedArea3Row = signedArea3;
@ -663,7 +670,30 @@ void DTRRender_Triangle(DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2,
{
if (signedArea1Row >= 0 && signedArea2Row >= 0 && signedArea3Row >= 0)
{
#if 1
f32 barycentricA = signedArea2Row * invSignedAreaParallelogram;
f32 barycentricB = signedArea3Row * invSignedAreaParallelogram;
f32 barycentricC = signedArea1Row * invSignedAreaParallelogram;
if (DTR_DEBUG)
{
f32 barycentricSum = barycentricA + barycentricB + barycentricC;
DQN_ASSERT((1.0f - barycentricSum) < BARYCENTRIC_EPSILON);
}
f32 pixelZValue =
(a.z * barycentricA) + (b.z * barycentricB) + (c.z * barycentricC);
i32 zBufferIndex = bufferX + (bufferY * zBufferPitch);
f32 currZValue = renderBuffer->zBuffer[zBufferIndex];
if (pixelZValue > currZValue)
{
renderBuffer->zBuffer[zBufferIndex] = pixelZValue;
SetPixel(renderBuffer, bufferX, bufferY, color, ColorSpace_Linear);
}
#else
SetPixel(renderBuffer, bufferX, bufferY, color, ColorSpace_Linear);
#endif
}
signedArea1Row += signedArea1DeltaX;
@ -675,6 +705,7 @@ void DTRRender_Triangle(DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2,
signedArea2 += signedArea2DeltaY;
signedArea3 += signedArea3DeltaY;
}
DTRDebug_EndCycleCount(DTRDebugCycleCount_RenderTriangle_Rasterise);
////////////////////////////////////////////////////////////////////////////
// Debug
@ -718,7 +749,7 @@ void DTRRender_Bitmap(DTRRenderBuffer *const renderBuffer,
const DTRRenderTransform transform, DqnV4 color)
{
if (!bitmap || !bitmap->memory || !renderBuffer) return;
DTR_DEBUG_TIMED_FUNCTION();
DTR_DEBUG_EP_TIMED_FUNCTION();
////////////////////////////////////////////////////////////////////////////
// Transform vertexes
@ -785,7 +816,7 @@ void DTRRender_Bitmap(DTRRenderBuffer *const renderBuffer,
if (bufXYIsInside)
{
DTR_DEBUG_TIMED_BLOCK("DTRRender_Bitmap TexelCalculation");
DTR_DEBUG_EP_TIMED_BLOCK("DTRRender_Bitmap TexelCalculation");
DqnV2 bufPRelToBasis = DqnV2_2i(bufferX, bufferY) - rectBasis;
f32 u = DqnV2_Dot(bufPRelToBasis, xAxisRelToBasis) * invXAxisLenSq;
@ -816,7 +847,7 @@ void DTRRender_Bitmap(DTRRenderBuffer *const renderBuffer,
i32 texel4Y = DQN_MIN((texelY + 1), bitmap->dim.h - 1);
{
DTR_DEBUG_TIMED_BLOCK("DTRRender_Bitmap TexelBilinearInterpolation");
DTR_DEBUG_EP_TIMED_BLOCK("DTRRender_Bitmap TexelBilinearInterpolation");
u32 texel1 = *(u32 *)(bitmapPtr + ((texel1X * bitmap->bytesPerPixel) + (texel1Y * pitch)));
u32 texel2 = *(u32 *)(bitmapPtr + ((texel2X * bitmap->bytesPerPixel) + (texel2Y * pitch)));
u32 texel3 = *(u32 *)(bitmapPtr + ((texel3X * bitmap->bytesPerPixel) + (texel3Y * pitch)));

View File

@ -513,6 +513,7 @@ int WINAPI wWinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance,
PlatformInput platformInput = {};
platformInput.canUseSSE2 = IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE);
platformInput.canUseRdtsc = IsProcessorFeaturePresent(PF_RDTSC_INSTRUCTION_AVAILABLE);
platformInput.api = platformAPI;
////////////////////////////////////////////////////////////////////////////

View File

@ -20,6 +20,7 @@
#endif
#include <stdint.h> // For standard types
#include <float.h>
#define LOCAL_PERSIST static
#define FILE_SCOPE static
@ -35,6 +36,8 @@ typedef int16_t i16;
typedef double f64;
typedef float f32;
#define DQN_F32_MIN FLT_MIN
#define DQN_TERABYTE(val) (DQN_GIGABYTE(val) * 1024LL)
#define DQN_GIGABYTE(val) (DQN_MEGABYTE(val) * 1024LL)
#define DQN_MEGABYTE(val) (DQN_KILOBYTE(val) * 1024LL)