Make tri rasterisation into SIMD/non-SIMD paths

This commit is contained in:
Doyle Thai 2017-05-31 16:40:13 +10:00
parent bb5fc03bda
commit 47d606e297
7 changed files with 478 additions and 430 deletions

View File

@ -8,6 +8,8 @@
#include "dqn.h" #include "dqn.h"
#include <math.h> #include <math.h>
PlatformFlags globalDTRPlatformFlags;
// #include <algorithm> // #include <algorithm>
void CompAssignment(DTRRenderBuffer *const renderBuffer, PlatformInput *const input, void CompAssignment(DTRRenderBuffer *const renderBuffer, PlatformInput *const input,
PlatformMemory *const memory) PlatformMemory *const memory)
@ -946,8 +948,9 @@ extern "C" void DTR_Update(PlatformRenderBuffer *const platformRenderBuffer,
//////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////
// Initialisation // Initialisation
//////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////
DTRState *state = (DTRState *)memory->context; DTRState *state = (DTRState *)memory->context;
if (input->executableReloaded) globalDTRPlatformFlags = input->flags;
if (globalDTRPlatformFlags.executableReloaded)
{ {
DTR_DEBUG_EP_PROFILE_END(); DTR_DEBUG_EP_PROFILE_END();
DTR_DEBUG_EP_PROFILE_START(); DTR_DEBUG_EP_PROFILE_START();
@ -1046,77 +1049,7 @@ extern "C" void DTR_Update(PlatformRenderBuffer *const platformRenderBuffer,
DTRMesh *const mesh = &state->mesh; DTRMesh *const mesh = &state->mesh;
DqnV3 modelP = DqnV3_3f(renderBuffer.width * 0.5f, renderBuffer.height * 0.5f, 0); DqnV3 modelP = DqnV3_3f(renderBuffer.width * 0.5f, renderBuffer.height * 0.5f, 0);
for (u32 i = 0; i < mesh->numFaces; i++) DTRRender_Mesh(&renderBuffer, mesh, modelP, MODEL_SCALE, LIGHT);
{
DTRMeshFace face = mesh->faces[i];
DQN_ASSERT(face.numVertexIndex == 3);
i32 vertAIndex = face.vertexIndex[0];
i32 vertBIndex = face.vertexIndex[1];
i32 vertCIndex = face.vertexIndex[2];
DqnV4 vertA = mesh->vertexes[vertAIndex];
DqnV4 vertB = mesh->vertexes[vertBIndex];
DqnV4 vertC = mesh->vertexes[vertCIndex];
// TODO(doyle): Some models have -ve indexes to refer to relative
// vertices. We should resolve that to positive indexes at run time.
DQN_ASSERT(vertAIndex < (i32)mesh->numVertexes);
DQN_ASSERT(vertBIndex < (i32)mesh->numVertexes);
DQN_ASSERT(vertCIndex < (i32)mesh->numVertexes);
DqnV4 vertAB = vertB - vertA;
DqnV4 vertAC = vertC - vertA;
DqnV3 normal = DqnV3_Cross(vertAC.xyz, vertAB.xyz);
f32 intensity = DqnV3_Dot(DqnV3_Normalise(normal), LIGHT);
if (intensity < 0) continue;
DqnV4 modelCol = DqnV4_4f(1, 1, 1, 1);
modelCol.rgb *= DQN_ABS(intensity);
DqnV3 screenVA = (vertA.xyz * MODEL_SCALE) + modelP;
DqnV3 screenVB = (vertB.xyz * MODEL_SCALE) + modelP;
DqnV3 screenVC = (vertC.xyz * MODEL_SCALE) + modelP;
// TODO(doyle): Why do we need rounding here? Maybe it's because
// I don't do any interpolation in the triangle routine for jagged
// edges.
screenVA.x = (f32)(i32)(screenVA.x + 0.5f);
screenVA.y = (f32)(i32)(screenVA.y + 0.5f);
screenVB.x = (f32)(i32)(screenVB.x + 0.5f);
screenVB.y = (f32)(i32)(screenVB.y + 0.5f);
screenVC.x = (f32)(i32)(screenVC.x + 0.5f);
screenVC.y = (f32)(i32)(screenVC.y + 0.5f);
i32 textureAIndex = face.texIndex[0];
i32 textureBIndex = face.texIndex[1];
i32 textureCIndex = face.texIndex[2];
DqnV2 texA = mesh->texUV[textureAIndex].xy;
DqnV2 texB = mesh->texUV[textureBIndex].xy;
DqnV2 texC = mesh->texUV[textureCIndex].xy;
DQN_ASSERT(textureAIndex < (i32)mesh->numTexUV);
DQN_ASSERT(textureBIndex < (i32)mesh->numTexUV);
DQN_ASSERT(textureCIndex < (i32)mesh->numTexUV);
bool DEBUG_SIMPLE_MODE = false;
if (DTR_DEBUG && DEBUG_SIMPLE_MODE)
{
DTRRender_Triangle(&renderBuffer, screenVA, screenVB, screenVC, modelCol);
}
else
{
DTRRender_TexturedTriangle(input, &renderBuffer, screenVA, screenVB, screenVC, texA,
texB, texC, &state->mesh.tex, modelCol);
}
bool DEBUG_WIREFRAME = false;
if (DTR_DEBUG && DEBUG_WIREFRAME)
{
DqnV4 wireColor = DqnV4_4f(1.0f, 1.0f, 1.0f, 0.01f);
DTRRender_Line(&renderBuffer, DqnV2i_V2(screenVA.xy), DqnV2i_V2(screenVB.xy), wireColor);
DTRRender_Line(&renderBuffer, DqnV2i_V2(screenVB.xy), DqnV2i_V2(screenVC.xy), wireColor);
DTRRender_Line(&renderBuffer, DqnV2i_V2(screenVC.xy), DqnV2i_V2(screenVA.xy), wireColor);
}
}
} }
// Rect drawing // Rect drawing

View File

@ -14,4 +14,6 @@ typedef struct DTRState
DTRBitmap bitmap; DTRBitmap bitmap;
DTRMesh mesh; DTRMesh mesh;
} DTRState; } DTRState;
extern PlatformFlags globalDTRPlatformFlags;
#endif #endif

View File

@ -119,7 +119,7 @@ void inline DTRDebug_BeginCycleCount(enum DTRDebugCycleCount tag)
{ {
if (DTR_DEBUG_PROFILING) if (DTR_DEBUG_PROFILING)
{ {
if (globalDebug.input && globalDebug.input->canUseRdtsc) if (globalDTRPlatformFlags.canUseRdtsc)
{ {
DTRDebugCycles *const cycles = &globalDebug.cycles[tag]; DTRDebugCycles *const cycles = &globalDebug.cycles[tag];
cycles->tmpStartCycles = __rdtsc(); cycles->tmpStartCycles = __rdtsc();
@ -132,7 +132,7 @@ void inline DTRDebug_EndCycleCount(enum DTRDebugCycleCount tag)
{ {
if (DTR_DEBUG_PROFILING) if (DTR_DEBUG_PROFILING)
{ {
if (globalDebug.input && globalDebug.input->canUseRdtsc) if (globalDTRPlatformFlags.canUseRdtsc)
{ {
DTRDebugCycles *const cycles = &globalDebug.cycles[tag]; DTRDebugCycles *const cycles = &globalDebug.cycles[tag];
cycles->totalCycles += __rdtsc() - cycles->tmpStartCycles; cycles->totalCycles += __rdtsc() - cycles->tmpStartCycles;
@ -208,8 +208,8 @@ void DTRDebug_Update(DTRState *const state,
DTRDebug_PushText("MouseRBtn: %s", (input->mouse.rightBtn.endedDown) ? "true" : "false"); DTRDebug_PushText("MouseRBtn: %s", (input->mouse.rightBtn.endedDown) ? "true" : "false");
DTRDebug_PushText(""); DTRDebug_PushText("");
DTRDebug_PushText("SSE2Support: %s", (input->canUseSSE2) ? "true" : "false"); DTRDebug_PushText("SSE2Support: %s", (globalDTRPlatformFlags.canUseSSE2) ? "true" : "false");
DTRDebug_PushText("RDTSCSupport: %s", (input->canUseRdtsc) ? "true" : "false"); DTRDebug_PushText("RDTSCSupport: %s", (globalDTRPlatformFlags.canUseRdtsc) ? "true" : "false");
DTRDebug_PushText(""); DTRDebug_PushText("");
DTRDebug_PushText("TotalSetPixels: %'lld", debug->totalSetPixels); DTRDebug_PushText("TotalSetPixels: %'lld", debug->totalSetPixels);
@ -226,7 +226,7 @@ void DTRDebug_Update(DTRState *const state,
u64 avgCycles = cycles->totalCycles / invocations; u64 avgCycles = cycles->totalCycles / invocations;
DTRDebug_PushText("%d: %'lld avg cycles", i, avgCycles); DTRDebug_PushText("%d: %'lld avg cycles", i, avgCycles);
*cycles = emptyDebugCycles; // *cycles = emptyDebugCycles;
} }
DTRDebug_PushText(""); DTRDebug_PushText("");

View File

@ -83,13 +83,18 @@ typedef struct PlatformMouse
KeyState rightBtn; KeyState rightBtn;
} PlatformMouse; } PlatformMouse;
typedef struct PlatformFlags
{
bool executableReloaded;
bool canUseRdtsc;
bool canUseSSE2;
} PlatformFlags;
typedef struct PlatformInput typedef struct PlatformInput
{ {
f32 deltaForFrame; f32 deltaForFrame;
f64 timeNowInS; f64 timeNowInS;
bool executableReloaded; PlatformFlags flags;
bool canUseSSE2;
bool canUseRdtsc;
PlatformAPI api; PlatformAPI api;
PlatformMouse mouse; PlatformMouse mouse;

View File

@ -554,8 +554,351 @@ FILE_SCOPE void DebugBarycentricInternal(DqnV2 p, DqnV2 a, DqnV2 b, DqnV2 c, f32
*u = 1.0f - *v - *w; *u = 1.0f - *v - *w;
} }
void DTRRender_TexturedTriangle(PlatformInput *const input, inline void RasteriseTexturedTriangle(DTRRenderBuffer *const renderBuffer, const DqnV3 p1,
DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2, DqnV3 p3, const DqnV3 p2, const DqnV3 p3, const DqnV2 uv1,
const DqnV2 uv2, const DqnV2 uv3, DTRBitmap *const texture,
const DqnV4 color)
{
DqnV2i max = DqnV2i_2f(DQN_MAX(DQN_MAX(p1.x, p2.x), p3.x), DQN_MAX(DQN_MAX(p1.y, p2.y), p3.y));
DqnV2i min = DqnV2i_2f(DQN_MIN(DQN_MIN(p1.x, p2.x), p3.x), DQN_MIN(DQN_MIN(p1.y, p2.y), p3.y));
min.x = DQN_MAX(min.x, 0);
min.y = DQN_MAX(min.y, 0);
max.x = DQN_MIN(max.x, renderBuffer->width - 1);
max.y = DQN_MIN(max.y, renderBuffer->height - 1);
const u32 zBufferPitch = renderBuffer->width;
const DqnV3 a = p1;
const DqnV3 b = p2;
const DqnV3 c = p3;
DqnV2i startP = min;
f32 signedArea1 = ((b.x - a.x) * (startP.y - a.y)) - ((b.y - a.y) * (startP.x - a.x));
f32 signedArea1DeltaX = a.y - b.y;
f32 signedArea1DeltaY = b.x - a.x;
f32 signedArea2 = ((c.x - b.x) * (startP.y - b.y)) - ((c.y - b.y) * (startP.x - b.x));
f32 signedArea2DeltaX = b.y - c.y;
f32 signedArea2DeltaY = c.x - b.x;
f32 signedArea3 = ((a.x - c.x) * (startP.y - c.y)) - ((a.y - c.y) * (startP.x - c.x));
f32 signedArea3DeltaX = c.y - a.y;
f32 signedArea3DeltaY = a.x - c.x;
f32 signedAreaParallelogram = signedArea1 + signedArea2 + signedArea3;
if (signedAreaParallelogram == 0) return;
f32 invSignedAreaParallelogram = 1 / signedAreaParallelogram;
for (i32 bufferY = min.y; bufferY < max.y; bufferY++)
{
f32 signedArea1Row = signedArea1;
f32 signedArea2Row = signedArea2;
f32 signedArea3Row = signedArea3;
for (i32 bufferX = min.x; bufferX < max.x; bufferX++)
{
if (signedArea1Row >= 0 && signedArea2Row >= 0 && signedArea3Row >= 0)
{
f32 barycentricB = signedArea3Row * invSignedAreaParallelogram;
f32 barycentricC = signedArea1Row * invSignedAreaParallelogram;
if (DTR_DEBUG)
{
const f32 EPSILON = 0.1f;
f32 debugSignedArea1 = ((b.x - a.x) * (bufferY - a.y)) - ((b.y - a.y) * (bufferX - a.x));
f32 debugSignedArea2 = ((c.x - b.x) * (bufferY - b.y)) - ((c.y - b.y) * (bufferX - b.x));
f32 debugSignedArea3 = ((a.x - c.x) * (bufferY - c.y)) - ((a.y - c.y) * (bufferX - c.x));
f32 deltaSignedArea1 = DQN_ABS(debugSignedArea1 - signedArea1Row);
f32 deltaSignedArea2 = DQN_ABS(debugSignedArea2 - signedArea2Row);
f32 deltaSignedArea3 = DQN_ABS(debugSignedArea3 - signedArea3Row);
DQN_ASSERT(deltaSignedArea1 < EPSILON && deltaSignedArea2 < EPSILON &&
deltaSignedArea3 < EPSILON)
f32 debugBarycentricA, debugBarycentricB, debugBarycentricC;
DebugBarycentricInternal(DqnV2_2i(bufferX, bufferY), a.xy, b.xy, c.xy,
&debugBarycentricA, &debugBarycentricB,
&debugBarycentricC);
f32 deltaBaryB = DQN_ABS(barycentricB - debugBarycentricB);
f32 deltaBaryC = DQN_ABS(barycentricC - debugBarycentricC);
DQN_ASSERT(deltaBaryB < EPSILON && deltaBaryC < EPSILON)
}
i32 zBufferIndex = bufferX + (bufferY * zBufferPitch);
f32 pixelZValue = a.z + (barycentricB * (b.z - a.z)) + (barycentricC * (c.z - a.z));
f32 currZValue = renderBuffer->zBuffer[zBufferIndex];
DQN_ASSERT(zBufferIndex < (renderBuffer->width * renderBuffer->height));
if (pixelZValue > currZValue)
{
renderBuffer->zBuffer[zBufferIndex] = pixelZValue;
if (texture)
{
u8 *texturePtr = texture->memory;
const u32 texturePitch = texture->bytesPerPixel * texture->dim.w;
DqnV2 uv =
uv1 + ((uv2 - uv1) * barycentricB) + ((uv3 - uv1) * barycentricC);
const f32 EPSILON = 0.1f;
DQN_ASSERT(uv.x >= 0 && uv.x < 1.0f + EPSILON);
DQN_ASSERT(uv.y >= 0 && uv.y < 1.0f + EPSILON);
uv.x = DqnMath_Clampf(uv.x, 0.0f, 1.0f);
uv.y = DqnMath_Clampf(uv.y, 0.0f, 1.0f);
f32 texelXf = uv.x * texture->dim.w;
f32 texelYf = uv.y * texture->dim.h;
DQN_ASSERT(texelXf >= 0 && texelXf < texture->dim.w);
DQN_ASSERT(texelYf >= 0 && texelYf < texture->dim.h);
i32 texelX = (i32)texelXf;
i32 texelY = (i32)texelYf;
u32 texel1 = *(u32 *)(texturePtr + (texelX * texture->bytesPerPixel) +
(texelY * texturePitch));
DqnV4 color1;
color1.a = (f32)(texel1 >> 24);
color1.b = (f32)((texel1 >> 16) & 0xFF);
color1.g = (f32)((texel1 >> 8) & 0xFF);
color1.r = (f32)((texel1 >> 0) & 0xFF);
color1 *= DTRRENDER_INV_255;
color1 = DTRRender_SRGB1ToLinearSpaceV4(color1);
DqnV4 blend = color * color1;
SetPixel(renderBuffer, bufferX, bufferY, blend, ColorSpace_Linear);
}
else
{
SetPixel(renderBuffer, bufferX, bufferY, color, ColorSpace_Linear);
}
}
}
signedArea1Row += signedArea1DeltaX;
signedArea2Row += signedArea2DeltaX;
signedArea3Row += signedArea3DeltaX;
}
signedArea1 += signedArea1DeltaY;
signedArea2 += signedArea2DeltaY;
signedArea3 += signedArea3DeltaY;
}
}
inline void SIMDRasteriseTexturedTriangle(DTRRenderBuffer *const renderBuffer, const DqnV3 p1,
const DqnV3 p2, const DqnV3 p3, const DqnV2 uv1,
const DqnV2 uv2, const DqnV2 uv3,
DTRBitmap *const texture, const DqnV4 color)
{
////////////////////////////////////////////////////////////////////////////
// Calculate Bounding Box
////////////////////////////////////////////////////////////////////////////
DqnV2i max = DqnV2i_2f(DQN_MAX(DQN_MAX(p1.x, p2.x), p3.x),
DQN_MAX(DQN_MAX(p1.y, p2.y), p3.y));
DqnV2i min = DqnV2i_2f(DQN_MIN(DQN_MIN(p1.x, p2.x), p3.x),
DQN_MIN(DQN_MIN(p1.y, p2.y), p3.y));
min.x = DQN_MAX(min.x, 0);
min.y = DQN_MAX(min.y, 0);
max.x = DQN_MIN(max.x, renderBuffer->width - 1);
max.y = DQN_MIN(max.y, renderBuffer->height - 1);
const u32 zBufferPitch = renderBuffer->width;
const DqnV3 a = p1;
const DqnV3 b = p2;
const DqnV3 c = p3;
DqnV2i startP = min;
f32 signedAreaC = ((b.x - a.x) * (startP.y - a.y)) - ((b.y - a.y) * (startP.x - a.x));
f32 signedAreaCDeltaX = a.y - b.y;
f32 signedAreaCDeltaY = b.x - a.x;
f32 signedAreaA = ((c.x - b.x) * (startP.y - b.y)) - ((c.y - b.y) * (startP.x - b.x));
f32 signedAreaADeltaX = b.y - c.y;
f32 signedAreaADeltaY = c.x - b.x;
f32 signedAreaB = ((a.x - c.x) * (startP.y - c.y)) - ((a.y - c.y) * (startP.x - c.x));
f32 signedAreaBDeltaX = c.y - a.y;
f32 signedAreaBDeltaY = a.x - c.x;
f32 signedAreaParallelogram = signedAreaC + signedAreaA + signedAreaB;
if (signedAreaParallelogram == 0) return;
f32 invSignedAreaParallelogram = 1.0f / signedAreaParallelogram;
__m128 invSignedAreaParallelogram_4x = _mm_set_ps1(invSignedAreaParallelogram);
// NOTE: Order is important here!
__m128 triangleZ = _mm_set_ps(0, b.z, a.z, c.z);
__m128 signedAreaPixelDeltaX = _mm_set_ps(0, signedAreaBDeltaX, signedAreaADeltaX, signedAreaCDeltaX);
__m128 signedAreaPixelDeltaY = _mm_set_ps(0, signedAreaBDeltaY, signedAreaADeltaY, signedAreaCDeltaY);
__m128 signedAreaPixel1 = _mm_set_ps(0, signedAreaB, signedAreaA, signedAreaC);
__m128 signedAreaPixel2 = _mm_add_ps(signedAreaPixel1, signedAreaPixelDeltaX);
const __m128 INV255_4X = _mm_set_ps1(1.0f / 255.0f);
const __m128 ZERO_4X = _mm_set_ps1(0.0f);
const __m128 TWO_4X = _mm_set_ps1(2.0f);
const u32 IS_GREATER_MASK = 0xF;
// NOTE: Step size of 2 pixels across X
signedAreaPixelDeltaX = _mm_mul_ps(signedAreaPixelDeltaX, TWO_4X);
const DqnV2 uv2SubUv1 = uv2 - uv1;
const DqnV2 uv3SubUv1 = uv3 - uv1;
const __m128 colorModulate = _mm_set_ps(color.a, color.b, color.g, color.r);
for (i32 bufferY = min.y; bufferY < max.y; bufferY++)
{
__m128 signedArea1 = signedAreaPixel1;
__m128 signedArea2 = signedAreaPixel2;
#define PROCESS_COLOR_NO_SIMD 0
for (i32 bufferX = min.x; bufferX < max.x; bufferX += 2)
{
__m128 isGreater1 = _mm_cmpge_ps(signedArea1, ZERO_4X);
i32 isGreaterResult1 = _mm_movemask_ps(isGreater1);
if ((isGreaterResult1 & IS_GREATER_MASK) == IS_GREATER_MASK)
{
__m128 barycentric = _mm_mul_ps(signedArea1, invSignedAreaParallelogram_4x);
__m128 barycentricZ = _mm_mul_ps(triangleZ, barycentric);
i32 zBufferIndex = bufferX + (bufferY * zBufferPitch);
f32 pixelZValue = ((f32 *)&barycentricZ)[0] +
((f32 *)&barycentricZ)[1] +
((f32 *)&barycentricZ)[2];
f32 currZValue = renderBuffer->zBuffer[zBufferIndex];
if (pixelZValue > currZValue)
{
renderBuffer->zBuffer[zBufferIndex] = pixelZValue;
u8 *texturePtr = texture->memory;
const u32 texturePitch = texture->bytesPerPixel * texture->dim.w;
f32 barycentricB = ((f32 *)&barycentric)[2];
f32 barycentricC = ((f32 *)&barycentric)[0];
DqnV2 uv = uv1 + (uv2SubUv1 * barycentricB) + (uv3SubUv1 * barycentricC);
const f32 EPSILON = 0.1f;
DQN_ASSERT(uv.x >= 0 && uv.x < 1.0f + EPSILON);
DQN_ASSERT(uv.y >= 0 && uv.y < 1.0f + EPSILON);
uv.x = DqnMath_Clampf(uv.x, 0.0f, 1.0f);
uv.y = DqnMath_Clampf(uv.y, 0.0f, 1.0f);
f32 texelXf = uv.x * texture->dim.w;
f32 texelYf = uv.y * texture->dim.h;
DQN_ASSERT(texelXf >= 0 && texelXf < texture->dim.w);
DQN_ASSERT(texelYf >= 0 && texelYf < texture->dim.h);
i32 texelX = (i32)texelXf;
i32 texelY = (i32)texelYf;
u32 texel1 = *(u32 *)(texturePtr + (texelX * texture->bytesPerPixel) +
(texelY * texturePitch));
#if PROCESS_COLOR_NO_SIMD
DqnV4 color1;
color1.a = (f32)(texel1 >> 24);
color1.b = (f32)((texel1 >> 16) & 0xFF);
color1.g = (f32)((texel1 >> 8) & 0xFF);
color1.r = (f32)((texel1 >> 0) & 0xFF);
color1 *= DTRRENDER_INV_255;
color1 = DTRRender_SRGB1ToLinearSpaceV4(color1);
DqnV4 blend = color * color1;
#else
__m128 color1 =
_mm_set_ps((f32)(texel1 >> 24), (f32)((texel1 >> 16) & 0xFF),
(f32)((texel1 >> 8) & 0xFF), (f32)((texel1 >> 0) & 0xFF));
color1 = _mm_mul_ps(color1, INV255_4X);
color1 = _mm_mul_ps(color1, color1); // to linear space
color1 = _mm_mul_ps(color1, colorModulate);
DqnV4 blend = {};
blend.r = ((f32 *)&color1)[0];
blend.g = ((f32 *)&color1)[1];
blend.b = ((f32 *)&color1)[2];
blend.a = ((f32 *)&color1)[3];
#endif
SetPixel(renderBuffer, bufferX, bufferY, blend, ColorSpace_Linear);
}
}
__m128 isGreater2 = _mm_cmpge_ps(signedArea2, ZERO_4X);
i32 isGreaterResult2 = _mm_movemask_ps(isGreater2);
i32 bufferX1 = bufferX + 1;
if ((isGreaterResult2 & IS_GREATER_MASK) == IS_GREATER_MASK && bufferX1 < max.x)
{
__m128 barycentric = _mm_mul_ps(signedArea2, invSignedAreaParallelogram_4x);
__m128 barycentricZ = _mm_mul_ps(triangleZ, barycentric);
i32 zBufferIndex = bufferX1 + (bufferY * zBufferPitch);
f32 pixelZValue = ((f32 *)&barycentricZ)[0] + ((f32 *)&barycentricZ)[1] +
((f32 *)&barycentricZ)[2];
f32 currZValue = renderBuffer->zBuffer[zBufferIndex];
if (pixelZValue > currZValue)
{
renderBuffer->zBuffer[zBufferIndex] = pixelZValue;
u8 *texturePtr = texture->memory;
const u32 texturePitch = texture->bytesPerPixel * texture->dim.w;
f32 barycentricB = ((f32 *)&barycentric)[2];
f32 barycentricC = ((f32 *)&barycentric)[0];
DqnV2 uv = uv1 + (uv2SubUv1 * barycentricB) + (uv3SubUv1 * barycentricC);
const f32 EPSILON = 0.1f;
DQN_ASSERT(uv.x >= 0 && uv.x < 1.0f + EPSILON);
DQN_ASSERT(uv.y >= 0 && uv.y < 1.0f + EPSILON);
uv.x = DqnMath_Clampf(uv.x, 0.0f, 1.0f);
uv.y = DqnMath_Clampf(uv.y, 0.0f, 1.0f);
f32 texelXf = uv.x * texture->dim.w;
f32 texelYf = uv.y * texture->dim.h;
DQN_ASSERT(texelXf >= 0 && texelXf < texture->dim.w);
DQN_ASSERT(texelYf >= 0 && texelYf < texture->dim.h);
i32 texelX = (i32)texelXf;
i32 texelY = (i32)texelYf;
u32 texel1 = *(u32 *)(texturePtr + (texelX * texture->bytesPerPixel) +
(texelY * texturePitch));
#if PROCESS_COLOR_NO_SIMD
DqnV4 color1;
color1.a = (f32)(texel1 >> 24);
color1.b = (f32)((texel1 >> 16) & 0xFF);
color1.g = (f32)((texel1 >> 8) & 0xFF);
color1.r = (f32)((texel1 >> 0) & 0xFF);
color1 *= DTRRENDER_INV_255;
color1 = DTRRender_SRGB1ToLinearSpaceV4(color1);
DqnV4 blend = color * color1;
#else
__m128 color1 =
_mm_set_ps((f32)(texel1 >> 24), (f32)((texel1 >> 16) & 0xFF),
(f32)((texel1 >> 8) & 0xFF), (f32)((texel1 >> 0) & 0xFF));
color1 = _mm_mul_ps(color1, INV255_4X);
color1 = _mm_mul_ps(color1, color1); // to linear space
color1 = _mm_mul_ps(color1, colorModulate);
DqnV4 blend = {};
blend.r = ((f32 *)&color1)[0];
blend.g = ((f32 *)&color1)[1];
blend.b = ((f32 *)&color1)[2];
blend.a = ((f32 *)&color1)[3];
#endif
SetPixel(renderBuffer, bufferX1, bufferY, blend, ColorSpace_Linear);
}
}
signedArea1 = _mm_add_ps(signedArea1, signedAreaPixelDeltaX);
signedArea2 = _mm_add_ps(signedArea2, signedAreaPixelDeltaX);
}
signedAreaPixel1 = _mm_add_ps(signedAreaPixel1, signedAreaPixelDeltaY);
signedAreaPixel2 = _mm_add_ps(signedAreaPixel2, signedAreaPixelDeltaY);
}
}
void DTRRender_TexturedTriangle(DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2, DqnV3 p3,
DqnV2 uv1, DqnV2 uv2, DqnV2 uv3, DTRBitmap *const texture, DqnV2 uv1, DqnV2 uv2, DqnV2 uv3, DTRBitmap *const texture,
DqnV4 color, const DTRRenderTransform transform) DqnV4 color, const DTRRenderTransform transform)
{ {
@ -579,18 +922,6 @@ void DTRRender_TexturedTriangle(PlatformInput *const input,
color = DTRRender_SRGB1ToLinearSpaceV4(color); color = DTRRender_SRGB1ToLinearSpaceV4(color);
color = PreMultiplyAlpha1(color); color = PreMultiplyAlpha1(color);
////////////////////////////////////////////////////////////////////////////
// Calculate Bounding Box
////////////////////////////////////////////////////////////////////////////
DqnV2i max = DqnV2i_2f(DQN_MAX(DQN_MAX(p1.x, p2.x), p3.x),
DQN_MAX(DQN_MAX(p1.y, p2.y), p3.y));
DqnV2i min = DqnV2i_2f(DQN_MIN(DQN_MIN(p1.x, p2.x), p3.x),
DQN_MIN(DQN_MIN(p1.y, p2.y), p3.y));
min.x = DQN_MAX(min.x, 0);
min.y = DQN_MAX(min.y, 0);
max.x = DQN_MIN(max.x, renderBuffer->width - 1);
max.y = DQN_MIN(max.y, renderBuffer->height - 1);
f32 area2Times = ((p2.x - p1.x) * (p2.y + p1.y)) + f32 area2Times = ((p2.x - p1.x) * (p2.y + p1.y)) +
((p3.x - p2.x) * (p3.y + p2.y)) + ((p3.x - p2.x) * (p3.y + p2.y)) +
((p1.x - p3.x) * (p1.y + p3.y)); ((p1.x - p3.x) * (p1.y + p3.y));
@ -600,329 +931,17 @@ void DTRRender_TexturedTriangle(PlatformInput *const input,
DQN_SWAP(DqnV3, p2, p3); DQN_SWAP(DqnV3, p2, p3);
} }
////////////////////////////////////////////////////////////////////////////
// Signed Area - See Render_Triangle for explanation
////////////////////////////////////////////////////////////////////////////
const DqnV3 a = p1;
const DqnV3 b = p2;
const DqnV3 c = p3;
DTRDebug_BeginCycleCount(DTRDebugCycleCount_RenderTriangle_Rasterise); DTRDebug_BeginCycleCount(DTRDebugCycleCount_RenderTriangle_Rasterise);
//////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////
// Scan and Render // Scan and Render
//////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////
const u32 zBufferPitch = renderBuffer->width; if (globalDTRPlatformFlags.canUseSSE2)
if (input->canUseSSE2)
{ {
DqnV2i startP = min; SIMDRasteriseTexturedTriangle(renderBuffer, p1, p2, p3, uv1, uv2, uv3, texture, color);
f32 edge1SignedAreaPixel1 = ((b.x - a.x) * (startP.y - a.y)) - ((b.y - a.y) * (startP.x - a.x));
f32 edge1SignedAreaPixel1DeltaX = a.y - b.y;
f32 edge1SignedAreaPixel1DeltaY = b.x - a.x;
f32 edge2SignedAreaPixel1 = ((c.x - b.x) * (startP.y - b.y)) - ((c.y - b.y) * (startP.x - b.x));
f32 edge2SignedAreaPixel1DeltaX = b.y - c.y;
f32 edge2SignedAreaPixel1DeltaY = c.x - b.x;
f32 edge3SignedAreaPixel1 = ((a.x - c.x) * (startP.y - c.y)) - ((a.y - c.y) * (startP.x - c.x));
f32 edge3SignedAreaPixel1DeltaX = c.y - a.y;
f32 edge3SignedAreaPixel1DeltaY = a.x - c.x;
f32 signedAreaParallelogramPixel1 = edge1SignedAreaPixel1 + edge2SignedAreaPixel1 + edge3SignedAreaPixel1;
if (signedAreaParallelogramPixel1 == 0) return;
f32 invSignedAreaParallelogramPixel1 = 1 / signedAreaParallelogramPixel1;
__m128 inv255_4x = _mm_set_ps1(DTRRENDER_INV_255);
__m128 zero_4x = _mm_set_ps1(0.0f);
__m128 two_4x = _mm_set_ps1(2.0f);
__m128 invSignedAreaParallelogram4x = _mm_set_ps1(invSignedAreaParallelogramPixel1);
__m128 triangleZ = _mm_set_ps(0, b.z, a.z, c.z);
__m128 signedAreaPixelDeltaX = _mm_set_ps(0, edge3SignedAreaPixel1DeltaX, edge2SignedAreaPixel1DeltaX, edge1SignedAreaPixel1DeltaX);
__m128 signedAreaPixelDeltaY = _mm_set_ps(0, edge3SignedAreaPixel1DeltaY, edge2SignedAreaPixel1DeltaY, edge1SignedAreaPixel1DeltaY);
__m128 signedAreaPixel1 = _mm_set_ps(0, edge3SignedAreaPixel1, edge2SignedAreaPixel1, edge1SignedAreaPixel1);
__m128 signedAreaPixel2 = _mm_add_ps(signedAreaPixel1, signedAreaPixelDeltaX);
// NOTE: Step size of 2 pixels across X
signedAreaPixelDeltaX = _mm_mul_ps(signedAreaPixelDeltaX, two_4x);
const DqnV2 uv2SubUv1 = uv2 - uv1;
const DqnV2 uv3SubUv1 = uv3 - uv1;
const __m128 colorModulate = _mm_set_ps(color.a, color.b, color.g, color.r);
const u32 IS_GREATER_MASK = 0xF;
for (i32 bufferY = min.y; bufferY < max.y; bufferY++)
{
__m128 signedArea1 = signedAreaPixel1;
__m128 signedArea2 = signedAreaPixel2;
#define PROCESS_COLOR_NO_SIMD 0
for (i32 bufferX = min.x; bufferX < max.x; bufferX += 2)
{
__m128 isGreater1 = _mm_cmpge_ps(signedArea1, zero_4x);
i32 isGreaterResult1 = _mm_movemask_ps(isGreater1);
if ((isGreaterResult1 & IS_GREATER_MASK) == IS_GREATER_MASK)
{
__m128 barycentric = _mm_mul_ps(signedArea1, invSignedAreaParallelogram4x);
__m128 barycentricZ = _mm_mul_ps(triangleZ, barycentric);
i32 zBufferIndex = bufferX + (bufferY * zBufferPitch);
f32 pixelZValue = ((f32 *)&barycentricZ)[0] +
((f32 *)&barycentricZ)[1] +
((f32 *)&barycentricZ)[2];
f32 currZValue = renderBuffer->zBuffer[zBufferIndex];
if (pixelZValue > currZValue)
{
renderBuffer->zBuffer[zBufferIndex] = pixelZValue;
u8 *texturePtr = texture->memory;
const u32 texturePitch = texture->bytesPerPixel * texture->dim.w;
f32 barycentricB = ((f32 *)&barycentric)[2];
f32 barycentricC = ((f32 *)&barycentric)[0];
DqnV2 uv = uv1 + (uv2SubUv1 * barycentricB) + (uv3SubUv1 * barycentricC);
const f32 EPSILON = 0.1f;
DQN_ASSERT(uv.x >= 0 && uv.x < 1.0f + EPSILON);
DQN_ASSERT(uv.y >= 0 && uv.y < 1.0f + EPSILON);
uv.x = DqnMath_Clampf(uv.x, 0.0f, 1.0f);
uv.y = DqnMath_Clampf(uv.y, 0.0f, 1.0f);
f32 texelXf = uv.x * texture->dim.w;
f32 texelYf = uv.y * texture->dim.h;
DQN_ASSERT(texelXf >= 0 && texelXf < texture->dim.w);
DQN_ASSERT(texelYf >= 0 && texelYf < texture->dim.h);
i32 texelX = (i32)texelXf;
i32 texelY = (i32)texelYf;
u32 texel1 = *(u32 *)(texturePtr + (texelX * texture->bytesPerPixel) +
(texelY * texturePitch));
#if PROCESS_COLOR_NO_SIMD
DqnV4 color1;
color1.a = (f32)(texel1 >> 24);
color1.b = (f32)((texel1 >> 16) & 0xFF);
color1.g = (f32)((texel1 >> 8) & 0xFF);
color1.r = (f32)((texel1 >> 0) & 0xFF);
color1 *= DTRRENDER_INV_255;
color1 = DTRRender_SRGB1ToLinearSpaceV4(color1);
DqnV4 blend = color * color1;
#else
__m128 color1 = _mm_set_ps((f32)(texel1 >> 24),
(f32)((texel1 >> 16) & 0xFF),
(f32)((texel1 >> 8) & 0xFF),
(f32)((texel1 >> 0) & 0xFF));
color1 = _mm_mul_ps(color1, inv255_4x);
color1 = _mm_mul_ps(color1, color1); // to linear space
color1 = _mm_mul_ps(color1, colorModulate);
DqnV4 blend = {};
blend.r = ((f32 *)&color1)[0];
blend.g = ((f32 *)&color1)[1];
blend.b = ((f32 *)&color1)[2];
blend.a = ((f32 *)&color1)[3];
#endif
SetPixel(renderBuffer, bufferX, bufferY, blend, ColorSpace_Linear);
}
}
__m128 isGreater2 = _mm_cmpge_ps(signedArea2, zero_4x);
i32 isGreaterResult2 = _mm_movemask_ps(isGreater2);
i32 bufferX1 = bufferX + 1;
if ((isGreaterResult2 & IS_GREATER_MASK) == IS_GREATER_MASK && bufferX1 < max.x)
{
__m128 barycentric = _mm_mul_ps(signedArea2, invSignedAreaParallelogram4x);
__m128 barycentricZ = _mm_mul_ps(triangleZ, barycentric);
i32 zBufferIndex = bufferX1 + (bufferY * zBufferPitch);
f32 pixelZValue = ((f32 *)&barycentricZ)[0] +
((f32 *)&barycentricZ)[1] +
((f32 *)&barycentricZ)[2];
f32 currZValue = renderBuffer->zBuffer[zBufferIndex];
if (pixelZValue > currZValue)
{
renderBuffer->zBuffer[zBufferIndex] = pixelZValue;
u8 *texturePtr = texture->memory;
const u32 texturePitch = texture->bytesPerPixel * texture->dim.w;
f32 barycentricB = ((f32 *)&barycentric)[2];
f32 barycentricC = ((f32 *)&barycentric)[0];
DqnV2 uv = uv1 + (uv2SubUv1 * barycentricB) + (uv3SubUv1 * barycentricC);
const f32 EPSILON = 0.1f;
DQN_ASSERT(uv.x >= 0 && uv.x < 1.0f + EPSILON);
DQN_ASSERT(uv.y >= 0 && uv.y < 1.0f + EPSILON);
uv.x = DqnMath_Clampf(uv.x, 0.0f, 1.0f);
uv.y = DqnMath_Clampf(uv.y, 0.0f, 1.0f);
f32 texelXf = uv.x * texture->dim.w;
f32 texelYf = uv.y * texture->dim.h;
DQN_ASSERT(texelXf >= 0 && texelXf < texture->dim.w);
DQN_ASSERT(texelYf >= 0 && texelYf < texture->dim.h);
i32 texelX = (i32)texelXf;
i32 texelY = (i32)texelYf;
u32 texel1 = *(u32 *)(texturePtr + (texelX * texture->bytesPerPixel) +
(texelY * texturePitch));
#if PROCESS_COLOR_NO_SIMD
DqnV4 color1;
color1.a = (f32)(texel1 >> 24);
color1.b = (f32)((texel1 >> 16) & 0xFF);
color1.g = (f32)((texel1 >> 8) & 0xFF);
color1.r = (f32)((texel1 >> 0) & 0xFF);
color1 *= DTRRENDER_INV_255;
color1 = DTRRender_SRGB1ToLinearSpaceV4(color1);
DqnV4 blend = color * color1;
#else
__m128 color1 = _mm_set_ps((f32)(texel1 >> 24),
(f32)((texel1 >> 16) & 0xFF),
(f32)((texel1 >> 8) & 0xFF),
(f32)((texel1 >> 0) & 0xFF));
color1 = _mm_mul_ps(color1, inv255_4x);
color1 = _mm_mul_ps(color1, color1); // to linear space
color1 = _mm_mul_ps(color1, colorModulate);
DqnV4 blend = {};
blend.r = ((f32 *)&color1)[0];
blend.g = ((f32 *)&color1)[1];
blend.b = ((f32 *)&color1)[2];
blend.a = ((f32 *)&color1)[3];
#endif
SetPixel(renderBuffer, bufferX1, bufferY, blend, ColorSpace_Linear);
}
}
signedArea1 = _mm_add_ps(signedArea1, signedAreaPixelDeltaX);
signedArea2 = _mm_add_ps(signedArea2, signedAreaPixelDeltaX);
}
signedAreaPixel1 = _mm_add_ps(signedAreaPixel1, signedAreaPixelDeltaY);
signedAreaPixel2 = _mm_add_ps(signedAreaPixel2, signedAreaPixelDeltaY);
}
} }
else else
{ {
DqnV2i startP = min; RasteriseTexturedTriangle(renderBuffer, p1, p2, p3, uv1, uv2, uv3, texture, color);
f32 signedArea1 = ((b.x - a.x) * (startP.y - a.y)) - ((b.y - a.y) * (startP.x - a.x));
f32 signedArea1DeltaX = a.y - b.y;
f32 signedArea1DeltaY = b.x - a.x;
f32 signedArea2 = ((c.x - b.x) * (startP.y - b.y)) - ((c.y - b.y) * (startP.x - b.x));
f32 signedArea2DeltaX = b.y - c.y;
f32 signedArea2DeltaY = c.x - b.x;
f32 signedArea3 = ((a.x - c.x) * (startP.y - c.y)) - ((a.y - c.y) * (startP.x - c.x));
f32 signedArea3DeltaX = c.y - a.y;
f32 signedArea3DeltaY = a.x - c.x;
f32 signedAreaParallelogram = signedArea1 + signedArea2 + signedArea3;
if (signedAreaParallelogram == 0) return;
f32 invSignedAreaParallelogram = 1 / signedAreaParallelogram;
for (i32 bufferY = min.y; bufferY < max.y; bufferY++)
{
f32 signedArea1Row = signedArea1;
f32 signedArea2Row = signedArea2;
f32 signedArea3Row = signedArea3;
for (i32 bufferX = min.x; bufferX < max.x; bufferX++)
{
if (signedArea1Row >= 0 && signedArea2Row >= 0 && signedArea3Row >= 0)
{
f32 barycentricB = signedArea3Row * invSignedAreaParallelogram;
f32 barycentricC = signedArea1Row * invSignedAreaParallelogram;
if (DTR_DEBUG)
{
const f32 EPSILON = 0.1f;
f32 debugSignedArea1 = ((b.x - a.x) * (bufferY - a.y)) - ((b.y - a.y) * (bufferX - a.x));
f32 debugSignedArea2 = ((c.x - b.x) * (bufferY - b.y)) - ((c.y - b.y) * (bufferX - b.x));
f32 debugSignedArea3 = ((a.x - c.x) * (bufferY - c.y)) - ((a.y - c.y) * (bufferX - c.x));
f32 deltaSignedArea1 = DQN_ABS(debugSignedArea1 - signedArea1Row);
f32 deltaSignedArea2 = DQN_ABS(debugSignedArea2 - signedArea2Row);
f32 deltaSignedArea3 = DQN_ABS(debugSignedArea3 - signedArea3Row);
DQN_ASSERT(deltaSignedArea1 < EPSILON && deltaSignedArea2 < EPSILON &&
deltaSignedArea3 < EPSILON)
f32 debugBarycentricA, debugBarycentricB, debugBarycentricC;
DebugBarycentricInternal(DqnV2_2i(bufferX, bufferY), a.xy, b.xy, c.xy,
&debugBarycentricA, &debugBarycentricB,
&debugBarycentricC);
f32 deltaBaryB = DQN_ABS(barycentricB - debugBarycentricB);
f32 deltaBaryC = DQN_ABS(barycentricC - debugBarycentricC);
DQN_ASSERT(deltaBaryB < EPSILON && deltaBaryC < EPSILON)
}
i32 zBufferIndex = bufferX + (bufferY * zBufferPitch);
f32 pixelZValue =
a.z + (barycentricB * (b.z - a.z)) + (barycentricC * (c.z - a.z));
f32 currZValue = renderBuffer->zBuffer[zBufferIndex];
DQN_ASSERT(zBufferIndex < (renderBuffer->width * renderBuffer->height));
if (pixelZValue > currZValue)
{
renderBuffer->zBuffer[zBufferIndex] = pixelZValue;
if (texture)
{
u8 *texturePtr = texture->memory;
const u32 texturePitch = texture->bytesPerPixel * texture->dim.w;
DqnV2 uv =
uv1 + ((uv2 - uv1) * barycentricB) + ((uv3 - uv1) * barycentricC);
const f32 EPSILON = 0.1f;
DQN_ASSERT(uv.x >= 0 && uv.x < 1.0f + EPSILON);
DQN_ASSERT(uv.y >= 0 && uv.y < 1.0f + EPSILON);
uv.x = DqnMath_Clampf(uv.x, 0.0f, 1.0f);
uv.y = DqnMath_Clampf(uv.y, 0.0f, 1.0f);
f32 texelXf = uv.x * texture->dim.w;
f32 texelYf = uv.y * texture->dim.h;
DQN_ASSERT(texelXf >= 0 && texelXf < texture->dim.w);
DQN_ASSERT(texelYf >= 0 && texelYf < texture->dim.h);
i32 texelX = (i32)texelXf;
i32 texelY = (i32)texelYf;
u32 texel1 = *(u32 *)(texturePtr + (texelX * texture->bytesPerPixel) +
(texelY * texturePitch));
DqnV4 color1;
color1.a = (f32)(texel1 >> 24);
color1.b = (f32)((texel1 >> 16) & 0xFF);
color1.g = (f32)((texel1 >> 8) & 0xFF);
color1.r = (f32)((texel1 >> 0) & 0xFF);
color1 *= DTRRENDER_INV_255;
color1 = DTRRender_SRGB1ToLinearSpaceV4(color1);
DqnV4 blend = color * color1;
SetPixel(renderBuffer, bufferX, bufferY, blend, ColorSpace_Linear);
}
else
{
SetPixel(renderBuffer, bufferX, bufferY, color, ColorSpace_Linear);
}
}
}
signedArea1Row += signedArea1DeltaX;
signedArea2Row += signedArea2DeltaX;
signedArea3Row += signedArea3DeltaX;
}
signedArea1 += signedArea1DeltaY;
signedArea2 += signedArea2DeltaY;
signedArea3 += signedArea3DeltaY;
}
} }
DTRDebug_EndCycleCount(DTRDebugCycleCount_RenderTriangle_Rasterise); DTRDebug_EndCycleCount(DTRDebugCycleCount_RenderTriangle_Rasterise);
@ -932,6 +951,15 @@ void DTRRender_TexturedTriangle(PlatformInput *const input,
DTRDebug_CounterIncrement(DTRDebugCounter_RenderTriangle); DTRDebug_CounterIncrement(DTRDebugCounter_RenderTriangle);
if (DTR_DEBUG_RENDER) if (DTR_DEBUG_RENDER)
{ {
DqnV2i max =
DqnV2i_2f(DQN_MAX(DQN_MAX(p1.x, p2.x), p3.x), DQN_MAX(DQN_MAX(p1.y, p2.y), p3.y));
DqnV2i min =
DqnV2i_2f(DQN_MIN(DQN_MIN(p1.x, p2.x), p3.x), DQN_MIN(DQN_MIN(p1.y, p2.y), p3.y));
min.x = DQN_MAX(min.x, 0);
min.y = DQN_MAX(min.y, 0);
max.x = DQN_MIN(max.x, renderBuffer->width - 1);
max.y = DQN_MIN(max.y, renderBuffer->height - 1);
// Draw Bounding box // Draw Bounding box
if (0) if (0)
{ {
@ -966,6 +994,87 @@ void DTRRender_TexturedTriangle(PlatformInput *const input,
} }
} }
void DTRRender_Mesh(DTRRenderBuffer *const renderBuffer, DTRMesh *const mesh, const DqnV3 pos,
const f32 scale, const DqnV3 lightVector)
{
if (!mesh) return;
for (u32 i = 0; i < mesh->numFaces; i++)
{
DTRMeshFace face = mesh->faces[i];
DQN_ASSERT(face.numVertexIndex == 3);
i32 vertAIndex = face.vertexIndex[0];
i32 vertBIndex = face.vertexIndex[1];
i32 vertCIndex = face.vertexIndex[2];
DqnV4 vertA = mesh->vertexes[vertAIndex];
DqnV4 vertB = mesh->vertexes[vertBIndex];
DqnV4 vertC = mesh->vertexes[vertCIndex];
// TODO(doyle): Some models have -ve indexes to refer to relative
// vertices. We should resolve that to positive indexes at run time.
DQN_ASSERT(vertAIndex < (i32)mesh->numVertexes);
DQN_ASSERT(vertBIndex < (i32)mesh->numVertexes);
DQN_ASSERT(vertCIndex < (i32)mesh->numVertexes);
DqnV4 vertAB = vertB - vertA;
DqnV4 vertAC = vertC - vertA;
DqnV3 normal = DqnV3_Cross(vertAC.xyz, vertAB.xyz);
f32 intensity = DqnV3_Dot(DqnV3_Normalise(normal), lightVector);
if (intensity < 0) continue;
DqnV4 modelCol = DqnV4_4f(1, 1, 1, 1);
modelCol.rgb *= DQN_ABS(intensity);
DqnV3 screenVA = (vertA.xyz * scale) + pos;
DqnV3 screenVB = (vertB.xyz * scale) + pos;
DqnV3 screenVC = (vertC.xyz * scale) + pos;
// TODO(doyle): Why do we need rounding here? Maybe it's because
// I don't do any interpolation in the triangle routine for jagged
// edges.
screenVA.x = (f32)(i32)(screenVA.x + 0.5f);
screenVA.y = (f32)(i32)(screenVA.y + 0.5f);
screenVB.x = (f32)(i32)(screenVB.x + 0.5f);
screenVB.y = (f32)(i32)(screenVB.y + 0.5f);
screenVC.x = (f32)(i32)(screenVC.x + 0.5f);
screenVC.y = (f32)(i32)(screenVC.y + 0.5f);
i32 textureAIndex = face.texIndex[0];
i32 textureBIndex = face.texIndex[1];
i32 textureCIndex = face.texIndex[2];
DqnV2 texA = mesh->texUV[textureAIndex].xy;
DqnV2 texB = mesh->texUV[textureBIndex].xy;
DqnV2 texC = mesh->texUV[textureCIndex].xy;
DQN_ASSERT(textureAIndex < (i32)mesh->numTexUV);
DQN_ASSERT(textureBIndex < (i32)mesh->numTexUV);
DQN_ASSERT(textureCIndex < (i32)mesh->numTexUV);
bool DEBUG_SIMPLE_MODE = false;
if (DTR_DEBUG && DEBUG_SIMPLE_MODE)
{
DTRRender_Triangle(renderBuffer, screenVA, screenVB, screenVC, modelCol);
}
else
{
DTRRender_TexturedTriangle(renderBuffer, screenVA, screenVB, screenVC, texA, texB,
texC, &mesh->tex, modelCol);
}
bool DEBUG_WIREFRAME = false;
if (DTR_DEBUG && DEBUG_WIREFRAME)
{
DqnV4 wireColor = DqnV4_4f(1.0f, 1.0f, 1.0f, 0.01f);
DTRRender_Line(renderBuffer, DqnV2i_V2(screenVA.xy), DqnV2i_V2(screenVB.xy),
wireColor);
DTRRender_Line(renderBuffer, DqnV2i_V2(screenVB.xy), DqnV2i_V2(screenVC.xy),
wireColor);
DTRRender_Line(renderBuffer, DqnV2i_V2(screenVC.xy), DqnV2i_V2(screenVA.xy),
wireColor);
}
}
}
void DTRRender_Triangle(DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2, DqnV3 p3, void DTRRender_Triangle(DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2, DqnV3 p3,
DqnV4 color, const DTRRenderTransform transform) DqnV4 color, const DTRRenderTransform transform)
{ {

View File

@ -62,8 +62,9 @@ inline DqnV4 DTRRender_PreMultiplyAlphaSRGB1WithLinearConversion(DqnV4 color);
void DTRRender_Text (DTRRenderBuffer *const renderBuffer, const DTRFont font, DqnV2 pos, const char *const text, DqnV4 color = DqnV4_1f(1), i32 len = -1); void DTRRender_Text (DTRRenderBuffer *const renderBuffer, const DTRFont font, DqnV2 pos, const char *const text, DqnV4 color = DqnV4_1f(1), i32 len = -1);
void DTRRender_Line (DTRRenderBuffer *const renderBuffer, DqnV2i a, DqnV2i b, DqnV4 color); void DTRRender_Line (DTRRenderBuffer *const renderBuffer, DqnV2i a, DqnV2i b, DqnV4 color);
void DTRRender_Rectangle (DTRRenderBuffer *const renderBuffer, DqnV2 min, DqnV2 max, DqnV4 color, const DTRRenderTransform transform = DTRRender_DefaultTransform()); void DTRRender_Rectangle (DTRRenderBuffer *const renderBuffer, DqnV2 min, DqnV2 max, DqnV4 color, const DTRRenderTransform transform = DTRRender_DefaultTransform());
void DTRRender_Mesh (DTRRenderBuffer *const renderBuffer, DTRMesh *const mesh, const DqnV3 pos, const f32 scale, const DqnV3 lightVector);
void DTRRender_Triangle (DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2, DqnV3 p3, DqnV4 color, const DTRRenderTransform transform = DTRRender_DefaultTriangleTransform()); void DTRRender_Triangle (DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2, DqnV3 p3, DqnV4 color, const DTRRenderTransform transform = DTRRender_DefaultTriangleTransform());
void DTRRender_TexturedTriangle(PlatformInput *const input, DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2, DqnV3 p3, DqnV2 uv1, DqnV2 uv2, DqnV2 uv3, DTRBitmap *const texture, DqnV4 color, const DTRRenderTransform transform = DTRRender_DefaultTriangleTransform()); void DTRRender_TexturedTriangle(DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2, DqnV3 p3, DqnV2 uv1, DqnV2 uv2, DqnV2 uv3, DTRBitmap *const texture, DqnV4 color, const DTRRenderTransform transform = DTRRender_DefaultTriangleTransform());
void DTRRender_Bitmap (DTRRenderBuffer *const renderBuffer, DTRBitmap *const bitmap, DqnV2 pos, const DTRRenderTransform transform = DTRRender_DefaultTransform(), DqnV4 color = DqnV4_4f(1, 1, 1, 1)); void DTRRender_Bitmap (DTRRenderBuffer *const renderBuffer, DTRBitmap *const bitmap, DqnV2 pos, const DTRRenderTransform transform = DTRRender_DefaultTransform(), DqnV4 color = DqnV4_4f(1, 1, 1, 1));
void DTRRender_Clear (DTRRenderBuffer *const renderBuffer, DqnV3 color); void DTRRender_Clear (DTRRenderBuffer *const renderBuffer, DqnV3 color);

View File

@ -273,23 +273,21 @@ FILE_SCOPE void Win32HandleMenuMessages(HWND window, MSG msg,
case Win32Menu_FileFlushMemory: case Win32Menu_FileFlushMemory:
{ {
DqnMemStack memStacks[DQN_ARRAY_COUNT(globalPlatformMemory.stacks)] = {};
for (i32 i = 0; i < DQN_ARRAY_COUNT(globalPlatformMemory.stacks); i++) for (i32 i = 0; i < DQN_ARRAY_COUNT(globalPlatformMemory.stacks); i++)
{ {
while (globalPlatformMemory.stacks[i].block->prevBlock) DqnMemStack *stack = &globalPlatformMemory.stacks[i];
DqnMemStack_FreeLastBlock(&globalPlatformMemory.stacks[i]); while (stack->block->prevBlock)
DqnMemStack_FreeLastBlock(stack);
DqnMemStack_ClearCurrBlock(&globalPlatformMemory.stacks[i], true); DqnMemStack_ClearCurrBlock(stack, true);
memStacks[i] = *stack;
} }
DqnMemStack mainStack = globalPlatformMemory.mainStack; PlatformMemory empty = {};
DqnMemStack assetStack = globalPlatformMemory.assetStack; globalPlatformMemory = empty;
DqnMemStack tempStack = globalPlatformMemory.tempStack; for (i32 i = 0; i < DQN_ARRAY_COUNT(globalPlatformMemory.stacks); i++)
globalPlatformMemory.stacks[i] = memStacks[i];
PlatformMemory empty = {};
globalPlatformMemory = empty;
globalPlatformMemory.mainStack = mainStack;
globalPlatformMemory.assetStack = assetStack;
globalPlatformMemory.tempStack = tempStack;
} }
break; break;
@ -560,10 +558,10 @@ int WINAPI wWinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance,
platformAPI.FileClose = Platform_FileClose; platformAPI.FileClose = Platform_FileClose;
platformAPI.Print = Platform_Print; platformAPI.Print = Platform_Print;
PlatformInput platformInput = {}; PlatformInput platformInput = {};
platformInput.canUseSSE2 = IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE); platformInput.api = platformAPI;
platformInput.canUseRdtsc = IsProcessorFeaturePresent(PF_RDTSC_INSTRUCTION_AVAILABLE); platformInput.flags.canUseSSE2 = IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE);
platformInput.api = platformAPI; platformInput.flags.canUseRdtsc = IsProcessorFeaturePresent(PF_RDTSC_INSTRUCTION_AVAILABLE);
//////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////
// Update Loop // Update Loop
@ -585,7 +583,7 @@ int WINAPI wWinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance,
{ {
Win32UnloadExternalDLL(&dllCode); Win32UnloadExternalDLL(&dllCode);
dllCode = Win32LoadExternalDLL(dllPath, dllTmpPath, lastWriteTime); dllCode = Win32LoadExternalDLL(dllPath, dllTmpPath, lastWriteTime);
platformInput.executableReloaded = true; platformInput.flags.executableReloaded = true;
} }
{ {