Accelerate textured triangle rendering using SIMD
This commit is contained in:
parent
4d2a7a7c06
commit
49270a2826
@ -1104,8 +1104,8 @@ extern "C" void DTR_Update(PlatformRenderBuffer *const platformRenderBuffer,
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
DTRRender_TexturedTriangle(&renderBuffer, screenVA, screenVB, screenVC, texA, texB,
|
DTRRender_TexturedTriangle(input, &renderBuffer, screenVA, screenVB, screenVC, texA,
|
||||||
texC, &state->mesh.tex, modelCol);
|
texB, texC, &state->mesh.tex, modelCol);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DEBUG_WIREFRAME = false;
|
bool DEBUG_WIREFRAME = false;
|
||||||
|
@ -17,14 +17,23 @@ void DTRAsset_InitGlobalState()
|
|||||||
stbi_set_flip_vertically_on_load(true);
|
stbi_set_flip_vertically_on_load(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
FILE_SCOPE void MemcopyInternal(u8 *dest, u8 *src, size_t numBytes)
|
FILE_SCOPE void MemcopyInternal(u8 *const dest, u8 *const src, size_t numBytes)
|
||||||
{
|
{
|
||||||
if (!dest || !src || numBytes == 0) return;
|
if (!dest || !src || numBytes == 0) return;
|
||||||
for (size_t i = 0; i < numBytes; i++)
|
for (size_t i = 0; i < numBytes; i++)
|
||||||
dest[i] = src[i];
|
dest[i] = src[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
FILE_SCOPE void AssetDqnArrayMemAPICallback(DqnMemAPICallbackInfo info, DqnMemAPICallbackResult *result)
|
// NOTE: Dynamic array allocations just requests space at the first option it
|
||||||
|
// can take. Realloc will reallocate in place if there's space. Otherwise
|
||||||
|
// it'll create a new block and reallocate there by copying the old data over.
|
||||||
|
|
||||||
|
// So this does waste space. But is a quick way to reroute allocations into
|
||||||
|
// a MemStack. It's main intended purpose is for one-shot loading data that you
|
||||||
|
// don't know how much space you need in your DArray. After filling out
|
||||||
|
// the dynamic array you then compact the data manually using memcopys into
|
||||||
|
// a new block and discard the old data.
|
||||||
|
FILE_SCOPE void DumbDynamicArrayMemAPICallback(DqnMemAPICallbackInfo info, DqnMemAPICallbackResult *result)
|
||||||
{
|
{
|
||||||
DQN_ASSERT(info.type != DqnMemAPICallbackType_Invalid);
|
DQN_ASSERT(info.type != DqnMemAPICallbackType_Invalid);
|
||||||
DqnMemStack *stack = static_cast<DqnMemStack *>(info.userContext);
|
DqnMemStack *stack = static_cast<DqnMemStack *>(info.userContext);
|
||||||
@ -40,27 +49,7 @@ FILE_SCOPE void AssetDqnArrayMemAPICallback(DqnMemAPICallbackInfo info, DqnMemAP
|
|||||||
|
|
||||||
case DqnMemAPICallbackType_Free:
|
case DqnMemAPICallbackType_Free:
|
||||||
{
|
{
|
||||||
DqnMemStackBlock **blockPtr = &stack->block;
|
DQN_ASSERT(DQN_INVALID_CODE_PATH);
|
||||||
while (*blockPtr && (*blockPtr)->memory != info.ptrToFree)
|
|
||||||
{
|
|
||||||
// NOTE(doyle): Ensure that the base ptr of each block is always
|
|
||||||
// actually aligned so we don't ever miss finding the block if
|
|
||||||
// the allocator had to realign the pointer from the base
|
|
||||||
// address.
|
|
||||||
if (DTR_DEBUG)
|
|
||||||
{
|
|
||||||
size_t memBaseAddr = (size_t)((*blockPtr)->memory);
|
|
||||||
DQN_ASSERT(DQN_ALIGN_POW_N(memBaseAddr, stack->byteAlign) ==
|
|
||||||
memBaseAddr);
|
|
||||||
}
|
|
||||||
blockPtr = &((*blockPtr)->prevBlock);
|
|
||||||
}
|
|
||||||
|
|
||||||
DQN_ASSERT(*blockPtr && (*blockPtr)->memory == info.ptrToFree);
|
|
||||||
DqnMemStackBlock *blockToFree = *blockPtr;
|
|
||||||
*blockPtr = blockToFree->prevBlock;
|
|
||||||
DqnMem_Free(blockToFree);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -189,7 +178,7 @@ bool DTRAsset_LoadWavefrontObj(const PlatformAPI api, DqnMemStack *const memStac
|
|||||||
size_t fileSize = file.size;
|
size_t fileSize = file.size;
|
||||||
|
|
||||||
DqnMemAPI memAPI = {};
|
DqnMemAPI memAPI = {};
|
||||||
memAPI.callback = AssetDqnArrayMemAPICallback;
|
memAPI.callback = DumbDynamicArrayMemAPICallback;
|
||||||
memAPI.userContext = memStack;
|
memAPI.userContext = memStack;
|
||||||
|
|
||||||
enum WavefVertexType {
|
enum WavefVertexType {
|
||||||
@ -199,6 +188,23 @@ bool DTRAsset_LoadWavefrontObj(const PlatformAPI api, DqnMemStack *const memStac
|
|||||||
WavefVertexType_Normal,
|
WavefVertexType_Normal,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// TODO(doyle): We should profile, reading it out to WavefModel format and
|
||||||
|
// then copying it over, versus just reading the file twice. First pass is
|
||||||
|
// to count the number of vertexes etc. for each section we need. Then the
|
||||||
|
// second pass we can allocate directly the number we need and reparse it.
|
||||||
|
// I have a feeling that, in general that's a better idea, atleast it gets
|
||||||
|
// rid of alot of stupid copying code and memstack juggling.
|
||||||
|
|
||||||
|
// NOTE(doyle): We pre-process the data into an intermediate format that
|
||||||
|
// more accurately represents the file format. Since there's no metadata
|
||||||
|
// inside Wavefront objects, we don't know how many vertexes/texUV/normals
|
||||||
|
// there are- which makes it hard to allocate "nicely" out of our memory
|
||||||
|
// stack.
|
||||||
|
|
||||||
|
// So we preprocess. Then once we know the final amount, copy over the data
|
||||||
|
// to a new memstack block such that all the data is compacted together in
|
||||||
|
// memory for locality. Then just throw away the intermediate
|
||||||
|
// representation.
|
||||||
WavefModel dummy_ = {};
|
WavefModel dummy_ = {};
|
||||||
WavefModel *obj = &dummy_;
|
WavefModel *obj = &dummy_;
|
||||||
|
|
||||||
@ -410,11 +416,17 @@ bool DTRAsset_LoadWavefrontObj(const PlatformAPI api, DqnMemStack *const memStac
|
|||||||
DQN_ASSERT(obj->groupNameIndex + 1 < DQN_ARRAY_COUNT(obj->groupName));
|
DQN_ASSERT(obj->groupNameIndex + 1 < DQN_ARRAY_COUNT(obj->groupName));
|
||||||
|
|
||||||
DQN_ASSERT(!obj->groupName[obj->groupNameIndex]);
|
DQN_ASSERT(!obj->groupName[obj->groupNameIndex]);
|
||||||
|
// TODO(doyle): Broken since I don't "copy" it over to our
|
||||||
|
// final DTRMesh. Below I copy over the data so that all the
|
||||||
|
// allocations are compacted together but don't copy this
|
||||||
|
// yet. Which means the name gets trashed atm.
|
||||||
|
#if 0
|
||||||
obj->groupName[obj->groupNameIndex++] =
|
obj->groupName[obj->groupNameIndex++] =
|
||||||
(char *)DqnMemStack_Push(memStack, (nameLen + 1) * sizeof(char));
|
(char *)DqnMemStack_Push(memStack, (nameLen + 1) * sizeof(char));
|
||||||
|
|
||||||
for (i32 i = 0; i < nameLen; i++)
|
for (i32 i = 0; i < nameLen; i++)
|
||||||
obj->groupName[obj->groupNameIndex - 1][i] = namePtr[i];
|
obj->groupName[obj->groupNameIndex - 1][i] = namePtr[i];
|
||||||
|
#endif
|
||||||
|
|
||||||
while (scan && (*scan == ' ' || *scan == '\n'))
|
while (scan && (*scan == ' ' || *scan == '\n'))
|
||||||
scan++;
|
scan++;
|
||||||
|
@ -121,7 +121,9 @@ void inline DTRDebug_BeginCycleCount(enum DTRDebugCycleCount tag)
|
|||||||
{
|
{
|
||||||
if (globalDebug.input && globalDebug.input->canUseRdtsc)
|
if (globalDebug.input && globalDebug.input->canUseRdtsc)
|
||||||
{
|
{
|
||||||
globalDebug.cycleCount[tag] = __rdtsc();
|
DTRDebugCycles *const cycles = &globalDebug.cycles[tag];
|
||||||
|
cycles->tmpStartCycles = __rdtsc();
|
||||||
|
cycles->numInvokes++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -132,7 +134,8 @@ void inline DTRDebug_EndCycleCount(enum DTRDebugCycleCount tag)
|
|||||||
{
|
{
|
||||||
if (globalDebug.input && globalDebug.input->canUseRdtsc)
|
if (globalDebug.input && globalDebug.input->canUseRdtsc)
|
||||||
{
|
{
|
||||||
globalDebug.cycleCount[tag] = __rdtsc() - globalDebug.cycleCount[tag];
|
DTRDebugCycles *const cycles = &globalDebug.cycles[tag];
|
||||||
|
cycles->totalCycles += __rdtsc() - cycles->tmpStartCycles;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -214,9 +217,16 @@ void DTRDebug_Update(DTRState *const state,
|
|||||||
DTRDebug_PushText("TrianglesRendered: %'lld", debug->counter[DTRDebugCounter_RenderTriangle]);
|
DTRDebug_PushText("TrianglesRendered: %'lld", debug->counter[DTRDebugCounter_RenderTriangle]);
|
||||||
DTRDebug_PushText("");
|
DTRDebug_PushText("");
|
||||||
|
|
||||||
for (i32 i = 0; i < DQN_ARRAY_COUNT(debug->cycleCount); i++)
|
DTRDebugCycles emptyDebugCycles = {};
|
||||||
|
for (i32 i = 0; i < DQN_ARRAY_COUNT(debug->cycles); i++)
|
||||||
{
|
{
|
||||||
DTRDebug_PushText("%d: %'lld cycles", i, debug->cycleCount[i]);
|
DTRDebugCycles *const cycles = &globalDebug.cycles[i];
|
||||||
|
|
||||||
|
u64 invocations = (cycles->numInvokes == 0) ? 1 : cycles->numInvokes;
|
||||||
|
u64 avgCycles = cycles->totalCycles / invocations;
|
||||||
|
DTRDebug_PushText("%d: %'lld avg cycles", i, avgCycles);
|
||||||
|
|
||||||
|
*cycles = emptyDebugCycles;
|
||||||
}
|
}
|
||||||
DTRDebug_PushText("");
|
DTRDebug_PushText("");
|
||||||
|
|
||||||
|
@ -47,6 +47,14 @@ enum DTRDebugCycleCount
|
|||||||
DTRDebugCycleCount_Count,
|
DTRDebugCycleCount_Count,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
typedef struct DTRDebugCycles
|
||||||
|
{
|
||||||
|
u64 totalCycles;
|
||||||
|
u64 numInvokes;
|
||||||
|
|
||||||
|
u64 tmpStartCycles; // Used to calculate the number of cycles elapsed
|
||||||
|
} DTRDebugCycles;
|
||||||
|
|
||||||
typedef struct DTRDebug
|
typedef struct DTRDebug
|
||||||
{
|
{
|
||||||
struct DTRFont *font;
|
struct DTRFont *font;
|
||||||
@ -57,7 +65,7 @@ typedef struct DTRDebug
|
|||||||
DqnV2 displayP;
|
DqnV2 displayP;
|
||||||
i32 displayYOffset;
|
i32 displayYOffset;
|
||||||
|
|
||||||
u64 cycleCount[DTRDebugCycleCount_Count];
|
DTRDebugCycles cycles [DTRDebugCycleCount_Count];
|
||||||
u64 counter[DTRDebugCounter_Count];
|
u64 counter[DTRDebugCounter_Count];
|
||||||
u64 totalSetPixels;
|
u64 totalSetPixels;
|
||||||
} DTRDebug;
|
} DTRDebug;
|
||||||
|
@ -7,6 +7,8 @@
|
|||||||
#include "external/stb_rect_pack.h"
|
#include "external/stb_rect_pack.h"
|
||||||
#include "external/stb_truetype.h"
|
#include "external/stb_truetype.h"
|
||||||
|
|
||||||
|
#include <intrin.h>
|
||||||
|
|
||||||
FILE_SCOPE const f32 COLOR_EPSILON = 0.9f;
|
FILE_SCOPE const f32 COLOR_EPSILON = 0.9f;
|
||||||
|
|
||||||
FILE_SCOPE inline DqnV4 PreMultiplyAlpha1(const DqnV4 color)
|
FILE_SCOPE inline DqnV4 PreMultiplyAlpha1(const DqnV4 color)
|
||||||
@ -497,7 +499,8 @@ FILE_SCOPE void DebugBarycentricInternal(DqnV2 p, DqnV2 a, DqnV2 b, DqnV2 c, f32
|
|||||||
*u = 1.0f - *v - *w;
|
*u = 1.0f - *v - *w;
|
||||||
}
|
}
|
||||||
|
|
||||||
void DTRRender_TexturedTriangle(DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2, DqnV3 p3,
|
void DTRRender_TexturedTriangle(PlatformInput *const input,
|
||||||
|
DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2, DqnV3 p3,
|
||||||
DqnV2 uv1, DqnV2 uv2, DqnV2 uv3, DTRBitmap *const texture,
|
DqnV2 uv1, DqnV2 uv2, DqnV2 uv3, DTRBitmap *const texture,
|
||||||
DqnV4 color, const DTRRenderTransform transform)
|
DqnV4 color, const DTRRenderTransform transform)
|
||||||
{
|
{
|
||||||
@ -549,11 +552,171 @@ void DTRRender_TexturedTriangle(DTRRenderBuffer *const renderBuffer, DqnV3 p1, D
|
|||||||
const DqnV3 b = p2;
|
const DqnV3 b = p2;
|
||||||
const DqnV3 c = p3;
|
const DqnV3 c = p3;
|
||||||
|
|
||||||
|
DTRDebug_BeginCycleCount(DTRDebugCycleCount_RenderTriangle_Rasterise);
|
||||||
|
////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Scan and Render
|
||||||
|
////////////////////////////////////////////////////////////////////////////
|
||||||
|
const u32 zBufferPitch = renderBuffer->width;
|
||||||
|
if (input->canUseSSE2)
|
||||||
|
{
|
||||||
DqnV2i startP = min;
|
DqnV2i startP = min;
|
||||||
f32 oldSignedArea1 = ((b.x - a.x) * (startP.y - a.y)) - ((b.y - a.y) * (startP.x - a.x));
|
f32 edge1SignedAreaPixel1 = ((b.x - a.x) * (startP.y - a.y)) - ((b.y - a.y) * (startP.x - a.x));
|
||||||
f32 oldSignedArea2 = ((c.x - b.x) * (startP.y - b.y)) - ((c.y - b.y) * (startP.x - b.x));
|
f32 edge1SignedAreaPixel1DeltaX = a.y - b.y;
|
||||||
f32 oldSignedArea3 = ((a.x - c.x) * (startP.y - c.y)) - ((a.y - c.y) * (startP.x - c.x));
|
f32 edge1SignedAreaPixel1DeltaY = b.x - a.x;
|
||||||
|
|
||||||
|
f32 edge2SignedAreaPixel1 = ((c.x - b.x) * (startP.y - b.y)) - ((c.y - b.y) * (startP.x - b.x));
|
||||||
|
f32 edge2SignedAreaPixel1DeltaX = b.y - c.y;
|
||||||
|
f32 edge2SignedAreaPixel1DeltaY = c.x - b.x;
|
||||||
|
|
||||||
|
f32 edge3SignedAreaPixel1 = ((a.x - c.x) * (startP.y - c.y)) - ((a.y - c.y) * (startP.x - c.x));
|
||||||
|
f32 edge3SignedAreaPixel1DeltaX = c.y - a.y;
|
||||||
|
f32 edge3SignedAreaPixel1DeltaY = a.x - c.x;
|
||||||
|
|
||||||
|
f32 signedAreaParallelogramPixel1 = edge1SignedAreaPixel1 + edge2SignedAreaPixel1 + edge3SignedAreaPixel1;
|
||||||
|
if (signedAreaParallelogramPixel1 == 0) return;
|
||||||
|
f32 invSignedAreaParallelogramPixel1 = 1 / signedAreaParallelogramPixel1;
|
||||||
|
|
||||||
|
__m128 zero_4x = _mm_set_ps1(0.0f);
|
||||||
|
__m128 two_4x = _mm_set_ps1(2.0f);
|
||||||
|
__m128 invSignedAreaParallelogram4x = _mm_set_ps1(invSignedAreaParallelogramPixel1);
|
||||||
|
__m128 triangleZ = _mm_set_ps(0, b.z, a.z, c.z);
|
||||||
|
|
||||||
|
__m128 signedAreaPixelDeltaX = _mm_set_ps(0, edge3SignedAreaPixel1DeltaX, edge2SignedAreaPixel1DeltaX, edge1SignedAreaPixel1DeltaX);
|
||||||
|
__m128 signedAreaPixelDeltaY = _mm_set_ps(0, edge3SignedAreaPixel1DeltaY, edge2SignedAreaPixel1DeltaY, edge1SignedAreaPixel1DeltaY);
|
||||||
|
|
||||||
|
__m128 signedAreaPixel1 = _mm_set_ps(0, edge3SignedAreaPixel1, edge2SignedAreaPixel1, edge1SignedAreaPixel1);
|
||||||
|
__m128 signedAreaPixel2 = _mm_add_ps(signedAreaPixel1, signedAreaPixelDeltaX);
|
||||||
|
|
||||||
|
// NOTE: Step size of 2 pixels across X
|
||||||
|
signedAreaPixelDeltaX = _mm_mul_ps(signedAreaPixelDeltaX, two_4x);
|
||||||
|
|
||||||
|
const DqnV2 uv2SubUv1 = uv2 - uv1;
|
||||||
|
const DqnV2 uv3SubUv1 = uv3 - uv1;
|
||||||
|
|
||||||
|
const u32 IS_GREATER_MASK = 0xF;
|
||||||
|
|
||||||
|
for (i32 bufferY = min.y; bufferY < max.y; bufferY++)
|
||||||
|
{
|
||||||
|
__m128 signedArea1 = signedAreaPixel1;
|
||||||
|
__m128 signedArea2 = signedAreaPixel2;
|
||||||
|
|
||||||
|
for (i32 bufferX = min.x; bufferX < max.x; bufferX += 2)
|
||||||
|
{
|
||||||
|
__m128 isGreater1 = _mm_cmpge_ps(signedArea1, zero_4x);
|
||||||
|
i32 isGreaterResult1 = _mm_movemask_ps(isGreater1);
|
||||||
|
if ((isGreaterResult1 & IS_GREATER_MASK) == IS_GREATER_MASK)
|
||||||
|
{
|
||||||
|
__m128 barycentric = _mm_mul_ps(signedArea1, invSignedAreaParallelogram4x);
|
||||||
|
__m128 barycentricZ = _mm_mul_ps(triangleZ, barycentric);
|
||||||
|
|
||||||
|
i32 zBufferIndex = bufferX + (bufferY * zBufferPitch);
|
||||||
|
f32 pixelZValue = ((f32 *)&barycentricZ)[0] +
|
||||||
|
((f32 *)&barycentricZ)[1] +
|
||||||
|
((f32 *)&barycentricZ)[2];
|
||||||
|
f32 currZValue = renderBuffer->zBuffer[zBufferIndex];
|
||||||
|
if (pixelZValue > currZValue)
|
||||||
|
{
|
||||||
|
renderBuffer->zBuffer[zBufferIndex] = pixelZValue;
|
||||||
|
u8 *texturePtr = texture->memory;
|
||||||
|
const u32 texturePitch = texture->bytesPerPixel * texture->dim.w;
|
||||||
|
|
||||||
|
f32 barycentricB = ((f32 *)&barycentric)[2];
|
||||||
|
f32 barycentricC = ((f32 *)&barycentric)[0];
|
||||||
|
DqnV2 uv = uv1 + (uv2SubUv1 * barycentricB) + (uv3SubUv1 * barycentricC);
|
||||||
|
|
||||||
|
const f32 EPSILON = 0.1f;
|
||||||
|
DQN_ASSERT(uv.x >= 0 && uv.x < 1.0f + EPSILON);
|
||||||
|
DQN_ASSERT(uv.y >= 0 && uv.y < 1.0f + EPSILON);
|
||||||
|
uv.x = DqnMath_Clampf(uv.x, 0.0f, 1.0f);
|
||||||
|
uv.y = DqnMath_Clampf(uv.y, 0.0f, 1.0f);
|
||||||
|
|
||||||
|
f32 texelXf = uv.x * texture->dim.w;
|
||||||
|
f32 texelYf = uv.y * texture->dim.h;
|
||||||
|
DQN_ASSERT(texelXf >= 0 && texelXf < texture->dim.w);
|
||||||
|
DQN_ASSERT(texelYf >= 0 && texelYf < texture->dim.h);
|
||||||
|
|
||||||
|
i32 texelX = (i32)texelXf;
|
||||||
|
i32 texelY = (i32)texelYf;
|
||||||
|
|
||||||
|
u32 texel1 = *(u32 *)(texturePtr + (texelX * texture->bytesPerPixel) +
|
||||||
|
(texelY * texturePitch));
|
||||||
|
|
||||||
|
DqnV4 color1;
|
||||||
|
color1.a = (f32)(texel1 >> 24);
|
||||||
|
color1.b = (f32)((texel1 >> 16) & 0xFF);
|
||||||
|
color1.g = (f32)((texel1 >> 8) & 0xFF);
|
||||||
|
color1.r = (f32)((texel1 >> 0) & 0xFF);
|
||||||
|
color1 *= DTRRENDER_INV_255;
|
||||||
|
color1 = DTRRender_SRGB1ToLinearSpaceV4(color1);
|
||||||
|
DqnV4 blend = color * color1;
|
||||||
|
SetPixel(renderBuffer, bufferX, bufferY, blend, ColorSpace_Linear);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
__m128 isGreater2 = _mm_cmpge_ps(signedArea2, zero_4x);
|
||||||
|
i32 isGreaterResult2 = _mm_movemask_ps(isGreater2);
|
||||||
|
i32 bufferX1 = bufferX + 1;
|
||||||
|
if ((isGreaterResult2 & IS_GREATER_MASK) == IS_GREATER_MASK && bufferX1 < max.x)
|
||||||
|
{
|
||||||
|
__m128 barycentric = _mm_mul_ps(signedArea2, invSignedAreaParallelogram4x);
|
||||||
|
__m128 barycentricZ = _mm_mul_ps(triangleZ, barycentric);
|
||||||
|
|
||||||
|
i32 zBufferIndex = bufferX1 + (bufferY * zBufferPitch);
|
||||||
|
f32 pixelZValue = ((f32 *)&barycentricZ)[0] +
|
||||||
|
((f32 *)&barycentricZ)[1] +
|
||||||
|
((f32 *)&barycentricZ)[2];
|
||||||
|
f32 currZValue = renderBuffer->zBuffer[zBufferIndex];
|
||||||
|
if (pixelZValue > currZValue)
|
||||||
|
{
|
||||||
|
renderBuffer->zBuffer[zBufferIndex] = pixelZValue;
|
||||||
|
u8 *texturePtr = texture->memory;
|
||||||
|
const u32 texturePitch = texture->bytesPerPixel * texture->dim.w;
|
||||||
|
|
||||||
|
f32 barycentricB = ((f32 *)&barycentric)[2];
|
||||||
|
f32 barycentricC = ((f32 *)&barycentric)[0];
|
||||||
|
DqnV2 uv = uv1 + (uv2SubUv1 * barycentricB) + (uv3SubUv1 * barycentricC);
|
||||||
|
|
||||||
|
const f32 EPSILON = 0.1f;
|
||||||
|
DQN_ASSERT(uv.x >= 0 && uv.x < 1.0f + EPSILON);
|
||||||
|
DQN_ASSERT(uv.y >= 0 && uv.y < 1.0f + EPSILON);
|
||||||
|
uv.x = DqnMath_Clampf(uv.x, 0.0f, 1.0f);
|
||||||
|
uv.y = DqnMath_Clampf(uv.y, 0.0f, 1.0f);
|
||||||
|
|
||||||
|
f32 texelXf = uv.x * texture->dim.w;
|
||||||
|
f32 texelYf = uv.y * texture->dim.h;
|
||||||
|
DQN_ASSERT(texelXf >= 0 && texelXf < texture->dim.w);
|
||||||
|
DQN_ASSERT(texelYf >= 0 && texelYf < texture->dim.h);
|
||||||
|
|
||||||
|
i32 texelX = (i32)texelXf;
|
||||||
|
i32 texelY = (i32)texelYf;
|
||||||
|
|
||||||
|
u32 texel1 = *(u32 *)(texturePtr + (texelX * texture->bytesPerPixel) +
|
||||||
|
(texelY * texturePitch));
|
||||||
|
|
||||||
|
DqnV4 color1;
|
||||||
|
color1.a = (f32)(texel1 >> 24);
|
||||||
|
color1.b = (f32)((texel1 >> 16) & 0xFF);
|
||||||
|
color1.g = (f32)((texel1 >> 8) & 0xFF);
|
||||||
|
color1.r = (f32)((texel1 >> 0) & 0xFF);
|
||||||
|
color1 *= DTRRENDER_INV_255;
|
||||||
|
color1 = DTRRender_SRGB1ToLinearSpaceV4(color1);
|
||||||
|
DqnV4 blend = color * color1;
|
||||||
|
SetPixel(renderBuffer, bufferX1, bufferY, blend, ColorSpace_Linear);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
signedArea1 = _mm_add_ps(signedArea1, signedAreaPixelDeltaX);
|
||||||
|
signedArea2 = _mm_add_ps(signedArea2, signedAreaPixelDeltaX);
|
||||||
|
}
|
||||||
|
|
||||||
|
signedAreaPixel1 = _mm_add_ps(signedAreaPixel1, signedAreaPixelDeltaY);
|
||||||
|
signedAreaPixel2 = _mm_add_ps(signedAreaPixel2, signedAreaPixelDeltaY);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
DqnV2i startP = min;
|
||||||
f32 signedArea1 = ((b.x - a.x) * (startP.y - a.y)) - ((b.y - a.y) * (startP.x - a.x));
|
f32 signedArea1 = ((b.x - a.x) * (startP.y - a.y)) - ((b.y - a.y) * (startP.x - a.x));
|
||||||
f32 signedArea1DeltaX = a.y - b.y;
|
f32 signedArea1DeltaX = a.y - b.y;
|
||||||
f32 signedArea1DeltaY = b.x - a.x;
|
f32 signedArea1DeltaY = b.x - a.x;
|
||||||
@ -570,15 +733,6 @@ void DTRRender_TexturedTriangle(DTRRenderBuffer *const renderBuffer, DqnV3 p1, D
|
|||||||
if (signedAreaParallelogram == 0) return;
|
if (signedAreaParallelogram == 0) return;
|
||||||
f32 invSignedAreaParallelogram = 1 / signedAreaParallelogram;
|
f32 invSignedAreaParallelogram = 1 / signedAreaParallelogram;
|
||||||
|
|
||||||
DTRDebug_BeginCycleCount(DTRDebugCycleCount_RenderTriangle_Rasterise);
|
|
||||||
////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Scan and Render
|
|
||||||
////////////////////////////////////////////////////////////////////////////
|
|
||||||
const u32 zBufferPitch = renderBuffer->width;
|
|
||||||
const f32 BARYCENTRIC_EPSILON = 0.1f;
|
|
||||||
|
|
||||||
u8 *texturePtr = texture->memory;
|
|
||||||
const u32 texturePitch = texture->bytesPerPixel * texture->dim.w;
|
|
||||||
for (i32 bufferY = min.y; bufferY < max.y; bufferY++)
|
for (i32 bufferY = min.y; bufferY < max.y; bufferY++)
|
||||||
{
|
{
|
||||||
f32 signedArea1Row = signedArea1;
|
f32 signedArea1Row = signedArea1;
|
||||||
@ -600,9 +754,9 @@ void DTRRender_TexturedTriangle(DTRRenderBuffer *const renderBuffer, DqnV3 p1, D
|
|||||||
f32 debugSignedArea2 = ((c.x - b.x) * (bufferY - b.y)) - ((c.y - b.y) * (bufferX - b.x));
|
f32 debugSignedArea2 = ((c.x - b.x) * (bufferY - b.y)) - ((c.y - b.y) * (bufferX - b.x));
|
||||||
f32 debugSignedArea3 = ((a.x - c.x) * (bufferY - c.y)) - ((a.y - c.y) * (bufferX - c.x));
|
f32 debugSignedArea3 = ((a.x - c.x) * (bufferY - c.y)) - ((a.y - c.y) * (bufferX - c.x));
|
||||||
|
|
||||||
f32 deltaSignedArea1 = debugSignedArea1 - signedArea1Row;
|
f32 deltaSignedArea1 = DQN_ABS(debugSignedArea1 - signedArea1Row);
|
||||||
f32 deltaSignedArea2 = debugSignedArea2 - signedArea2Row;
|
f32 deltaSignedArea2 = DQN_ABS(debugSignedArea2 - signedArea2Row);
|
||||||
f32 deltaSignedArea3 = debugSignedArea3 - signedArea3Row;
|
f32 deltaSignedArea3 = DQN_ABS(debugSignedArea3 - signedArea3Row);
|
||||||
DQN_ASSERT(deltaSignedArea1 < EPSILON && deltaSignedArea2 < EPSILON &&
|
DQN_ASSERT(deltaSignedArea1 < EPSILON && deltaSignedArea2 < EPSILON &&
|
||||||
deltaSignedArea3 < EPSILON)
|
deltaSignedArea3 < EPSILON)
|
||||||
|
|
||||||
@ -611,7 +765,6 @@ void DTRRender_TexturedTriangle(DTRRenderBuffer *const renderBuffer, DqnV3 p1, D
|
|||||||
&debugBarycentricA, &debugBarycentricB,
|
&debugBarycentricA, &debugBarycentricB,
|
||||||
&debugBarycentricC);
|
&debugBarycentricC);
|
||||||
|
|
||||||
|
|
||||||
f32 deltaBaryB = DQN_ABS(barycentricB - debugBarycentricB);
|
f32 deltaBaryB = DQN_ABS(barycentricB - debugBarycentricB);
|
||||||
f32 deltaBaryC = DQN_ABS(barycentricC - debugBarycentricC);
|
f32 deltaBaryC = DQN_ABS(barycentricC - debugBarycentricC);
|
||||||
|
|
||||||
@ -619,15 +772,21 @@ void DTRRender_TexturedTriangle(DTRRenderBuffer *const renderBuffer, DqnV3 p1, D
|
|||||||
}
|
}
|
||||||
|
|
||||||
i32 zBufferIndex = bufferX + (bufferY * zBufferPitch);
|
i32 zBufferIndex = bufferX + (bufferY * zBufferPitch);
|
||||||
f32 pixelZValue = a.z + (barycentricB * (b.z - a.z)) + (barycentricC * (c.z - a.z));
|
f32 pixelZValue =
|
||||||
|
a.z + (barycentricB * (b.z - a.z)) + (barycentricC * (c.z - a.z));
|
||||||
f32 currZValue = renderBuffer->zBuffer[zBufferIndex];
|
f32 currZValue = renderBuffer->zBuffer[zBufferIndex];
|
||||||
DQN_ASSERT(zBufferIndex < (renderBuffer->width * renderBuffer->height));
|
DQN_ASSERT(zBufferIndex < (renderBuffer->width * renderBuffer->height));
|
||||||
|
|
||||||
if (pixelZValue > currZValue)
|
if (pixelZValue > currZValue)
|
||||||
{
|
{
|
||||||
renderBuffer->zBuffer[zBufferIndex] = pixelZValue;
|
renderBuffer->zBuffer[zBufferIndex] = pixelZValue;
|
||||||
const bool DEBUG_SAMPLE_TEXTURE = true;
|
if (texture)
|
||||||
DqnV2 uv = uv1 + ((uv2 - uv1) * barycentricB) + ((uv3 - uv1) * barycentricC);
|
{
|
||||||
|
u8 *texturePtr = texture->memory;
|
||||||
|
const u32 texturePitch = texture->bytesPerPixel * texture->dim.w;
|
||||||
|
|
||||||
|
DqnV2 uv =
|
||||||
|
uv1 + ((uv2 - uv1) * barycentricB) + ((uv3 - uv1) * barycentricC);
|
||||||
|
|
||||||
const f32 EPSILON = 0.1f;
|
const f32 EPSILON = 0.1f;
|
||||||
DQN_ASSERT(uv.x >= 0 && uv.x < 1.0f + EPSILON);
|
DQN_ASSERT(uv.x >= 0 && uv.x < 1.0f + EPSILON);
|
||||||
@ -658,6 +817,11 @@ void DTRRender_TexturedTriangle(DTRRenderBuffer *const renderBuffer, DqnV3 p1, D
|
|||||||
DqnV4 blend = color * color1;
|
DqnV4 blend = color * color1;
|
||||||
SetPixel(renderBuffer, bufferX, bufferY, blend, ColorSpace_Linear);
|
SetPixel(renderBuffer, bufferX, bufferY, blend, ColorSpace_Linear);
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
SetPixel(renderBuffer, bufferX, bufferY, color, ColorSpace_Linear);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
signedArea1Row += signedArea1DeltaX;
|
signedArea1Row += signedArea1DeltaX;
|
||||||
@ -669,6 +833,7 @@ void DTRRender_TexturedTriangle(DTRRenderBuffer *const renderBuffer, DqnV3 p1, D
|
|||||||
signedArea2 += signedArea2DeltaY;
|
signedArea2 += signedArea2DeltaY;
|
||||||
signedArea3 += signedArea3DeltaY;
|
signedArea3 += signedArea3DeltaY;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
DTRDebug_EndCycleCount(DTRDebugCycleCount_RenderTriangle_Rasterise);
|
DTRDebug_EndCycleCount(DTRDebugCycleCount_RenderTriangle_Rasterise);
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////
|
||||||
@ -881,12 +1046,10 @@ void DTRRender_Triangle(DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2,
|
|||||||
if (signedAreaParallelogram == 0) return;
|
if (signedAreaParallelogram == 0) return;
|
||||||
f32 invSignedAreaParallelogram = 1 / signedAreaParallelogram;
|
f32 invSignedAreaParallelogram = 1 / signedAreaParallelogram;
|
||||||
|
|
||||||
DTRDebug_BeginCycleCount(DTRDebugCycleCount_RenderTriangle_Rasterise);
|
|
||||||
////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////
|
||||||
// Scan and Render
|
// Scan and Render
|
||||||
////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////
|
||||||
const u32 zBufferPitch = renderBuffer->width;
|
const u32 zBufferPitch = renderBuffer->width;
|
||||||
const f32 BARYCENTRIC_EPSILON = 0.1f;
|
|
||||||
for (i32 bufferY = min.y; bufferY < max.y; bufferY++)
|
for (i32 bufferY = min.y; bufferY < max.y; bufferY++)
|
||||||
{
|
{
|
||||||
f32 signedArea1Row = signedArea1;
|
f32 signedArea1Row = signedArea1;
|
||||||
@ -903,6 +1066,7 @@ void DTRRender_Triangle(DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2,
|
|||||||
i32 zBufferIndex = bufferX + (bufferY * zBufferPitch);
|
i32 zBufferIndex = bufferX + (bufferY * zBufferPitch);
|
||||||
f32 pixelZValue = a.z + (barycentricB * (b.z - a.z)) + (barycentricC * (c.z - a.z));
|
f32 pixelZValue = a.z + (barycentricB * (b.z - a.z)) + (barycentricC * (c.z - a.z));
|
||||||
f32 currZValue = renderBuffer->zBuffer[zBufferIndex];
|
f32 currZValue = renderBuffer->zBuffer[zBufferIndex];
|
||||||
|
DQN_ASSERT(zBufferIndex < (renderBuffer->width * renderBuffer->height));
|
||||||
if (pixelZValue > currZValue)
|
if (pixelZValue > currZValue)
|
||||||
{
|
{
|
||||||
renderBuffer->zBuffer[zBufferIndex] = pixelZValue;
|
renderBuffer->zBuffer[zBufferIndex] = pixelZValue;
|
||||||
@ -919,7 +1083,6 @@ void DTRRender_Triangle(DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2,
|
|||||||
signedArea2 += signedArea2DeltaY;
|
signedArea2 += signedArea2DeltaY;
|
||||||
signedArea3 += signedArea3DeltaY;
|
signedArea3 += signedArea3DeltaY;
|
||||||
}
|
}
|
||||||
DTRDebug_EndCycleCount(DTRDebugCycleCount_RenderTriangle_Rasterise);
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////
|
||||||
// Debug
|
// Debug
|
||||||
@ -958,8 +1121,7 @@ void DTRRender_Triangle(DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void DTRRender_Bitmap(DTRRenderBuffer *const renderBuffer,
|
void DTRRender_Bitmap(DTRRenderBuffer *const renderBuffer, DTRBitmap *const bitmap, DqnV2 pos,
|
||||||
DTRBitmap *const bitmap, DqnV2 pos,
|
|
||||||
const DTRRenderTransform transform, DqnV4 color)
|
const DTRRenderTransform transform, DqnV4 color)
|
||||||
{
|
{
|
||||||
if (!bitmap || !bitmap->memory || !renderBuffer) return;
|
if (!bitmap || !bitmap->memory || !renderBuffer) return;
|
||||||
|
@ -63,7 +63,7 @@ void DTRRender_Text (DTRRenderBuffer *const renderBuffer, const DTRFo
|
|||||||
void DTRRender_Line (DTRRenderBuffer *const renderBuffer, DqnV2i a, DqnV2i b, DqnV4 color);
|
void DTRRender_Line (DTRRenderBuffer *const renderBuffer, DqnV2i a, DqnV2i b, DqnV4 color);
|
||||||
void DTRRender_Rectangle (DTRRenderBuffer *const renderBuffer, DqnV2 min, DqnV2 max, DqnV4 color, const DTRRenderTransform transform = DTRRender_DefaultTransform());
|
void DTRRender_Rectangle (DTRRenderBuffer *const renderBuffer, DqnV2 min, DqnV2 max, DqnV4 color, const DTRRenderTransform transform = DTRRender_DefaultTransform());
|
||||||
void DTRRender_Triangle (DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2, DqnV3 p3, DqnV4 color, const DTRRenderTransform transform = DTRRender_DefaultTriangleTransform());
|
void DTRRender_Triangle (DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2, DqnV3 p3, DqnV4 color, const DTRRenderTransform transform = DTRRender_DefaultTriangleTransform());
|
||||||
void DTRRender_TexturedTriangle(DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2, DqnV3 p3, DqnV2 uv1, DqnV2 uv2, DqnV2 uv3, DTRBitmap *const texture, DqnV4 color, const DTRRenderTransform transform = DTRRender_DefaultTriangleTransform());
|
void DTRRender_TexturedTriangle(PlatformInput *const input, DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2, DqnV3 p3, DqnV2 uv1, DqnV2 uv2, DqnV2 uv3, DTRBitmap *const texture, DqnV4 color, const DTRRenderTransform transform = DTRRender_DefaultTriangleTransform());
|
||||||
void DTRRender_Bitmap (DTRRenderBuffer *const renderBuffer, DTRBitmap *const bitmap, DqnV2 pos, const DTRRenderTransform transform = DTRRender_DefaultTransform(), DqnV4 color = DqnV4_4f(1, 1, 1, 1));
|
void DTRRender_Bitmap (DTRRenderBuffer *const renderBuffer, DTRBitmap *const bitmap, DqnV2 pos, const DTRRenderTransform transform = DTRRender_DefaultTransform(), DqnV4 color = DqnV4_4f(1, 1, 1, 1));
|
||||||
void DTRRender_Clear (DTRRenderBuffer *const renderBuffer, DqnV3 color);
|
void DTRRender_Clear (DTRRenderBuffer *const renderBuffer, DqnV3 color);
|
||||||
|
|
||||||
|
@ -39,7 +39,7 @@ REM wd4100 unused argument parameters
|
|||||||
REM wd4201 nonstandard extension used: nameless struct/union
|
REM wd4201 nonstandard extension used: nameless struct/union
|
||||||
REM wd4189 local variable is initialised but not referenced
|
REM wd4189 local variable is initialised but not referenced
|
||||||
REM wd4505 unreferenced local function not used will be removed
|
REM wd4505 unreferenced local function not used will be removed
|
||||||
set CompileFlags=-EHsc -GR- -Oi -MT -Z7 -W4 -wd4100 -wd4201 -wd4189 -wd4505 -Od -FAsc /I..\src\external\
|
set CompileFlags=-EHsc -GR- -Oi -MT -Z7 -W4 -wd4100 -wd4201 -wd4189 -wd4505 -O2 -FAsc /I..\src\external\
|
||||||
set DLLFlags=/Fm%ProjectName% /Fo%ProjectName% /Fa%ProjectName% /Fe%ProjectName%
|
set DLLFlags=/Fm%ProjectName% /Fo%ProjectName% /Fa%ProjectName% /Fe%ProjectName%
|
||||||
set Win32Flags=/FmWin32DTRenderer /FeWin32DTRenderer
|
set Win32Flags=/FmWin32DTRenderer /FeWin32DTRenderer
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user