1793 lines
65 KiB
C++
1793 lines
65 KiB
C++
#include "DTRendererRender.h"
|
|
#include "DTRendererDebug.h"
|
|
#include "DTRendererPlatform.h"
|
|
|
|
#define STB_RECT_PACK_IMPLEMENTATION
|
|
#define STB_TRUETYPE_IMPLEMENTATION
|
|
#include "external/stb_rect_pack.h"
|
|
#include "external/stb_truetype.h"
|
|
|
|
#include <intrin.h>
|
|
|
|
FILE_SCOPE const f32 COLOR_EPSILON = 0.9f;
|
|
|
|
inline void Make3PointsClockwise(DqnV3 *p1, DqnV3 *p2, DqnV3 *p3)
|
|
{
|
|
f32 area2Times = ((p2->x - p1->x) * (p2->y + p1->y)) +
|
|
((p3->x - p2->x) * (p3->y + p2->y)) +
|
|
((p1->x - p3->x) * (p1->y + p3->y));
|
|
if (area2Times > 0)
|
|
{
|
|
// Clockwise swap any point to make it clockwise
|
|
DQN_SWAP(DqnV3, *p2, *p3);
|
|
}
|
|
}
|
|
|
|
FILE_SCOPE inline DqnV4 PreMultiplyAlpha1(const DqnV4 color)
|
|
{
|
|
DQN_ASSERT(color.a >= 0.0f && color.a <= 1.0f);
|
|
DqnV4 result;
|
|
result.r = color.r * color.a;
|
|
result.g = color.g * color.a;
|
|
result.b = color.b * color.a;
|
|
result.a = color.a;
|
|
|
|
DQN_ASSERT(result.r >= 0.0f && result.r <= 1.0f);
|
|
DQN_ASSERT(result.g >= 0.0f && result.g <= 1.0f);
|
|
DQN_ASSERT(result.b >= 0.0f && result.b <= 1.0f);
|
|
|
|
DQN_ASSERT(result.a >= result.r);
|
|
DQN_ASSERT(result.a >= result.g);
|
|
DQN_ASSERT(result.a >= result.b);
|
|
return result;
|
|
}
|
|
|
|
FILE_SCOPE inline DqnV4 PreMultiplyAlpha255(const DqnV4 color)
|
|
{
|
|
DqnV4 result;
|
|
f32 normA = color.a * DTRRENDER_INV_255;
|
|
DQN_ASSERT(normA >= 0.0f && normA <= 1.0f + COLOR_EPSILON);
|
|
result.r = color.r * normA;
|
|
result.g = color.g * normA;
|
|
result.b = color.b * normA;
|
|
result.a = color.a;
|
|
|
|
return result;
|
|
}
|
|
|
|
enum ColorSpace
|
|
{
|
|
ColorSpace_SRGB,
|
|
ColorSpace_Linear,
|
|
};
|
|
|
|
// NOTE(doyle): We are approximating the actual gamma correct value 2.2 to 2 as
|
|
// a compromise.
|
|
inline f32 DTRRender_SRGB1ToLinearSpacef(f32 val)
|
|
{
|
|
DQN_ASSERT(val >= 0.0f && val <= 1.0f + COLOR_EPSILON);
|
|
f32 result = DQN_SQUARED(val);
|
|
return result;
|
|
}
|
|
|
|
inline DqnV4 DTRRender_SRGB1ToLinearSpaceV4(DqnV4 color)
|
|
{
|
|
DqnV4 result;
|
|
result.r = DTRRender_SRGB1ToLinearSpacef(color.r);
|
|
result.g = DTRRender_SRGB1ToLinearSpacef(color.g);
|
|
result.b = DTRRender_SRGB1ToLinearSpacef(color.b);
|
|
result.a = color.a;
|
|
|
|
return result;
|
|
}
|
|
|
|
inline f32 DTRRender_LinearToSRGB1Spacef(f32 val)
|
|
{
|
|
DQN_ASSERT(val >= 0.0f && val <= 1.0f + COLOR_EPSILON);
|
|
if (val == 0) return 0;
|
|
f32 result = DqnMath_Sqrtf(val);
|
|
return result;
|
|
}
|
|
|
|
inline DqnV4 DTRRender_LinearToSRGB1SpaceV4(DqnV4 color)
|
|
{
|
|
DqnV4 result;
|
|
result.r = DTRRender_LinearToSRGB1Spacef(color.r);
|
|
result.g = DTRRender_LinearToSRGB1Spacef(color.g);
|
|
result.b = DTRRender_LinearToSRGB1Spacef(color.b);
|
|
result.a = color.a;
|
|
|
|
return result;
|
|
}
|
|
|
|
inline DqnV4 DTRRender_PreMultiplyAlphaSRGB1WithLinearConversion(DqnV4 color)
|
|
{
|
|
DqnV4 result = color;
|
|
result = DTRRender_SRGB1ToLinearSpaceV4(result);
|
|
result = PreMultiplyAlpha1(result);
|
|
result = DTRRender_LinearToSRGB1SpaceV4(result);
|
|
|
|
return result;
|
|
}
|
|
|
|
// IMPORTANT(doyle): Color is expected to be premultiplied already
|
|
FILE_SCOPE inline void SetPixel(DTRRenderBuffer *const renderBuffer, const i32 x, const i32 y,
|
|
DqnV4 color, const enum ColorSpace colorSpace = ColorSpace_SRGB)
|
|
{
|
|
if (!renderBuffer) return;
|
|
if (x < 0 || x > (renderBuffer->width - 1)) return;
|
|
if (y < 0 || y > (renderBuffer->height - 1)) return;
|
|
DTR_DEBUG_EP_TIMED_FUNCTION();
|
|
|
|
u32 *const bitmapPtr = (u32 *)renderBuffer->memory;
|
|
const u32 pitchInU32 = (renderBuffer->width * renderBuffer->bytesPerPixel) / 4;
|
|
|
|
// If some alpha is involved, we need to apply gamma correction, but if the
|
|
// new pixel is totally opaque or invisible then we're just flat out
|
|
// overwriting/keeping the state of the pixel so we can save cycles by skipping.
|
|
bool needGammaFix = (color.a > 0.0f || color.a < 1.0f + COLOR_EPSILON) && (colorSpace == ColorSpace_SRGB);
|
|
if (needGammaFix) color = DTRRender_SRGB1ToLinearSpaceV4(color);
|
|
|
|
u32 src = bitmapPtr[x + (y * pitchInU32)];
|
|
f32 srcR = (f32)((src >> 16) & 0xFF) * DTRRENDER_INV_255;
|
|
f32 srcG = (f32)((src >> 8) & 0xFF) * DTRRENDER_INV_255;
|
|
f32 srcB = (f32)((src >> 0) & 0xFF) * DTRRENDER_INV_255;
|
|
|
|
srcR = DTRRender_SRGB1ToLinearSpacef(srcR);
|
|
srcG = DTRRender_SRGB1ToLinearSpacef(srcG);
|
|
srcB = DTRRender_SRGB1ToLinearSpacef(srcB);
|
|
|
|
// NOTE(doyle): AlphaBlend equations is (alpha * new) + (1 - alpha) * src.
|
|
// IMPORTANT(doyle): We pre-multiply so we can take out the (alpha * new)
|
|
f32 invANorm = 1 - color.a;
|
|
f32 destR = color.r + (invANorm * srcR);
|
|
f32 destG = color.g + (invANorm * srcG);
|
|
f32 destB = color.b + (invANorm * srcB);
|
|
|
|
destR = DTRRender_LinearToSRGB1Spacef(destR) * 255.0f;
|
|
destG = DTRRender_LinearToSRGB1Spacef(destG) * 255.0f;
|
|
destB = DTRRender_LinearToSRGB1Spacef(destB) * 255.0f;
|
|
|
|
if (DTR_DEBUG)
|
|
{
|
|
DQN_ASSERT((destR - 255.0f) < COLOR_EPSILON);
|
|
DQN_ASSERT((destG - 255.0f) < COLOR_EPSILON);
|
|
DQN_ASSERT((destB - 255.0f) < COLOR_EPSILON);
|
|
}
|
|
|
|
if (destR > 255.0f)
|
|
{
|
|
destR = 255;
|
|
}
|
|
|
|
if (destG > 255.0f)
|
|
{
|
|
destG = 255;
|
|
}
|
|
|
|
if (destB > 255.0f)
|
|
{
|
|
destB = 255;
|
|
}
|
|
|
|
u32 pixel = // ((u32)(destA) << 24 |
|
|
(u32)(destR) << 16 |
|
|
(u32)(destG) << 8 |
|
|
(u32)(destB) << 0;
|
|
bitmapPtr[x + (y * pitchInU32)] = pixel;
|
|
|
|
DTRDebug_CounterIncrement(DTRDebugCounter_SetPixels);
|
|
}
|
|
|
|
void DTRRender_Text(DTRRenderBuffer *const renderBuffer,
|
|
const DTRFont font, DqnV2 pos, const char *const text,
|
|
DqnV4 color, i32 len)
|
|
{
|
|
if (!text) return;
|
|
if (!font.bitmap || !font.atlas || !renderBuffer) return;
|
|
DTR_DEBUG_EP_TIMED_FUNCTION();
|
|
|
|
if (len == -1) len = Dqn_strlen(text);
|
|
|
|
i32 index = 0;
|
|
color = DTRRender_SRGB1ToLinearSpaceV4(color);
|
|
color = PreMultiplyAlpha1(color);
|
|
while (index < len)
|
|
{
|
|
if (text[index] < font.codepointRange.min &&
|
|
text[index] > font.codepointRange.max)
|
|
{
|
|
return;
|
|
}
|
|
|
|
i32 charIndex = text[index++] - (i32)font.codepointRange.min;
|
|
DQN_ASSERT(charIndex >= 0 &&
|
|
charIndex < (i32)(font.codepointRange.max -
|
|
font.codepointRange.min));
|
|
|
|
stbtt_aligned_quad alignedQuad = {};
|
|
stbtt_GetPackedQuad(font.atlas, font.bitmapDim.w, font.bitmapDim.h,
|
|
charIndex, &pos.x, &pos.y, &alignedQuad, true);
|
|
|
|
DqnRect fontRect = {};
|
|
fontRect.min = DqnV2_2f(alignedQuad.s0 * font.bitmapDim.w, alignedQuad.t1 * font.bitmapDim.h);
|
|
fontRect.max = DqnV2_2f(alignedQuad.s1 * font.bitmapDim.w, alignedQuad.t0 * font.bitmapDim.h);
|
|
|
|
DqnRect screenRect = {};
|
|
screenRect.min = DqnV2_2f(alignedQuad.x0, alignedQuad.y0);
|
|
screenRect.max = DqnV2_2f(alignedQuad.x1, alignedQuad.y1);
|
|
|
|
// TODO: Assumes 1bpp and pitch of font bitmap
|
|
const u32 fontPitch = font.bitmapDim.w;
|
|
u32 fontOffset = (u32)(fontRect.min.x + (fontRect.max.y * fontPitch));
|
|
u8 *fontPtr = font.bitmap + fontOffset;
|
|
|
|
DQN_ASSERT(sizeof(u32) == renderBuffer->bytesPerPixel);
|
|
|
|
// NOTE(doyle): This offset, yOffset and flipping t1, t0 is necessary
|
|
// for reversing the order of the font since its convention is 0,0 top
|
|
// left and -ve Y.
|
|
stbtt_packedchar *const charData = font.atlas + charIndex;
|
|
f32 fontHeightOffset = charData->yoff2 + charData->yoff;
|
|
|
|
u32 screenOffset = (u32)(screenRect.min.x + (screenRect.min.y - fontHeightOffset) * renderBuffer->width);
|
|
u32 *screenPtr = ((u32 *)renderBuffer->memory) + screenOffset;
|
|
|
|
i32 fontWidth = DQN_ABS((i32)(fontRect.min.x - fontRect.max.x));
|
|
i32 fontHeight = DQN_ABS((i32)(fontRect.min.y - fontRect.max.y));
|
|
for (i32 y = 0; y < fontHeight; y++)
|
|
{
|
|
for (i32 x = 0; x < fontWidth; x++)
|
|
{
|
|
i32 yOffset = fontHeight - y;
|
|
u8 srcA = fontPtr[x + (yOffset * fontPitch)];
|
|
if (srcA == 0) continue;
|
|
|
|
f32 srcANorm = srcA / 255.0f;
|
|
DqnV4 resultColor = {};
|
|
resultColor.r = color.r * srcANorm;
|
|
resultColor.g = color.g * srcANorm;
|
|
resultColor.b = color.b * srcANorm;
|
|
resultColor.a = color.a * srcANorm;
|
|
|
|
i32 actualX = (i32)(screenRect.min.x + x);
|
|
i32 actualY = (i32)(screenRect.min.y + y - fontHeightOffset);
|
|
SetPixel(renderBuffer, actualX, actualY, resultColor, ColorSpace_Linear);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
FILE_SCOPE void TransformPoints(const DqnV2 origin, DqnV2 *const pList,
|
|
const i32 numP, const DqnV2 scale,
|
|
const f32 rotation)
|
|
{
|
|
if (!pList || numP == 0) return;
|
|
DTR_DEBUG_EP_TIMED_FUNCTION();
|
|
|
|
DqnV2 xAxis = (DqnV2_2f(cosf(rotation), sinf(rotation)));
|
|
DqnV2 yAxis = DqnV2_2f(-xAxis.y, xAxis.x);
|
|
xAxis *= scale.x;
|
|
yAxis *= scale.y;
|
|
|
|
for (i32 i = 0; i < numP; i++)
|
|
{
|
|
DqnV2 p = pList[i];
|
|
pList[i] = origin + (xAxis * p.x) + (yAxis * p.y);
|
|
}
|
|
}
|
|
|
|
void DTRRender_Line(DTRRenderBuffer *const renderBuffer, DqnV2i a,
|
|
DqnV2i b, DqnV4 color)
|
|
{
|
|
if (!renderBuffer) return;
|
|
DTR_DEBUG_EP_TIMED_FUNCTION();
|
|
|
|
color = DTRRender_SRGB1ToLinearSpaceV4(color);
|
|
color = PreMultiplyAlpha1(color);
|
|
|
|
bool yTallerThanX = false;
|
|
if (DQN_ABS(a.x - b.x) < DQN_ABS(a.y - b.y))
|
|
{
|
|
// NOTE(doyle): Enforce that the X component is always longer than the
|
|
// Y component. When drawing this we just reverse the order back.
|
|
// This is to ensure that the gradient is always < 1, such that we can
|
|
// use the gradient to calculate the distance from the pixel origin, and
|
|
// at which point we want to increment the y.
|
|
yTallerThanX = true;
|
|
DQN_SWAP(i32, a.x, a.y);
|
|
DQN_SWAP(i32, b.x, b.y);
|
|
}
|
|
|
|
if (b.x < a.x) DQN_SWAP(DqnV2i, a, b);
|
|
|
|
i32 rise = b.y - a.y;
|
|
i32 run = b.x - a.x;
|
|
|
|
i32 delta = (b.y > a.y) ? 1 : -1;
|
|
i32 numIterations = b.x - a.x;
|
|
|
|
i32 distFromPixelOrigin = DQN_ABS(rise) * 2;
|
|
i32 distAccumulator = 0;
|
|
|
|
i32 newX = a.x;
|
|
i32 newY = a.y;
|
|
|
|
// Unflip the points if we did for plotting the pixels
|
|
i32 *plotX, *plotY;
|
|
if (yTallerThanX)
|
|
{
|
|
plotX = &newY;
|
|
plotY = &newX;
|
|
}
|
|
else
|
|
{
|
|
plotX = &newX;
|
|
plotY = &newY;
|
|
}
|
|
|
|
for (i32 iterateX = 0; iterateX < numIterations; iterateX++)
|
|
{
|
|
newX = a.x + iterateX;
|
|
SetPixel(renderBuffer, *plotX, *plotY, color, ColorSpace_Linear);
|
|
|
|
distAccumulator += distFromPixelOrigin;
|
|
if (distAccumulator > run)
|
|
{
|
|
newY += delta;
|
|
distAccumulator -= (run * 2);
|
|
}
|
|
}
|
|
}
|
|
|
|
// NOTE: This information is only particularly relevant for bitmaps so that
|
|
// after transformation, we can still programatically find the original
|
|
// coordinate system of the bitmap for texture mapping.
|
|
enum RectPointsIndex
|
|
{
|
|
RectPointsIndex_Basis = 0,
|
|
RectPointsIndex_XAxis,
|
|
RectPointsIndex_Point,
|
|
RectPointsIndex_YAxis,
|
|
RectPointsIndex_Count
|
|
};
|
|
|
|
typedef struct RectPoints
|
|
{
|
|
DqnV2 pList[RectPointsIndex_Count];
|
|
} RectPoints;
|
|
|
|
// Apply rotation and scale around the anchored point. This is a helper function that expands the
|
|
// min and max into the 4 vertexes of a rectangle then calls the normal transform routine.
|
|
// anchor: A normalised [0->1] value the points should be positioned from
|
|
FILE_SCOPE RectPoints TransformRectPoints(DqnV2 min, DqnV2 max, DTRRenderTransform transform)
|
|
{
|
|
DqnV2 dim = DqnV2_2f(max.x - min.x, max.y - min.y);
|
|
DqnV2 origin = DqnV2_2f(min.x + (transform.anchor.x * dim.w), min.y + (transform.anchor.y * dim.h));
|
|
DQN_ASSERT(dim.w > 0 && dim.h > 0);
|
|
|
|
RectPoints result = {};
|
|
result.pList[RectPointsIndex_Basis] = min - origin;
|
|
result.pList[RectPointsIndex_XAxis] = DqnV2_2f(max.x, min.y) - origin;
|
|
result.pList[RectPointsIndex_Point] = max - origin;
|
|
result.pList[RectPointsIndex_YAxis] = DqnV2_2f(min.x, max.y) - origin;
|
|
|
|
TransformPoints(origin, result.pList, DQN_ARRAY_COUNT(result.pList), transform.scale, transform.rotation);
|
|
|
|
return result;
|
|
}
|
|
|
|
FILE_SCOPE DqnRect GetBoundingBox(const DqnV2 *const pList, const i32 numP)
|
|
{
|
|
DqnRect result = {};
|
|
if (numP == 0 || !pList) return result;
|
|
|
|
result.min = pList[0];
|
|
result.max = pList[0];
|
|
for (i32 i = 1; i < numP; i++)
|
|
{
|
|
DqnV2 checkP = pList[i];
|
|
result.min.x = DQN_MIN(result.min.x, checkP.x);
|
|
result.min.y = DQN_MIN(result.min.y, checkP.y);
|
|
|
|
result.max.x = DQN_MAX(result.max.x, checkP.x);
|
|
result.max.y = DQN_MAX(result.max.y, checkP.y);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
void DTRRender_Rectangle(DTRRenderBuffer *const renderBuffer, DqnV2 min, DqnV2 max,
|
|
DqnV4 color, const DTRRenderTransform transform)
|
|
{
|
|
DTR_DEBUG_EP_TIMED_FUNCTION();
|
|
////////////////////////////////////////////////////////////////////////////
|
|
// Transform vertexes
|
|
////////////////////////////////////////////////////////////////////////////
|
|
color = DTRRender_SRGB1ToLinearSpaceV4(color);
|
|
color = PreMultiplyAlpha1(color);
|
|
|
|
RectPoints rectPoints = TransformRectPoints(min, max, transform);
|
|
DqnV2 *const pList = &rectPoints.pList[0];
|
|
const i32 RECT_PLIST_SIZE = DQN_ARRAY_COUNT(rectPoints.pList);
|
|
|
|
DqnRect bounds = GetBoundingBox(pList, RECT_PLIST_SIZE);
|
|
min = bounds.min;
|
|
max = bounds.max;
|
|
|
|
////////////////////////////////////////////////////////////////////////////
|
|
// Clip Drawing Space
|
|
////////////////////////////////////////////////////////////////////////////
|
|
DqnRect rect = DqnRect_4f(min.x, min.y, max.x, max.y);
|
|
DqnRect clip = DqnRect_4i(0, 0, renderBuffer->width, renderBuffer->height);
|
|
|
|
DqnRect clippedRect = DqnRect_ClipRect(rect, clip);
|
|
DqnV2 clippedSize = DqnRect_GetSizeV2(clippedRect);
|
|
|
|
////////////////////////////////////////////////////////////////////////////
|
|
// Render
|
|
////////////////////////////////////////////////////////////////////////////
|
|
if (transform.rotation != 0)
|
|
{
|
|
for (i32 y = 0; y < clippedSize.w; y++)
|
|
{
|
|
i32 bufferY = (i32)clippedRect.min.y + y;
|
|
for (i32 x = 0; x < clippedSize.h; x++)
|
|
{
|
|
i32 bufferX = (i32)clippedRect.min.x + x;
|
|
bool pIsInside = true;
|
|
|
|
for (i32 pIndex = 0; pIndex < RECT_PLIST_SIZE; pIndex++)
|
|
{
|
|
DqnV2 origin = pList[pIndex];
|
|
DqnV2 line = pList[(pIndex + 1) % RECT_PLIST_SIZE] - origin;
|
|
DqnV2 axis = DqnV2_2i(bufferX, bufferY) - origin;
|
|
f32 dotResult = DqnV2_Dot(line, axis);
|
|
|
|
if (dotResult < 0)
|
|
{
|
|
pIsInside = false;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (pIsInside) SetPixel(renderBuffer, bufferX, bufferY, color, ColorSpace_Linear);
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (i32 y = 0; y < clippedSize.h; y++)
|
|
{
|
|
i32 bufferY = (i32)clippedRect.min.y + y;
|
|
for (i32 x = 0; x < clippedSize.w; x++)
|
|
{
|
|
i32 bufferX = (i32)clippedRect.min.x + x;
|
|
SetPixel(renderBuffer, bufferX, bufferY, color, ColorSpace_Linear);
|
|
}
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////
|
|
// Debug
|
|
////////////////////////////////////////////////////////////////////////////
|
|
if (DTR_DEBUG_RENDER)
|
|
{
|
|
// Draw Bounding box
|
|
{
|
|
DTRRender_Line(renderBuffer, DqnV2i_2f(min.x, min.y), DqnV2i_2f(min.x, max.y), color);
|
|
DTRRender_Line(renderBuffer, DqnV2i_2f(min.x, max.y), DqnV2i_2f(max.x, max.y), color);
|
|
DTRRender_Line(renderBuffer, DqnV2i_2f(max.x, max.y), DqnV2i_2f(max.x, min.y), color);
|
|
DTRRender_Line(renderBuffer, DqnV2i_2f(max.x, min.y), DqnV2i_2f(min.x, min.y), color);
|
|
}
|
|
|
|
// Draw rotating outline
|
|
if (transform.rotation > 0)
|
|
{
|
|
DqnV4 green = DqnV4_4f(0, 1, 0, 1);
|
|
DTRRender_Line(renderBuffer, DqnV2i_V2(pList[0]), DqnV2i_V2(pList[1]), green);
|
|
DTRRender_Line(renderBuffer, DqnV2i_V2(pList[1]), DqnV2i_V2(pList[2]), green);
|
|
DTRRender_Line(renderBuffer, DqnV2i_V2(pList[2]), DqnV2i_V2(pList[3]), green);
|
|
DTRRender_Line(renderBuffer, DqnV2i_V2(pList[3]), DqnV2i_V2(pList[0]), green);
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
FILE_SCOPE void DebugBarycentricInternal(DqnV2 p, DqnV2 a, DqnV2 b, DqnV2 c, f32 *u, f32 *v, f32 *w)
|
|
{
|
|
DqnV2 v0 = b - a;
|
|
DqnV2 v1 = c - a;
|
|
DqnV2 v2 = p - a;
|
|
|
|
f32 d00 = DqnV2_Dot(v0, v0);
|
|
f32 d01 = DqnV2_Dot(v0, v1);
|
|
f32 d11 = DqnV2_Dot(v1, v1);
|
|
f32 d20 = DqnV2_Dot(v2, v0);
|
|
f32 d21 = DqnV2_Dot(v2, v1);
|
|
f32 denom = d00 * d11 - d01 * d01;
|
|
*v = (d11 * d20 - d01 * d21) / denom;
|
|
*w = (d00 * d21 - d01 * d20) / denom;
|
|
*u = 1.0f - *v - *w;
|
|
}
|
|
|
|
FILE_SCOPE inline f32 Triangle2TimesSignedArea(const DqnV2 a, const DqnV2 b, const DqnV2 c)
|
|
{
|
|
f32 result = ((b.x - a.x) * (c.y - a.y)) - ((b.y - a.y) * (c.x - a.x));
|
|
return result;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// SIMD
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// color: _mm_set_ps(a, b, g, r) ie. 0=r, 1=g, 2=b, 3=a
|
|
FILE_SCOPE inline void DebugSIMDAssertColorInRange(__m128 color, f32 min, f32 max)
|
|
{
|
|
if (DTR_DEBUG)
|
|
{
|
|
f32 r = ((f32 *)&color)[0];
|
|
f32 g = ((f32 *)&color)[1];
|
|
f32 b = ((f32 *)&color)[2];
|
|
f32 a = ((f32 *)&color)[3];
|
|
DQN_ASSERT(r >= min && r <= max);
|
|
DQN_ASSERT(g >= min && g <= max);
|
|
DQN_ASSERT(b >= min && b <= max);
|
|
DQN_ASSERT(a >= min && a <= max);
|
|
}
|
|
}
|
|
|
|
// color: _mm_set_ps(a, b, g, r) ie. 0=r, 1=g, 2=b, 3=a
|
|
FILE_SCOPE inline __m128 SIMDSRGB1ToLinearSpace(__m128 color)
|
|
{
|
|
DebugSIMDAssertColorInRange(color, 0.0f, 1.0f);
|
|
|
|
f32 preserveAlpha = ((f32 *)&color)[3];
|
|
__m128 result = _mm_mul_ps(color, color);
|
|
((f32 *)&result)[3] = preserveAlpha;
|
|
|
|
return result;
|
|
}
|
|
|
|
// color: _mm_set_ps(a, b, g, r) ie. 0=r, 1=g, 2=b, 3=a
|
|
FILE_SCOPE inline __m128 SIMDSRGB255ToLinearSpace1(__m128 color)
|
|
{
|
|
LOCAL_PERSIST const __m128 INV255_4X = _mm_set_ps1(DTRRENDER_INV_255);
|
|
color = _mm_mul_ps(color, INV255_4X);
|
|
|
|
f32 preserveAlpha = ((f32 *)&color)[3];
|
|
__m128 result = _mm_mul_ps(color, color);
|
|
((f32 *)&result)[3] = preserveAlpha;
|
|
|
|
return result;
|
|
}
|
|
|
|
// color: _mm_set_ps(a, b, g, r) ie. 0=r, 1=g, 2=b, 3=a
|
|
FILE_SCOPE inline __m128 SIMDLinearSpace1ToSRGB1(__m128 color)
|
|
{
|
|
DebugSIMDAssertColorInRange(color, 0.0f, 1.0f);
|
|
|
|
f32 preserveAlpha = ((f32 *)&color)[3];
|
|
__m128 result = _mm_sqrt_ps(color);
|
|
((f32 *)&result)[3] = preserveAlpha;
|
|
|
|
return result;
|
|
}
|
|
|
|
|
|
// color: _mm_set_ps(a, b, g, r) ie. 0=r, 1=g, 2=b, 3=a
|
|
FILE_SCOPE inline __m128 SIMDPreMultiplyAlpha1(__m128 color)
|
|
{
|
|
f32 alpha = ((f32 *)&color)[3];
|
|
__m128 simdAlpha = _mm_set_ps(1, alpha, alpha, alpha);
|
|
__m128 result = _mm_mul_ps(color, simdAlpha);
|
|
|
|
return result;
|
|
}
|
|
|
|
FILE_SCOPE inline DqnV2 Get2DOriginFromTransformAnchor(const DqnV2 p1, const DqnV2 p2,
|
|
const DqnV2 p3,
|
|
const DTRRenderTransform transform)
|
|
{
|
|
DqnV2 p1p2 = p2 - p1;
|
|
DqnV2 p1p3 = p3 - p1;
|
|
|
|
DqnV2 p1p2Anchored = p1p2 * transform.anchor;
|
|
DqnV2 p1p3Anchored = p1p3 * transform.anchor;
|
|
DqnV2 origin = p1 + p1p2Anchored + p1p3Anchored;
|
|
|
|
return origin;
|
|
}
|
|
|
|
// color: _mm_set_ps(a, b, g, r) ie. 0=r, 1=g, 2=b, 3=a
|
|
FILE_SCOPE inline void SIMDSetPixel(DTRRenderBuffer *const renderBuffer, const i32 x, const i32 y,
|
|
__m128 color,
|
|
const enum ColorSpace colorSpace = ColorSpace_SRGB)
|
|
{
|
|
if (!renderBuffer) return;
|
|
if (x < 0 || x > (renderBuffer->width - 1)) return;
|
|
if (y < 0 || y > (renderBuffer->height - 1)) return;
|
|
|
|
DTR_DEBUG_EP_TIMED_FUNCTION();
|
|
DebugSIMDAssertColorInRange(color, 0.0f, 1.0f);
|
|
|
|
u32 *const bitmapPtr = (u32 *)renderBuffer->memory;
|
|
const u32 pitchInU32 = (renderBuffer->width * renderBuffer->bytesPerPixel) / 4;
|
|
|
|
// If some alpha is involved, we need to apply gamma correction, but if the
|
|
// new pixel is totally opaque or invisible then we're just flat out
|
|
// overwriting/keeping the state of the pixel so we can save cycles by skipping.
|
|
f32 alpha = ((f32 *)&color)[3];
|
|
bool needGammaFix = (alpha > 0.0f || alpha < (1.0f + COLOR_EPSILON)) && (colorSpace == ColorSpace_SRGB);
|
|
if (needGammaFix) color = SIMDSRGB1ToLinearSpace(color);
|
|
|
|
// Format: u32 == (XX, RR, GG, BB)
|
|
u32 srcPixel = bitmapPtr[x + (y * pitchInU32)];
|
|
__m128 src = _mm_set_ps(0,
|
|
(f32)((srcPixel >> 0) & 0xFF),
|
|
(f32)((srcPixel >> 8) & 0xFF),
|
|
(f32)((srcPixel >> 16) & 0xFF));
|
|
src = SIMDSRGB255ToLinearSpace1(src);
|
|
|
|
f32 invA = 1 - alpha;
|
|
__m128 invA_4x = _mm_set_ps1(invA);
|
|
|
|
// PreAlphaMulColor + (1 - Alpha) * Src
|
|
__m128 oneMinusAlphaSrc = _mm_mul_ps(invA_4x, src);
|
|
__m128 dest = _mm_add_ps(color, oneMinusAlphaSrc);
|
|
dest = SIMDLinearSpace1ToSRGB1(dest);
|
|
dest = _mm_mul_ps(dest, _mm_set_ps1(255.0f)); // to 0->255 range
|
|
|
|
DebugSIMDAssertColorInRange(dest, 0.0f, 255.0f);
|
|
|
|
f32 destR = ((f32 *)&dest)[0];
|
|
f32 destG = ((f32 *)&dest)[1];
|
|
f32 destB = ((f32 *)&dest)[2];
|
|
|
|
u32 pixel = // ((u32)(destA) << 24 |
|
|
(u32)(destR) << 16 |
|
|
(u32)(destG) << 8 |
|
|
(u32)(destB) << 0;
|
|
bitmapPtr[x + (y * pitchInU32)] = pixel;
|
|
|
|
DTRDebug_CounterIncrement(DTRDebugCounter_SetPixels);
|
|
}
|
|
|
|
// colorModulate: _mm_set_ps(a, b, g, r) ie. 0=r, 1=g, 2=b, 3=a
|
|
// barycentric: _mm_set_ps(xx, p3, p2, p1) ie. 0=p1, 1=p2, 2=p3, 3=a
|
|
FILE_SCOPE __m128 SIMDSampleTextureForTriangle(DTRBitmap *const texture, const DqnV2 uv1,
|
|
const DqnV2 uv2SubUv1, const DqnV2 uv3SubUv1,
|
|
const __m128 barycentric)
|
|
{
|
|
DTRDebug_BeginCycleCount("SIMDTexturedTriangle_SampleTexture",
|
|
DTRDebugCycleCount_SIMDTexturedTriangle_SampleTexture);
|
|
|
|
LOCAL_PERSIST const __m128 INV255_4X = _mm_set_ps1(1.0f / 255.0f);
|
|
|
|
const f32 barycentricP2 = ((f32 *)&barycentric)[1];
|
|
const f32 barycentricP3 = ((f32 *)&barycentric)[2];
|
|
DqnV2 uv = uv1 + (uv2SubUv1 * barycentricP2) + (uv3SubUv1 * barycentricP3);
|
|
|
|
const f32 EPSILON = 0.1f;
|
|
DQN_ASSERT(uv.x >= 0 && uv.x < 1.0f + EPSILON);
|
|
DQN_ASSERT(uv.y >= 0 && uv.y < 1.0f + EPSILON);
|
|
uv.x = DqnMath_Clampf(uv.x, 0.0f, 1.0f);
|
|
uv.y = DqnMath_Clampf(uv.y, 0.0f, 1.0f);
|
|
|
|
f32 texelXf = uv.x * texture->dim.w;
|
|
f32 texelYf = uv.y * texture->dim.h;
|
|
DQN_ASSERT(texelXf >= 0 && texelXf < texture->dim.w);
|
|
DQN_ASSERT(texelYf >= 0 && texelYf < texture->dim.h);
|
|
|
|
i32 texelX = (i32)texelXf;
|
|
i32 texelY = (i32)texelYf;
|
|
|
|
const u32 texturePitch = texture->bytesPerPixel * texture->dim.w;
|
|
const u8 *const texturePtr = texture->memory;
|
|
u32 texel1 = *(u32 *)(texturePtr + (texelX * texture->bytesPerPixel) + (texelY * texturePitch));
|
|
|
|
__m128 color = _mm_set_ps((f32)(texel1 >> 24),
|
|
(f32)((texel1 >> 16) & 0xFF),
|
|
(f32)((texel1 >> 8) & 0xFF),
|
|
(f32)((texel1 >> 0) & 0xFF));
|
|
|
|
color = SIMDSRGB255ToLinearSpace1(color);
|
|
DTRDebug_EndCycleCount(DTRDebugCycleCount_SIMDTexturedTriangle_SampleTexture);
|
|
return color;
|
|
}
|
|
|
|
// IMPORTANT: Debug Markers can _NOT_ be used in primitive rendering functions,
|
|
// ie. any render function that is used in this call because it'll call into
|
|
// itself infinitely.
|
|
FILE_SCOPE void DebugRenderMarkers(DTRRenderBuffer *const renderBuffer, const DqnV2 *const pList,
|
|
const i32 pListSize, const DTRRenderTransform transform,
|
|
bool drawBoundingBox, bool drawBasis, bool drawVertexMarkers)
|
|
{
|
|
if (!DTR_DEBUG) return;
|
|
if (!DTR_DEBUG_RENDER) return;
|
|
|
|
DqnV4 green = DqnV4_4f(0, 1, 0, 1);
|
|
DqnV4 blue = DqnV4_4f(0, 0, 1, 1);
|
|
DqnV4 purple = DqnV4_4f(1, 0, 1, 1);
|
|
DqnV4 red = DqnV4_4f(1, 0, 0, 1);
|
|
|
|
// Draw Bounding box
|
|
if (drawBoundingBox)
|
|
{
|
|
DqnRect bounds = GetBoundingBox(pList, pListSize);
|
|
|
|
DTRRender_Line(renderBuffer, DqnV2i_2f(bounds.min.x, bounds.min.y), DqnV2i_2f(bounds.min.x, bounds.max.y), red);
|
|
DTRRender_Line(renderBuffer, DqnV2i_2f(bounds.min.x, bounds.max.y), DqnV2i_2f(bounds.max.x, bounds.max.y), red);
|
|
DTRRender_Line(renderBuffer, DqnV2i_2f(bounds.max.x, bounds.max.y), DqnV2i_2f(bounds.max.x, bounds.min.y), red);
|
|
DTRRender_Line(renderBuffer, DqnV2i_2f(bounds.max.x, bounds.min.y), DqnV2i_2f(bounds.min.x, bounds.min.y), red);
|
|
}
|
|
|
|
// Draw Coordinate Basis
|
|
if (drawBasis)
|
|
{
|
|
// TODO(doyle): Fixme
|
|
if (pListSize == 3)
|
|
{
|
|
DqnV2 origin = Get2DOriginFromTransformAnchor(pList[0], pList[1], pList[2], transform);
|
|
const f32 rotation = transform.rotation;
|
|
DqnV2 xAxis = DqnV2_2f(cosf(rotation), sinf(rotation)) * transform.scale.x;
|
|
DqnV2 yAxis = DqnV2_2f(-xAxis.y, xAxis.x) * transform.scale.y;
|
|
DqnV4 coordSysColor = DqnV4_4f(0, 1, 1, 1);
|
|
i32 axisLen = 50;
|
|
DTRRender_Line(renderBuffer, DqnV2i_V2(origin),
|
|
DqnV2i_V2(origin) + DqnV2i_V2(xAxis * axisLen), coordSysColor);
|
|
DTRRender_Line(renderBuffer, DqnV2i_V2(origin),
|
|
DqnV2i_V2(origin) + DqnV2i_V2(yAxis * axisLen), coordSysColor);
|
|
}
|
|
}
|
|
|
|
// Draw axis point
|
|
if (drawVertexMarkers)
|
|
{
|
|
DqnV4 colorList[] = {green, blue, purple, red};
|
|
for (i32 i = 0; i < pListSize; i++)
|
|
{
|
|
DqnV2 p = pList[i];
|
|
DTRRender_Rectangle(renderBuffer, p - DqnV2_1f(5), p + DqnV2_1f(5), colorList[i]);
|
|
}
|
|
}
|
|
}
|
|
|
|
FILE_SCOPE void SIMDTexturedTriangle(DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2,
|
|
DqnV3 p3, DqnV2 uv1, DqnV2 uv2, DqnV2 uv3,
|
|
DTRBitmap *const texture, DqnV4 color)
|
|
|
|
{
|
|
DTR_DEBUG_EP_TIMED_FUNCTION();
|
|
DTRDebug_BeginCycleCount("SIMDTexturedTriangle", DTRDebugCycleCount_SIMDTexturedTriangle);
|
|
|
|
////////////////////////////////////////////////////////////////////////////
|
|
// Convert color
|
|
////////////////////////////////////////////////////////////////////////////
|
|
__m128 simdColor = _mm_set_ps(color.a, color.b, color.g, color.r);
|
|
simdColor = SIMDSRGB1ToLinearSpace(simdColor);
|
|
simdColor = SIMDPreMultiplyAlpha1(simdColor);
|
|
|
|
////////////////////////////////////////////////////////////////////////////
|
|
// Render Bounds
|
|
////////////////////////////////////////////////////////////////////////////
|
|
DqnV2i max = DqnV2i_2f(DQN_MAX(DQN_MAX(p1.x, p2.x), p3.x), DQN_MAX(DQN_MAX(p1.y, p2.y), p3.y));
|
|
DqnV2i min = DqnV2i_2f(DQN_MIN(DQN_MIN(p1.x, p2.x), p3.x), DQN_MIN(DQN_MIN(p1.y, p2.y), p3.y));
|
|
min.x = DQN_MAX(min.x, 0);
|
|
min.y = DQN_MAX(min.y, 0);
|
|
max.x = DQN_MIN(max.x, renderBuffer->width - 1);
|
|
max.y = DQN_MIN(max.y, renderBuffer->height - 1);
|
|
|
|
////////////////////////////////////////////////////////////////////////////
|
|
// Setup SIMD data
|
|
////////////////////////////////////////////////////////////////////////////
|
|
const u32 NUM_X_PIXELS_TO_SIMD = 2;
|
|
const u32 NUM_Y_PIXELS_TO_SIMD = 1;
|
|
|
|
const __m128 INV255_4X = _mm_set_ps1(1.0f / 255.0f);
|
|
const __m128 ZERO_4X = _mm_set_ps1(0.0f);
|
|
const u32 IS_GREATER_MASK = 0xF;
|
|
|
|
// SignedArea: _mm_set_ps(unused, p3, p2, p1) ie 0=p1, 1=p1, 2=p3, 3=unused
|
|
__m128 signedAreaPixel1;
|
|
__m128 signedAreaPixel2;
|
|
|
|
__m128 signedAreaPixelDeltaX;
|
|
__m128 signedAreaPixelDeltaY;
|
|
__m128 invSignedAreaParallelogram_4x;
|
|
|
|
__m128 triangleZ = _mm_set_ps(0, p3.z, p2.z, p1.z);
|
|
{
|
|
DqnV2i startP = min;
|
|
f32 signedArea1Start = Triangle2TimesSignedArea(p2.xy, p3.xy, DqnV2_V2i(startP));
|
|
f32 signedArea1DeltaX = p2.y - p3.y;
|
|
f32 signedArea1DeltaY = p3.x - p2.x;
|
|
|
|
f32 signedArea2Start = Triangle2TimesSignedArea(p3.xy, p1.xy, DqnV2_V2i(startP));
|
|
f32 signedArea2DeltaX = p3.y - p1.y;
|
|
f32 signedArea2DeltaY = p1.x - p3.x;
|
|
|
|
f32 signedArea3Start = Triangle2TimesSignedArea(p1.xy, p2.xy, DqnV2_V2i(startP));
|
|
f32 signedArea3DeltaX = p1.y - p2.y;
|
|
f32 signedArea3DeltaY = p2.x - p1.x;
|
|
|
|
f32 signedAreaParallelogram = signedArea1Start + signedArea2Start + signedArea3Start;
|
|
if (signedAreaParallelogram == 0) return;
|
|
|
|
f32 invSignedAreaParallelogram = 1.0f / signedAreaParallelogram;
|
|
invSignedAreaParallelogram_4x = _mm_set_ps1(invSignedAreaParallelogram);
|
|
|
|
// NOTE: Order is important here!
|
|
signedAreaPixelDeltaX = _mm_set_ps(0, signedArea3DeltaX, signedArea2DeltaX, signedArea1DeltaX);
|
|
signedAreaPixelDeltaY = _mm_set_ps(0, signedArea3DeltaY, signedArea2DeltaY, signedArea1DeltaY);
|
|
|
|
signedAreaPixel1 = _mm_set_ps(0, signedArea3Start, signedArea2Start, signedArea1Start);
|
|
signedAreaPixel2 = _mm_add_ps(signedAreaPixel1, signedAreaPixelDeltaX);
|
|
|
|
// NOTE: Increase step size to the number of pixels rasterised with SIMD
|
|
{
|
|
const __m128 STEP_X_4X = _mm_set_ps1((f32)NUM_X_PIXELS_TO_SIMD);
|
|
const __m128 STEP_Y_4X = _mm_set_ps1((f32)NUM_Y_PIXELS_TO_SIMD);
|
|
|
|
signedAreaPixelDeltaX = _mm_mul_ps(signedAreaPixelDeltaX, STEP_X_4X);
|
|
signedAreaPixelDeltaY = _mm_mul_ps(signedAreaPixelDeltaY, STEP_Y_4X);
|
|
}
|
|
}
|
|
|
|
const DqnV2 uv2SubUv1 = uv2 - uv1;
|
|
const DqnV2 uv3SubUv1 = uv3 - uv1;
|
|
const u32 texturePitch = texture->bytesPerPixel * texture->dim.w;
|
|
const u8 *const texturePtr = texture->memory;
|
|
const u32 zBufferPitch = renderBuffer->width;
|
|
|
|
////////////////////////////////////////////////////////////////////////////
|
|
// Scan and Render
|
|
////////////////////////////////////////////////////////////////////////////
|
|
DTRDebug_BeginCycleCount("SIMDTexturedTriangle_Rasterise", DTRDebugCycleCount_SIMDTexturedTriangle_Rasterise);
|
|
for (i32 bufferY = min.y; bufferY < max.y; bufferY += NUM_Y_PIXELS_TO_SIMD)
|
|
{
|
|
__m128 signedArea1 = signedAreaPixel1;
|
|
__m128 signedArea2 = signedAreaPixel2;
|
|
|
|
for (i32 bufferX = min.x; bufferX < max.x; bufferX += NUM_X_PIXELS_TO_SIMD)
|
|
{
|
|
|
|
DTRDebug_BeginCycleCount("SIMDTexturedTriangle_RasterisePixel",
|
|
DTRDebugCycleCount_SIMDTexturedTriangle_RasterisePixel);
|
|
// Rasterise buffer(X, Y) pixel
|
|
{
|
|
__m128 checkArea = signedArea1;
|
|
__m128 isGreater = _mm_cmpge_ps(checkArea, ZERO_4X);
|
|
i32 isGreaterResult = _mm_movemask_ps(isGreater);
|
|
i32 posX = bufferX;
|
|
i32 posY = bufferY;
|
|
|
|
if ((isGreaterResult & IS_GREATER_MASK) == IS_GREATER_MASK)
|
|
{
|
|
__m128 barycentric = _mm_mul_ps(checkArea, invSignedAreaParallelogram_4x);
|
|
__m128 barycentricZ = _mm_mul_ps(triangleZ, barycentric);
|
|
|
|
i32 zBufferIndex = posX + (posY * zBufferPitch);
|
|
f32 pixelZValue = ((f32 *)&barycentricZ)[0] +
|
|
((f32 *)&barycentricZ)[1] +
|
|
((f32 *)&barycentricZ)[2];
|
|
f32 currZValue = renderBuffer->zBuffer[zBufferIndex];
|
|
if (pixelZValue > currZValue)
|
|
{
|
|
renderBuffer->zBuffer[zBufferIndex] = pixelZValue;
|
|
__m128 texSampledColor = SIMDSampleTextureForTriangle(texture, uv1, uv2SubUv1, uv3SubUv1, barycentric);
|
|
__m128 finalColor = _mm_mul_ps(texSampledColor, simdColor);
|
|
SIMDSetPixel(renderBuffer, posX, posY, finalColor, ColorSpace_Linear);
|
|
}
|
|
}
|
|
signedArea1 = _mm_add_ps(signedArea1, signedAreaPixelDeltaX);
|
|
}
|
|
DTRDebug_EndCycleCount(DTRDebugCycleCount_SIMDTexturedTriangle_RasterisePixel);
|
|
|
|
// Rasterise buffer(X + 1, Y) pixel
|
|
{
|
|
__m128 checkArea = signedArea2;
|
|
__m128 isGreater = _mm_cmpge_ps(checkArea, ZERO_4X);
|
|
i32 isGreaterResult = _mm_movemask_ps(isGreater);
|
|
i32 posX = bufferX + 1;
|
|
i32 posY = bufferY;
|
|
if ((isGreaterResult & IS_GREATER_MASK) == IS_GREATER_MASK && posX < max.x)
|
|
{
|
|
__m128 barycentric = _mm_mul_ps(checkArea, invSignedAreaParallelogram_4x);
|
|
__m128 barycentricZ = _mm_mul_ps(triangleZ, barycentric);
|
|
|
|
i32 zBufferIndex = posX + (posY * zBufferPitch);
|
|
f32 pixelZValue = ((f32 *)&barycentricZ)[0] +
|
|
((f32 *)&barycentricZ)[1] +
|
|
((f32 *)&barycentricZ)[2];
|
|
f32 currZValue = renderBuffer->zBuffer[zBufferIndex];
|
|
if (pixelZValue > currZValue)
|
|
{
|
|
renderBuffer->zBuffer[zBufferIndex] = pixelZValue;
|
|
__m128 texSampledColor = SIMDSampleTextureForTriangle(texture, uv1, uv2SubUv1, uv3SubUv1, barycentric);
|
|
__m128 finalColor = _mm_mul_ps(texSampledColor, simdColor);
|
|
SIMDSetPixel(renderBuffer, posX, posY, finalColor, ColorSpace_Linear);
|
|
}
|
|
}
|
|
signedArea2 = _mm_add_ps(signedArea2, signedAreaPixelDeltaX);
|
|
}
|
|
|
|
}
|
|
|
|
signedAreaPixel1 = _mm_add_ps(signedAreaPixel1, signedAreaPixelDeltaY);
|
|
signedAreaPixel2 = _mm_add_ps(signedAreaPixel2, signedAreaPixelDeltaY);
|
|
}
|
|
DTRDebug_EndCycleCount(DTRDebugCycleCount_SIMDTexturedTriangle_Rasterise);
|
|
DTRDebug_EndCycleCount(DTRDebugCycleCount_SIMDTexturedTriangle);
|
|
}
|
|
|
|
FILE_SCOPE void SlowTexturedTriangle(DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2,
|
|
DqnV3 p3, DqnV2 uv1, DqnV2 uv2, DqnV2 uv3,
|
|
DTRBitmap *const texture, DqnV4 color)
|
|
{
|
|
DTR_DEBUG_EP_TIMED_FUNCTION();
|
|
////////////////////////////////////////////////////////////////////////////
|
|
// Convert Color
|
|
////////////////////////////////////////////////////////////////////////////
|
|
color = DTRRender_SRGB1ToLinearSpaceV4(color);
|
|
color = PreMultiplyAlpha1(color);
|
|
|
|
////////////////////////////////////////////////////////////////////////////
|
|
// Scan and Render
|
|
////////////////////////////////////////////////////////////////////////////
|
|
DqnV2i max = DqnV2i_2f(DQN_MAX(DQN_MAX(p1.x, p2.x), p3.x), DQN_MAX(DQN_MAX(p1.y, p2.y), p3.y));
|
|
DqnV2i min = DqnV2i_2f(DQN_MIN(DQN_MIN(p1.x, p2.x), p3.x), DQN_MIN(DQN_MIN(p1.y, p2.y), p3.y));
|
|
min.x = DQN_MAX(min.x, 0);
|
|
min.y = DQN_MAX(min.y, 0);
|
|
max.x = DQN_MIN(max.x, renderBuffer->width - 1);
|
|
max.y = DQN_MIN(max.y, renderBuffer->height - 1);
|
|
|
|
DqnV2i startP = min;
|
|
f32 signedArea1Pixel = Triangle2TimesSignedArea(p2.xy, p3.xy, DqnV2_V2i(startP));
|
|
f32 signedArea1DeltaX = p2.y - p3.y;
|
|
f32 signedArea1DeltaY = p3.x - p2.x;
|
|
|
|
f32 signedArea2Pixel = Triangle2TimesSignedArea(p3.xy, p1.xy, DqnV2_V2i(startP));
|
|
f32 signedArea2DeltaX = p3.y - p1.y;
|
|
f32 signedArea2DeltaY = p1.x - p3.x;
|
|
|
|
f32 signedArea3Pixel = Triangle2TimesSignedArea(p1.xy, p2.xy, DqnV2_V2i(startP));
|
|
f32 signedArea3DeltaX = p1.y - p2.y;
|
|
f32 signedArea3DeltaY = p2.x - p1.x;
|
|
|
|
f32 signedAreaParallelogram = signedArea1Pixel + signedArea2Pixel + signedArea3Pixel;
|
|
if (signedAreaParallelogram == 0) return;
|
|
|
|
f32 invSignedAreaParallelogram = 1.0f / signedAreaParallelogram;
|
|
|
|
const DqnV3 p2SubP1 = p2 - p1;
|
|
const DqnV3 p3SubP1 = p3 - p1;
|
|
const DqnV2 uv2SubUv1 = uv2 - uv1;
|
|
const DqnV2 uv3SubUv1 = uv3 - uv1;
|
|
|
|
const u32 zBufferPitch = renderBuffer->width;
|
|
const u8 *const texturePtr = texture->memory;
|
|
const u32 texturePitch = texture->bytesPerPixel * texture->dim.w;
|
|
|
|
const f32 INV_255 = 1 / 255.0f;
|
|
for (i32 bufferY = min.y; bufferY < max.y; bufferY++)
|
|
{
|
|
f32 signedArea1 = signedArea1Pixel;
|
|
f32 signedArea2 = signedArea2Pixel;
|
|
f32 signedArea3 = signedArea3Pixel;
|
|
|
|
for (i32 bufferX = min.x; bufferX < max.x; bufferX++)
|
|
{
|
|
if (signedArea1 >= 0 && signedArea2 >= 0 && signedArea3 >= 0)
|
|
{
|
|
f32 barycentricB = signedArea3 * invSignedAreaParallelogram;
|
|
f32 barycentricC = signedArea1 * invSignedAreaParallelogram;
|
|
|
|
if (DTR_DEBUG)
|
|
{
|
|
const f32 EPSILON = 0.1f;
|
|
f32 debugSignedArea1 = ((p2.x - p1.x) * (bufferY - p1.y)) - ((p2.y - p1.y) * (bufferX - p1.x));
|
|
f32 debugSignedArea2 = ((p3.x - p2.x) * (bufferY - p2.y)) - ((p3.y - p2.y) * (bufferX - p2.x));
|
|
f32 debugSignedArea3 = ((p1.x - p3.x) * (bufferY - p3.y)) - ((p1.y - p3.y) * (bufferX - p3.x));
|
|
|
|
f32 deltaSignedArea1 = DQN_ABS(debugSignedArea1 - signedArea1);
|
|
f32 deltaSignedArea2 = DQN_ABS(debugSignedArea2 - signedArea2);
|
|
f32 deltaSignedArea3 = DQN_ABS(debugSignedArea3 - signedArea3);
|
|
DQN_ASSERT(deltaSignedArea1 < EPSILON && deltaSignedArea2 < EPSILON &&
|
|
deltaSignedArea3 < EPSILON)
|
|
|
|
f32 debugBarycentricA, debugBarycentricB, debugBarycentricC;
|
|
DebugBarycentricInternal(DqnV2_2i(bufferX, bufferY), p1.xy, p2.xy, p3.xy,
|
|
&debugBarycentricA, &debugBarycentricB,
|
|
&debugBarycentricC);
|
|
|
|
f32 deltaBaryB = DQN_ABS(barycentricB - debugBarycentricB);
|
|
f32 deltaBaryC = DQN_ABS(barycentricC - debugBarycentricC);
|
|
|
|
DQN_ASSERT(deltaBaryB < EPSILON && deltaBaryC < EPSILON)
|
|
}
|
|
|
|
i32 zBufferIndex = bufferX + (bufferY * zBufferPitch);
|
|
f32 pixelZValue = p1.z + (barycentricB * (p2SubP1.z)) + (barycentricC * (p3SubP1.z));
|
|
f32 currZValue = renderBuffer->zBuffer[zBufferIndex];
|
|
DQN_ASSERT(zBufferIndex < (renderBuffer->width * renderBuffer->height));
|
|
|
|
if (pixelZValue > currZValue)
|
|
{
|
|
renderBuffer->zBuffer[zBufferIndex] = pixelZValue;
|
|
if (texture)
|
|
{
|
|
DqnV2 uv = uv1 + (uv2SubUv1 * barycentricB) + (uv3SubUv1 * barycentricC);
|
|
|
|
const f32 EPSILON = 0.1f;
|
|
DQN_ASSERT(uv.x >= 0 && uv.x < 1.0f + EPSILON);
|
|
DQN_ASSERT(uv.y >= 0 && uv.y < 1.0f + EPSILON);
|
|
uv.x = DqnMath_Clampf(uv.x, 0.0f, 1.0f);
|
|
uv.y = DqnMath_Clampf(uv.y, 0.0f, 1.0f);
|
|
|
|
f32 texelXf = uv.x * texture->dim.w;
|
|
f32 texelYf = uv.y * texture->dim.h;
|
|
DQN_ASSERT(texelXf >= 0 && texelXf < texture->dim.w);
|
|
DQN_ASSERT(texelYf >= 0 && texelYf < texture->dim.h);
|
|
|
|
i32 texelX = (i32)texelXf;
|
|
i32 texelY = (i32)texelYf;
|
|
|
|
u32 texel1 = *(u32 *)(texturePtr + (texelX * texture->bytesPerPixel) + (texelY * texturePitch));
|
|
|
|
DqnV4 color1;
|
|
color1.a = (f32)(texel1 >> 24);
|
|
color1.b = (f32)((texel1 >> 16) & 0xFF);
|
|
color1.g = (f32)((texel1 >> 8) & 0xFF);
|
|
color1.r = (f32)((texel1 >> 0) & 0xFF);
|
|
|
|
color1 *= INV_255;
|
|
color1 = DTRRender_SRGB1ToLinearSpaceV4(color1);
|
|
DqnV4 blend = color * color1;
|
|
SetPixel(renderBuffer, bufferX, bufferY, blend, ColorSpace_Linear);
|
|
}
|
|
else
|
|
{
|
|
SetPixel(renderBuffer, bufferX, bufferY, color, ColorSpace_Linear);
|
|
}
|
|
}
|
|
}
|
|
|
|
signedArea1 += signedArea1DeltaX;
|
|
signedArea2 += signedArea2DeltaX;
|
|
signedArea3 += signedArea3DeltaX;
|
|
}
|
|
|
|
signedArea1Pixel += signedArea1DeltaY;
|
|
signedArea2Pixel += signedArea2DeltaY;
|
|
signedArea3Pixel += signedArea3DeltaY;
|
|
}
|
|
}
|
|
|
|
void DTRRender_TexturedTriangle(DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2, DqnV3 p3,
|
|
DqnV2 uv1, DqnV2 uv2, DqnV2 uv3, DTRBitmap *const texture,
|
|
DqnV4 color, const DTRRenderTransform transform)
|
|
{
|
|
////////////////////////////////////////////////////////////////////////////
|
|
// Transform vertexes p1, p2, p3 inplace
|
|
////////////////////////////////////////////////////////////////////////////
|
|
Make3PointsClockwise(&p1, &p2, &p3);
|
|
|
|
// TODO(doyle): Transform is only in 2d right now
|
|
DqnV2 origin = Get2DOriginFromTransformAnchor(p1.xy, p2.xy, p3.xy, transform);
|
|
DqnV2 pList[3] = {p1.xy - origin, p2.xy - origin, p3.xy - origin};
|
|
TransformPoints(origin, pList, DQN_ARRAY_COUNT(pList), transform.scale, transform.rotation);
|
|
|
|
p1.xy = pList[0];
|
|
p2.xy = pList[1];
|
|
p3.xy = pList[2];
|
|
|
|
////////////////////////////////////////////////////////////////////////////
|
|
// SIMD/Slow Path
|
|
////////////////////////////////////////////////////////////////////////////
|
|
if (globalDTRPlatformFlags.canUseSSE2)
|
|
{
|
|
SIMDTexturedTriangle(renderBuffer, p1, p2, p3, uv1, uv2, uv3, texture, color);
|
|
}
|
|
else
|
|
{
|
|
SlowTexturedTriangle(renderBuffer, p1, p2, p3, uv1, uv2, uv3, texture, color);
|
|
}
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////
|
|
// Debug
|
|
////////////////////////////////////////////////////////////////////////////
|
|
DTRDebug_CounterIncrement(DTRDebugCounter_RenderTriangle);
|
|
{
|
|
bool drawBoundingBox = false;
|
|
bool drawBasis = false;
|
|
bool drawVertexMarkers = false;
|
|
|
|
DebugRenderMarkers(renderBuffer, pList, DQN_ARRAY_COUNT(pList), transform, drawBoundingBox,
|
|
drawBasis, drawVertexMarkers);
|
|
}
|
|
}
|
|
|
|
void DTRRender_Mesh(DTRRenderBuffer *const renderBuffer, DTRMesh *const mesh, const DqnV3 pos,
|
|
const f32 scale, const DqnV3 lightVector)
|
|
{
|
|
if (!mesh) return;
|
|
|
|
for (u32 i = 0; i < mesh->numFaces; i++)
|
|
{
|
|
DTRMeshFace face = mesh->faces[i];
|
|
DQN_ASSERT(face.numVertexIndex == 3);
|
|
i32 vertAIndex = face.vertexIndex[0];
|
|
i32 vertBIndex = face.vertexIndex[1];
|
|
i32 vertCIndex = face.vertexIndex[2];
|
|
|
|
DqnV4 vertA = mesh->vertexes[vertAIndex];
|
|
DqnV4 vertB = mesh->vertexes[vertBIndex];
|
|
DqnV4 vertC = mesh->vertexes[vertCIndex];
|
|
// TODO(doyle): Some models have -ve indexes to refer to relative
|
|
// vertices. We should resolve that to positive indexes at run time.
|
|
DQN_ASSERT(vertAIndex < (i32)mesh->numVertexes);
|
|
DQN_ASSERT(vertBIndex < (i32)mesh->numVertexes);
|
|
DQN_ASSERT(vertCIndex < (i32)mesh->numVertexes);
|
|
|
|
DqnV4 vertAB = vertB - vertA;
|
|
DqnV4 vertAC = vertC - vertA;
|
|
DqnV3 normal = DqnV3_Cross(vertAC.xyz, vertAB.xyz);
|
|
|
|
f32 intensity = DqnV3_Dot(DqnV3_Normalise(normal), lightVector);
|
|
if (intensity < 0) continue;
|
|
DqnV4 modelCol = DqnV4_4f(1, 1, 1, 1);
|
|
modelCol.rgb *= DQN_ABS(intensity);
|
|
|
|
DqnV3 screenVA = (vertA.xyz * scale) + pos;
|
|
DqnV3 screenVB = (vertB.xyz * scale) + pos;
|
|
DqnV3 screenVC = (vertC.xyz * scale) + pos;
|
|
|
|
// TODO(doyle): Why do we need rounding here? Maybe it's because
|
|
// I don't do any interpolation in the triangle routine for jagged
|
|
// edges.
|
|
#if 1
|
|
screenVA.x = (f32)(i32)(screenVA.x + 0.5f);
|
|
screenVA.y = (f32)(i32)(screenVA.y + 0.5f);
|
|
screenVB.x = (f32)(i32)(screenVB.x + 0.5f);
|
|
screenVB.y = (f32)(i32)(screenVB.y + 0.5f);
|
|
screenVC.x = (f32)(i32)(screenVC.x + 0.5f);
|
|
screenVC.y = (f32)(i32)(screenVC.y + 0.5f);
|
|
#endif
|
|
|
|
i32 textureAIndex = face.texIndex[0];
|
|
i32 textureBIndex = face.texIndex[1];
|
|
i32 textureCIndex = face.texIndex[2];
|
|
|
|
DqnV2 texA = mesh->texUV[textureAIndex].xy;
|
|
DqnV2 texB = mesh->texUV[textureBIndex].xy;
|
|
DqnV2 texC = mesh->texUV[textureCIndex].xy;
|
|
DQN_ASSERT(textureAIndex < (i32)mesh->numTexUV);
|
|
DQN_ASSERT(textureBIndex < (i32)mesh->numTexUV);
|
|
DQN_ASSERT(textureCIndex < (i32)mesh->numTexUV);
|
|
|
|
bool DEBUG_SIMPLE_MODE = false;
|
|
if (DTR_DEBUG && DEBUG_SIMPLE_MODE)
|
|
{
|
|
DTRRender_Triangle(renderBuffer, screenVA, screenVB, screenVC, modelCol);
|
|
}
|
|
else
|
|
{
|
|
DTRRender_TexturedTriangle(renderBuffer, screenVA, screenVB, screenVC, texA, texB,
|
|
texC, &mesh->tex, modelCol);
|
|
}
|
|
|
|
bool DEBUG_WIREFRAME = false;
|
|
if (DTR_DEBUG && DEBUG_WIREFRAME)
|
|
{
|
|
DqnV4 wireColor = DqnV4_4f(1.0f, 1.0f, 1.0f, 0.01f);
|
|
DTRRender_Line(renderBuffer, DqnV2i_V2(screenVA.xy), DqnV2i_V2(screenVB.xy),
|
|
wireColor);
|
|
DTRRender_Line(renderBuffer, DqnV2i_V2(screenVB.xy), DqnV2i_V2(screenVC.xy),
|
|
wireColor);
|
|
DTRRender_Line(renderBuffer, DqnV2i_V2(screenVC.xy), DqnV2i_V2(screenVA.xy),
|
|
wireColor);
|
|
}
|
|
}
|
|
}
|
|
|
|
FILE_SCOPE void SIMDTriangle(DTRRenderBuffer *const renderBuffer, const DqnV3 p1, const DqnV3 p2,
|
|
const DqnV3 p3, DqnV4 color)
|
|
{
|
|
DTR_DEBUG_EP_TIMED_FUNCTION();
|
|
DTRDebug_BeginCycleCount("SIMDTriangle", DTRDebugCycleCount_SIMDTriangle);
|
|
|
|
////////////////////////////////////////////////////////////////////////////
|
|
// Convert color
|
|
////////////////////////////////////////////////////////////////////////////
|
|
__m128 simdColor = _mm_set_ps(color.a, color.b, color.g, color.r);
|
|
simdColor = SIMDSRGB1ToLinearSpace(simdColor);
|
|
simdColor = SIMDPreMultiplyAlpha1(simdColor);
|
|
|
|
////////////////////////////////////////////////////////////////////////////
|
|
// Render Bounds
|
|
////////////////////////////////////////////////////////////////////////////
|
|
DqnV2i max = DqnV2i_2f(DQN_MAX(DQN_MAX(p1.x, p2.x), p3.x), DQN_MAX(DQN_MAX(p1.y, p2.y), p3.y));
|
|
DqnV2i min = DqnV2i_2f(DQN_MIN(DQN_MIN(p1.x, p2.x), p3.x), DQN_MIN(DQN_MIN(p1.y, p2.y), p3.y));
|
|
min.x = DQN_MAX(min.x, 0);
|
|
min.y = DQN_MAX(min.y, 0);
|
|
max.x = DQN_MIN(max.x, renderBuffer->width - 1);
|
|
max.y = DQN_MIN(max.y, renderBuffer->height - 1);
|
|
|
|
////////////////////////////////////////////////////////////////////////////
|
|
// Setup SIMD data
|
|
////////////////////////////////////////////////////////////////////////////
|
|
const u32 NUM_X_PIXELS_TO_SIMD = 2;
|
|
const u32 NUM_Y_PIXELS_TO_SIMD = 1;
|
|
|
|
const __m128 INV255_4X = _mm_set_ps1(1.0f / 255.0f);
|
|
const __m128 ZERO_4X = _mm_set_ps1(0.0f);
|
|
const u32 IS_GREATER_MASK = 0xF;
|
|
|
|
// SignedArea: _mm_set_ps(unused, p3, p2, p1) ie 0=p1, 1=p1, 2=p3, 3=unused
|
|
__m128 signedAreaPixel1;
|
|
__m128 signedAreaPixel2;
|
|
|
|
__m128 signedAreaPixelDeltaX;
|
|
__m128 signedAreaPixelDeltaY;
|
|
__m128 invSignedAreaParallelogram_4x;
|
|
|
|
__m128 triangleZ = _mm_set_ps(0, p3.z, p2.z, p1.z);
|
|
{
|
|
DqnV2i startP = min;
|
|
f32 signedArea1Start = Triangle2TimesSignedArea(p2.xy, p3.xy, DqnV2_V2i(startP));
|
|
f32 signedArea1DeltaX = p2.y - p3.y;
|
|
f32 signedArea1DeltaY = p3.x - p2.x;
|
|
|
|
f32 signedArea2Start = Triangle2TimesSignedArea(p3.xy, p1.xy, DqnV2_V2i(startP));
|
|
f32 signedArea2DeltaX = p3.y - p1.y;
|
|
f32 signedArea2DeltaY = p1.x - p3.x;
|
|
|
|
f32 signedArea3Start = Triangle2TimesSignedArea(p1.xy, p2.xy, DqnV2_V2i(startP));
|
|
f32 signedArea3DeltaX = p1.y - p2.y;
|
|
f32 signedArea3DeltaY = p2.x - p1.x;
|
|
|
|
f32 signedAreaParallelogram = signedArea1Start + signedArea2Start + signedArea3Start;
|
|
if (signedAreaParallelogram == 0) return;
|
|
|
|
f32 invSignedAreaParallelogram = 1.0f / signedAreaParallelogram;
|
|
invSignedAreaParallelogram_4x = _mm_set_ps1(invSignedAreaParallelogram);
|
|
|
|
// NOTE: Order is important here!
|
|
signedAreaPixelDeltaX = _mm_set_ps(0, signedArea3DeltaX, signedArea2DeltaX, signedArea1DeltaX);
|
|
signedAreaPixelDeltaY = _mm_set_ps(0, signedArea3DeltaY, signedArea2DeltaY, signedArea1DeltaY);
|
|
|
|
signedAreaPixel1 = _mm_set_ps(0, signedArea3Start, signedArea2Start, signedArea1Start);
|
|
signedAreaPixel2 = _mm_add_ps(signedAreaPixel1, signedAreaPixelDeltaX);
|
|
|
|
// NOTE: Increase step size to the number of pixels rasterised with SIMD
|
|
{
|
|
const __m128 STEP_X_4X = _mm_set_ps1((f32)NUM_X_PIXELS_TO_SIMD);
|
|
const __m128 STEP_Y_4X = _mm_set_ps1((f32)NUM_Y_PIXELS_TO_SIMD);
|
|
|
|
signedAreaPixelDeltaX = _mm_mul_ps(signedAreaPixelDeltaX, STEP_X_4X);
|
|
signedAreaPixelDeltaY = _mm_mul_ps(signedAreaPixelDeltaY, STEP_Y_4X);
|
|
}
|
|
}
|
|
|
|
const u32 zBufferPitch = renderBuffer->width;
|
|
////////////////////////////////////////////////////////////////////////////
|
|
// Scan and Render
|
|
////////////////////////////////////////////////////////////////////////////
|
|
DTRDebug_BeginCycleCount("SIMDTriangle_Rasterise", DTRDebugCycleCount_SIMDTriangle_Rasterise);
|
|
for (i32 bufferY = min.y; bufferY < max.y; bufferY += NUM_Y_PIXELS_TO_SIMD)
|
|
{
|
|
__m128 signedArea1 = signedAreaPixel1;
|
|
__m128 signedArea2 = signedAreaPixel2;
|
|
|
|
for (i32 bufferX = min.x; bufferX < max.x; bufferX += NUM_X_PIXELS_TO_SIMD)
|
|
{
|
|
|
|
// Rasterise buffer(X, Y) pixel
|
|
{
|
|
__m128 checkArea = signedArea1;
|
|
__m128 isGreater = _mm_cmpge_ps(checkArea, ZERO_4X);
|
|
i32 isGreaterResult = _mm_movemask_ps(isGreater);
|
|
i32 posX = bufferX;
|
|
i32 posY = bufferY;
|
|
|
|
if ((isGreaterResult & IS_GREATER_MASK) == IS_GREATER_MASK)
|
|
{
|
|
DTRDebug_BeginCycleCount("SIMDTriangle_RasterisePixel",
|
|
DTRDebugCycleCount_SIMDTriangle_RasterisePixel);
|
|
__m128 barycentric = _mm_mul_ps(checkArea, invSignedAreaParallelogram_4x);
|
|
__m128 barycentricZ = _mm_mul_ps(triangleZ, barycentric);
|
|
|
|
i32 zBufferIndex = posX + (posY * zBufferPitch);
|
|
f32 pixelZValue = ((f32 *)&barycentricZ)[0] +
|
|
((f32 *)&barycentricZ)[1] +
|
|
((f32 *)&barycentricZ)[2];
|
|
f32 currZValue = renderBuffer->zBuffer[zBufferIndex];
|
|
if (pixelZValue > currZValue)
|
|
{
|
|
renderBuffer->zBuffer[zBufferIndex] = pixelZValue;
|
|
SIMDSetPixel(renderBuffer, posX, posY, simdColor, ColorSpace_Linear);
|
|
}
|
|
DTRDebug_EndCycleCount(DTRDebugCycleCount_SIMDTriangle_RasterisePixel);
|
|
}
|
|
signedArea1 = _mm_add_ps(signedArea1, signedAreaPixelDeltaX);
|
|
}
|
|
|
|
// Rasterise buffer(X + 1, Y) pixel
|
|
{
|
|
__m128 checkArea = signedArea2;
|
|
__m128 isGreater = _mm_cmpge_ps(checkArea, ZERO_4X);
|
|
i32 isGreaterResult = _mm_movemask_ps(isGreater);
|
|
i32 posX = bufferX + 1;
|
|
i32 posY = bufferY;
|
|
if ((isGreaterResult & IS_GREATER_MASK) == IS_GREATER_MASK && posX < max.x)
|
|
{
|
|
__m128 barycentric = _mm_mul_ps(checkArea, invSignedAreaParallelogram_4x);
|
|
__m128 barycentricZ = _mm_mul_ps(triangleZ, barycentric);
|
|
|
|
i32 zBufferIndex = posX + (posY * zBufferPitch);
|
|
f32 pixelZValue = ((f32 *)&barycentricZ)[0] +
|
|
((f32 *)&barycentricZ)[1] +
|
|
((f32 *)&barycentricZ)[2];
|
|
f32 currZValue = renderBuffer->zBuffer[zBufferIndex];
|
|
if (pixelZValue > currZValue)
|
|
{
|
|
renderBuffer->zBuffer[zBufferIndex] = pixelZValue;
|
|
SIMDSetPixel(renderBuffer, posX, posY, simdColor, ColorSpace_Linear);
|
|
}
|
|
}
|
|
signedArea2 = _mm_add_ps(signedArea2, signedAreaPixelDeltaX);
|
|
}
|
|
|
|
}
|
|
|
|
signedAreaPixel1 = _mm_add_ps(signedAreaPixel1, signedAreaPixelDeltaY);
|
|
signedAreaPixel2 = _mm_add_ps(signedAreaPixel2, signedAreaPixelDeltaY);
|
|
}
|
|
DTRDebug_EndCycleCount(DTRDebugCycleCount_SIMDTriangle_Rasterise);
|
|
DTRDebug_EndCycleCount(DTRDebugCycleCount_SIMDTriangle);
|
|
}
|
|
|
|
FILE_SCOPE void SlowTriangle(DTRRenderBuffer *const renderBuffer, const DqnV3 p1, const DqnV3 p2,
|
|
const DqnV3 p3, DqnV4 color)
|
|
{
|
|
DTRDebug_BeginCycleCount("SlowTriangle", DTRDebugCycleCount_SlowTriangle);
|
|
color = DTRRender_SRGB1ToLinearSpaceV4(color);
|
|
color = PreMultiplyAlpha1(color);
|
|
|
|
DqnV2i max = DqnV2i_2f(DQN_MAX(DQN_MAX(p1.x, p2.x), p3.x), DQN_MAX(DQN_MAX(p1.y, p2.y), p3.y));
|
|
DqnV2i min = DqnV2i_2f(DQN_MIN(DQN_MIN(p1.x, p2.x), p3.x), DQN_MIN(DQN_MIN(p1.y, p2.y), p3.y));
|
|
min.x = DQN_MAX(min.x, 0);
|
|
min.y = DQN_MAX(min.y, 0);
|
|
max.x = DQN_MIN(max.x, renderBuffer->width - 1);
|
|
max.y = DQN_MIN(max.y, renderBuffer->height - 1);
|
|
|
|
/*
|
|
/////////////////////////////////////////////////////////////////////////
|
|
// Rearranging the Determinant
|
|
/////////////////////////////////////////////////////////////////////////
|
|
Given two points that form a line and an extra point to test, we can
|
|
determine whether a point lies on the line, or is to the left or right of
|
|
a the line.
|
|
|
|
We can do this using the PerpDotProduct conceptually known as the cross
|
|
product in 2D. This can be expressed using the determinant and is the
|
|
method we are using.
|
|
|
|
First forming a 3x3 matrix of our terms with a, b being from the triangle
|
|
and test point c, we can derive a 2x2 matrix by subtracting the 1st
|
|
column from the 2nd and 1st column from the third.
|
|
|
|
| ax bx cx | | (bx - ax) (cx - ax) |
|
|
m = | ay by cy | ==> | (by - ay) (cy - ay) |
|
|
| 1 1 1 |
|
|
|
|
From our 2x2 representation we can calculate the determinant which gives
|
|
us the signed area of the triangle extended into a parallelogram.
|
|
|
|
det(m) = (bx - ax)(cy - ay) - (by - ay)(cx - ax)
|
|
|
|
Depending on the order of the vertices supplied, if it's
|
|
- CCW and c(x,y) is outside the line (triangle), the signed area is negative
|
|
- CCW and c(x,y) is inside the line (triangle), the signed area is positive
|
|
- CW and c(x,y) is outside the line (triangle), the signed area is positive
|
|
- CW and c(x,y) is inside the line (triangle), the signed area is negative
|
|
|
|
/////////////////////////////////////////////////////////////////////////
|
|
// Optimising the Determinant Calculation
|
|
/////////////////////////////////////////////////////////////////////////
|
|
The det(m) can be rearranged if expanded to be
|
|
SignedArea(cx, cy) = (ay - by)cx + (bx - ay)cy + (ax*by - ay*bx)
|
|
|
|
When we scan to fill our triangle we go pixel by pixel, left to right,
|
|
bottom to top, notice that this translates to +1 for x and +1 for y, i.e.
|
|
|
|
The first pixel's signed area is cx, then cx+1, cx+2 .. etc
|
|
SignedArea(cx, cy) = (ay - by)cx + (bx - ax)cy + (ax*by - ay*bx)
|
|
SignedArea(cx+1, cy) = (ay - by)cx+1 + (bx - ax)cy + (ax*by - ay*bx)
|
|
|
|
Then
|
|
SignedArea(cx+1, cy) - SignedArea(cx, cy) =
|
|
(ay - by)cx+1 + (bx - ax)cy + (ax*by - ay*bx)
|
|
- (ay - by)cx + (bx - ax)cy + (ax*by - ay*bx)
|
|
= (ay - by)cx+1 - (ay - by)cx
|
|
= (ay - by)(cx+1 - cx)
|
|
= (ay - by)(1) = (ay - by)
|
|
|
|
Similarly when progressing in y
|
|
SignedArea(cx, cy) = (ay - by)cx + (bx - ay)cy + (ax*by - ay*bx)
|
|
SignedArea(cx, cy+1) = (ay - by)cx + (bx - ay)cy+1 + (ax*by - ay*bx)
|
|
|
|
Then
|
|
SignedArea(cx, cy+1) - SignedArea(cx, cy) =
|
|
(ay - by)cx + (bx - ax)cy+1 + (ax*by - ay*bx)
|
|
- (ay - by)cx + (bx - ax)cy + (ax*by - ay*bx)
|
|
= (bx - ax)cy+1 - (bx - ax)cy
|
|
= (bx - ax)(cy+1 - cy)
|
|
= (bx - ax)(1) = (bx - ax)
|
|
|
|
Then we can see that when we progress along x, we only need to change by
|
|
the value of SignedArea by (ay - by) and similarly for y, (bx - ax)
|
|
|
|
/////////////////////////////////////////////////////////////////////////
|
|
// Barycentric Coordinates
|
|
/////////////////////////////////////////////////////////////////////////
|
|
At this point we have an equation that can be used to calculate the
|
|
2x the signed area of a triangle, or the signed area of a parallelogram,
|
|
the two of which are equivalent.
|
|
|
|
det(m) = (bx - ax)(cy - ay) - (by - ay)(cx - ax)
|
|
SignedArea(cx, cy) = (ay - by)cx + (bx - ay)cy + (ax*by - ay*bx)
|
|
|
|
A barycentric coordinate is some coefficient on A, B, C that allows us to
|
|
specify an arbitrary point in the triangle as a linear combination of the
|
|
three usually with some coefficient [0, 1].
|
|
|
|
The SignedArea turns out to be actually the barycentric coord for c(x, y)
|
|
normalised to the sum of the parallelogram area. For example a triangle
|
|
with points, A, B, C and an arbitrary point P inside the triangle. Then
|
|
|
|
SignedArea(P) with vertex A and B = Barycentric Coordinate for C
|
|
SignedArea(P) with vertex B and C = Barycentric Coordinate for A
|
|
SignedArea(P) with vertex C and A = Barycentric Coordinate for B
|
|
|
|
B
|
|
/ \
|
|
/ \
|
|
/ P \
|
|
/_______\
|
|
A C
|
|
|
|
This is normalised to the area's sum, but we can trivially turn this into
|
|
a normalised version by dividing the area of the parallelogram, i.e.
|
|
|
|
BaryCentricC(P) = (SignedArea(P) with vertex A and B)/SignedArea(with the orig triangle vertex)
|
|
BaryCentricA(P) = (SignedArea(P) with vertex B and C)/SignedArea(with the orig triangle vertex)
|
|
BaryCentricB(P) = (SignedArea(P) with vertex C and A)/SignedArea(with the orig triangle vertex)
|
|
*/
|
|
|
|
DqnV2i startP = min;
|
|
f32 signedArea1Start = Triangle2TimesSignedArea(p2.xy, p3.xy, DqnV2_V2i(startP));
|
|
f32 signedArea1DeltaX = p2.y - p3.y;
|
|
f32 signedArea1DeltaY = p3.x - p2.x;
|
|
|
|
f32 signedArea2Start = Triangle2TimesSignedArea(p3.xy, p1.xy, DqnV2_V2i(startP));
|
|
f32 signedArea2DeltaX = p3.y - p1.y;
|
|
f32 signedArea2DeltaY = p1.x - p3.x;
|
|
|
|
f32 signedArea3Start = Triangle2TimesSignedArea(p1.xy, p2.xy, DqnV2_V2i(startP));
|
|
f32 signedArea3DeltaX = p1.y - p2.y;
|
|
f32 signedArea3DeltaY = p2.x - p1.x;
|
|
|
|
f32 signedAreaParallelogram = signedArea1Start + signedArea2Start + signedArea3Start;
|
|
if (signedAreaParallelogram == 0) return;
|
|
|
|
f32 invSignedAreaParallelogram = 1.0f / signedAreaParallelogram;
|
|
|
|
const DqnV3 p2SubP1 = p2 - p1;
|
|
const DqnV3 p3SubP1 = p3 - p1;
|
|
const u32 zBufferPitch = renderBuffer->width;
|
|
DTRDebug_BeginCycleCount("SlowTriangle_Rasterise", DTRDebugCycleCount_SlowTriangle_Rasterise);
|
|
for (i32 bufferY = min.y; bufferY < max.y; bufferY++)
|
|
{
|
|
f32 signedArea1 = signedArea1Start;
|
|
f32 signedArea2 = signedArea2Start;
|
|
f32 signedArea3 = signedArea3Start;
|
|
|
|
for (i32 bufferX = min.x; bufferX < max.x; bufferX++)
|
|
{
|
|
if (signedArea1 >= 0 && signedArea2 >= 0 && signedArea3 >= 0)
|
|
{
|
|
DTRDebug_BeginCycleCount("SlowTriangle_RasterisePixel",
|
|
DTRDebugCycleCount_SlowTriangle_RasterisePixel);
|
|
f32 barycentricB = signedArea3 * invSignedAreaParallelogram;
|
|
f32 barycentricC = signedArea1 * invSignedAreaParallelogram;
|
|
|
|
i32 zBufferIndex = bufferX + (bufferY * zBufferPitch);
|
|
f32 pixelZValue = p1.z + (barycentricB * (p2SubP1.z)) + (barycentricC * (p3SubP1.z));
|
|
f32 currZValue = renderBuffer->zBuffer[zBufferIndex];
|
|
DQN_ASSERT(zBufferIndex < (renderBuffer->width * renderBuffer->height));
|
|
if (pixelZValue > currZValue)
|
|
{
|
|
renderBuffer->zBuffer[zBufferIndex] = pixelZValue;
|
|
SetPixel(renderBuffer, bufferX, bufferY, color, ColorSpace_Linear);
|
|
}
|
|
DTRDebug_EndCycleCount(DTRDebugCycleCount_SlowTriangle_RasterisePixel);
|
|
}
|
|
|
|
signedArea1 += signedArea1DeltaX;
|
|
signedArea2 += signedArea2DeltaX;
|
|
signedArea3 += signedArea3DeltaX;
|
|
}
|
|
|
|
signedArea1Start += signedArea1DeltaY;
|
|
signedArea2Start += signedArea2DeltaY;
|
|
signedArea3Start += signedArea3DeltaY;
|
|
}
|
|
DTRDebug_EndCycleCount(DTRDebugCycleCount_SlowTriangle_Rasterise);
|
|
DTRDebug_EndCycleCount(DTRDebugCycleCount_SlowTriangle);
|
|
}
|
|
|
|
void DTRRender_Triangle(DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2, DqnV3 p3,
|
|
DqnV4 color, const DTRRenderTransform transform)
|
|
{
|
|
DTR_DEBUG_EP_TIMED_FUNCTION();
|
|
////////////////////////////////////////////////////////////////////////////
|
|
// Transform vertexes p1, p2, p3 inplace
|
|
////////////////////////////////////////////////////////////////////////////
|
|
Make3PointsClockwise(&p1, &p2, &p3);
|
|
|
|
// TODO(doyle): Transform is only in 2d right now
|
|
DqnV2 origin = Get2DOriginFromTransformAnchor(p1.xy, p2.xy, p3.xy, transform);
|
|
DqnV2 pList[3] = {p1.xy - origin, p2.xy - origin, p3.xy - origin};
|
|
TransformPoints(origin, pList, DQN_ARRAY_COUNT(pList), transform.scale, transform.rotation);
|
|
|
|
p1.xy = pList[0];
|
|
p2.xy = pList[1];
|
|
p3.xy = pList[2];
|
|
|
|
////////////////////////////////////////////////////////////////////////////
|
|
// SIMD/Slow Path
|
|
////////////////////////////////////////////////////////////////////////////
|
|
if (globalDTRPlatformFlags.canUseSSE2)
|
|
{
|
|
SIMDTriangle(renderBuffer, p1, p2, p3, color);
|
|
}
|
|
else
|
|
{
|
|
SlowTriangle(renderBuffer, p1, p2, p3, color);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////
|
|
// Debug
|
|
////////////////////////////////////////////////////////////////////////////
|
|
DTRDebug_CounterIncrement(DTRDebugCounter_RenderTriangle);
|
|
{
|
|
bool drawBoundingBox = true;
|
|
bool drawBasis = true;
|
|
bool drawVertexMarkers = true;
|
|
|
|
DebugRenderMarkers(renderBuffer, pList, DQN_ARRAY_COUNT(pList), transform, drawBoundingBox,
|
|
drawBasis, drawVertexMarkers);
|
|
}
|
|
}
|
|
|
|
void DTRRender_Bitmap(DTRRenderBuffer *const renderBuffer, DTRBitmap *const bitmap, DqnV2 pos,
|
|
const DTRRenderTransform transform, DqnV4 color)
|
|
{
|
|
if (!bitmap || !bitmap->memory || !renderBuffer) return;
|
|
DTR_DEBUG_EP_TIMED_FUNCTION();
|
|
|
|
////////////////////////////////////////////////////////////////////////////
|
|
// Transform vertexes
|
|
////////////////////////////////////////////////////////////////////////////
|
|
DqnV2 min = pos;
|
|
DqnV2 max = min + DqnV2_V2i(bitmap->dim);
|
|
|
|
RectPoints rectPoints = TransformRectPoints(min, max, transform);
|
|
const DqnV2 *const pList = &rectPoints.pList[0];
|
|
const i32 RECT_PLIST_SIZE = DQN_ARRAY_COUNT(rectPoints.pList);
|
|
|
|
DqnRect bounds = GetBoundingBox(pList, RECT_PLIST_SIZE);
|
|
min = bounds.min;
|
|
max = bounds.max;
|
|
|
|
color = DTRRender_SRGB1ToLinearSpaceV4(color);
|
|
color = PreMultiplyAlpha1(color);
|
|
DQN_ASSERT(color.a >= 0 && color.a <= 1.0f);
|
|
DQN_ASSERT(color.r >= 0 && color.r <= 1.0f);
|
|
DQN_ASSERT(color.g >= 0 && color.g <= 1.0f);
|
|
DQN_ASSERT(color.b >= 0 && color.b <= 1.0f);
|
|
|
|
////////////////////////////////////////////////////////////////////////////
|
|
// Clip drawing space
|
|
////////////////////////////////////////////////////////////////////////////
|
|
DqnRect drawRect = DqnRect_4f(bounds.min.x, bounds.min.y, bounds.max.x, bounds.max.y);
|
|
DqnRect clip = DqnRect_4i(0, 0, renderBuffer->width, renderBuffer->height);
|
|
|
|
DqnRect clippedDrawRect = DqnRect_ClipRect(drawRect, clip);
|
|
DqnV2 clippedSize = DqnRect_GetSizeV2(clippedDrawRect);
|
|
|
|
////////////////////////////////////////////////////////////////////////////
|
|
// Setup Texture Mapping
|
|
////////////////////////////////////////////////////////////////////////////
|
|
const i32 pitch = bitmap->dim.w * bitmap->bytesPerPixel;
|
|
u8 *const bitmapPtr = (u8 *)bitmap->memory;
|
|
|
|
const DqnV2 rectBasis = pList[RectPointsIndex_Basis];
|
|
const DqnV2 xAxisRelToBasis = pList[RectPointsIndex_XAxis] - rectBasis;
|
|
const DqnV2 yAxisRelToBasis = pList[RectPointsIndex_YAxis] - rectBasis;
|
|
|
|
const f32 invXAxisLenSq = 1 / DqnV2_LengthSquared(DqnV2_1f(0), xAxisRelToBasis);
|
|
const f32 invYAxisLenSq = 1 / DqnV2_LengthSquared(DqnV2_1f(0), yAxisRelToBasis);
|
|
for (i32 y = 0; y < (i32)clippedSize.h; y++)
|
|
{
|
|
const i32 bufferY = (i32)clippedDrawRect.min.y + y;
|
|
for (i32 x = 0; x < (i32)clippedSize.w; x++)
|
|
{
|
|
const i32 bufferX = (i32)clippedDrawRect.min.x + x;
|
|
|
|
bool bufXYIsInside = true;
|
|
for (i32 pIndex = 0; pIndex < RECT_PLIST_SIZE; pIndex++)
|
|
{
|
|
DqnV2 origin = pList[pIndex];
|
|
DqnV2 axis = pList[(pIndex + 1) % RECT_PLIST_SIZE] - origin;
|
|
DqnV2 testP = DqnV2_2i(bufferX, bufferY) - origin;
|
|
|
|
f32 dot = DqnV2_Dot(testP, axis);
|
|
if (dot < 0)
|
|
{
|
|
bufXYIsInside = false;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (bufXYIsInside)
|
|
{
|
|
DTR_DEBUG_EP_TIMED_BLOCK("DTRRender_Bitmap TexelCalculation");
|
|
DqnV2 bufPRelToBasis = DqnV2_2i(bufferX, bufferY) - rectBasis;
|
|
|
|
f32 u = DqnV2_Dot(bufPRelToBasis, xAxisRelToBasis) * invXAxisLenSq;
|
|
f32 v = DqnV2_Dot(bufPRelToBasis, yAxisRelToBasis) * invYAxisLenSq;
|
|
u = DqnMath_Clampf(u, 0.0f, 1.0f);
|
|
v = DqnMath_Clampf(v, 0.0f, 1.0f);
|
|
|
|
f32 texelXf = u * (f32)(bitmap->dim.w - 1);
|
|
f32 texelYf = v * (f32)(bitmap->dim.h - 1);
|
|
DQN_ASSERT(texelXf >= 0 && texelXf < bitmap->dim.w);
|
|
DQN_ASSERT(texelYf >= 0 && texelYf < bitmap->dim.h);
|
|
|
|
i32 texelX = (i32)texelXf;
|
|
i32 texelY = (i32)texelYf;
|
|
f32 texelFractionalX = texelXf - texelX;
|
|
f32 texelFractionalY = texelYf - texelY;
|
|
|
|
i32 texel1X = texelX;
|
|
i32 texel1Y = texelY;
|
|
|
|
i32 texel2X = DQN_MIN((texelX + 1), bitmap->dim.w - 1);
|
|
i32 texel2Y = texelY;
|
|
|
|
i32 texel3X = texelX;
|
|
i32 texel3Y = DQN_MIN((texelY + 1), bitmap->dim.h - 1);
|
|
|
|
i32 texel4X = DQN_MIN((texelX + 1), bitmap->dim.w - 1);
|
|
i32 texel4Y = DQN_MIN((texelY + 1), bitmap->dim.h - 1);
|
|
|
|
{
|
|
DTR_DEBUG_EP_TIMED_BLOCK("DTRRender_Bitmap TexelBilinearInterpolation");
|
|
u32 texel1 = *(u32 *)(bitmapPtr + ((texel1X * bitmap->bytesPerPixel) + (texel1Y * pitch)));
|
|
u32 texel2 = *(u32 *)(bitmapPtr + ((texel2X * bitmap->bytesPerPixel) + (texel2Y * pitch)));
|
|
u32 texel3 = *(u32 *)(bitmapPtr + ((texel3X * bitmap->bytesPerPixel) + (texel3Y * pitch)));
|
|
u32 texel4 = *(u32 *)(bitmapPtr + ((texel4X * bitmap->bytesPerPixel) + (texel4Y * pitch)));
|
|
|
|
DqnV4 color1;
|
|
color1.a = (f32)(texel1 >> 24);
|
|
color1.b = (f32)((texel1 >> 16) & 0xFF);
|
|
color1.g = (f32)((texel1 >> 8) & 0xFF);
|
|
color1.r = (f32)((texel1 >> 0) & 0xFF);
|
|
|
|
DqnV4 color2;
|
|
color2.a = (f32)(texel2 >> 24);
|
|
color2.b = (f32)((texel2 >> 16) & 0xFF);
|
|
color2.g = (f32)((texel2 >> 8) & 0xFF);
|
|
color2.r = (f32)((texel2 >> 0) & 0xFF);
|
|
|
|
DqnV4 color3;
|
|
color3.a = (f32)(texel3 >> 24);
|
|
color3.b = (f32)((texel3 >> 16) & 0xFF);
|
|
color3.g = (f32)((texel3 >> 8) & 0xFF);
|
|
color3.r = (f32)((texel3 >> 0) & 0xFF);
|
|
|
|
DqnV4 color4;
|
|
color4.a = (f32)(texel4 >> 24);
|
|
color4.b = (f32)((texel4 >> 16) & 0xFF);
|
|
color4.g = (f32)((texel4 >> 8) & 0xFF);
|
|
color4.r = (f32)((texel4 >> 0) & 0xFF);
|
|
|
|
color1 *= DTRRENDER_INV_255;
|
|
color2 *= DTRRENDER_INV_255;
|
|
color3 *= DTRRENDER_INV_255;
|
|
color4 *= DTRRENDER_INV_255;
|
|
|
|
color1 = DTRRender_SRGB1ToLinearSpaceV4(color1);
|
|
color2 = DTRRender_SRGB1ToLinearSpaceV4(color2);
|
|
color3 = DTRRender_SRGB1ToLinearSpaceV4(color3);
|
|
color4 = DTRRender_SRGB1ToLinearSpaceV4(color4);
|
|
|
|
DqnV4 color12;
|
|
color12.a = DqnMath_Lerp(color1.a, texelFractionalX, color2.a);
|
|
color12.b = DqnMath_Lerp(color1.b, texelFractionalX, color2.b);
|
|
color12.g = DqnMath_Lerp(color1.g, texelFractionalX, color2.g);
|
|
color12.r = DqnMath_Lerp(color1.r, texelFractionalX, color2.r);
|
|
|
|
DqnV4 color34;
|
|
color34.a = DqnMath_Lerp(color3.a, texelFractionalX, color4.a);
|
|
color34.b = DqnMath_Lerp(color3.b, texelFractionalX, color4.b);
|
|
color34.g = DqnMath_Lerp(color3.g, texelFractionalX, color4.g);
|
|
color34.r = DqnMath_Lerp(color3.r, texelFractionalX, color4.r);
|
|
|
|
DqnV4 blend;
|
|
blend.a = DqnMath_Lerp(color12.a, texelFractionalY, color34.a);
|
|
blend.b = DqnMath_Lerp(color12.b, texelFractionalY, color34.b);
|
|
blend.g = DqnMath_Lerp(color12.g, texelFractionalY, color34.g);
|
|
blend.r = DqnMath_Lerp(color12.r, texelFractionalY, color34.r);
|
|
|
|
DQN_ASSERT(blend.a >= 0 && blend.a <= 1.0f);
|
|
DQN_ASSERT(blend.r >= 0 && blend.r <= 1.0f);
|
|
DQN_ASSERT(blend.g >= 0 && blend.g <= 1.0f);
|
|
DQN_ASSERT(blend.b >= 0 && blend.b <= 1.0f);
|
|
|
|
// TODO(doyle): Color modulation does not work!!! By supplying
|
|
// colors [0->1] it'll reduce some of the coverage of a channel
|
|
// and once alpha blending is applied that reduced coverage will
|
|
// blend with the background and cause the bitmap to go
|
|
// transparent when it shouldn't.
|
|
blend.a *= color.a;
|
|
blend.r *= color.r;
|
|
blend.g *= color.g;
|
|
blend.b *= color.b;
|
|
|
|
SetPixel(renderBuffer, bufferX, bufferY, blend, ColorSpace_Linear);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////
|
|
// Debug
|
|
////////////////////////////////////////////////////////////////////////////
|
|
{
|
|
bool drawBoundingBox = true;
|
|
bool drawBasis = true;
|
|
bool drawVertexMarkers = true;
|
|
|
|
DebugRenderMarkers(renderBuffer, pList, RECT_PLIST_SIZE, transform, drawBoundingBox,
|
|
drawBasis, drawVertexMarkers);
|
|
}
|
|
}
|
|
|
|
void DTRRender_Clear(DTRRenderBuffer *const renderBuffer,
|
|
DqnV3 color)
|
|
{
|
|
if (!renderBuffer) return;
|
|
|
|
DQN_ASSERT(color.r >= 0.0f && color.r <= 1.0f);
|
|
DQN_ASSERT(color.g >= 0.0f && color.g <= 1.0f);
|
|
DQN_ASSERT(color.b >= 0.0f && color.b <= 1.0f);
|
|
color *= 255.0f;
|
|
|
|
u32 *const bitmapPtr = (u32 *)renderBuffer->memory;
|
|
for (i32 y = 0; y < renderBuffer->height; y++)
|
|
{
|
|
for (i32 x = 0; x < renderBuffer->width; x++)
|
|
{
|
|
u32 pixel = ((i32)0 << 24) |
|
|
((i32)color.r << 16) |
|
|
((i32)color.g << 8) |
|
|
((i32)color.b << 0);
|
|
bitmapPtr[x + (y * renderBuffer->width)] = pixel;
|
|
}
|
|
}
|
|
}
|