Accelerate textured triangle rendering using SIMD
This commit is contained in:
		
							parent
							
								
									4d2a7a7c06
								
							
						
					
					
						commit
						49270a2826
					
				| @ -1104,8 +1104,8 @@ extern "C" void DTR_Update(PlatformRenderBuffer *const platformRenderBuffer, | |||||||
| 			} | 			} | ||||||
| 			else | 			else | ||||||
| 			{ | 			{ | ||||||
| 				DTRRender_TexturedTriangle(&renderBuffer, screenVA, screenVB, screenVC, texA, texB, | 				DTRRender_TexturedTriangle(input, &renderBuffer, screenVA, screenVB, screenVC, texA, | ||||||
| 				                           texC, &state->mesh.tex, modelCol); | 				                           texB, texC, &state->mesh.tex, modelCol); | ||||||
| 			} | 			} | ||||||
| 
 | 
 | ||||||
| 			bool DEBUG_WIREFRAME = false; | 			bool DEBUG_WIREFRAME = false; | ||||||
|  | |||||||
| @ -17,14 +17,23 @@ void DTRAsset_InitGlobalState() | |||||||
| 	stbi_set_flip_vertically_on_load(true); | 	stbi_set_flip_vertically_on_load(true); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| FILE_SCOPE void MemcopyInternal(u8 *dest, u8 *src, size_t numBytes) | FILE_SCOPE void MemcopyInternal(u8 *const dest, u8 *const src, size_t numBytes) | ||||||
| { | { | ||||||
| 	if (!dest || !src || numBytes == 0) return; | 	if (!dest || !src || numBytes == 0) return; | ||||||
| 	for (size_t i = 0; i < numBytes; i++) | 	for (size_t i = 0; i < numBytes; i++) | ||||||
| 		dest[i] = src[i]; | 		dest[i] = src[i]; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| FILE_SCOPE void AssetDqnArrayMemAPICallback(DqnMemAPICallbackInfo info, DqnMemAPICallbackResult *result) | // NOTE: Dynamic array allocations just requests space at the first option it
 | ||||||
|  | // can take. Realloc will reallocate in place if there's space. Otherwise
 | ||||||
|  | // it'll create a new block and reallocate there by copying the old data over.
 | ||||||
|  | 
 | ||||||
|  | // So this does waste space. But is a quick way to reroute allocations into
 | ||||||
|  | // a MemStack. It's main intended purpose is for one-shot loading data that you
 | ||||||
|  | // don't know how much space you need in your DArray. After filling out
 | ||||||
|  | // the dynamic array you then compact the data manually using memcopys into
 | ||||||
|  | // a new block and discard the old data.
 | ||||||
|  | FILE_SCOPE void DumbDynamicArrayMemAPICallback(DqnMemAPICallbackInfo info, DqnMemAPICallbackResult *result) | ||||||
| { | { | ||||||
| 	DQN_ASSERT(info.type != DqnMemAPICallbackType_Invalid); | 	DQN_ASSERT(info.type != DqnMemAPICallbackType_Invalid); | ||||||
| 	DqnMemStack *stack = static_cast<DqnMemStack *>(info.userContext); | 	DqnMemStack *stack = static_cast<DqnMemStack *>(info.userContext); | ||||||
| @ -40,27 +49,7 @@ FILE_SCOPE void AssetDqnArrayMemAPICallback(DqnMemAPICallbackInfo info, DqnMemAP | |||||||
| 
 | 
 | ||||||
| 		case DqnMemAPICallbackType_Free: | 		case DqnMemAPICallbackType_Free: | ||||||
| 		{ | 		{ | ||||||
| 			DqnMemStackBlock **blockPtr = &stack->block; | 			DQN_ASSERT(DQN_INVALID_CODE_PATH); | ||||||
| 			while (*blockPtr && (*blockPtr)->memory != info.ptrToFree) |  | ||||||
| 			{ |  | ||||||
| 				// NOTE(doyle): Ensure that the base ptr of each block is always
 |  | ||||||
| 				// actually aligned so we don't ever miss finding the block if
 |  | ||||||
| 				// the allocator had to realign the pointer from the base
 |  | ||||||
| 				// address.
 |  | ||||||
| 				if (DTR_DEBUG) |  | ||||||
| 				{ |  | ||||||
| 					size_t memBaseAddr = (size_t)((*blockPtr)->memory); |  | ||||||
| 					DQN_ASSERT(DQN_ALIGN_POW_N(memBaseAddr, stack->byteAlign) == |  | ||||||
| 					           memBaseAddr); |  | ||||||
| 				} |  | ||||||
| 				blockPtr = &((*blockPtr)->prevBlock); |  | ||||||
| 			} |  | ||||||
| 
 |  | ||||||
| 			DQN_ASSERT(*blockPtr && (*blockPtr)->memory == info.ptrToFree); |  | ||||||
| 			DqnMemStackBlock *blockToFree = *blockPtr; |  | ||||||
| 			*blockPtr                     = blockToFree->prevBlock; |  | ||||||
| 			DqnMem_Free(blockToFree); |  | ||||||
| 
 |  | ||||||
| 		} | 		} | ||||||
| 		break; | 		break; | ||||||
| 
 | 
 | ||||||
| @ -189,7 +178,7 @@ bool DTRAsset_LoadWavefrontObj(const PlatformAPI api, DqnMemStack *const memStac | |||||||
| 	size_t fileSize                = file.size; | 	size_t fileSize                = file.size; | ||||||
| 
 | 
 | ||||||
| 	DqnMemAPI memAPI   = {}; | 	DqnMemAPI memAPI   = {}; | ||||||
| 	memAPI.callback    = AssetDqnArrayMemAPICallback; | 	memAPI.callback    = DumbDynamicArrayMemAPICallback; | ||||||
| 	memAPI.userContext = memStack; | 	memAPI.userContext = memStack; | ||||||
| 
 | 
 | ||||||
| 	enum WavefVertexType { | 	enum WavefVertexType { | ||||||
| @ -199,6 +188,23 @@ bool DTRAsset_LoadWavefrontObj(const PlatformAPI api, DqnMemStack *const memStac | |||||||
| 		WavefVertexType_Normal, | 		WavefVertexType_Normal, | ||||||
| 	}; | 	}; | ||||||
| 
 | 
 | ||||||
|  | 	// TODO(doyle): We should profile, reading it out to WavefModel format and
 | ||||||
|  | 	// then copying it over, versus just reading the file twice. First pass is
 | ||||||
|  | 	// to count the number of vertexes etc. for each section we need. Then the
 | ||||||
|  | 	// second pass we can allocate directly the number we need and reparse it.
 | ||||||
|  | 	// I have a feeling that, in general that's a better idea, atleast it gets
 | ||||||
|  | 	// rid of alot of stupid copying code and memstack juggling.
 | ||||||
|  | 
 | ||||||
|  | 	// NOTE(doyle): We pre-process the data into an intermediate format that
 | ||||||
|  | 	// more accurately represents the file format. Since there's no metadata
 | ||||||
|  | 	// inside Wavefront objects, we don't know how many vertexes/texUV/normals
 | ||||||
|  | 	// there are- which makes it hard to allocate "nicely" out of our memory
 | ||||||
|  | 	// stack.
 | ||||||
|  | 
 | ||||||
|  | 	// So we preprocess. Then once we know the final amount, copy over the data
 | ||||||
|  | 	// to a new memstack block such that all the data is compacted together in
 | ||||||
|  | 	// memory for locality. Then just throw away the intermediate
 | ||||||
|  | 	// representation.
 | ||||||
| 	WavefModel dummy_ = {}; | 	WavefModel dummy_ = {}; | ||||||
| 	WavefModel *obj   = &dummy_; | 	WavefModel *obj   = &dummy_; | ||||||
| 
 | 
 | ||||||
| @ -410,11 +416,17 @@ bool DTRAsset_LoadWavefrontObj(const PlatformAPI api, DqnMemStack *const memStac | |||||||
| 					DQN_ASSERT(obj->groupNameIndex + 1 < DQN_ARRAY_COUNT(obj->groupName)); | 					DQN_ASSERT(obj->groupNameIndex + 1 < DQN_ARRAY_COUNT(obj->groupName)); | ||||||
| 
 | 
 | ||||||
| 					DQN_ASSERT(!obj->groupName[obj->groupNameIndex]); | 					DQN_ASSERT(!obj->groupName[obj->groupNameIndex]); | ||||||
|  | 					// TODO(doyle): Broken since I don't "copy" it over to our
 | ||||||
|  | 					// final DTRMesh. Below I copy over the data so that all the
 | ||||||
|  | 					// allocations are compacted together but don't copy this
 | ||||||
|  | 					// yet. Which means the name gets trashed atm.
 | ||||||
|  | #if 0 | ||||||
| 					obj->groupName[obj->groupNameIndex++] = | 					obj->groupName[obj->groupNameIndex++] = | ||||||
| 					    (char *)DqnMemStack_Push(memStack, (nameLen + 1) * sizeof(char)); | 					    (char *)DqnMemStack_Push(memStack, (nameLen + 1) * sizeof(char)); | ||||||
| 
 | 
 | ||||||
| 					for (i32 i = 0; i < nameLen; i++) | 					for (i32 i = 0; i < nameLen; i++) | ||||||
| 						obj->groupName[obj->groupNameIndex - 1][i] = namePtr[i]; | 						obj->groupName[obj->groupNameIndex - 1][i] = namePtr[i]; | ||||||
|  | #endif | ||||||
| 
 | 
 | ||||||
| 					while (scan && (*scan == ' ' || *scan == '\n')) | 					while (scan && (*scan == ' ' || *scan == '\n')) | ||||||
| 						scan++; | 						scan++; | ||||||
|  | |||||||
| @ -121,7 +121,9 @@ void inline DTRDebug_BeginCycleCount(enum DTRDebugCycleCount tag) | |||||||
| 	{ | 	{ | ||||||
| 		if (globalDebug.input && globalDebug.input->canUseRdtsc) | 		if (globalDebug.input && globalDebug.input->canUseRdtsc) | ||||||
| 		{ | 		{ | ||||||
| 			globalDebug.cycleCount[tag] = __rdtsc(); | 			DTRDebugCycles *const cycles = &globalDebug.cycles[tag]; | ||||||
|  | 			cycles->tmpStartCycles       = __rdtsc(); | ||||||
|  | 			cycles->numInvokes++; | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| @ -132,7 +134,8 @@ void inline DTRDebug_EndCycleCount(enum DTRDebugCycleCount tag) | |||||||
| 	{ | 	{ | ||||||
| 		if (globalDebug.input && globalDebug.input->canUseRdtsc) | 		if (globalDebug.input && globalDebug.input->canUseRdtsc) | ||||||
| 		{ | 		{ | ||||||
| 			globalDebug.cycleCount[tag] = __rdtsc() - globalDebug.cycleCount[tag]; | 			DTRDebugCycles *const cycles = &globalDebug.cycles[tag]; | ||||||
|  | 			cycles->totalCycles += __rdtsc() - cycles->tmpStartCycles; | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| @ -214,9 +217,16 @@ void DTRDebug_Update(DTRState *const state, | |||||||
| 		DTRDebug_PushText("TrianglesRendered: %'lld", debug->counter[DTRDebugCounter_RenderTriangle]); | 		DTRDebug_PushText("TrianglesRendered: %'lld", debug->counter[DTRDebugCounter_RenderTriangle]); | ||||||
| 		DTRDebug_PushText(""); | 		DTRDebug_PushText(""); | ||||||
| 
 | 
 | ||||||
| 		for (i32 i = 0; i < DQN_ARRAY_COUNT(debug->cycleCount); i++) | 		DTRDebugCycles emptyDebugCycles = {}; | ||||||
|  | 		for (i32 i = 0; i < DQN_ARRAY_COUNT(debug->cycles); i++) | ||||||
| 		{ | 		{ | ||||||
| 			DTRDebug_PushText("%d: %'lld cycles", i, debug->cycleCount[i]); | 			DTRDebugCycles *const cycles = &globalDebug.cycles[i]; | ||||||
|  | 
 | ||||||
|  | 			u64 invocations = (cycles->numInvokes == 0) ? 1 : cycles->numInvokes; | ||||||
|  | 			u64 avgCycles   = cycles->totalCycles / invocations; | ||||||
|  | 			DTRDebug_PushText("%d: %'lld avg cycles", i, avgCycles); | ||||||
|  | 
 | ||||||
|  | 			*cycles = emptyDebugCycles; | ||||||
| 		} | 		} | ||||||
| 		DTRDebug_PushText(""); | 		DTRDebug_PushText(""); | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -47,6 +47,14 @@ enum DTRDebugCycleCount | |||||||
| 	DTRDebugCycleCount_Count, | 	DTRDebugCycleCount_Count, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | typedef struct DTRDebugCycles | ||||||
|  | { | ||||||
|  | 	u64 totalCycles; | ||||||
|  | 	u64 numInvokes; | ||||||
|  | 
 | ||||||
|  | 	u64 tmpStartCycles; // Used to calculate the number of cycles elapsed
 | ||||||
|  | } DTRDebugCycles; | ||||||
|  | 
 | ||||||
| typedef struct DTRDebug | typedef struct DTRDebug | ||||||
| { | { | ||||||
| 	struct DTRFont         *font; | 	struct DTRFont         *font; | ||||||
| @ -57,9 +65,9 @@ typedef struct DTRDebug | |||||||
| 	DqnV2 displayP; | 	DqnV2 displayP; | ||||||
| 	i32   displayYOffset; | 	i32   displayYOffset; | ||||||
| 
 | 
 | ||||||
| 	u64 cycleCount[DTRDebugCycleCount_Count]; | 	DTRDebugCycles cycles [DTRDebugCycleCount_Count]; | ||||||
| 	u64 counter   [DTRDebugCounter_Count]; | 	u64            counter[DTRDebugCounter_Count]; | ||||||
| 	u64 totalSetPixels; | 	u64            totalSetPixels; | ||||||
| } DTRDebug; | } DTRDebug; | ||||||
| 
 | 
 | ||||||
| extern DTRDebug globalDebug; | extern DTRDebug globalDebug; | ||||||
|  | |||||||
| @ -7,6 +7,8 @@ | |||||||
| #include "external/stb_rect_pack.h" | #include "external/stb_rect_pack.h" | ||||||
| #include "external/stb_truetype.h" | #include "external/stb_truetype.h" | ||||||
| 
 | 
 | ||||||
|  | #include <intrin.h> | ||||||
|  | 
 | ||||||
| FILE_SCOPE const f32 COLOR_EPSILON = 0.9f; | FILE_SCOPE const f32 COLOR_EPSILON = 0.9f; | ||||||
| 
 | 
 | ||||||
| FILE_SCOPE inline DqnV4 PreMultiplyAlpha1(const DqnV4 color) | FILE_SCOPE inline DqnV4 PreMultiplyAlpha1(const DqnV4 color) | ||||||
| @ -497,7 +499,8 @@ FILE_SCOPE void DebugBarycentricInternal(DqnV2 p, DqnV2 a, DqnV2 b, DqnV2 c, f32 | |||||||
| 	*u        = 1.0f - *v - *w; | 	*u        = 1.0f - *v - *w; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void DTRRender_TexturedTriangle(DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2, DqnV3 p3, | void DTRRender_TexturedTriangle(PlatformInput *const input, | ||||||
|  |                                 DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2, DqnV3 p3, | ||||||
|                                 DqnV2 uv1, DqnV2 uv2, DqnV2 uv3, DTRBitmap *const texture, |                                 DqnV2 uv1, DqnV2 uv2, DqnV2 uv3, DTRBitmap *const texture, | ||||||
|                                 DqnV4 color, const DTRRenderTransform transform) |                                 DqnV4 color, const DTRRenderTransform transform) | ||||||
| { | { | ||||||
| @ -549,125 +552,287 @@ void DTRRender_TexturedTriangle(DTRRenderBuffer *const renderBuffer, DqnV3 p1, D | |||||||
| 	const DqnV3 b = p2; | 	const DqnV3 b = p2; | ||||||
| 	const DqnV3 c = p3; | 	const DqnV3 c = p3; | ||||||
| 
 | 
 | ||||||
| 	DqnV2i startP = min; |  | ||||||
| 	f32 oldSignedArea1       = ((b.x - a.x) * (startP.y - a.y)) - ((b.y - a.y) * (startP.x - a.x)); |  | ||||||
| 	f32 oldSignedArea2       = ((c.x - b.x) * (startP.y - b.y)) - ((c.y - b.y) * (startP.x - b.x)); |  | ||||||
| 	f32 oldSignedArea3       = ((a.x - c.x) * (startP.y - c.y)) - ((a.y - c.y) * (startP.x - c.x)); |  | ||||||
| 
 |  | ||||||
| 	f32 signedArea1       = ((b.x - a.x) * (startP.y - a.y)) - ((b.y - a.y) * (startP.x - a.x)); |  | ||||||
| 	f32 signedArea1DeltaX = a.y - b.y; |  | ||||||
| 	f32 signedArea1DeltaY = b.x - a.x; |  | ||||||
| 
 |  | ||||||
| 	f32 signedArea2       = ((c.x - b.x) * (startP.y - b.y)) - ((c.y - b.y) * (startP.x - b.x)); |  | ||||||
| 	f32 signedArea2DeltaX = b.y - c.y; |  | ||||||
| 	f32 signedArea2DeltaY = c.x - b.x; |  | ||||||
| 
 |  | ||||||
| 	f32 signedArea3       = ((a.x - c.x) * (startP.y - c.y)) - ((a.y - c.y) * (startP.x - c.x)); |  | ||||||
| 	f32 signedArea3DeltaX = c.y - a.y; |  | ||||||
| 	f32 signedArea3DeltaY = a.x - c.x; |  | ||||||
| 
 |  | ||||||
| 	f32 signedAreaParallelogram = signedArea1 + signedArea2 + signedArea3; |  | ||||||
| 	if (signedAreaParallelogram == 0) return; |  | ||||||
| 	f32 invSignedAreaParallelogram = 1 / signedAreaParallelogram; |  | ||||||
| 
 |  | ||||||
| 	DTRDebug_BeginCycleCount(DTRDebugCycleCount_RenderTriangle_Rasterise); | 	DTRDebug_BeginCycleCount(DTRDebugCycleCount_RenderTriangle_Rasterise); | ||||||
| 	////////////////////////////////////////////////////////////////////////////
 | 	////////////////////////////////////////////////////////////////////////////
 | ||||||
| 	// Scan and Render
 | 	// Scan and Render
 | ||||||
| 	////////////////////////////////////////////////////////////////////////////
 | 	////////////////////////////////////////////////////////////////////////////
 | ||||||
| 	const u32 zBufferPitch        = renderBuffer->width; | 	const u32 zBufferPitch = renderBuffer->width; | ||||||
| 	const f32 BARYCENTRIC_EPSILON = 0.1f; | 	if (input->canUseSSE2) | ||||||
| 
 |  | ||||||
| 	u8 *texturePtr         = texture->memory; |  | ||||||
| 	const u32 texturePitch = texture->bytesPerPixel * texture->dim.w; |  | ||||||
| 	for (i32 bufferY = min.y; bufferY < max.y; bufferY++) |  | ||||||
| 	{ | 	{ | ||||||
| 		f32 signedArea1Row = signedArea1; | 		DqnV2i startP                   = min; | ||||||
| 		f32 signedArea2Row = signedArea2; | 		f32 edge1SignedAreaPixel1       = ((b.x - a.x) * (startP.y - a.y)) - ((b.y - a.y) * (startP.x - a.x)); | ||||||
| 		f32 signedArea3Row = signedArea3; | 		f32 edge1SignedAreaPixel1DeltaX = a.y - b.y; | ||||||
|  | 		f32 edge1SignedAreaPixel1DeltaY = b.x - a.x; | ||||||
| 
 | 
 | ||||||
| 		for (i32 bufferX = min.x; bufferX < max.x; bufferX++) | 		f32 edge2SignedAreaPixel1       = ((c.x - b.x) * (startP.y - b.y)) - ((c.y - b.y) * (startP.x - b.x)); | ||||||
|  | 		f32 edge2SignedAreaPixel1DeltaX = b.y - c.y; | ||||||
|  | 		f32 edge2SignedAreaPixel1DeltaY = c.x - b.x; | ||||||
|  | 
 | ||||||
|  | 		f32 edge3SignedAreaPixel1       = ((a.x - c.x) * (startP.y - c.y)) - ((a.y - c.y) * (startP.x - c.x)); | ||||||
|  | 		f32 edge3SignedAreaPixel1DeltaX = c.y - a.y; | ||||||
|  | 		f32 edge3SignedAreaPixel1DeltaY = a.x - c.x; | ||||||
|  | 
 | ||||||
|  | 		f32 signedAreaParallelogramPixel1 = edge1SignedAreaPixel1 + edge2SignedAreaPixel1 + edge3SignedAreaPixel1; | ||||||
|  | 		if (signedAreaParallelogramPixel1 == 0) return; | ||||||
|  | 		f32 invSignedAreaParallelogramPixel1 = 1 / signedAreaParallelogramPixel1; | ||||||
|  | 
 | ||||||
|  | 		__m128 zero_4x                      = _mm_set_ps1(0.0f); | ||||||
|  | 		__m128 two_4x                       = _mm_set_ps1(2.0f); | ||||||
|  | 		__m128 invSignedAreaParallelogram4x = _mm_set_ps1(invSignedAreaParallelogramPixel1); | ||||||
|  | 		__m128 triangleZ                    = _mm_set_ps(0, b.z, a.z, c.z); | ||||||
|  | 
 | ||||||
|  | 		__m128 signedAreaPixelDeltaX = _mm_set_ps(0, edge3SignedAreaPixel1DeltaX, edge2SignedAreaPixel1DeltaX, edge1SignedAreaPixel1DeltaX); | ||||||
|  | 		__m128 signedAreaPixelDeltaY = _mm_set_ps(0, edge3SignedAreaPixel1DeltaY, edge2SignedAreaPixel1DeltaY, edge1SignedAreaPixel1DeltaY); | ||||||
|  | 
 | ||||||
|  | 		__m128 signedAreaPixel1 = _mm_set_ps(0, edge3SignedAreaPixel1, edge2SignedAreaPixel1, edge1SignedAreaPixel1); | ||||||
|  | 		__m128 signedAreaPixel2 = _mm_add_ps(signedAreaPixel1, signedAreaPixelDeltaX); | ||||||
|  | 
 | ||||||
|  | 		// NOTE: Step size of 2 pixels across X
 | ||||||
|  | 		signedAreaPixelDeltaX = _mm_mul_ps(signedAreaPixelDeltaX, two_4x); | ||||||
|  | 
 | ||||||
|  | 		const DqnV2 uv2SubUv1 = uv2 - uv1; | ||||||
|  | 		const DqnV2 uv3SubUv1 = uv3 - uv1; | ||||||
|  | 
 | ||||||
|  | 		const u32 IS_GREATER_MASK = 0xF; | ||||||
|  | 
 | ||||||
|  | 		for (i32 bufferY = min.y; bufferY < max.y; bufferY++) | ||||||
| 		{ | 		{ | ||||||
| 			if (signedArea1Row >= 0 && signedArea2Row >= 0 && signedArea3Row >= 0) | 			__m128 signedArea1 = signedAreaPixel1; | ||||||
|  | 			__m128 signedArea2 = signedAreaPixel2; | ||||||
|  | 
 | ||||||
|  | 			for (i32 bufferX = min.x; bufferX < max.x; bufferX += 2) | ||||||
| 			{ | 			{ | ||||||
| 				f32 barycentricB = signedArea3Row * invSignedAreaParallelogram; | 				__m128 isGreater1    = _mm_cmpge_ps(signedArea1, zero_4x); | ||||||
| 				f32 barycentricC = signedArea1Row * invSignedAreaParallelogram; | 				i32 isGreaterResult1 = _mm_movemask_ps(isGreater1); | ||||||
| 
 | 				if ((isGreaterResult1 & IS_GREATER_MASK) == IS_GREATER_MASK) | ||||||
| 				if (DTR_DEBUG) |  | ||||||
| 				{ | 				{ | ||||||
| 					const f32 EPSILON = 0.1f; | 					__m128 barycentric  = _mm_mul_ps(signedArea1, invSignedAreaParallelogram4x); | ||||||
|  | 					__m128 barycentricZ = _mm_mul_ps(triangleZ, barycentric); | ||||||
| 
 | 
 | ||||||
| 					f32 debugSignedArea1 = ((b.x - a.x) * (bufferY - a.y)) - ((b.y - a.y) * (bufferX - a.x)); | 					i32 zBufferIndex = bufferX + (bufferY * zBufferPitch); | ||||||
| 					f32 debugSignedArea2 = ((c.x - b.x) * (bufferY - b.y)) - ((c.y - b.y) * (bufferX - b.x)); | 					f32 pixelZValue = ((f32 *)&barycentricZ)[0] + | ||||||
| 					f32 debugSignedArea3 = ((a.x - c.x) * (bufferY - c.y)) - ((a.y - c.y) * (bufferX - c.x)); | 					                  ((f32 *)&barycentricZ)[1] + | ||||||
|  | 					                  ((f32 *)&barycentricZ)[2]; | ||||||
|  | 					f32 currZValue = renderBuffer->zBuffer[zBufferIndex]; | ||||||
|  | 					if (pixelZValue > currZValue) | ||||||
|  | 					{ | ||||||
|  | 						renderBuffer->zBuffer[zBufferIndex] = pixelZValue; | ||||||
|  | 						u8 *texturePtr                      = texture->memory; | ||||||
|  | 						const u32 texturePitch = texture->bytesPerPixel * texture->dim.w; | ||||||
| 
 | 
 | ||||||
| 					f32 deltaSignedArea1 = debugSignedArea1 - signedArea1Row; | 						f32 barycentricB = ((f32 *)&barycentric)[2]; | ||||||
| 					f32 deltaSignedArea2 = debugSignedArea2 - signedArea2Row; | 						f32 barycentricC = ((f32 *)&barycentric)[0]; | ||||||
| 					f32 deltaSignedArea3 = debugSignedArea3 - signedArea3Row; | 						DqnV2 uv = uv1 + (uv2SubUv1 * barycentricB) + (uv3SubUv1 * barycentricC); | ||||||
| 					DQN_ASSERT(deltaSignedArea1 < EPSILON && deltaSignedArea2 < EPSILON && |  | ||||||
| 					           deltaSignedArea3 < EPSILON) |  | ||||||
| 
 | 
 | ||||||
| 					f32 debugBarycentricA, debugBarycentricB, debugBarycentricC; | 						const f32 EPSILON = 0.1f; | ||||||
| 					DebugBarycentricInternal(DqnV2_2i(bufferX, bufferY), a.xy, b.xy, c.xy, | 						DQN_ASSERT(uv.x >= 0 && uv.x < 1.0f + EPSILON); | ||||||
| 					                         &debugBarycentricA, &debugBarycentricB, | 						DQN_ASSERT(uv.y >= 0 && uv.y < 1.0f + EPSILON); | ||||||
| 					                         &debugBarycentricC); | 						uv.x = DqnMath_Clampf(uv.x, 0.0f, 1.0f); | ||||||
|  | 						uv.y = DqnMath_Clampf(uv.y, 0.0f, 1.0f); | ||||||
| 
 | 
 | ||||||
|  | 						f32 texelXf = uv.x * texture->dim.w; | ||||||
|  | 						f32 texelYf = uv.y * texture->dim.h; | ||||||
|  | 						DQN_ASSERT(texelXf >= 0 && texelXf < texture->dim.w); | ||||||
|  | 						DQN_ASSERT(texelYf >= 0 && texelYf < texture->dim.h); | ||||||
| 
 | 
 | ||||||
| 					f32 deltaBaryB = DQN_ABS(barycentricB - debugBarycentricB); | 						i32 texelX = (i32)texelXf; | ||||||
| 					f32 deltaBaryC = DQN_ABS(barycentricC - debugBarycentricC); | 						i32 texelY = (i32)texelYf; | ||||||
|  | 
 | ||||||
|  | 						u32 texel1 = *(u32 *)(texturePtr + (texelX * texture->bytesPerPixel) + | ||||||
|  | 						                      (texelY * texturePitch)); | ||||||
|  | 
 | ||||||
|  | 						DqnV4 color1; | ||||||
|  | 						color1.a = (f32)(texel1 >> 24); | ||||||
|  | 						color1.b = (f32)((texel1 >> 16) & 0xFF); | ||||||
|  | 						color1.g = (f32)((texel1 >> 8) & 0xFF); | ||||||
|  | 						color1.r = (f32)((texel1 >> 0) & 0xFF); | ||||||
|  | 						color1 *= DTRRENDER_INV_255; | ||||||
|  | 						color1      = DTRRender_SRGB1ToLinearSpaceV4(color1); | ||||||
|  | 						DqnV4 blend = color * color1; | ||||||
|  | 						SetPixel(renderBuffer, bufferX, bufferY, blend, ColorSpace_Linear); | ||||||
|  | 					} | ||||||
| 
 | 
 | ||||||
| 					DQN_ASSERT(deltaBaryB < EPSILON && deltaBaryC < EPSILON) |  | ||||||
| 				} | 				} | ||||||
| 
 | 
 | ||||||
| 				i32 zBufferIndex = bufferX + (bufferY * zBufferPitch); | 				__m128 isGreater2    = _mm_cmpge_ps(signedArea2, zero_4x); | ||||||
| 				f32 pixelZValue = a.z + (barycentricB * (b.z - a.z)) + (barycentricC * (c.z - a.z)); | 				i32 isGreaterResult2 = _mm_movemask_ps(isGreater2); | ||||||
| 				f32 currZValue  = renderBuffer->zBuffer[zBufferIndex]; | 				i32 bufferX1         = bufferX + 1; | ||||||
| 				DQN_ASSERT(zBufferIndex < (renderBuffer->width * renderBuffer->height)); | 				if ((isGreaterResult2 & IS_GREATER_MASK) == IS_GREATER_MASK && bufferX1 < max.x) | ||||||
| 
 |  | ||||||
| 				if (pixelZValue > currZValue) |  | ||||||
| 				{ | 				{ | ||||||
| 					renderBuffer->zBuffer[zBufferIndex] = pixelZValue; | 					__m128 barycentric  = _mm_mul_ps(signedArea2, invSignedAreaParallelogram4x); | ||||||
| 					const bool DEBUG_SAMPLE_TEXTURE = true; | 					__m128 barycentricZ = _mm_mul_ps(triangleZ, barycentric); | ||||||
| 					DqnV2 uv = uv1 + ((uv2 - uv1) * barycentricB) + ((uv3 - uv1) * barycentricC); |  | ||||||
| 
 | 
 | ||||||
| 					const f32 EPSILON = 0.1f; | 					i32 zBufferIndex = bufferX1 + (bufferY * zBufferPitch); | ||||||
| 					DQN_ASSERT(uv.x >= 0 && uv.x < 1.0f + EPSILON); | 					f32 pixelZValue  = ((f32 *)&barycentricZ)[0] + | ||||||
| 					DQN_ASSERT(uv.y >= 0 && uv.y < 1.0f + EPSILON); | 					                   ((f32 *)&barycentricZ)[1] + | ||||||
|  | 					                   ((f32 *)&barycentricZ)[2]; | ||||||
|  | 					f32 currZValue = renderBuffer->zBuffer[zBufferIndex]; | ||||||
|  | 					if (pixelZValue > currZValue) | ||||||
|  | 					{ | ||||||
|  | 						renderBuffer->zBuffer[zBufferIndex] = pixelZValue; | ||||||
|  | 						u8 *texturePtr                      = texture->memory; | ||||||
|  | 						const u32 texturePitch = texture->bytesPerPixel * texture->dim.w; | ||||||
| 
 | 
 | ||||||
| 					uv.x = DqnMath_Clampf(uv.x, 0.0f, 1.0f); | 						f32 barycentricB = ((f32 *)&barycentric)[2]; | ||||||
| 					uv.y = DqnMath_Clampf(uv.y, 0.0f, 1.0f); | 						f32 barycentricC = ((f32 *)&barycentric)[0]; | ||||||
|  | 						DqnV2 uv = uv1 + (uv2SubUv1 * barycentricB) + (uv3SubUv1 * barycentricC); | ||||||
| 
 | 
 | ||||||
| 					f32 texelXf = uv.x * texture->dim.w; | 						const f32 EPSILON = 0.1f; | ||||||
| 					f32 texelYf = uv.y * texture->dim.h; | 						DQN_ASSERT(uv.x >= 0 && uv.x < 1.0f + EPSILON); | ||||||
| 					DQN_ASSERT(texelXf >= 0 && texelXf < texture->dim.w); | 						DQN_ASSERT(uv.y >= 0 && uv.y < 1.0f + EPSILON); | ||||||
| 					DQN_ASSERT(texelYf >= 0 && texelYf < texture->dim.h); | 						uv.x = DqnMath_Clampf(uv.x, 0.0f, 1.0f); | ||||||
|  | 						uv.y = DqnMath_Clampf(uv.y, 0.0f, 1.0f); | ||||||
| 
 | 
 | ||||||
| 					i32 texelX = (i32)texelXf; | 						f32 texelXf = uv.x * texture->dim.w; | ||||||
| 					i32 texelY = (i32)texelYf; | 						f32 texelYf = uv.y * texture->dim.h; | ||||||
|  | 						DQN_ASSERT(texelXf >= 0 && texelXf < texture->dim.w); | ||||||
|  | 						DQN_ASSERT(texelYf >= 0 && texelYf < texture->dim.h); | ||||||
| 
 | 
 | ||||||
| 					u32 texel1 = *(u32 *)(texturePtr + (texelX * texture->bytesPerPixel) + | 						i32 texelX = (i32)texelXf; | ||||||
| 					                      (texelY * texturePitch)); | 						i32 texelY = (i32)texelYf; | ||||||
| 
 | 
 | ||||||
| 					DqnV4 color1; | 						u32 texel1 = *(u32 *)(texturePtr + (texelX * texture->bytesPerPixel) + | ||||||
| 					color1.a = (f32)(texel1 >> 24); | 						                      (texelY * texturePitch)); | ||||||
| 					color1.b = (f32)((texel1 >> 16) & 0xFF); |  | ||||||
| 					color1.g = (f32)((texel1 >> 8) & 0xFF); |  | ||||||
| 					color1.r = (f32)((texel1 >> 0) & 0xFF); |  | ||||||
| 
 | 
 | ||||||
| 					color1 *= DTRRENDER_INV_255; | 						DqnV4 color1; | ||||||
| 					color1      = DTRRender_SRGB1ToLinearSpaceV4(color1); | 						color1.a = (f32)(texel1 >> 24); | ||||||
| 					DqnV4 blend = color * color1; | 						color1.b = (f32)((texel1 >> 16) & 0xFF); | ||||||
| 					SetPixel(renderBuffer, bufferX, bufferY, blend, ColorSpace_Linear); | 						color1.g = (f32)((texel1 >> 8) & 0xFF); | ||||||
|  | 						color1.r = (f32)((texel1 >> 0) & 0xFF); | ||||||
|  | 						color1 *= DTRRENDER_INV_255; | ||||||
|  | 						color1      = DTRRender_SRGB1ToLinearSpaceV4(color1); | ||||||
|  | 						DqnV4 blend = color * color1; | ||||||
|  | 						SetPixel(renderBuffer, bufferX1, bufferY, blend, ColorSpace_Linear); | ||||||
|  | 					} | ||||||
| 				} | 				} | ||||||
|  | 
 | ||||||
|  | 				signedArea1 = _mm_add_ps(signedArea1, signedAreaPixelDeltaX); | ||||||
|  | 				signedArea2 = _mm_add_ps(signedArea2, signedAreaPixelDeltaX); | ||||||
| 			} | 			} | ||||||
| 
 | 
 | ||||||
| 			signedArea1Row += signedArea1DeltaX; | 			signedAreaPixel1 = _mm_add_ps(signedAreaPixel1, signedAreaPixelDeltaY); | ||||||
| 			signedArea2Row += signedArea2DeltaX; | 			signedAreaPixel2 = _mm_add_ps(signedAreaPixel2, signedAreaPixelDeltaY); | ||||||
| 			signedArea3Row += signedArea3DeltaX; |  | ||||||
| 		} | 		} | ||||||
|  | 	} | ||||||
|  | 	else | ||||||
|  | 	{ | ||||||
|  | 		DqnV2i startP         = min; | ||||||
|  | 		f32 signedArea1       = ((b.x - a.x) * (startP.y - a.y)) - ((b.y - a.y) * (startP.x - a.x)); | ||||||
|  | 		f32 signedArea1DeltaX = a.y - b.y; | ||||||
|  | 		f32 signedArea1DeltaY = b.x - a.x; | ||||||
| 
 | 
 | ||||||
| 		signedArea1 += signedArea1DeltaY; | 		f32 signedArea2       = ((c.x - b.x) * (startP.y - b.y)) - ((c.y - b.y) * (startP.x - b.x)); | ||||||
| 		signedArea2 += signedArea2DeltaY; | 		f32 signedArea2DeltaX = b.y - c.y; | ||||||
| 		signedArea3 += signedArea3DeltaY; | 		f32 signedArea2DeltaY = c.x - b.x; | ||||||
|  | 
 | ||||||
|  | 		f32 signedArea3       = ((a.x - c.x) * (startP.y - c.y)) - ((a.y - c.y) * (startP.x - c.x)); | ||||||
|  | 		f32 signedArea3DeltaX = c.y - a.y; | ||||||
|  | 		f32 signedArea3DeltaY = a.x - c.x; | ||||||
|  | 
 | ||||||
|  | 		f32 signedAreaParallelogram = signedArea1 + signedArea2 + signedArea3; | ||||||
|  | 		if (signedAreaParallelogram == 0) return; | ||||||
|  | 		f32 invSignedAreaParallelogram = 1 / signedAreaParallelogram; | ||||||
|  | 
 | ||||||
|  | 		for (i32 bufferY = min.y; bufferY < max.y; bufferY++) | ||||||
|  | 		{ | ||||||
|  | 			f32 signedArea1Row = signedArea1; | ||||||
|  | 			f32 signedArea2Row = signedArea2; | ||||||
|  | 			f32 signedArea3Row = signedArea3; | ||||||
|  | 
 | ||||||
|  | 			for (i32 bufferX = min.x; bufferX < max.x; bufferX++) | ||||||
|  | 			{ | ||||||
|  | 				if (signedArea1Row >= 0 && signedArea2Row >= 0 && signedArea3Row >= 0) | ||||||
|  | 				{ | ||||||
|  | 					f32 barycentricB = signedArea3Row * invSignedAreaParallelogram; | ||||||
|  | 					f32 barycentricC = signedArea1Row * invSignedAreaParallelogram; | ||||||
|  | 
 | ||||||
|  | 					if (DTR_DEBUG) | ||||||
|  | 					{ | ||||||
|  | 						const f32 EPSILON = 0.1f; | ||||||
|  | 
 | ||||||
|  | 						f32 debugSignedArea1 = ((b.x - a.x) * (bufferY - a.y)) - ((b.y - a.y) * (bufferX - a.x)); | ||||||
|  | 						f32 debugSignedArea2 = ((c.x - b.x) * (bufferY - b.y)) - ((c.y - b.y) * (bufferX - b.x)); | ||||||
|  | 						f32 debugSignedArea3 = ((a.x - c.x) * (bufferY - c.y)) - ((a.y - c.y) * (bufferX - c.x)); | ||||||
|  | 
 | ||||||
|  | 						f32 deltaSignedArea1 = DQN_ABS(debugSignedArea1 - signedArea1Row); | ||||||
|  | 						f32 deltaSignedArea2 = DQN_ABS(debugSignedArea2 - signedArea2Row); | ||||||
|  | 						f32 deltaSignedArea3 = DQN_ABS(debugSignedArea3 - signedArea3Row); | ||||||
|  | 						DQN_ASSERT(deltaSignedArea1 < EPSILON && deltaSignedArea2 < EPSILON && | ||||||
|  | 						           deltaSignedArea3 < EPSILON) | ||||||
|  | 
 | ||||||
|  | 						f32 debugBarycentricA, debugBarycentricB, debugBarycentricC; | ||||||
|  | 						DebugBarycentricInternal(DqnV2_2i(bufferX, bufferY), a.xy, b.xy, c.xy, | ||||||
|  | 						                         &debugBarycentricA, &debugBarycentricB, | ||||||
|  | 						                         &debugBarycentricC); | ||||||
|  | 
 | ||||||
|  | 						f32 deltaBaryB = DQN_ABS(barycentricB - debugBarycentricB); | ||||||
|  | 						f32 deltaBaryC = DQN_ABS(barycentricC - debugBarycentricC); | ||||||
|  | 
 | ||||||
|  | 						DQN_ASSERT(deltaBaryB < EPSILON && deltaBaryC < EPSILON) | ||||||
|  | 					} | ||||||
|  | 
 | ||||||
|  | 					i32 zBufferIndex = bufferX + (bufferY * zBufferPitch); | ||||||
|  | 					f32 pixelZValue = | ||||||
|  | 					    a.z + (barycentricB * (b.z - a.z)) + (barycentricC * (c.z - a.z)); | ||||||
|  | 					f32 currZValue = renderBuffer->zBuffer[zBufferIndex]; | ||||||
|  | 					DQN_ASSERT(zBufferIndex < (renderBuffer->width * renderBuffer->height)); | ||||||
|  | 
 | ||||||
|  | 					if (pixelZValue > currZValue) | ||||||
|  | 					{ | ||||||
|  | 						renderBuffer->zBuffer[zBufferIndex] = pixelZValue; | ||||||
|  | 						if (texture) | ||||||
|  | 						{ | ||||||
|  | 							u8 *texturePtr         = texture->memory; | ||||||
|  | 							const u32 texturePitch = texture->bytesPerPixel * texture->dim.w; | ||||||
|  | 
 | ||||||
|  | 							DqnV2 uv = | ||||||
|  | 							    uv1 + ((uv2 - uv1) * barycentricB) + ((uv3 - uv1) * barycentricC); | ||||||
|  | 
 | ||||||
|  | 							const f32 EPSILON = 0.1f; | ||||||
|  | 							DQN_ASSERT(uv.x >= 0 && uv.x < 1.0f + EPSILON); | ||||||
|  | 							DQN_ASSERT(uv.y >= 0 && uv.y < 1.0f + EPSILON); | ||||||
|  | 
 | ||||||
|  | 							uv.x = DqnMath_Clampf(uv.x, 0.0f, 1.0f); | ||||||
|  | 							uv.y = DqnMath_Clampf(uv.y, 0.0f, 1.0f); | ||||||
|  | 
 | ||||||
|  | 							f32 texelXf = uv.x * texture->dim.w; | ||||||
|  | 							f32 texelYf = uv.y * texture->dim.h; | ||||||
|  | 							DQN_ASSERT(texelXf >= 0 && texelXf < texture->dim.w); | ||||||
|  | 							DQN_ASSERT(texelYf >= 0 && texelYf < texture->dim.h); | ||||||
|  | 
 | ||||||
|  | 							i32 texelX = (i32)texelXf; | ||||||
|  | 							i32 texelY = (i32)texelYf; | ||||||
|  | 
 | ||||||
|  | 							u32 texel1 = *(u32 *)(texturePtr + (texelX * texture->bytesPerPixel) + | ||||||
|  | 							                      (texelY * texturePitch)); | ||||||
|  | 
 | ||||||
|  | 							DqnV4 color1; | ||||||
|  | 							color1.a = (f32)(texel1 >> 24); | ||||||
|  | 							color1.b = (f32)((texel1 >> 16) & 0xFF); | ||||||
|  | 							color1.g = (f32)((texel1 >> 8) & 0xFF); | ||||||
|  | 							color1.r = (f32)((texel1 >> 0) & 0xFF); | ||||||
|  | 
 | ||||||
|  | 							color1 *= DTRRENDER_INV_255; | ||||||
|  | 							color1      = DTRRender_SRGB1ToLinearSpaceV4(color1); | ||||||
|  | 							DqnV4 blend = color * color1; | ||||||
|  | 							SetPixel(renderBuffer, bufferX, bufferY, blend, ColorSpace_Linear); | ||||||
|  | 						} | ||||||
|  | 						else | ||||||
|  | 						{ | ||||||
|  | 							SetPixel(renderBuffer, bufferX, bufferY, color, ColorSpace_Linear); | ||||||
|  | 						} | ||||||
|  | 					} | ||||||
|  | 				} | ||||||
|  | 
 | ||||||
|  | 				signedArea1Row += signedArea1DeltaX; | ||||||
|  | 				signedArea2Row += signedArea2DeltaX; | ||||||
|  | 				signedArea3Row += signedArea3DeltaX; | ||||||
|  | 			} | ||||||
|  | 
 | ||||||
|  | 			signedArea1 += signedArea1DeltaY; | ||||||
|  | 			signedArea2 += signedArea2DeltaY; | ||||||
|  | 			signedArea3 += signedArea3DeltaY; | ||||||
|  | 		} | ||||||
| 	} | 	} | ||||||
| 	DTRDebug_EndCycleCount(DTRDebugCycleCount_RenderTriangle_Rasterise); | 	DTRDebug_EndCycleCount(DTRDebugCycleCount_RenderTriangle_Rasterise); | ||||||
| 
 | 
 | ||||||
| @ -881,12 +1046,10 @@ void DTRRender_Triangle(DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2, | |||||||
| 	if (signedAreaParallelogram == 0) return; | 	if (signedAreaParallelogram == 0) return; | ||||||
| 	f32 invSignedAreaParallelogram = 1 / signedAreaParallelogram; | 	f32 invSignedAreaParallelogram = 1 / signedAreaParallelogram; | ||||||
| 
 | 
 | ||||||
| 	DTRDebug_BeginCycleCount(DTRDebugCycleCount_RenderTriangle_Rasterise); |  | ||||||
| 	////////////////////////////////////////////////////////////////////////////
 | 	////////////////////////////////////////////////////////////////////////////
 | ||||||
| 	// Scan and Render
 | 	// Scan and Render
 | ||||||
| 	////////////////////////////////////////////////////////////////////////////
 | 	////////////////////////////////////////////////////////////////////////////
 | ||||||
| 	const u32 zBufferPitch        = renderBuffer->width; | 	const u32 zBufferPitch = renderBuffer->width; | ||||||
| 	const f32 BARYCENTRIC_EPSILON = 0.1f; |  | ||||||
| 	for (i32 bufferY = min.y; bufferY < max.y; bufferY++) | 	for (i32 bufferY = min.y; bufferY < max.y; bufferY++) | ||||||
| 	{ | 	{ | ||||||
| 		f32 signedArea1Row = signedArea1; | 		f32 signedArea1Row = signedArea1; | ||||||
| @ -903,6 +1066,7 @@ void DTRRender_Triangle(DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2, | |||||||
| 				i32 zBufferIndex = bufferX + (bufferY * zBufferPitch); | 				i32 zBufferIndex = bufferX + (bufferY * zBufferPitch); | ||||||
| 				f32 pixelZValue = a.z + (barycentricB * (b.z - a.z)) + (barycentricC * (c.z - a.z)); | 				f32 pixelZValue = a.z + (barycentricB * (b.z - a.z)) + (barycentricC * (c.z - a.z)); | ||||||
| 				f32 currZValue  = renderBuffer->zBuffer[zBufferIndex]; | 				f32 currZValue  = renderBuffer->zBuffer[zBufferIndex]; | ||||||
|  | 				DQN_ASSERT(zBufferIndex < (renderBuffer->width * renderBuffer->height)); | ||||||
| 				if (pixelZValue > currZValue) | 				if (pixelZValue > currZValue) | ||||||
| 				{ | 				{ | ||||||
| 					renderBuffer->zBuffer[zBufferIndex] = pixelZValue; | 					renderBuffer->zBuffer[zBufferIndex] = pixelZValue; | ||||||
| @ -919,7 +1083,6 @@ void DTRRender_Triangle(DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2, | |||||||
| 		signedArea2 += signedArea2DeltaY; | 		signedArea2 += signedArea2DeltaY; | ||||||
| 		signedArea3 += signedArea3DeltaY; | 		signedArea3 += signedArea3DeltaY; | ||||||
| 	} | 	} | ||||||
| 	DTRDebug_EndCycleCount(DTRDebugCycleCount_RenderTriangle_Rasterise); |  | ||||||
| 
 | 
 | ||||||
| 	////////////////////////////////////////////////////////////////////////////
 | 	////////////////////////////////////////////////////////////////////////////
 | ||||||
| 	// Debug
 | 	// Debug
 | ||||||
| @ -958,8 +1121,7 @@ void DTRRender_Triangle(DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2, | |||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void DTRRender_Bitmap(DTRRenderBuffer *const renderBuffer, | void DTRRender_Bitmap(DTRRenderBuffer *const renderBuffer, DTRBitmap *const bitmap, DqnV2 pos, | ||||||
|                       DTRBitmap *const bitmap, DqnV2 pos, |  | ||||||
|                       const DTRRenderTransform transform, DqnV4 color) |                       const DTRRenderTransform transform, DqnV4 color) | ||||||
| { | { | ||||||
| 	if (!bitmap || !bitmap->memory || !renderBuffer) return; | 	if (!bitmap || !bitmap->memory || !renderBuffer) return; | ||||||
|  | |||||||
| @ -63,7 +63,7 @@ void DTRRender_Text            (DTRRenderBuffer *const renderBuffer, const DTRFo | |||||||
| void DTRRender_Line            (DTRRenderBuffer *const renderBuffer, DqnV2i a, DqnV2i b, DqnV4 color); | void DTRRender_Line            (DTRRenderBuffer *const renderBuffer, DqnV2i a, DqnV2i b, DqnV4 color); | ||||||
| void DTRRender_Rectangle       (DTRRenderBuffer *const renderBuffer, DqnV2 min, DqnV2 max, DqnV4 color, const DTRRenderTransform transform = DTRRender_DefaultTransform()); | void DTRRender_Rectangle       (DTRRenderBuffer *const renderBuffer, DqnV2 min, DqnV2 max, DqnV4 color, const DTRRenderTransform transform = DTRRender_DefaultTransform()); | ||||||
| void DTRRender_Triangle        (DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2, DqnV3 p3, DqnV4 color, const DTRRenderTransform transform = DTRRender_DefaultTriangleTransform()); | void DTRRender_Triangle        (DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2, DqnV3 p3, DqnV4 color, const DTRRenderTransform transform = DTRRender_DefaultTriangleTransform()); | ||||||
| void DTRRender_TexturedTriangle(DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2, DqnV3 p3, DqnV2 uv1, DqnV2 uv2, DqnV2 uv3, DTRBitmap *const texture, DqnV4 color, const DTRRenderTransform transform = DTRRender_DefaultTriangleTransform()); | void DTRRender_TexturedTriangle(PlatformInput *const input, DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2, DqnV3 p3, DqnV2 uv1, DqnV2 uv2, DqnV2 uv3, DTRBitmap *const texture, DqnV4 color, const DTRRenderTransform transform = DTRRender_DefaultTriangleTransform()); | ||||||
| void DTRRender_Bitmap          (DTRRenderBuffer *const renderBuffer, DTRBitmap *const bitmap, DqnV2 pos, const DTRRenderTransform transform = DTRRender_DefaultTransform(), DqnV4 color = DqnV4_4f(1, 1, 1, 1)); | void DTRRender_Bitmap          (DTRRenderBuffer *const renderBuffer, DTRBitmap *const bitmap, DqnV2 pos, const DTRRenderTransform transform = DTRRender_DefaultTransform(), DqnV4 color = DqnV4_4f(1, 1, 1, 1)); | ||||||
| void DTRRender_Clear           (DTRRenderBuffer *const renderBuffer, DqnV3 color); | void DTRRender_Clear           (DTRRenderBuffer *const renderBuffer, DqnV3 color); | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -39,7 +39,7 @@ REM wd4100 unused argument parameters | |||||||
| REM wd4201 nonstandard extension used: nameless struct/union | REM wd4201 nonstandard extension used: nameless struct/union | ||||||
| REM wd4189 local variable is initialised but not referenced | REM wd4189 local variable is initialised but not referenced | ||||||
| REM wd4505 unreferenced local function not used will be removed | REM wd4505 unreferenced local function not used will be removed | ||||||
| set CompileFlags=-EHsc -GR- -Oi -MT -Z7 -W4 -wd4100 -wd4201 -wd4189 -wd4505 -Od -FAsc /I..\src\external\ | set CompileFlags=-EHsc -GR- -Oi -MT -Z7 -W4 -wd4100 -wd4201 -wd4189 -wd4505 -O2 -FAsc /I..\src\external\ | ||||||
| set DLLFlags=/Fm%ProjectName% /Fo%ProjectName% /Fa%ProjectName% /Fe%ProjectName% | set DLLFlags=/Fm%ProjectName% /Fo%ProjectName% /Fa%ProjectName% /Fe%ProjectName% | ||||||
| set Win32Flags=/FmWin32DTRenderer /FeWin32DTRenderer | set Win32Flags=/FmWin32DTRenderer /FeWin32DTRenderer | ||||||
| 
 | 
 | ||||||
| @ -62,7 +62,7 @@ REM //////////////////////////////////////////////////////////////////////////// | |||||||
| del *.pdb >NUL 2>NUL | del *.pdb >NUL 2>NUL | ||||||
| cl %CompileFlags% %Win32Flags% ..\src\Win32DTRenderer.cpp /link %LinkLibraries% %LinkFlags% | cl %CompileFlags% %Win32Flags% ..\src\Win32DTRenderer.cpp /link %LinkLibraries% %LinkFlags% | ||||||
| REM cl %CompileFlags% %DLLFlags%   ..\src\UnityBuild\UnityBuild.cpp /LD /link ..\src\external\easy\easy_profiler.lib /PDB:%ProjectName%_%TimeStamp%.pdb /export:DTR_Update %LinkFlags% | REM cl %CompileFlags% %DLLFlags%   ..\src\UnityBuild\UnityBuild.cpp /LD /link ..\src\external\easy\easy_profiler.lib /PDB:%ProjectName%_%TimeStamp%.pdb /export:DTR_Update %LinkFlags% | ||||||
| cl %CompileFlags% %DLLFlags%   ..\src\UnityBuild\UnityBuild.cpp /LD /link /PDB:%ProjectName%_%TimeStamp%.pdb /export:DTR_Update %LinkFlags% | cl %CompileFlags% %DLLFlags%  ..\src\UnityBuild\UnityBuild.cpp /LD /link /PDB:%ProjectName%_%TimeStamp%.pdb /export:DTR_Update %LinkFlags% | ||||||
| 
 | 
 | ||||||
| popd | popd | ||||||
| set LastError=%ERRORLEVEL% | set LastError=%ERRORLEVEL% | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user