Make threading work queue platform abstracted

This commit is contained in:
Doyle Thai 2017-06-18 20:25:57 +10:00
parent 5a6564fa94
commit 079e19b58b
5 changed files with 263 additions and 134 deletions

View File

@ -986,7 +986,7 @@ extern "C" void DTR_Update(PlatformRenderBuffer *const platformRenderBuffer,
DTRRenderTransform rotatingXform = DTRRender_DefaultTriangleTransform();
rotatingXform.rotation = rotation;
if (1)
if (0)
{
DTRDebug_BeginCycleCount("DTR_Update_RenderPrimitiveTriangles",
DTRDebugCycleCount_DTR_Update_RenderPrimitiveTriangles);
@ -1000,7 +1000,7 @@ extern "C" void DTR_Update(PlatformRenderBuffer *const platformRenderBuffer,
DTRDebug_EndCycleCount(DTRDebugCycleCount_DTR_Update_RenderPrimitiveTriangles);
}
if (0)
if (1)
{
LOCAL_PERSIST bool runTinyRendererOnce = false;
if (1 && runTinyRendererOnce)
@ -1033,7 +1033,8 @@ extern "C" void DTR_Update(PlatformRenderBuffer *const platformRenderBuffer,
lighting.vector = LIGHT;
lighting.color = DqnV4_4f(1, 1, 1, 1);
DTRRender_Mesh(&renderBuffer, mesh, lighting, modelP, transform);
DTRRender_Mesh(&renderBuffer, &memory->tempStack, &input->api, input->jobQueue,
mesh, lighting, modelP, transform);
DTRDebug_EndCycleCount(DTRDebugCycleCount_DTR_Update_RenderModel);
}
}

View File

@ -4,6 +4,9 @@
#include "dqn.h"
#include <intrin.h>
////////////////////////////////////////////////////////////////////////////////
// Platform File I/O
////////////////////////////////////////////////////////////////////////////////
enum PlatformFilePermissionFlag
{
PlatformFilePermissionFlag_Read = (1 << 0),
@ -24,11 +27,34 @@ typedef struct PlatformFile
u32 permissionFlags;
} PlatformFile;
// File I/O API
typedef bool PlatformAPI_FileOpen (const char *const path, PlatformFile *const file, const u32 permissionFlags, const enum PlatformFileAction actionFlags);
typedef size_t PlatformAPI_FileRead (PlatformFile *const file, u8 *const buf, const size_t bytesToRead); // Return bytes read
typedef size_t PlatformAPI_FileWrite(PlatformFile *const file, u8 *const buf, const size_t numBytesToWrite); // Return bytes read
typedef void PlatformAPI_FileClose(PlatformFile *const file);
typedef void PlatformAPI_Print (const char *const string);
////////////////////////////////////////////////////////////////////////////////
// Platform Multithreading
////////////////////////////////////////////////////////////////////////////////
// PlatformJobQueue must be implemented in platform code. It simply needs to
// fullfill the API and be able to accept PlatformJob structs and execute them.
typedef struct PlatformJobQueue PlatformJobQueue;
typedef void PlatformJob_Callback(PlatformJobQueue *const queue, void *const userData);
typedef struct PlatformJob
{
PlatformJob_Callback *callback;
void *userData;
} PlatformJob;
// Multithreading API
typedef bool PlatformAPI_QueueAddJob (PlatformJobQueue *const queue, const PlatformJob job);
typedef bool PlatformAPI_QueueTryExecuteNextJob(PlatformJobQueue *const queue);
////////////////////////////////////////////////////////////////////////////////
// Platform API for Game to Use
////////////////////////////////////////////////////////////////////////////////
typedef struct PlatformAPI
{
PlatformAPI_FileOpen *FileOpen;
@ -36,8 +62,14 @@ typedef struct PlatformAPI
PlatformAPI_FileWrite *FileWrite;
PlatformAPI_FileClose *FileClose;
PlatformAPI_Print *Print;
PlatformAPI_QueueAddJob *QueueAddJob;
PlatformAPI_QueueTryExecuteNextJob *QueueTryExecuteNextJob;
} PlatformAPI;
////////////////////////////////////////////////////////////////////////////////
// Platform Input
////////////////////////////////////////////////////////////////////////////////
enum Key
{
key_up,
@ -98,6 +130,7 @@ typedef struct PlatformInput
PlatformAPI api;
PlatformMouse mouse;
PlatformJobQueue *jobQueue;
union {
KeyState key[key_count];
struct
@ -131,6 +164,9 @@ typedef struct PlatformInput
};
} PlatformInput;
////////////////////////////////////////////////////////////////////////////////
// Platform Memory
////////////////////////////////////////////////////////////////////////////////
typedef struct PlatformMemory
{
union {
@ -146,6 +182,9 @@ typedef struct PlatformMemory
void *context;
} PlatformMemory;
////////////////////////////////////////////////////////////////////////////////
// Platform Frame Buffer
////////////////////////////////////////////////////////////////////////////////
typedef struct PlatformRenderBuffer
{
i32 width;

View File

@ -963,7 +963,7 @@ FILE_SCOPE void SIMDTriangle(DTRRenderBuffer *const renderBuffer, const DqnV3 p1
f32 currZValue = renderBuffer->zBuffer[zBufferIndex];
if (pixelZValue > currZValue)
{
renderBuffer->zBuffer[zBufferIndex] = pixelZValue;
// renderBuffer->zBuffer[zBufferIndex] = pixelZValue;
__m128 finalColor = simdColor;
if (!ignoreLight)
{
@ -988,7 +988,7 @@ FILE_SCOPE void SIMDTriangle(DTRRenderBuffer *const renderBuffer, const DqnV3 p1
__m128 texSampledColor = SIMDSampleTextureForTriangle(texture, uv1, uv2SubUv1, uv3SubUv1, barycentric);
finalColor = _mm_mul_ps(texSampledColor, finalColor);
}
SIMDSetPixel(renderBuffer, posX, posY, finalColor, ColorSpace_Linear);
// SIMDSetPixel(renderBuffer, posX, posY, finalColor, ColorSpace_Linear);
}
DEBUG_SIMD_AUTO_CHOOSE_END_CYCLE_COUNT(Triangle_RasterisePixel);
}
@ -1014,7 +1014,7 @@ FILE_SCOPE void SIMDTriangle(DTRRenderBuffer *const renderBuffer, const DqnV3 p1
f32 currZValue = renderBuffer->zBuffer[zBufferIndex];
if (pixelZValue > currZValue)
{
renderBuffer->zBuffer[zBufferIndex] = pixelZValue;
// renderBuffer->zBuffer[zBufferIndex] = pixelZValue;
__m128 finalColor = simdColor;
if (!ignoreLight)
@ -1040,7 +1040,7 @@ FILE_SCOPE void SIMDTriangle(DTRRenderBuffer *const renderBuffer, const DqnV3 p1
__m128 texSampledColor = SIMDSampleTextureForTriangle(texture, uv1, uv2SubUv1, uv3SubUv1, barycentric);
finalColor = _mm_mul_ps(texSampledColor, finalColor);
}
SIMDSetPixel(renderBuffer, posX, posY, finalColor, ColorSpace_Linear);
// SIMDSetPixel(renderBuffer, posX, posY, finalColor, ColorSpace_Linear);
}
}
signedArea2 = _mm_add_ps(signedArea2, signedAreaPixelDeltaX);
@ -1346,11 +1346,41 @@ void DTRRender_TexturedTriangle(DTRRenderBuffer *const renderBuffer,
color, transform);
}
void DTRRender_Mesh(DTRRenderBuffer *const renderBuffer, DTRMesh *const mesh,
DTRRenderLight lighting, const DqnV3 pos,
const DTRRenderTransform transform)
typedef struct RenderMeshJob
{
if (!mesh) return;
DTRRenderBuffer *renderBuffer;
DTRBitmap *tex;
RenderLightInternal lighting;
DqnV3 v1;
DqnV3 v2;
DqnV3 v3;
DqnV2 uv1;
DqnV2 uv2;
DqnV2 uv3;
DqnV4 color;
} RenderMeshJob;
void MultiThreadedRenderMesh(struct PlatformJobQueue *const queue, void *const userData)
{
if (!queue || !userData)
{
DQN_ASSERT(DQN_INVALID_CODE_PATH);
return;
}
RenderMeshJob *job = (RenderMeshJob *)userData;
#if 1
TexturedTriangleInternal(job->renderBuffer, job->lighting, job->v1, job->v2, job->v3, job->uv1,
job->uv2, job->uv3, job->tex, job->color);
#endif
}
void DTRRender_Mesh(DTRRenderBuffer *const renderBuffer, DqnMemStack *const tempStack,
PlatformAPI *const api, PlatformJobQueue *const jobQueue, DTRMesh *const mesh,
DTRRenderLight lighting, const DqnV3 pos, const DTRRenderTransform transform)
{
if (!mesh || !renderBuffer || !tempStack || !api || !jobQueue) return;
DqnMat4 viewPModelViewProjection = {};
{
@ -1462,29 +1492,72 @@ void DTRRender_Mesh(DTRRenderBuffer *const renderBuffer, DTRMesh *const mesh,
lightingInternal.numNormals = 3;
bool DEBUG_NO_TEX = false;
bool RUN_MULTITHREADED = true;
if (RUN_MULTITHREADED)
{
RenderMeshJob *jobData = (RenderMeshJob *)DqnMemStack_Push(tempStack, sizeof(*jobData));
if (jobData)
{
jobData->v1 = v1.xyz;
jobData->v2 = v2.xyz;
jobData->v3 = v3.xyz;
jobData->uv1 = uv1;
jobData->uv2 = uv2;
jobData->uv3 = uv3;
jobData->color = color;
jobData->renderBuffer = renderBuffer;
jobData->lighting = lightingInternal;
if (DTR_DEBUG && DEBUG_NO_TEX)
{
TexturedTriangleInternal(renderBuffer, lightingInternal, v1.xyz, v2.xyz, v3.xyz, uv1,
uv2, uv3, NULL, color);
jobData->tex = NULL;
}
else
{
TexturedTriangleInternal(renderBuffer, lightingInternal, v1.xyz, v2.xyz, v3.xyz, uv1,
uv2, uv3, &mesh->tex, color);
jobData->tex = &mesh->tex;
}
PlatformJob renderJob = {};
renderJob.callback = MultiThreadedRenderMesh;
renderJob.userData = jobData;
while (!api->QueueAddJob(jobQueue, renderJob))
{
api->QueueTryExecuteNextJob(jobQueue);
}
}
else
{
// TODO(doyle): Allocation error
DQN_ASSERT(DQN_INVALID_CODE_PATH);
}
}
else
{
if (DTR_DEBUG && DEBUG_NO_TEX)
{
TexturedTriangleInternal(renderBuffer, lightingInternal, v1.xyz, v2.xyz, v3.xyz,
uv1, uv2, uv3, NULL, color);
}
else
{
TexturedTriangleInternal(renderBuffer, lightingInternal, v1.xyz, v2.xyz, v3.xyz,
uv1, uv2, uv3, &mesh->tex, color);
}
}
bool DEBUG_WIREFRAME = false;
if (DTR_DEBUG && DEBUG_WIREFRAME)
{
DqnV4 wireColor = DqnV4_4f(1.0f, 1.0f, 1.0f, 0.01f);
DTRRender_Line(renderBuffer, DqnV2i_V2(v1.xy), DqnV2i_V2(v2.xy),
wireColor);
DTRRender_Line(renderBuffer, DqnV2i_V2(v2.xy), DqnV2i_V2(v3.xy),
wireColor);
DTRRender_Line(renderBuffer, DqnV2i_V2(v3.xy), DqnV2i_V2(v1.xy),
wireColor);
DTRRender_Line(renderBuffer, DqnV2i_V2(v1.xy), DqnV2i_V2(v2.xy), wireColor);
DTRRender_Line(renderBuffer, DqnV2i_V2(v2.xy), DqnV2i_V2(v3.xy), wireColor);
DTRRender_Line(renderBuffer, DqnV2i_V2(v3.xy), DqnV2i_V2(v1.xy), wireColor);
}
}
while (api->QueueTryExecuteNextJob(jobQueue))
;
}
void DTRRender_Triangle(DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2, DqnV3 p3,

View File

@ -2,10 +2,10 @@
#define DTRENDERER_RENDER_H
#include "dqn.h"
#include "DTRendererPlatform.h"
#define DTRRENDER_INV_255 1.0f/255.0f
typedef struct DTRRenderBuffer DTRRenderBuffer;
typedef struct DTRBitmap DTRBitmap;
////////////////////////////////////////////////////////////////////////////////////////////////////
@ -79,7 +79,7 @@ typedef struct DTRRenderLight
void DTRRender_Text (DTRRenderBuffer *const renderBuffer, const DTRFont font, DqnV2 pos, const char *const text, DqnV4 color = DqnV4_1f(1), i32 len = -1);
void DTRRender_Line (DTRRenderBuffer *const renderBuffer, DqnV2i a, DqnV2i b, DqnV4 color);
void DTRRender_Rectangle (DTRRenderBuffer *const renderBuffer, DqnV2 min, DqnV2 max, DqnV4 color, const DTRRenderTransform transform = DTRRender_DefaultTransform());
void DTRRender_Mesh (DTRRenderBuffer *const renderBuffer, DTRMesh *const mesh, DTRRenderLight lighting, const DqnV3 pos, const DTRRenderTransform transform);
void DTRRender_Mesh (DTRRenderBuffer *const renderBuffer, DqnMemStack *const tempStack, PlatformAPI *const api, PlatformJobQueue *const jobQueue, DTRMesh *const mesh, DTRRenderLight lighting, const DqnV3 pos, const DTRRenderTransform transform);
void DTRRender_Triangle (DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2, DqnV3 p3, DqnV4 color, const DTRRenderTransform transform = DTRRender_DefaultTriangleTransform());
void DTRRender_TexturedTriangle(DTRRenderBuffer *const renderBuffer, DqnV3 p1, DqnV3 p2, DqnV3 p3, DqnV2 uv1, DqnV2 uv2, DqnV2 uv3, DTRBitmap *const texture, DqnV4 color, const DTRRenderTransform transform = DTRRender_DefaultTriangleTransform());
void DTRRender_Bitmap (DTRRenderBuffer *const renderBuffer, DTRBitmap *const bitmap, DqnV2 pos, const DTRRenderTransform transform = DTRRender_DefaultTransform(), DqnV4 color = DqnV4_4f(1, 1, 1, 1));

View File

@ -12,7 +12,76 @@ const char *const DLL_NAME = "dtrenderer.dll";
const char *const DLL_TMP_NAME = "dtrenderer_temp.dll";
////////////////////////////////////////////////////////////////////////////////
// Platform API Implementation
// Platform Multi Threading
////////////////////////////////////////////////////////////////////////////////
struct PlatformJobQueue
{
PlatformJob volatile *jobList;
LONG size;
// NOTE: Modified by main+worker threads
LONG volatile jobToExecuteIndex;
HANDLE volatile win32Semaphore;
// NOTE: Modified by main thread ONLY
LONG volatile jobInsertIndex;
};
bool Platform_QueueAddJob(PlatformJobQueue *const queue, const PlatformJob job)
{
LONG newJobInsertIndex = (queue->jobInsertIndex + 1) % queue->size;
if (newJobInsertIndex == queue->jobToExecuteIndex) return false;
queue->jobList[queue->jobInsertIndex] = job;
_WriteBarrier();
_mm_sfence();
queue->jobInsertIndex = newJobInsertIndex;
ReleaseSemaphore(queue->win32Semaphore, 1, NULL);
return true;
}
bool Platform_QueueTryExecuteNextJob(PlatformJobQueue *const queue)
{
LONG originalJobToExecute = queue->jobToExecuteIndex;
if (originalJobToExecute != queue->jobInsertIndex)
{
LONG newJobIndexForNextThread = (originalJobToExecute + 1) % queue->size;
LONG index = InterlockedCompareExchange(&queue->jobToExecuteIndex, newJobIndexForNextThread,
originalJobToExecute);
// NOTE: If we weren't successful at the interlock, another thread has
// taken the work and we can't know if there's more work or not. So
// irrespective of that result, return true to let the thread check
// again for more work.
if (index == originalJobToExecute)
{
PlatformJob job = queue->jobList[index];
job.callback(queue, job.userData);
}
return true;
}
return false;
}
DWORD WINAPI Win32ThreadCallback(void *lpParameter)
{
PlatformJobQueue *queue = (PlatformJobQueue *)lpParameter;
for (;;)
{
if (!Platform_QueueTryExecuteNextJob(queue))
{
WaitForSingleObjectEx(queue->win32Semaphore, INFINITE, false);
}
}
}
////////////////////////////////////////////////////////////////////////////////
// Platform I/O
////////////////////////////////////////////////////////////////////////////////
FILE_SCOPE inline PlatformFile DqnFileToPlatformFileInternal(const DqnFile file)
{
@ -441,65 +510,11 @@ i32 Win32GetModuleDirectory(char *const buf, const u32 bufLen)
return lastSlashIndex;
}
typedef struct Win32Thread
FILE_SCOPE void DebugWin32JobPrintNumber(PlatformJobQueue *const queue, void *const userData)
{
HANDLE handle;
i32 logicalIndex;
DWORD id;
} Win32Thread;
typedef struct Win32ThreadContext
{
Win32Thread *threadList;
i32 size;
} Win32ThreadContext;
typedef struct ThreadJob
{
i32 numberToPrint;
} ThreadJob;
typedef struct Win32ThreadJobQueue {
ThreadJob *jobList;
LONG volatile jobInsertIndex;
LONG volatile currJobIndex;
LONG size;
} Win32ThreadJobQueue;
FILE_SCOPE Win32ThreadJobQueue globalJobQueue;
FILE_SCOPE Win32ThreadContext globalThreadContext;
FILE_SCOPE HANDLE globalSemaphore;
void PushThreadJob(Win32ThreadJobQueue *queue, ThreadJob job)
{
DQN_ASSERT(queue->jobInsertIndex < queue->size);
queue->jobList[queue->jobInsertIndex] = job;
queue->jobInsertIndex++;
ReleaseSemaphore(globalSemaphore, 1, NULL);
}
DWORD WINAPI Win32ThreadCallback(void *lpParameter)
{
Win32Thread *thread = (Win32Thread *)lpParameter;
DQN_ASSERT(thread);
for (;;)
{
if (globalJobQueue.currJobIndex < globalJobQueue.jobInsertIndex)
{
LONG workIndex = InterlockedIncrement(&globalJobQueue.currJobIndex) - 1;
ThreadJob job = globalJobQueue.jobList[workIndex];
DqnWin32_OutputDebugString("Thread %d: Printing number: %d\n", thread->logicalIndex,
job.numberToPrint);
}
else
{
WaitForSingleObjectEx(globalSemaphore, INFINITE, false);
}
}
i32 numberToPrint = *((i32 *)userData);
DqnWin32_OutputDebugString("Thread %d: Printing number: %d\n", GetCurrentThreadId(),
numberToPrint);
}
int WINAPI wWinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPWSTR lpCmdLine, int nShowCmd)
@ -619,8 +634,14 @@ int WINAPI wWinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPWSTR lpCmdLi
platformAPI.FileClose = Platform_FileClose;
platformAPI.Print = Platform_Print;
platformAPI.QueueAddJob = Platform_QueueAddJob;
platformAPI.QueueTryExecuteNextJob = Platform_QueueTryExecuteNextJob;
PlatformJobQueue jobQueue = {};
PlatformInput platformInput = {};
platformInput.api = platformAPI;
platformInput.jobQueue = &jobQueue;
platformInput.flags.canUseSSE2 = IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE);
platformInput.flags.canUseRdtsc = IsProcessorFeaturePresent(PF_RDTSC_INSTRUCTION_AVAILABLE);
@ -696,55 +717,50 @@ int WINAPI wWinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPWSTR lpCmdLi
////////////////////////////////////////////////////////////////////////
// Threading
////////////////////////////////////////////////////////////////////////
const i32 QUEUE_SIZE = 20;
globalJobQueue.jobList = (ThreadJob *)DqnMemStack_Push(&globalPlatformMemory.tempStack,
sizeof(ThreadJob) * QUEUE_SIZE);
const i32 QUEUE_SIZE = 512;
jobQueue.jobList = (PlatformJob *)DqnMemStack_Push(&globalPlatformMemory.mainStack,
sizeof(*jobQueue.jobList) * QUEUE_SIZE);
if (globalJobQueue.jobList)
// NOTE: InterlockedIncrement requires things to be on 32bit boundaries.
DQN_ASSERT(((size_t)&jobQueue.jobToExecuteIndex) % 4 == 0);
if (jobQueue.jobList)
{
// NOTE: (numCores - 1), 1 core is already exclusively for main thread
i32 availableThreads = (numCores - 1) * numLogicalCores;
// TODO(doyle): Logic for single core/thread processors
DQN_ASSERT(availableThreads > 0);
globalSemaphore = CreateSemaphore(NULL, 0, availableThreads, NULL);
if (globalSemaphore)
jobQueue.win32Semaphore = CreateSemaphore(NULL, 0, availableThreads, NULL);
if (jobQueue.win32Semaphore)
{
// Create jobs
globalJobQueue.size = QUEUE_SIZE;
for (i32 i = 0; i < 10; i++)
{
ThreadJob job = {};
job.numberToPrint = i;
PushThreadJob(&globalJobQueue, job);
}
// Create threads
globalThreadContext.threadList = (Win32Thread *)DqnMemStack_Push(
&globalPlatformMemory.tempStack, sizeof(Win32Thread) * availableThreads);
if (globalThreadContext.threadList)
{
globalThreadContext.size = availableThreads;
for (i32 i = 0; i < globalThreadContext.size; i++)
for (i32 i = 0; i < availableThreads; i++)
{
const i32 USE_DEFAULT_STACK_SIZE = 0;
Win32Thread *thread = &globalThreadContext.threadList[i];
thread->logicalIndex = i;
thread->handle =
CreateThread(NULL, USE_DEFAULT_STACK_SIZE, Win32ThreadCallback,
(void *)thread, 0, &thread->id);
if (!thread->handle)
{
DqnWin32_DisplayLastError("CreateThread() failed");
}
}
}
else
{
// TODO(doyle): Failed allocation
void *threadParam = &jobQueue;
HANDLE handle = CreateThread(NULL, USE_DEFAULT_STACK_SIZE, Win32ThreadCallback,
threadParam, 0, NULL);
CloseHandle(handle);
}
// Create jobs
jobQueue.size = QUEUE_SIZE;
for (i32 i = 0; i < 20; i++)
{
PlatformJob job = {};
job.callback = DebugWin32JobPrintNumber;
job.userData = DqnMemStack_Push(&globalPlatformMemory.tempStack, sizeof(i32));
if (job.userData)
{
*((i32 *)job.userData) = i;
Platform_QueueAddJob(&jobQueue, job);
}
}
while (Platform_QueueTryExecuteNextJob(&jobQueue))
;
}
else
{