Update to latest DN library
This commit is contained in:
+42
-4
@@ -738,8 +738,8 @@ DN_API DN_OSExecResult DN_OS_ExecOrAbort(DN_Str8Slice cmd_line, DN_OSExecArgs *a
|
||||
// NOTE: DN_OSThread
|
||||
static void DN_OS_ThreadExecute_(void *user_context)
|
||||
{
|
||||
DN_OSThread *thread = DN_Cast(DN_OSThread *) user_context;
|
||||
DN_TCInitFromMemFuncs(&thread->context, thread->thread_id, DN_TCInitArgsDefault(), DN_MemFuncsDefault());
|
||||
DN_OSThread *thread = DN_Cast(DN_OSThread *) user_context;
|
||||
DN_TCInitFromMemFuncs(&thread->context, thread->thread_id, thread->tc_init_args, DN_MemFuncsDefault());
|
||||
DN_TCEquip(&thread->context);
|
||||
if (thread->is_lane_set) {
|
||||
DN_OS_TCThreadLaneEquip(thread->lane);
|
||||
@@ -801,7 +801,7 @@ DN_API void DN_OS_ThreadLaneSync(DN_OSThreadLane *lane, void **ptr_to_share)
|
||||
DN_OS_BarrierWait(&lane->barrier); // NOTE: Ensure the reading lanes have completed the read
|
||||
}
|
||||
|
||||
DN_API DN_V2USize DN_OS_ThreadLaneRange(DN_OSThreadLane *lane, DN_USize values_count)
|
||||
DN_API DN_V2USize DN_OS_ThreadLaneRange(DN_OSThreadLane const *lane, DN_USize values_count)
|
||||
{
|
||||
DN_USize values_per_thread = values_count / lane->count;
|
||||
DN_USize rem_values = values_count % lane->count;
|
||||
@@ -822,6 +822,44 @@ DN_API DN_V2USize DN_OS_ThreadLaneRange(DN_OSThreadLane *lane, DN_USize values_c
|
||||
return result;
|
||||
}
|
||||
|
||||
DN_API DN_OSThreadLaneway DN_OS_ThreadLanewayFromArgs(DN_OSThread* threads, DN_USize threads_count, DN_UPtr* shared_mem)
|
||||
{
|
||||
DN_OSThreadLaneway result = {};
|
||||
result.threads = threads;
|
||||
result.threads_count = threads_count;
|
||||
result.shared_mem = shared_mem;
|
||||
result.barrier = DN_OS_BarrierInit(DN_Cast(DN_U32) result.threads_count);
|
||||
return result;
|
||||
}
|
||||
|
||||
DN_API DN_OSThreadLaneway DN_OS_ThreadLanewayFromArena(DN_USize threads_count, DN_Arena* arena)
|
||||
{
|
||||
DN_U64 mem_p = DN_MemListPos(arena->mem);
|
||||
DN_OSThreadLaneway result = {};
|
||||
DN_OSThread* threads = DN_ArenaNewArray(arena, DN_OSThread, threads_count, DN_ZMem_No);
|
||||
DN_UPtr* shared_mem = DN_ArenaNewZ(arena, DN_UPtr);
|
||||
if (threads && shared_mem)
|
||||
result = DN_OS_ThreadLanewayFromArgs(threads, threads_count, shared_mem);
|
||||
else
|
||||
DN_MemListPopTo(arena->mem, mem_p);
|
||||
return result;
|
||||
}
|
||||
|
||||
DN_API void DN_OS_ThreadLanewayDispatch(DN_OSThreadLaneway *laneway, DN_OSThreadFunc *entry_point, DN_TCInitArgs tc_init_args, void *user_context)
|
||||
{
|
||||
for (DN_ForItSize(it, DN_OSThread, laneway->threads, laneway->threads_count)) {
|
||||
DN_OSThreadLane lane = DN_OS_ThreadLaneInit(it.index, laneway->threads_count, laneway->barrier, laneway->shared_mem);
|
||||
DN_OS_ThreadInit(it.data, entry_point, &lane, tc_init_args, user_context);
|
||||
}
|
||||
}
|
||||
|
||||
DN_API void DN_OS_ThreadLanewayJoin(DN_OSThreadLaneway *laneway, DN_TCDeinitArenas deinit_arenas)
|
||||
{
|
||||
for (DN_ForItSize(it, DN_OSThread, laneway->threads, laneway->threads_count))
|
||||
DN_OS_ThreadJoin(it.data, deinit_arenas);
|
||||
DN_OS_BarrierDeinit(&laneway->barrier);
|
||||
}
|
||||
|
||||
DN_API DN_OSThreadLane *DN_OS_TCThreadLane()
|
||||
{
|
||||
DN_TCCore *tc = DN_TCGet();
|
||||
@@ -1269,7 +1307,7 @@ DN_API DN_StackTrace DN_StackTraceFromAllocator(DN_Allocator allocator, DN_U16 l
|
||||
DN_Memcpy(result.base_addr, raw_frames, raw_frames_count * sizeof(raw_frames[0]));
|
||||
#else
|
||||
(void)limit;
|
||||
(void)arena;
|
||||
(void)allocator;
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
|
||||
+76
-2
@@ -207,6 +207,14 @@ struct DN_OSThreadLane
|
||||
void* shared_mem;
|
||||
};
|
||||
|
||||
struct DN_OSThreadLaneway
|
||||
{
|
||||
DN_OSThread* threads;
|
||||
DN_USize threads_count;
|
||||
DN_UPtr* shared_mem;
|
||||
DN_OSBarrier barrier;
|
||||
};
|
||||
|
||||
struct DN_OSThread
|
||||
{
|
||||
DN_Str8x64 name;
|
||||
@@ -218,6 +226,7 @@ struct DN_OSThread
|
||||
void *user_context;
|
||||
DN_OSThreadFunc *func;
|
||||
DN_OSSemaphore init_semaphore;
|
||||
DN_TCInitArgs tc_init_args;
|
||||
};
|
||||
|
||||
// NOTE: DN_OSHttp
|
||||
@@ -422,14 +431,79 @@ DN_API bool DN_OS_ConditionVariableWaitUntil (D
|
||||
DN_API void DN_OS_ConditionVariableSignal (DN_OSConditionVariable *cv);
|
||||
DN_API void DN_OS_ConditionVariableBroadcast (DN_OSConditionVariable *cv);
|
||||
|
||||
DN_API bool DN_OS_ThreadInit (DN_OSThread *thread, DN_OSThreadFunc *func, DN_OSThreadLane *lane, void *user_context);
|
||||
DN_API bool DN_OS_ThreadInit (DN_OSThread *thread, DN_OSThreadFunc *func, DN_OSThreadLane *lane, DN_TCInitArgs tc_init_args, void *user_context);
|
||||
DN_API bool DN_OS_ThreadJoin (DN_OSThread *thread, DN_TCDeinitArenas deinit_arenas);
|
||||
DN_API DN_U32 DN_OS_ThreadID ();
|
||||
DN_API void DN_OS_ThreadSetNameFmt (char const *fmt, ...);
|
||||
|
||||
// NOTE: Thread lanes provide an abstraction to represent the concept of programming a CPU like a
|
||||
// GPU, e.g. SIMT (Single Instruction Multiple Threads). The lane terminology is popularised by Ryan
|
||||
// Fleury. SIMT is formally defined as
|
||||
//
|
||||
// Threads are grouped into warps/wavefronts (typically 32 or 64 threads) that execute the same
|
||||
// instruction in lockstep, but each thread operates on different data and maintains its own state
|
||||
//
|
||||
// The individual threads in a wavefront on the CPU side are colloquially dubbed "lanes" and a
|
||||
// thread lane here contains the necessary state to facilitate this such as the current index in the
|
||||
// wavefront and synchronisation primitives to coordinate the different lanes together.
|
||||
//
|
||||
// The idea is to write code in a single-threaded manner (linear execution) but across multiple
|
||||
// threads so that the default is all execution paths are inherently multi-threaded by default. Opt
|
||||
// out of parallelism instead of opt in. This optimises for the trend of core counts increasing
|
||||
// whilst clock counts remain static.
|
||||
//
|
||||
// A laneway is a helper function to initialise the number of requested OS threads/lanes upfront and
|
||||
// setup the required synchronisation primitives. It can then be dispatched all the threads which
|
||||
// start executing the `entry_point` in parallel.
|
||||
//
|
||||
// API
|
||||
// DN_OS_ThreadLaneSync
|
||||
// A blocking call to synchronise the program-counter of all other lanes in the laneway to this
|
||||
// function call invocation (using an OS barrier). Optionally pass in the pointer to a pointer
|
||||
// `ptr_to_share` to broadcast the pointer from one lanes to the others. The lane that wishes
|
||||
// to broadcast the pointer must have a non-null pointer, all other lanes must pass in a
|
||||
// non-null pointer. A typical use case might look like:
|
||||
/*
|
||||
DN_OSThreadLane *lane = DN_OS_TCThreadLane(); // Get lane from current (t)hread (c)context
|
||||
|
||||
// NOTE: Allocate buffer in lane 0
|
||||
DN_U8 *buffer = nullptr;
|
||||
if (lane->index == 0)
|
||||
buffer = DN_ArenaNewArray(DN_TCMainArena(), DN_U8, DN_Gigabytes(1), DN_ZMem_No);
|
||||
|
||||
// NOTE: Lane 0 broadcasts the `buffer` pointer to lane 1..N
|
||||
DN_OS_ThreadLaneSync(lane, &buffer);
|
||||
|
||||
// NOTE: We use LaneRange to divide the buffer into equal sized chunks that each lane can
|
||||
// write into without clobbering over each other.
|
||||
DN_V2USize range = DN_OS_ThreadLaneRange(lane, DN_Gigabytes(1));
|
||||
for (DN_USize index = range.begin; index < range.end; index++) { buffer[index] = index; }
|
||||
*/
|
||||
// In this example, lane 0 will allocate a 1GiB buffer pass in a `buffer` to
|
||||
// DN_OS_ThreadLaneSync` that is non-null. Lanes 1->N will skip the branch (because their lanes
|
||||
// indexes are 1..N) and invoke `DN_OS_ThreadLaneSync` with a nullptr `buffer`. After the
|
||||
// blocking call is complete, lanes 0->N will now have synchronised the `buffer` pointer and all
|
||||
// lanes point to the 1GiB range allocated in lane 0's allocator.
|
||||
//
|
||||
// Additionally we demonstrate `DN_OS_ThreadLaneRange` which does math behind the scenes to
|
||||
// divide the buffer up and assign each lane their own indices in the buffer that they can work
|
||||
// on in parallel without clobbering each others work.
|
||||
//
|
||||
// DN_OS_ThreadLaneRange
|
||||
// Calculates the range of values the current lane in the laneway should execute. For example if
|
||||
// you have 128 items and 16 threads each lane will receive the following `DN_V2USize` range:
|
||||
// Lane 0 => [0, 8)
|
||||
// Lane 1 => [8, 16)
|
||||
// ...
|
||||
// Lane 16 => [120, 128)
|
||||
DN_API DN_OSThreadLane DN_OS_ThreadLaneInit (DN_USize index, DN_USize thread_count, DN_OSBarrier barrier, DN_UPtr *share_mem);
|
||||
DN_API void DN_OS_ThreadLaneSync (DN_OSThreadLane *lane, void **ptr_to_share);
|
||||
DN_API DN_V2USize DN_OS_ThreadLaneRange (DN_OSThreadLane *lane, DN_USize values_count);
|
||||
DN_API DN_V2USize DN_OS_ThreadLaneRange (DN_OSThreadLane const *lane, DN_USize values_count);
|
||||
|
||||
DN_API DN_OSThreadLaneway DN_OS_ThreadLanewayFromArgs (DN_OSThread* threads, DN_USize threads_count, DN_UPtr* shared_mem);
|
||||
DN_API DN_OSThreadLaneway DN_OS_ThreadLanewayFromArena (DN_USize threads_count, DN_Arena* arena);
|
||||
DN_API void DN_OS_ThreadLanewayDispatch (DN_OSThreadLaneway *laneway, DN_OSThreadFunc *entry_point, DN_TCInitArgs tc_init_args, void *user_context);
|
||||
DN_API void DN_OS_ThreadLanewayJoin (DN_OSThreadLaneway *laneway, DN_TCDeinitArenas deinit_arenas);
|
||||
|
||||
DN_API DN_OSThreadLane* DN_OS_TCThreadLane ();
|
||||
DN_API void DN_OS_TCThreadLaneSync (void **ptr_to_share);
|
||||
|
||||
+17
-16
@@ -228,7 +228,7 @@ DN_API bool DN_OS_SetEnvVar(DN_Str8 name, DN_Str8 value)
|
||||
|
||||
DN_API DN_OSDiskSpace DN_OS_DiskSpace(DN_Str8 path)
|
||||
{
|
||||
DN_TCScratch scratch = DN_TCScratchBegin(nullptr, 0);
|
||||
DN_TCScratch scratch = DN_TCScratchBeginArena(nullptr, 0);
|
||||
DN_OSDiskSpace result = {};
|
||||
DN_Str8 path_z_terminated = DN_Str8FromStr8Arena(path, &scratch.arena);
|
||||
struct statvfs info = {};
|
||||
@@ -387,7 +387,7 @@ DN_API bool DN_OS_FileCopy(DN_Str8 src, DN_Str8 dest, bool overwrite, DN_ErrSink
|
||||
result = (bytes_written == stat_existing.st_size);
|
||||
if (!result) {
|
||||
int error_code = errno;
|
||||
DN_TCScratch scratch = DN_TCScratchBegin(nullptr, 0);
|
||||
DN_TCScratch scratch = DN_TCScratchBeginArena(nullptr, 0);
|
||||
DN_Str8 file_size_str8 = DN_Str8FromByteCount(scratch.arena, stat_existing.st_size, DN_ByteCountType_Auto);
|
||||
DN_Str8 bytes_written_str8 = DN_Str8FromByteCount(scratch.arena, bytes_written, DN_ByteCountType_Auto);
|
||||
DN_rrSinkAppendF(error,
|
||||
@@ -517,8 +517,8 @@ DN_API DN_OSFileRead DN_OS_FileRead(DN_OSFile *file, void *buffer, DN_USize size
|
||||
|
||||
result.bytes_read = fread(buffer, 1, size, DN_Cast(FILE *) file->handle);
|
||||
if (feof(DN_Cast(FILE*)file->handle)) {
|
||||
DN_TCScratch scratch = DN_TCScratchBegin(nullptr, 0);
|
||||
DN_Str8x32 buffer_size_str8 = DN_ByteCountStr8x32(size);
|
||||
DN_TCScratch scratch = DN_TCScratchBeginArena(nullptr, 0);
|
||||
DN_Str8x32 buffer_size_str8 = DN_Str8x32FromByteCountU64Auto(size);
|
||||
DN_ErrSinkAppendF(err, 1, "Failed to read %S from file", buffer_size_str8);
|
||||
DN_TCScratchEnd(&scratch);
|
||||
return result;
|
||||
@@ -536,8 +536,8 @@ DN_API bool DN_OS_FileWritePtr(DN_OSFile *file, void const *buffer, DN_USize siz
|
||||
fwrite(buffer, DN_Cast(DN_USize) size, 1 /*count*/, DN_Cast(FILE *) file->handle) ==
|
||||
1 /*count*/;
|
||||
if (!result) {
|
||||
DN_TCScratch scratch = DN_TCScratchBegin(nullptr, 0);
|
||||
DN_Str8x32 buffer_size_str8 = DN_ByteCountStr8x32(size);
|
||||
DN_TCScratch scratch = DN_TCScratchBeginArena(nullptr, 0);
|
||||
DN_Str8x32 buffer_size_str8 = DN_Str8x32FromByteCountU64Auto(size);
|
||||
DN_ErrSinkAppendF(err, 1, "Failed to write buffer (%s) to file handle", DN_Str8PrintFmt(buffer_size_str8));
|
||||
DN_TCScratchEnd(&scratch);
|
||||
}
|
||||
@@ -627,7 +627,7 @@ DN_API bool DN_OS_PathIsDir(DN_Str8 path)
|
||||
|
||||
DN_API bool DN_OS_PathMakeDir(DN_Str8 path)
|
||||
{
|
||||
DN_TCScratch scratch = DN_TCScratchBegin(nullptr, 0);
|
||||
DN_TCScratch scratch = DN_TCScratchBeginArena(nullptr, 0);
|
||||
bool result = true;
|
||||
|
||||
// TODO(doyle): Implement this without using the path indexes, it's not
|
||||
@@ -793,7 +793,7 @@ DN_API DN_OSExecResult DN_OS_ExecWait(DN_OSExecAsyncHandle handle,
|
||||
|
||||
// NOTE: Read the data from the read end of the pipe
|
||||
if (result.os_error_code == 0) {
|
||||
DN_TCScratch scratch = DN_TCScratchBegin(&arena, 1);
|
||||
DN_TCScratch scratch = DN_TCScratchBeginArena(&arena, 1);
|
||||
if (arena && handle.stdout_read) {
|
||||
char buffer[4096];
|
||||
DN_Str8Builder builder = DN_Str8BuilderFromArena(&scratch.arena);
|
||||
@@ -805,7 +805,7 @@ DN_API DN_OSExecResult DN_OS_ExecWait(DN_OSExecAsyncHandle handle,
|
||||
DN_Str8BuilderAppendF(&builder, "%.*s", bytes_read, buffer);
|
||||
}
|
||||
|
||||
result.stdout_text = DN_Str8BuilderBuild(&builder, arena);
|
||||
result.stdout_text = DN_Str8FromStr8BuilderArena(&builder, arena);
|
||||
}
|
||||
|
||||
if (arena && handle.stderr_read) {
|
||||
@@ -819,7 +819,7 @@ DN_API DN_OSExecResult DN_OS_ExecWait(DN_OSExecAsyncHandle handle,
|
||||
DN_Str8BuilderAppendF(&builder, "%.*s", bytes_read, buffer);
|
||||
}
|
||||
|
||||
result.stderr_text = DN_Str8BuilderBuild(&builder, arena);
|
||||
result.stderr_text = DN_Str8FromStr8BuilderArena(&builder, arena);
|
||||
}
|
||||
DN_TCScratchEnd(&scratch);
|
||||
}
|
||||
@@ -842,7 +842,7 @@ DN_API DN_OSExecAsyncHandle DN_OS_ExecAsync(DN_Str8Slice cmd_line,
|
||||
if (cmd_line.count == 0)
|
||||
return result;
|
||||
|
||||
DN_TCScratch scratch = DN_TCScratchBegin(nullptr, 0);
|
||||
DN_TCScratch scratch = DN_TCScratchBeginArena(nullptr, 0);
|
||||
DN_DEFER { DN_TCScratchEnd(&scratch); };
|
||||
DN_Str8 cmd_rendered = DN_Str8SliceRender(cmd_line, DN_Str8Lit(" "), &scratch.arena);
|
||||
int stdout_pipe[DN_OSPipeType__Count] = {};
|
||||
@@ -1286,7 +1286,7 @@ static void *DN_OS_ThreadFunc_(void *user_context)
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
DN_API bool DN_OS_ThreadInit(DN_OSThread *thread, DN_OSThreadFunc *func, DN_OSThreadLane *lane, void *user_context)
|
||||
DN_API bool DN_OS_ThreadInit(DN_OSThread *thread, DN_OSThreadFunc *func, DN_OSThreadLane *lane, DN_TCInitArgs tc_init_args, void *user_context)
|
||||
{
|
||||
bool result = false;
|
||||
if (!thread)
|
||||
@@ -1296,6 +1296,7 @@ DN_API bool DN_OS_ThreadInit(DN_OSThread *thread, DN_OSThreadFunc *func, DN_OSTh
|
||||
thread->user_context = user_context;
|
||||
thread->init_semaphore = DN_OS_SemaphoreInit(0 /*initial_count*/);
|
||||
thread->lane = *lane;
|
||||
thread->tc_init_args = tc_init_args;
|
||||
|
||||
// TODO(doyle): Check if semaphore is valid
|
||||
// NOTE: pthread_t is essentially the thread ID. In Windows, the handle and
|
||||
@@ -1370,7 +1371,7 @@ DN_API void DN_OS_PosixThreadSetName(DN_Str8 name)
|
||||
#if defined(DN_PLATFORM_EMSCRIPTEN)
|
||||
(void)name;
|
||||
#else
|
||||
DN_TCScratch scratch = DN_TCScratchBegin(nullptr, 0);
|
||||
DN_TCScratch scratch = DN_TCScratchBeginArena(nullptr, 0);
|
||||
DN_Str8 copy = DN_Str8FromStr8Arena(name, &scratch.arena);
|
||||
pthread_t thread = pthread_self();
|
||||
pthread_setname_np(thread, (char *)copy.data);
|
||||
@@ -1394,7 +1395,7 @@ DN_API DN_OSPosixProcSelfStatus DN_OS_PosixProcSelfStatus()
|
||||
DN_OSFile file = DN_OS_FileOpen(DN_Str8Lit("/proc/self/status"), DN_OSFileOpen_OpenIfExist, DN_OSFileAccess_Read, nullptr);
|
||||
|
||||
if (!file.error) {
|
||||
DN_TCScratch scratch = DN_TCScratchBegin(nullptr, 0);
|
||||
DN_TCScratch scratch = DN_TCScratchBeginArena(nullptr, 0);
|
||||
char buf[256];
|
||||
DN_Str8Builder builder = DN_Str8BuilderFromArena(&scratch.arena);
|
||||
for (;;) {
|
||||
@@ -1408,7 +1409,7 @@ DN_API DN_OSPosixProcSelfStatus DN_OS_PosixProcSelfStatus()
|
||||
DN_Str8 const PID = DN_Str8Lit("Pid:");
|
||||
DN_Str8 const VM_PEAK = DN_Str8Lit("VmPeak:");
|
||||
DN_Str8 const VM_SIZE = DN_Str8Lit("VmSize:");
|
||||
DN_Str8 status_buf = DN_Str8BuilderBuild(&builder, &scratch.arena);
|
||||
DN_Str8 status_buf = DN_Str8FromStr8BuilderArena(&builder, &scratch.arena);
|
||||
DN_Str8SplitResult lines = DN_Str8SplitArena(status_buf, DN_Str8Lit("\n"), DN_Str8SplitFlags_ExcludeEmptyStrings, &scratch.arena);
|
||||
|
||||
for (DN_ForItSize(line_it, DN_Str8, lines.data, lines.count)) {
|
||||
@@ -1535,7 +1536,7 @@ DN_API void DN_OS_HttpRequestAsync(DN_OSHttpResponse *response,
|
||||
response->builder.arena = response->scratch_arena.mem ? &response->scratch_arena : &response->tmp_arena;
|
||||
|
||||
DN_Arena *scratch = &response->scratch_arena;
|
||||
DN_TCScratch scratch_ = DN_TCScratchBegin(&arena, 1);
|
||||
DN_TCScratch scratch_ = DN_TCScratchBeginArena(&arena, 1);
|
||||
DN_DEFER { DN_TCScratchEnd(&scratch_); };
|
||||
if (!scratch)
|
||||
scratch = &scratch_.arena;
|
||||
|
||||
@@ -1239,7 +1239,7 @@ static DWORD __stdcall DN_OS_ThreadFunc_(void *user_context)
|
||||
return 0;
|
||||
}
|
||||
|
||||
DN_API bool DN_OS_ThreadInit(DN_OSThread *thread, DN_OSThreadFunc *func, DN_OSThreadLane *lane, void *user_context)
|
||||
DN_API bool DN_OS_ThreadInit(DN_OSThread *thread, DN_OSThreadFunc *func, DN_OSThreadLane *lane, DN_TCInitArgs tc_init_args, void *user_context)
|
||||
{
|
||||
bool result = false;
|
||||
if (!thread)
|
||||
@@ -1248,6 +1248,7 @@ DN_API bool DN_OS_ThreadInit(DN_OSThread *thread, DN_OSThreadFunc *func, DN_OSTh
|
||||
thread->func = func;
|
||||
thread->user_context = user_context;
|
||||
thread->init_semaphore = DN_OS_SemaphoreInit(0 /*initial_count*/);
|
||||
thread->tc_init_args = tc_init_args;
|
||||
if (lane) {
|
||||
thread->is_lane_set = true;
|
||||
thread->lane = *lane;
|
||||
|
||||
Reference in New Issue
Block a user