Update to latest DN library

This commit is contained in:
2026-06-18 16:41:12 +10:00
parent cbf7416220
commit 2659f0316f
17 changed files with 602 additions and 140 deletions
+42 -4
View File
@@ -738,8 +738,8 @@ DN_API DN_OSExecResult DN_OS_ExecOrAbort(DN_Str8Slice cmd_line, DN_OSExecArgs *a
// NOTE: DN_OSThread
static void DN_OS_ThreadExecute_(void *user_context)
{
DN_OSThread *thread = DN_Cast(DN_OSThread *) user_context;
DN_TCInitFromMemFuncs(&thread->context, thread->thread_id, DN_TCInitArgsDefault(), DN_MemFuncsDefault());
DN_OSThread *thread = DN_Cast(DN_OSThread *) user_context;
DN_TCInitFromMemFuncs(&thread->context, thread->thread_id, thread->tc_init_args, DN_MemFuncsDefault());
DN_TCEquip(&thread->context);
if (thread->is_lane_set) {
DN_OS_TCThreadLaneEquip(thread->lane);
@@ -801,7 +801,7 @@ DN_API void DN_OS_ThreadLaneSync(DN_OSThreadLane *lane, void **ptr_to_share)
DN_OS_BarrierWait(&lane->barrier); // NOTE: Ensure the reading lanes have completed the read
}
DN_API DN_V2USize DN_OS_ThreadLaneRange(DN_OSThreadLane *lane, DN_USize values_count)
DN_API DN_V2USize DN_OS_ThreadLaneRange(DN_OSThreadLane const *lane, DN_USize values_count)
{
DN_USize values_per_thread = values_count / lane->count;
DN_USize rem_values = values_count % lane->count;
@@ -822,6 +822,44 @@ DN_API DN_V2USize DN_OS_ThreadLaneRange(DN_OSThreadLane *lane, DN_USize values_c
return result;
}
DN_API DN_OSThreadLaneway DN_OS_ThreadLanewayFromArgs(DN_OSThread* threads, DN_USize threads_count, DN_UPtr* shared_mem)
{
DN_OSThreadLaneway result = {};
result.threads = threads;
result.threads_count = threads_count;
result.shared_mem = shared_mem;
result.barrier = DN_OS_BarrierInit(DN_Cast(DN_U32) result.threads_count);
return result;
}
DN_API DN_OSThreadLaneway DN_OS_ThreadLanewayFromArena(DN_USize threads_count, DN_Arena* arena)
{
DN_U64 mem_p = DN_MemListPos(arena->mem);
DN_OSThreadLaneway result = {};
DN_OSThread* threads = DN_ArenaNewArray(arena, DN_OSThread, threads_count, DN_ZMem_No);
DN_UPtr* shared_mem = DN_ArenaNewZ(arena, DN_UPtr);
if (threads && shared_mem)
result = DN_OS_ThreadLanewayFromArgs(threads, threads_count, shared_mem);
else
DN_MemListPopTo(arena->mem, mem_p);
return result;
}
DN_API void DN_OS_ThreadLanewayDispatch(DN_OSThreadLaneway *laneway, DN_OSThreadFunc *entry_point, DN_TCInitArgs tc_init_args, void *user_context)
{
for (DN_ForItSize(it, DN_OSThread, laneway->threads, laneway->threads_count)) {
DN_OSThreadLane lane = DN_OS_ThreadLaneInit(it.index, laneway->threads_count, laneway->barrier, laneway->shared_mem);
DN_OS_ThreadInit(it.data, entry_point, &lane, tc_init_args, user_context);
}
}
DN_API void DN_OS_ThreadLanewayJoin(DN_OSThreadLaneway *laneway, DN_TCDeinitArenas deinit_arenas)
{
for (DN_ForItSize(it, DN_OSThread, laneway->threads, laneway->threads_count))
DN_OS_ThreadJoin(it.data, deinit_arenas);
DN_OS_BarrierDeinit(&laneway->barrier);
}
DN_API DN_OSThreadLane *DN_OS_TCThreadLane()
{
DN_TCCore *tc = DN_TCGet();
@@ -1269,7 +1307,7 @@ DN_API DN_StackTrace DN_StackTraceFromAllocator(DN_Allocator allocator, DN_U16 l
DN_Memcpy(result.base_addr, raw_frames, raw_frames_count * sizeof(raw_frames[0]));
#else
(void)limit;
(void)arena;
(void)allocator;
#endif
return result;
}
+76 -2
View File
@@ -207,6 +207,14 @@ struct DN_OSThreadLane
void* shared_mem;
};
struct DN_OSThreadLaneway
{
DN_OSThread* threads;
DN_USize threads_count;
DN_UPtr* shared_mem;
DN_OSBarrier barrier;
};
struct DN_OSThread
{
DN_Str8x64 name;
@@ -218,6 +226,7 @@ struct DN_OSThread
void *user_context;
DN_OSThreadFunc *func;
DN_OSSemaphore init_semaphore;
DN_TCInitArgs tc_init_args;
};
// NOTE: DN_OSHttp
@@ -422,14 +431,79 @@ DN_API bool DN_OS_ConditionVariableWaitUntil (D
DN_API void DN_OS_ConditionVariableSignal (DN_OSConditionVariable *cv);
DN_API void DN_OS_ConditionVariableBroadcast (DN_OSConditionVariable *cv);
DN_API bool DN_OS_ThreadInit (DN_OSThread *thread, DN_OSThreadFunc *func, DN_OSThreadLane *lane, void *user_context);
DN_API bool DN_OS_ThreadInit (DN_OSThread *thread, DN_OSThreadFunc *func, DN_OSThreadLane *lane, DN_TCInitArgs tc_init_args, void *user_context);
DN_API bool DN_OS_ThreadJoin (DN_OSThread *thread, DN_TCDeinitArenas deinit_arenas);
DN_API DN_U32 DN_OS_ThreadID ();
DN_API void DN_OS_ThreadSetNameFmt (char const *fmt, ...);
// NOTE: Thread lanes provide an abstraction to represent the concept of programming a CPU like a
// GPU, e.g. SIMT (Single Instruction Multiple Threads). The lane terminology is popularised by Ryan
// Fleury. SIMT is formally defined as
//
// Threads are grouped into warps/wavefronts (typically 32 or 64 threads) that execute the same
// instruction in lockstep, but each thread operates on different data and maintains its own state
//
// The individual threads in a wavefront on the CPU side are colloquially dubbed "lanes" and a
// thread lane here contains the necessary state to facilitate this such as the current index in the
// wavefront and synchronisation primitives to coordinate the different lanes together.
//
// The idea is to write code in a single-threaded manner (linear execution) but across multiple
// threads so that the default is all execution paths are inherently multi-threaded by default. Opt
// out of parallelism instead of opt in. This optimises for the trend of core counts increasing
// whilst clock counts remain static.
//
// A laneway is a helper function to initialise the number of requested OS threads/lanes upfront and
// setup the required synchronisation primitives. It can then be dispatched all the threads which
// start executing the `entry_point` in parallel.
//
// API
// DN_OS_ThreadLaneSync
// A blocking call to synchronise the program-counter of all other lanes in the laneway to this
// function call invocation (using an OS barrier). Optionally pass in the pointer to a pointer
// `ptr_to_share` to broadcast the pointer from one lanes to the others. The lane that wishes
// to broadcast the pointer must have a non-null pointer, all other lanes must pass in a
// non-null pointer. A typical use case might look like:
/*
DN_OSThreadLane *lane = DN_OS_TCThreadLane(); // Get lane from current (t)hread (c)context
// NOTE: Allocate buffer in lane 0
DN_U8 *buffer = nullptr;
if (lane->index == 0)
buffer = DN_ArenaNewArray(DN_TCMainArena(), DN_U8, DN_Gigabytes(1), DN_ZMem_No);
// NOTE: Lane 0 broadcasts the `buffer` pointer to lane 1..N
DN_OS_ThreadLaneSync(lane, &buffer);
// NOTE: We use LaneRange to divide the buffer into equal sized chunks that each lane can
// write into without clobbering over each other.
DN_V2USize range = DN_OS_ThreadLaneRange(lane, DN_Gigabytes(1));
for (DN_USize index = range.begin; index < range.end; index++) { buffer[index] = index; }
*/
// In this example, lane 0 will allocate a 1GiB buffer pass in a `buffer` to
// DN_OS_ThreadLaneSync` that is non-null. Lanes 1->N will skip the branch (because their lanes
// indexes are 1..N) and invoke `DN_OS_ThreadLaneSync` with a nullptr `buffer`. After the
// blocking call is complete, lanes 0->N will now have synchronised the `buffer` pointer and all
// lanes point to the 1GiB range allocated in lane 0's allocator.
//
// Additionally we demonstrate `DN_OS_ThreadLaneRange` which does math behind the scenes to
// divide the buffer up and assign each lane their own indices in the buffer that they can work
// on in parallel without clobbering each others work.
//
// DN_OS_ThreadLaneRange
// Calculates the range of values the current lane in the laneway should execute. For example if
// you have 128 items and 16 threads each lane will receive the following `DN_V2USize` range:
// Lane 0 => [0, 8)
// Lane 1 => [8, 16)
// ...
// Lane 16 => [120, 128)
DN_API DN_OSThreadLane DN_OS_ThreadLaneInit (DN_USize index, DN_USize thread_count, DN_OSBarrier barrier, DN_UPtr *share_mem);
DN_API void DN_OS_ThreadLaneSync (DN_OSThreadLane *lane, void **ptr_to_share);
DN_API DN_V2USize DN_OS_ThreadLaneRange (DN_OSThreadLane *lane, DN_USize values_count);
DN_API DN_V2USize DN_OS_ThreadLaneRange (DN_OSThreadLane const *lane, DN_USize values_count);
DN_API DN_OSThreadLaneway DN_OS_ThreadLanewayFromArgs (DN_OSThread* threads, DN_USize threads_count, DN_UPtr* shared_mem);
DN_API DN_OSThreadLaneway DN_OS_ThreadLanewayFromArena (DN_USize threads_count, DN_Arena* arena);
DN_API void DN_OS_ThreadLanewayDispatch (DN_OSThreadLaneway *laneway, DN_OSThreadFunc *entry_point, DN_TCInitArgs tc_init_args, void *user_context);
DN_API void DN_OS_ThreadLanewayJoin (DN_OSThreadLaneway *laneway, DN_TCDeinitArenas deinit_arenas);
DN_API DN_OSThreadLane* DN_OS_TCThreadLane ();
DN_API void DN_OS_TCThreadLaneSync (void **ptr_to_share);
+17 -16
View File
@@ -228,7 +228,7 @@ DN_API bool DN_OS_SetEnvVar(DN_Str8 name, DN_Str8 value)
DN_API DN_OSDiskSpace DN_OS_DiskSpace(DN_Str8 path)
{
DN_TCScratch scratch = DN_TCScratchBegin(nullptr, 0);
DN_TCScratch scratch = DN_TCScratchBeginArena(nullptr, 0);
DN_OSDiskSpace result = {};
DN_Str8 path_z_terminated = DN_Str8FromStr8Arena(path, &scratch.arena);
struct statvfs info = {};
@@ -387,7 +387,7 @@ DN_API bool DN_OS_FileCopy(DN_Str8 src, DN_Str8 dest, bool overwrite, DN_ErrSink
result = (bytes_written == stat_existing.st_size);
if (!result) {
int error_code = errno;
DN_TCScratch scratch = DN_TCScratchBegin(nullptr, 0);
DN_TCScratch scratch = DN_TCScratchBeginArena(nullptr, 0);
DN_Str8 file_size_str8 = DN_Str8FromByteCount(scratch.arena, stat_existing.st_size, DN_ByteCountType_Auto);
DN_Str8 bytes_written_str8 = DN_Str8FromByteCount(scratch.arena, bytes_written, DN_ByteCountType_Auto);
DN_rrSinkAppendF(error,
@@ -517,8 +517,8 @@ DN_API DN_OSFileRead DN_OS_FileRead(DN_OSFile *file, void *buffer, DN_USize size
result.bytes_read = fread(buffer, 1, size, DN_Cast(FILE *) file->handle);
if (feof(DN_Cast(FILE*)file->handle)) {
DN_TCScratch scratch = DN_TCScratchBegin(nullptr, 0);
DN_Str8x32 buffer_size_str8 = DN_ByteCountStr8x32(size);
DN_TCScratch scratch = DN_TCScratchBeginArena(nullptr, 0);
DN_Str8x32 buffer_size_str8 = DN_Str8x32FromByteCountU64Auto(size);
DN_ErrSinkAppendF(err, 1, "Failed to read %S from file", buffer_size_str8);
DN_TCScratchEnd(&scratch);
return result;
@@ -536,8 +536,8 @@ DN_API bool DN_OS_FileWritePtr(DN_OSFile *file, void const *buffer, DN_USize siz
fwrite(buffer, DN_Cast(DN_USize) size, 1 /*count*/, DN_Cast(FILE *) file->handle) ==
1 /*count*/;
if (!result) {
DN_TCScratch scratch = DN_TCScratchBegin(nullptr, 0);
DN_Str8x32 buffer_size_str8 = DN_ByteCountStr8x32(size);
DN_TCScratch scratch = DN_TCScratchBeginArena(nullptr, 0);
DN_Str8x32 buffer_size_str8 = DN_Str8x32FromByteCountU64Auto(size);
DN_ErrSinkAppendF(err, 1, "Failed to write buffer (%s) to file handle", DN_Str8PrintFmt(buffer_size_str8));
DN_TCScratchEnd(&scratch);
}
@@ -627,7 +627,7 @@ DN_API bool DN_OS_PathIsDir(DN_Str8 path)
DN_API bool DN_OS_PathMakeDir(DN_Str8 path)
{
DN_TCScratch scratch = DN_TCScratchBegin(nullptr, 0);
DN_TCScratch scratch = DN_TCScratchBeginArena(nullptr, 0);
bool result = true;
// TODO(doyle): Implement this without using the path indexes, it's not
@@ -793,7 +793,7 @@ DN_API DN_OSExecResult DN_OS_ExecWait(DN_OSExecAsyncHandle handle,
// NOTE: Read the data from the read end of the pipe
if (result.os_error_code == 0) {
DN_TCScratch scratch = DN_TCScratchBegin(&arena, 1);
DN_TCScratch scratch = DN_TCScratchBeginArena(&arena, 1);
if (arena && handle.stdout_read) {
char buffer[4096];
DN_Str8Builder builder = DN_Str8BuilderFromArena(&scratch.arena);
@@ -805,7 +805,7 @@ DN_API DN_OSExecResult DN_OS_ExecWait(DN_OSExecAsyncHandle handle,
DN_Str8BuilderAppendF(&builder, "%.*s", bytes_read, buffer);
}
result.stdout_text = DN_Str8BuilderBuild(&builder, arena);
result.stdout_text = DN_Str8FromStr8BuilderArena(&builder, arena);
}
if (arena && handle.stderr_read) {
@@ -819,7 +819,7 @@ DN_API DN_OSExecResult DN_OS_ExecWait(DN_OSExecAsyncHandle handle,
DN_Str8BuilderAppendF(&builder, "%.*s", bytes_read, buffer);
}
result.stderr_text = DN_Str8BuilderBuild(&builder, arena);
result.stderr_text = DN_Str8FromStr8BuilderArena(&builder, arena);
}
DN_TCScratchEnd(&scratch);
}
@@ -842,7 +842,7 @@ DN_API DN_OSExecAsyncHandle DN_OS_ExecAsync(DN_Str8Slice cmd_line,
if (cmd_line.count == 0)
return result;
DN_TCScratch scratch = DN_TCScratchBegin(nullptr, 0);
DN_TCScratch scratch = DN_TCScratchBeginArena(nullptr, 0);
DN_DEFER { DN_TCScratchEnd(&scratch); };
DN_Str8 cmd_rendered = DN_Str8SliceRender(cmd_line, DN_Str8Lit(" "), &scratch.arena);
int stdout_pipe[DN_OSPipeType__Count] = {};
@@ -1286,7 +1286,7 @@ static void *DN_OS_ThreadFunc_(void *user_context)
return nullptr;
}
DN_API bool DN_OS_ThreadInit(DN_OSThread *thread, DN_OSThreadFunc *func, DN_OSThreadLane *lane, void *user_context)
DN_API bool DN_OS_ThreadInit(DN_OSThread *thread, DN_OSThreadFunc *func, DN_OSThreadLane *lane, DN_TCInitArgs tc_init_args, void *user_context)
{
bool result = false;
if (!thread)
@@ -1296,6 +1296,7 @@ DN_API bool DN_OS_ThreadInit(DN_OSThread *thread, DN_OSThreadFunc *func, DN_OSTh
thread->user_context = user_context;
thread->init_semaphore = DN_OS_SemaphoreInit(0 /*initial_count*/);
thread->lane = *lane;
thread->tc_init_args = tc_init_args;
// TODO(doyle): Check if semaphore is valid
// NOTE: pthread_t is essentially the thread ID. In Windows, the handle and
@@ -1370,7 +1371,7 @@ DN_API void DN_OS_PosixThreadSetName(DN_Str8 name)
#if defined(DN_PLATFORM_EMSCRIPTEN)
(void)name;
#else
DN_TCScratch scratch = DN_TCScratchBegin(nullptr, 0);
DN_TCScratch scratch = DN_TCScratchBeginArena(nullptr, 0);
DN_Str8 copy = DN_Str8FromStr8Arena(name, &scratch.arena);
pthread_t thread = pthread_self();
pthread_setname_np(thread, (char *)copy.data);
@@ -1394,7 +1395,7 @@ DN_API DN_OSPosixProcSelfStatus DN_OS_PosixProcSelfStatus()
DN_OSFile file = DN_OS_FileOpen(DN_Str8Lit("/proc/self/status"), DN_OSFileOpen_OpenIfExist, DN_OSFileAccess_Read, nullptr);
if (!file.error) {
DN_TCScratch scratch = DN_TCScratchBegin(nullptr, 0);
DN_TCScratch scratch = DN_TCScratchBeginArena(nullptr, 0);
char buf[256];
DN_Str8Builder builder = DN_Str8BuilderFromArena(&scratch.arena);
for (;;) {
@@ -1408,7 +1409,7 @@ DN_API DN_OSPosixProcSelfStatus DN_OS_PosixProcSelfStatus()
DN_Str8 const PID = DN_Str8Lit("Pid:");
DN_Str8 const VM_PEAK = DN_Str8Lit("VmPeak:");
DN_Str8 const VM_SIZE = DN_Str8Lit("VmSize:");
DN_Str8 status_buf = DN_Str8BuilderBuild(&builder, &scratch.arena);
DN_Str8 status_buf = DN_Str8FromStr8BuilderArena(&builder, &scratch.arena);
DN_Str8SplitResult lines = DN_Str8SplitArena(status_buf, DN_Str8Lit("\n"), DN_Str8SplitFlags_ExcludeEmptyStrings, &scratch.arena);
for (DN_ForItSize(line_it, DN_Str8, lines.data, lines.count)) {
@@ -1535,7 +1536,7 @@ DN_API void DN_OS_HttpRequestAsync(DN_OSHttpResponse *response,
response->builder.arena = response->scratch_arena.mem ? &response->scratch_arena : &response->tmp_arena;
DN_Arena *scratch = &response->scratch_arena;
DN_TCScratch scratch_ = DN_TCScratchBegin(&arena, 1);
DN_TCScratch scratch_ = DN_TCScratchBeginArena(&arena, 1);
DN_DEFER { DN_TCScratchEnd(&scratch_); };
if (!scratch)
scratch = &scratch_.arena;
+2 -1
View File
@@ -1239,7 +1239,7 @@ static DWORD __stdcall DN_OS_ThreadFunc_(void *user_context)
return 0;
}
DN_API bool DN_OS_ThreadInit(DN_OSThread *thread, DN_OSThreadFunc *func, DN_OSThreadLane *lane, void *user_context)
DN_API bool DN_OS_ThreadInit(DN_OSThread *thread, DN_OSThreadFunc *func, DN_OSThreadLane *lane, DN_TCInitArgs tc_init_args, void *user_context)
{
bool result = false;
if (!thread)
@@ -1248,6 +1248,7 @@ DN_API bool DN_OS_ThreadInit(DN_OSThread *thread, DN_OSThreadFunc *func, DN_OSTh
thread->func = func;
thread->user_context = user_context;
thread->init_semaphore = DN_OS_SemaphoreInit(0 /*initial_count*/);
thread->tc_init_args = tc_init_args;
if (lane) {
thread->is_lane_set = true;
thread->lane = *lane;