Revamp the profiler

This commit is contained in:
doylet 2025-09-17 21:48:33 +10:00
parent f3d0e40012
commit b6c9205331
5 changed files with 149 additions and 124 deletions

View File

@ -5,11 +5,7 @@ DN_API void DN_Core_Init(DN_Core *core, DN_CoreOnInit on_init)
DN_Assert(g_dn_os_core_);
g_dn_core = core;
// NOTE Initialise fields //////////////////////////////////////////////////////////////////////
#if !defined(DN_NO_PROFILER)
core->profiler = &core->profiler_default_instance;
#endif
// NOTE Initialise fields
#if defined(DN_LEAK_TRACKING)
// NOTE: Setup the allocation table with allocation tracking turned off on
// the arena we're using to initialise the table.
@ -44,10 +40,6 @@ DN_API void DN_Core_Init(DN_Core *core, DN_CoreOnInit on_init)
DN_Str8Builder_AppendRef(&builder, DN_STR8(" Allocation leak tracing\n"));
#endif
#if !defined(DN_NO_PROFILER)
DN_Str8Builder_AppendRef(&builder, DN_STR8(" TSC profiler available\n"));
#endif
#if defined(DN_PLATFORM_EMSCRIPTEN) || defined(DN_PLATFORM_POSIX)
DN_POSIXCore *posix = DN_CAST(DN_POSIXCore *)g_dn_os_core_->platform_context;
DN_Str8Builder_AppendF(&builder, " Clock GetTime: %S\n", posix->clock_monotonic_raw ? DN_STR8("CLOCK_MONOTONIC_RAW") : DN_STR8("CLOCK_MONOTONIC"));
@ -90,11 +82,3 @@ DN_API void DN_Core_BeginFrame()
{
DN_AtomicSetValue64(&g_dn_os_core_->mem_allocs_frame, 0);
}
#if !defined(DN_NO_PROFILER)
DN_API void DN_Core_SetProfiler(DN_Profiler *profiler)
{
if (profiler)
g_dn_core->profiler = profiler;
}
#endif

View File

@ -1,24 +1,16 @@
#if !defined(DN_CORE_H)
#define DN_CORE_H
// NOTE: DN_Core ///////////////////////////////////////////////////////////////////////////////////
// Book-keeping data for the library and allow customisation of certain features
// provided.
// NOTE: DN_Core
struct DN_Core
{
// NOTE: Leak Tracing //////////////////////////////////////////////////////////////////////////
// NOTE: Leak Tracing
#if defined(DN_LEAK_TRACKING)
DN_DSMap<DN_DebugAlloc> alloc_table;
DN_TicketMutex alloc_table_mutex;
DN_Arena alloc_table_arena;
#endif
DN_U64 alloc_table_bytes_allocated_for_stack_traces;
// NOTE: Profiler //////////////////////////////////////////////////////////////////////////////
#if !defined(DN_NO_PROFILER)
DN_Profiler * profiler;
DN_Profiler profiler_default_instance;
#endif
};
enum DN_CoreOnInit
@ -29,9 +21,6 @@ enum DN_CoreOnInit
DN_CoreOnInit_LogAllFeatures = DN_CoreOnInit_LogLibFeatures | DN_CoreOnInit_LogCPUFeatures,
};
DN_API void DN_Core_Init (DN_Core *core, DN_CoreOnInit on_init);
DN_API void DN_Core_BeginFrame ();
#if !defined(DN_NO_PROFILER)
DN_API void DN_Core_SetProfiler (DN_Profiler *profiler);
#endif
DN_API void DN_Core_Init (DN_Core *core, DN_CoreOnInit on_init);
DN_API void DN_Core_BeginFrame();
#endif // !defined(DN_CORE_H)

View File

@ -2,6 +2,7 @@
#include "../dn_base_inc.h"
#include "../dn_os_inc.h"
#include "../dn_core_inc.h"
DN_API DN_StackTraceWalkResult DN_StackTrace_Walk(DN_Arena *arena, uint16_t limit)
{
@ -349,91 +350,135 @@ DN_API void DN_DBGDumpLeaks()
}
#endif // DN_LEAK_TRACKING
#if !defined(DN_NO_PROFILER)
// NOTE: DN_Profiler ///////////////////////////////////////////////////////////////////////////////
DN_API DN_ProfilerZoneScope::DN_ProfilerZoneScope(DN_Str8 name, uint16_t anchor_index)
// NOTE: DN_Profiler
DN_API DN_Profiler DN_Profiler_Init(DN_ProfilerAnchor *anchors, DN_USize count, DN_USize anchors_per_frame, DN_ProfilerTSC tsc, DN_U64 tsc_frequency)
{
zone = DN_Profiler_BeginZoneAtIndex(name, anchor_index);
}
DN_API DN_ProfilerZoneScope::~DN_ProfilerZoneScope()
{
DN_Profiler_EndZone(zone);
}
DN_API DN_ProfilerAnchor *DN_Profiler_ReadBuffer()
{
uint8_t mask = DN_ArrayCountU(g_dn_core->profiler->anchors) - 1;
DN_ProfilerAnchor *result = g_dn_core->profiler->anchors[(g_dn_core->profiler->active_anchor_buffer - 1) & mask];
DN_Profiler result = {};
result.anchors = anchors;
result.anchors_count = count;
result.anchors_per_frame = anchors_per_frame;
result.tsc = tsc;
result.tsc_frequency = tsc_frequency;
return result;
}
DN_API DN_ProfilerAnchor *DN_Profiler_WriteBuffer()
DN_API DN_USize DN_Profiler_FrameCount(DN_Profiler const *profiler)
{
uint8_t mask = DN_ArrayCountU(g_dn_core->profiler->anchors) - 1;
DN_ProfilerAnchor *result = g_dn_core->profiler->anchors[(g_dn_core->profiler->active_anchor_buffer + 0) & mask];
DN_USize result = profiler->anchors_count / profiler->anchors_per_frame;
return result;
}
DN_API DN_ProfilerZone DN_Profiler_BeginZoneAtIndex(DN_Str8 name, uint16_t anchor_index)
DN_API DN_ProfilerAnchorArray DN_Profiler_FrameAnchorsFromIndex(DN_Profiler *profiler, DN_USize frame_index)
{
DN_ProfilerAnchor *anchor = DN_Profiler_WriteBuffer() + anchor_index;
DN_ProfilerAnchorArray result = {};
DN_USize anchor_offset = frame_index * profiler->anchors_per_frame;
result.data = profiler->anchors + anchor_offset;
result.count = profiler->anchors_per_frame;
return result;
}
DN_API DN_ProfilerAnchorArray DN_Profiler_FrameAnchors(DN_Profiler *profiler)
{
DN_ProfilerAnchorArray result = DN_Profiler_FrameAnchorsFromIndex(profiler, profiler->frame_index);
return result;
}
DN_API DN_ProfilerZone DN_Profiler_BeginZone(DN_Profiler *profiler, DN_Str8 name, DN_U16 anchor_index)
{
DN_ProfilerZone result = {};
if (profiler->paused)
return result;
DN_Assert(anchor_index < profiler->anchors_per_frame);
DN_ProfilerAnchor *anchor = DN_Profiler_FrameAnchors(profiler).data + anchor_index;
anchor->name = name;
// TODO: We need per-thread-local-storage profiler so that we can use these apis
// across threads. For now, we let them overwrite each other but this is not tenable.
#if 0
if (DN_Str8_HasData(anchor->name) && anchor->name != name)
DN_AssertF(name == anchor->name, "Potentially overwriting a zone by accident? Anchor is '%.*s', name is '%.*s'", DN_STR_FMT(anchor->name), DN_STR_FMT(name));
#endif
anchor->name = name;
DN_ProfilerZone result = {};
result.begin_tsc = DN_CPUGetTSC();
if (profiler->tsc == DN_ProfilerTSC_RDTSC)
result.begin_tsc = DN_CPUGetTSC();
else
result.begin_tsc = DN_OS_PerfCounterNow();
result.anchor_index = anchor_index;
result.parent_zone = g_dn_core->profiler->parent_zone;
result.parent_zone = profiler->parent_zone;
result.elapsed_tsc_at_zone_start = anchor->tsc_inclusive;
g_dn_core->profiler->parent_zone = anchor_index;
profiler->parent_zone = anchor_index;
return result;
}
DN_API void DN_Profiler_EndZone(DN_ProfilerZone zone)
DN_API void DN_Profiler_EndZone(DN_Profiler *profiler, DN_ProfilerZone zone)
{
uint64_t elapsed_tsc = DN_CPUGetTSC() - zone.begin_tsc;
DN_ProfilerAnchor *anchor_buffer = DN_Profiler_WriteBuffer();
DN_ProfilerAnchor *anchor = anchor_buffer + zone.anchor_index;
if (profiler->paused)
return;
DN_Assert(zone.anchor_index < profiler->anchors_per_frame);
DN_Assert(zone.parent_zone < profiler->anchors_per_frame);
DN_ProfilerAnchorArray array = DN_Profiler_FrameAnchors(profiler);
DN_ProfilerAnchor *anchor = array.data + zone.anchor_index;
DN_U64 tsc_now = profiler->tsc == DN_ProfilerTSC_RDTSC ? DN_CPUGetTSC() : DN_OS_PerfCounterNow();
DN_U64 elapsed_tsc = tsc_now - zone.begin_tsc;
anchor->hit_count++;
anchor->tsc_inclusive = zone.elapsed_tsc_at_zone_start + elapsed_tsc;
anchor->tsc_exclusive += elapsed_tsc;
anchor->tsc_inclusive = zone.elapsed_tsc_at_zone_start + elapsed_tsc;
anchor->tsc_exclusive += elapsed_tsc;
DN_ProfilerAnchor *parent_anchor = anchor_buffer + zone.parent_zone;
parent_anchor->tsc_exclusive -= elapsed_tsc;
g_dn_core->profiler->parent_zone = zone.parent_zone;
if (zone.parent_zone != zone.anchor_index) {
DN_ProfilerAnchor *parent_anchor = array.data + zone.parent_zone;
parent_anchor->tsc_exclusive -= elapsed_tsc;
}
profiler->parent_zone = zone.parent_zone;
}
DN_API void DN_Profiler_SwapAnchorBuffer()
DN_API void DN_Profiler_NewFrame(DN_Profiler *profiler)
{
g_dn_core->profiler->active_anchor_buffer++;
g_dn_core->profiler->parent_zone = 0;
DN_ProfilerAnchor *anchors = DN_Profiler_WriteBuffer();
DN_Memset(anchors,
0,
DN_ArrayCountU(g_dn_core->profiler->anchors[0]) * sizeof(g_dn_core->profiler->anchors[0][0]));
if (profiler->paused)
return;
// NOTE: End the frame's zone
DN_Profiler_EndZone(profiler, profiler->frame_zone);
DN_ProfilerAnchorArray old_frame_anchors = DN_Profiler_FrameAnchors(profiler);
DN_ProfilerAnchor old_frame_anchor = old_frame_anchors.data[0];
profiler->frame_avg_tsc = (profiler->frame_avg_tsc + old_frame_anchor.tsc_inclusive) / 2.f;
// NOTE: Bump to the next frame
DN_USize frame_count = profiler->anchors_count / profiler->anchors_per_frame;
profiler->frame_index = (profiler->frame_index + 1) % frame_count;
// NOTE: Zero out the anchors
DN_ProfilerAnchorArray next_anchors = DN_Profiler_FrameAnchors(profiler);
DN_Memset(next_anchors.data, 0, sizeof(*profiler->anchors) * next_anchors.count);
// NOTE: Start the frame's zone
profiler->frame_zone = DN_Profiler_BeginZone(profiler, DN_STR8("Profiler Frame"), 0);
}
DN_API void DN_Profiler_Dump(uint64_t tsc_per_second)
DN_API void DN_Profiler_Dump(DN_Profiler *profiler)
{
DN_ProfilerAnchor *anchors = DN_Profiler_ReadBuffer();
for (size_t anchor_index = 1; anchor_index < DN_PROFILER_ANCHOR_BUFFER_SIZE; anchor_index++) {
DN_ProfilerAnchor const *anchor = anchors + anchor_index;
if (profiler->frame_index == 0)
return;
DN_USize frame_index = profiler->frame_index - 1;
DN_Assert(profiler->frame_index < profiler->anchors_per_frame);
DN_ProfilerAnchor *anchors = profiler->anchors + (frame_index * profiler->anchors_per_frame);
for (DN_USize index = 1; index < profiler->anchors_per_frame; index++) {
DN_ProfilerAnchor const *anchor = anchors + index;
if (!anchor->hit_count)
continue;
uint64_t tsc_exclusive = anchor->tsc_exclusive;
uint64_t tsc_inclusive = anchor->tsc_inclusive;
DN_F64 tsc_exclusive_milliseconds = tsc_exclusive * 1000 / DN_CAST(DN_F64) tsc_per_second;
DN_U64 tsc_exclusive = anchor->tsc_exclusive;
DN_U64 tsc_inclusive = anchor->tsc_inclusive;
DN_F64 tsc_exclusive_milliseconds = tsc_exclusive * 1000 / DN_CAST(DN_F64) profiler->tsc_frequency;
if (tsc_exclusive == tsc_inclusive) {
DN_OS_PrintOutLnF("%.*s[%u]: %.1fms", DN_STR_FMT(anchor->name), anchor->hit_count, tsc_exclusive_milliseconds);
} else {
DN_F64 tsc_inclusive_milliseconds = tsc_inclusive * 1000 / DN_CAST(DN_F64) tsc_per_second;
DN_F64 tsc_inclusive_milliseconds = tsc_inclusive * 1000 / DN_CAST(DN_F64) profiler->tsc_frequency;
DN_OS_PrintOutLnF("%.*s[%u]: %.1f/%.1fms",
DN_STR_FMT(anchor->name),
anchor->hit_count,
@ -442,5 +487,15 @@ DN_API void DN_Profiler_Dump(uint64_t tsc_per_second)
}
}
}
#endif // !defined(DN_NO_PROFILER)
DN_API DN_F64 DN_Profiler_SecFromTSC(DN_Profiler *profiler, DN_U64 duration_tsc)
{
DN_F64 result = DN_CAST(DN_F64)duration_tsc / profiler->tsc_frequency;
return result;
}
DN_API DN_F64 DN_Profiler_MsFromTSC(DN_Profiler *profiler, DN_U64 duration_tsc)
{
DN_F64 result = DN_CAST(DN_F64)duration_tsc / profiler->tsc_frequency * 1000.0;
return result;
}

View File

@ -3,8 +3,7 @@
#include "../dn_base_inc.h"
// NOTE: DN_StackTrace /////////////////////////////////////////////////////////////////////////////
// NOTE: DN_Debug //////////////////////////////////////////////////////////////////////////////////
// NOTE: DN_Debug
enum DN_DebugAllocFlag
{
DN_DebugAllocFlag_Freed = 1 << 0,
@ -26,12 +25,7 @@ static_assert(sizeof(DN_DebugAlloc) == 64 || sizeof(DN_DebugAlloc) == 32, // NOT
"memory tracking can get expensive. Enforce that there is no "
"unexpected padding.");
// NOTE: DN_Profiler ///////////////////////////////////////////////////////////////////////////////
#if !defined(DN_NO_PROFILER)
#if !defined(DN_PROFILER_ANCHOR_BUFFER_SIZE)
#define DN_PROFILER_ANCHOR_BUFFER_SIZE 256
#endif
// NOTE: DN_Profiler
struct DN_ProfilerAnchor
{
// Inclusive refers to the time spent to complete the function call
@ -55,47 +49,50 @@ struct DN_ProfilerZone
DN_U64 elapsed_tsc_at_zone_start;
};
#if defined(__cplusplus)
struct DN_ProfilerZoneScope
struct DN_ProfilerAnchorArray
{
DN_ProfilerZoneScope(DN_Str8 name, DN_U16 anchor_index);
~DN_ProfilerZoneScope();
DN_ProfilerZone zone;
DN_ProfilerAnchor *data;
DN_USize count;
};
#define DN_Profiler_ZoneScopeAtIndex(name, anchor_index) auto DN_UniqueName(profile_zone_) = DN_ProfilerZoneScope(DN_STR8(name), anchor_index)
#define DN_Profiler_ZoneScope(name) DN_Profiler_ZoneScopeAtIndex(name, __COUNTER__ + 1)
#endif
#define DN_Profiler_ZoneBlockIndex(name, index) \
for (DN_ProfilerZone DN_UniqueName(profile_zone__) = DN_Profiler_BeginZoneAtIndex(name, index), DN_UniqueName(dummy__) = {}; \
DN_UniqueName(dummy__).begin_tsc == 0; \
DN_Profiler_EndZone(DN_UniqueName(profile_zone__)), DN_UniqueName(dummy__).begin_tsc = 1)
#define DN_Profiler_ZoneBlock(name) DN_Profiler_ZoneBlockIndex(DN_STR8(name), __COUNTER__ + 1)
enum DN_ProfilerAnchorBuffer
enum DN_ProfilerTSC
{
DN_ProfilerAnchorBuffer_Back,
DN_ProfilerAnchorBuffer_Front,
DN_ProfilerTSC_RDTSC,
DN_ProfilerTSC_OSPerformanceCounter,
};
struct DN_Profiler
{
DN_ProfilerAnchor anchors[2][DN_PROFILER_ANCHOR_BUFFER_SIZE];
DN_U8 active_anchor_buffer;
DN_U16 parent_zone;
DN_USize frame_index;
DN_ProfilerAnchor *anchors;
DN_USize anchors_count;
DN_USize anchors_per_frame;
DN_U16 parent_zone;
bool paused;
DN_ProfilerTSC tsc;
DN_U64 tsc_frequency;
DN_ProfilerZone frame_zone;
DN_F64 frame_avg_tsc;
};
DN_API DN_ProfilerAnchor * DN_Profiler_ReadBuffer ();
DN_API DN_ProfilerAnchor * DN_Profiler_WriteBuffer ();
#define DN_Profiler_BeginZone(name) DN_Profiler_BeginZoneAtIndex(DN_STR8(name), __COUNTER__ + 1)
DN_API DN_ProfilerZone DN_Profiler_BeginZoneAtIndex (DN_Str8 name, DN_U16 anchor_index);
DN_API void DN_Profiler_EndZone (DN_ProfilerZone zone);
DN_API DN_ProfilerAnchor * DN_Profiler_AnchorBuffer (DN_ProfilerAnchorBuffer buffer);
DN_API void DN_Profiler_SwapAnchorBuffer ();
DN_API void DN_Profiler_Dump (DN_U64 tsc_per_second);
#endif // !defined(DN_NO_PROFILER)
#define DN_Profiler_ZoneLoop(prof, name, index) \
DN_ProfilerZone DN_UniqueName(zone_) = DN_Profiler_BeginZone(prof, DN_STR8(name), index), DN_UniqueName(dummy_) = {}; \
DN_UniqueName(dummy_).begin_tsc == 0; \
DN_Profiler_EndZone(prof, DN_UniqueName(zone_)), DN_UniqueName(dummy_).begin_tsc = 1
#define DN_Profiler_ZoneLoopAuto(prof, name) DN_Profiler_ZoneLoop(prof, name, __COUNTER__ + 1)
DN_API DN_Profiler DN_Profiler_Init (DN_ProfilerAnchor *anchors, DN_USize count, DN_USize anchors_per_frame, DN_ProfilerTSC tsc, DN_U64 tsc_frequency);
DN_API DN_ProfilerZone DN_Profiler_BeginZone (DN_Profiler *profiler, DN_Str8 name, DN_U16 anchor_index);
#define DN_Profiler_BeginZoneAuto(prof, name) DN_Profiler_BeginZone(prof, DN_STR8(name), __COUNTER__ + 1)
DN_API void DN_Profiler_EndZone (DN_Profiler *profiler, DN_ProfilerZone zone);
DN_API DN_USize DN_Profiler_FrameCount (DN_Profiler const *profiler);
DN_API DN_ProfilerAnchorArray DN_Profiler_FrameAnchorsFromIndex (DN_Profiler *profiler, DN_USize frame_index);
DN_API DN_ProfilerAnchorArray DN_Profiler_FrameAnchors (DN_Profiler *profiler);
DN_API void DN_Profiler_NewFrame (DN_Profiler *profiler);
DN_API void DN_Profiler_Dump (DN_Profiler *profiler);
DN_API DN_F64 DN_Profiler_SecFromTSC (DN_Profiler *profiler, DN_U64 duration_tsc);
DN_API DN_F64 DN_Profiler_MsFromTSC (DN_Profiler *profiler, DN_U64 duration_tsc);
#if defined(DN_LEAK_TRACKING)

View File

@ -272,7 +272,7 @@ struct DN_OSCore
DN_CPUReport cpu_report;
DN_OSTLS tls; // Thread local storage state for the main thread.
// NOTE: Logging ///////////////////////////////////////////////////////////////////////////////
// NOTE: Logging
DN_LOGEmitFromTypeFVFunc * log_callback; // Set this pointer to override the logging routine
void * log_user_data; // User pointer passed into 'log_callback'
bool log_to_file; // Output logs to file as well as standard out
@ -280,12 +280,12 @@ struct DN_OSCore
DN_TicketMutex log_file_mutex; // Is locked when instantiating the log_file for the first time
bool log_no_colour; // Disable colours in the logging output
// NOTE: OS //////////////////////////////////////////////////////////////////////////////////////
// NOTE: OS
DN_U32 logical_processor_count;
DN_U32 page_size;
DN_U32 alloc_granularity;
// NOTE: Memory ////////////////////////////////////////////////////////////////////////////////
// NOTE: Memory
// Total OS mem allocs in lifetime of program (e.g. malloc, VirtualAlloc, HeapAlloc ...). This
// only includes allocations routed through the library such as the growing nature of arenas or
// using the memory allocation routines in the library like DN_OS_MemCommit and so forth.