From b6c92053311d3f1a918aa00dc3911911aa92a20d Mon Sep 17 00:00:00 2001 From: doylet Date: Wed, 17 Sep 2025 21:48:33 +1000 Subject: [PATCH] Revamp the profiler --- Source/Core/dn_core.cpp | 18 +--- Source/Core/dn_core.h | 19 +---- Source/Core/dn_core_debug.cpp | 155 +++++++++++++++++++++++----------- Source/Core/dn_core_debug.h | 75 ++++++++-------- Source/OS/dn_os.h | 6 +- 5 files changed, 149 insertions(+), 124 deletions(-) diff --git a/Source/Core/dn_core.cpp b/Source/Core/dn_core.cpp index 2b81aca..7ae7514 100644 --- a/Source/Core/dn_core.cpp +++ b/Source/Core/dn_core.cpp @@ -5,11 +5,7 @@ DN_API void DN_Core_Init(DN_Core *core, DN_CoreOnInit on_init) DN_Assert(g_dn_os_core_); g_dn_core = core; - // NOTE Initialise fields ////////////////////////////////////////////////////////////////////// - #if !defined(DN_NO_PROFILER) - core->profiler = &core->profiler_default_instance; - #endif - + // NOTE Initialise fields #if defined(DN_LEAK_TRACKING) // NOTE: Setup the allocation table with allocation tracking turned off on // the arena we're using to initialise the table. @@ -44,10 +40,6 @@ DN_API void DN_Core_Init(DN_Core *core, DN_CoreOnInit on_init) DN_Str8Builder_AppendRef(&builder, DN_STR8(" Allocation leak tracing\n")); #endif - #if !defined(DN_NO_PROFILER) - DN_Str8Builder_AppendRef(&builder, DN_STR8(" TSC profiler available\n")); - #endif - #if defined(DN_PLATFORM_EMSCRIPTEN) || defined(DN_PLATFORM_POSIX) DN_POSIXCore *posix = DN_CAST(DN_POSIXCore *)g_dn_os_core_->platform_context; DN_Str8Builder_AppendF(&builder, " Clock GetTime: %S\n", posix->clock_monotonic_raw ? DN_STR8("CLOCK_MONOTONIC_RAW") : DN_STR8("CLOCK_MONOTONIC")); @@ -90,11 +82,3 @@ DN_API void DN_Core_BeginFrame() { DN_AtomicSetValue64(&g_dn_os_core_->mem_allocs_frame, 0); } - -#if !defined(DN_NO_PROFILER) -DN_API void DN_Core_SetProfiler(DN_Profiler *profiler) -{ - if (profiler) - g_dn_core->profiler = profiler; -} -#endif diff --git a/Source/Core/dn_core.h b/Source/Core/dn_core.h index 20aad31..b609e8f 100644 --- a/Source/Core/dn_core.h +++ b/Source/Core/dn_core.h @@ -1,24 +1,16 @@ #if !defined(DN_CORE_H) #define DN_CORE_H -// NOTE: DN_Core /////////////////////////////////////////////////////////////////////////////////// -// Book-keeping data for the library and allow customisation of certain features -// provided. +// NOTE: DN_Core struct DN_Core { - // NOTE: Leak Tracing ////////////////////////////////////////////////////////////////////////// + // NOTE: Leak Tracing #if defined(DN_LEAK_TRACKING) DN_DSMap alloc_table; DN_TicketMutex alloc_table_mutex; DN_Arena alloc_table_arena; #endif DN_U64 alloc_table_bytes_allocated_for_stack_traces; - - // NOTE: Profiler ////////////////////////////////////////////////////////////////////////////// - #if !defined(DN_NO_PROFILER) - DN_Profiler * profiler; - DN_Profiler profiler_default_instance; - #endif }; enum DN_CoreOnInit @@ -29,9 +21,6 @@ enum DN_CoreOnInit DN_CoreOnInit_LogAllFeatures = DN_CoreOnInit_LogLibFeatures | DN_CoreOnInit_LogCPUFeatures, }; -DN_API void DN_Core_Init (DN_Core *core, DN_CoreOnInit on_init); -DN_API void DN_Core_BeginFrame (); -#if !defined(DN_NO_PROFILER) -DN_API void DN_Core_SetProfiler (DN_Profiler *profiler); -#endif +DN_API void DN_Core_Init (DN_Core *core, DN_CoreOnInit on_init); +DN_API void DN_Core_BeginFrame(); #endif // !defined(DN_CORE_H) diff --git a/Source/Core/dn_core_debug.cpp b/Source/Core/dn_core_debug.cpp index ccaf760..be47734 100644 --- a/Source/Core/dn_core_debug.cpp +++ b/Source/Core/dn_core_debug.cpp @@ -2,6 +2,7 @@ #include "../dn_base_inc.h" #include "../dn_os_inc.h" +#include "../dn_core_inc.h" DN_API DN_StackTraceWalkResult DN_StackTrace_Walk(DN_Arena *arena, uint16_t limit) { @@ -349,91 +350,135 @@ DN_API void DN_DBGDumpLeaks() } #endif // DN_LEAK_TRACKING -#if !defined(DN_NO_PROFILER) -// NOTE: DN_Profiler /////////////////////////////////////////////////////////////////////////////// -DN_API DN_ProfilerZoneScope::DN_ProfilerZoneScope(DN_Str8 name, uint16_t anchor_index) +// NOTE: DN_Profiler +DN_API DN_Profiler DN_Profiler_Init(DN_ProfilerAnchor *anchors, DN_USize count, DN_USize anchors_per_frame, DN_ProfilerTSC tsc, DN_U64 tsc_frequency) { - zone = DN_Profiler_BeginZoneAtIndex(name, anchor_index); -} - -DN_API DN_ProfilerZoneScope::~DN_ProfilerZoneScope() -{ - DN_Profiler_EndZone(zone); -} - -DN_API DN_ProfilerAnchor *DN_Profiler_ReadBuffer() -{ - uint8_t mask = DN_ArrayCountU(g_dn_core->profiler->anchors) - 1; - DN_ProfilerAnchor *result = g_dn_core->profiler->anchors[(g_dn_core->profiler->active_anchor_buffer - 1) & mask]; + DN_Profiler result = {}; + result.anchors = anchors; + result.anchors_count = count; + result.anchors_per_frame = anchors_per_frame; + result.tsc = tsc; + result.tsc_frequency = tsc_frequency; return result; } -DN_API DN_ProfilerAnchor *DN_Profiler_WriteBuffer() +DN_API DN_USize DN_Profiler_FrameCount(DN_Profiler const *profiler) { - uint8_t mask = DN_ArrayCountU(g_dn_core->profiler->anchors) - 1; - DN_ProfilerAnchor *result = g_dn_core->profiler->anchors[(g_dn_core->profiler->active_anchor_buffer + 0) & mask]; + DN_USize result = profiler->anchors_count / profiler->anchors_per_frame; return result; } -DN_API DN_ProfilerZone DN_Profiler_BeginZoneAtIndex(DN_Str8 name, uint16_t anchor_index) +DN_API DN_ProfilerAnchorArray DN_Profiler_FrameAnchorsFromIndex(DN_Profiler *profiler, DN_USize frame_index) { - DN_ProfilerAnchor *anchor = DN_Profiler_WriteBuffer() + anchor_index; + DN_ProfilerAnchorArray result = {}; + DN_USize anchor_offset = frame_index * profiler->anchors_per_frame; + result.data = profiler->anchors + anchor_offset; + result.count = profiler->anchors_per_frame; + return result; +} + +DN_API DN_ProfilerAnchorArray DN_Profiler_FrameAnchors(DN_Profiler *profiler) +{ + DN_ProfilerAnchorArray result = DN_Profiler_FrameAnchorsFromIndex(profiler, profiler->frame_index); + return result; +} + +DN_API DN_ProfilerZone DN_Profiler_BeginZone(DN_Profiler *profiler, DN_Str8 name, DN_U16 anchor_index) +{ + DN_ProfilerZone result = {}; + if (profiler->paused) + return result; + + DN_Assert(anchor_index < profiler->anchors_per_frame); + DN_ProfilerAnchor *anchor = DN_Profiler_FrameAnchors(profiler).data + anchor_index; + anchor->name = name; + // TODO: We need per-thread-local-storage profiler so that we can use these apis // across threads. For now, we let them overwrite each other but this is not tenable. #if 0 if (DN_Str8_HasData(anchor->name) && anchor->name != name) DN_AssertF(name == anchor->name, "Potentially overwriting a zone by accident? Anchor is '%.*s', name is '%.*s'", DN_STR_FMT(anchor->name), DN_STR_FMT(name)); #endif - anchor->name = name; - DN_ProfilerZone result = {}; - result.begin_tsc = DN_CPUGetTSC(); + + if (profiler->tsc == DN_ProfilerTSC_RDTSC) + result.begin_tsc = DN_CPUGetTSC(); + else + result.begin_tsc = DN_OS_PerfCounterNow(); result.anchor_index = anchor_index; - result.parent_zone = g_dn_core->profiler->parent_zone; + result.parent_zone = profiler->parent_zone; result.elapsed_tsc_at_zone_start = anchor->tsc_inclusive; - g_dn_core->profiler->parent_zone = anchor_index; + profiler->parent_zone = anchor_index; return result; } -DN_API void DN_Profiler_EndZone(DN_ProfilerZone zone) +DN_API void DN_Profiler_EndZone(DN_Profiler *profiler, DN_ProfilerZone zone) { - uint64_t elapsed_tsc = DN_CPUGetTSC() - zone.begin_tsc; - DN_ProfilerAnchor *anchor_buffer = DN_Profiler_WriteBuffer(); - DN_ProfilerAnchor *anchor = anchor_buffer + zone.anchor_index; + if (profiler->paused) + return; + + DN_Assert(zone.anchor_index < profiler->anchors_per_frame); + DN_Assert(zone.parent_zone < profiler->anchors_per_frame); + + DN_ProfilerAnchorArray array = DN_Profiler_FrameAnchors(profiler); + DN_ProfilerAnchor *anchor = array.data + zone.anchor_index; + DN_U64 tsc_now = profiler->tsc == DN_ProfilerTSC_RDTSC ? DN_CPUGetTSC() : DN_OS_PerfCounterNow(); + DN_U64 elapsed_tsc = tsc_now - zone.begin_tsc; anchor->hit_count++; - anchor->tsc_inclusive = zone.elapsed_tsc_at_zone_start + elapsed_tsc; - anchor->tsc_exclusive += elapsed_tsc; + anchor->tsc_inclusive = zone.elapsed_tsc_at_zone_start + elapsed_tsc; + anchor->tsc_exclusive += elapsed_tsc; - DN_ProfilerAnchor *parent_anchor = anchor_buffer + zone.parent_zone; - parent_anchor->tsc_exclusive -= elapsed_tsc; - g_dn_core->profiler->parent_zone = zone.parent_zone; + if (zone.parent_zone != zone.anchor_index) { + DN_ProfilerAnchor *parent_anchor = array.data + zone.parent_zone; + parent_anchor->tsc_exclusive -= elapsed_tsc; + } + profiler->parent_zone = zone.parent_zone; } -DN_API void DN_Profiler_SwapAnchorBuffer() +DN_API void DN_Profiler_NewFrame(DN_Profiler *profiler) { - g_dn_core->profiler->active_anchor_buffer++; - g_dn_core->profiler->parent_zone = 0; - DN_ProfilerAnchor *anchors = DN_Profiler_WriteBuffer(); - DN_Memset(anchors, - 0, - DN_ArrayCountU(g_dn_core->profiler->anchors[0]) * sizeof(g_dn_core->profiler->anchors[0][0])); + if (profiler->paused) + return; + + // NOTE: End the frame's zone + DN_Profiler_EndZone(profiler, profiler->frame_zone); + DN_ProfilerAnchorArray old_frame_anchors = DN_Profiler_FrameAnchors(profiler); + DN_ProfilerAnchor old_frame_anchor = old_frame_anchors.data[0]; + profiler->frame_avg_tsc = (profiler->frame_avg_tsc + old_frame_anchor.tsc_inclusive) / 2.f; + + // NOTE: Bump to the next frame + DN_USize frame_count = profiler->anchors_count / profiler->anchors_per_frame; + profiler->frame_index = (profiler->frame_index + 1) % frame_count; + + // NOTE: Zero out the anchors + DN_ProfilerAnchorArray next_anchors = DN_Profiler_FrameAnchors(profiler); + DN_Memset(next_anchors.data, 0, sizeof(*profiler->anchors) * next_anchors.count); + + // NOTE: Start the frame's zone + profiler->frame_zone = DN_Profiler_BeginZone(profiler, DN_STR8("Profiler Frame"), 0); } -DN_API void DN_Profiler_Dump(uint64_t tsc_per_second) +DN_API void DN_Profiler_Dump(DN_Profiler *profiler) { - DN_ProfilerAnchor *anchors = DN_Profiler_ReadBuffer(); - for (size_t anchor_index = 1; anchor_index < DN_PROFILER_ANCHOR_BUFFER_SIZE; anchor_index++) { - DN_ProfilerAnchor const *anchor = anchors + anchor_index; + if (profiler->frame_index == 0) + return; + + DN_USize frame_index = profiler->frame_index - 1; + DN_Assert(profiler->frame_index < profiler->anchors_per_frame); + + DN_ProfilerAnchor *anchors = profiler->anchors + (frame_index * profiler->anchors_per_frame); + for (DN_USize index = 1; index < profiler->anchors_per_frame; index++) { + DN_ProfilerAnchor const *anchor = anchors + index; if (!anchor->hit_count) continue; - uint64_t tsc_exclusive = anchor->tsc_exclusive; - uint64_t tsc_inclusive = anchor->tsc_inclusive; - DN_F64 tsc_exclusive_milliseconds = tsc_exclusive * 1000 / DN_CAST(DN_F64) tsc_per_second; + DN_U64 tsc_exclusive = anchor->tsc_exclusive; + DN_U64 tsc_inclusive = anchor->tsc_inclusive; + DN_F64 tsc_exclusive_milliseconds = tsc_exclusive * 1000 / DN_CAST(DN_F64) profiler->tsc_frequency; if (tsc_exclusive == tsc_inclusive) { DN_OS_PrintOutLnF("%.*s[%u]: %.1fms", DN_STR_FMT(anchor->name), anchor->hit_count, tsc_exclusive_milliseconds); } else { - DN_F64 tsc_inclusive_milliseconds = tsc_inclusive * 1000 / DN_CAST(DN_F64) tsc_per_second; + DN_F64 tsc_inclusive_milliseconds = tsc_inclusive * 1000 / DN_CAST(DN_F64) profiler->tsc_frequency; DN_OS_PrintOutLnF("%.*s[%u]: %.1f/%.1fms", DN_STR_FMT(anchor->name), anchor->hit_count, @@ -442,5 +487,15 @@ DN_API void DN_Profiler_Dump(uint64_t tsc_per_second) } } } -#endif // !defined(DN_NO_PROFILER) +DN_API DN_F64 DN_Profiler_SecFromTSC(DN_Profiler *profiler, DN_U64 duration_tsc) +{ + DN_F64 result = DN_CAST(DN_F64)duration_tsc / profiler->tsc_frequency; + return result; +} + +DN_API DN_F64 DN_Profiler_MsFromTSC(DN_Profiler *profiler, DN_U64 duration_tsc) +{ + DN_F64 result = DN_CAST(DN_F64)duration_tsc / profiler->tsc_frequency * 1000.0; + return result; +} diff --git a/Source/Core/dn_core_debug.h b/Source/Core/dn_core_debug.h index 4d5fa7f..fe68d3c 100644 --- a/Source/Core/dn_core_debug.h +++ b/Source/Core/dn_core_debug.h @@ -3,8 +3,7 @@ #include "../dn_base_inc.h" -// NOTE: DN_StackTrace ///////////////////////////////////////////////////////////////////////////// -// NOTE: DN_Debug ////////////////////////////////////////////////////////////////////////////////// +// NOTE: DN_Debug enum DN_DebugAllocFlag { DN_DebugAllocFlag_Freed = 1 << 0, @@ -26,12 +25,7 @@ static_assert(sizeof(DN_DebugAlloc) == 64 || sizeof(DN_DebugAlloc) == 32, // NOT "memory tracking can get expensive. Enforce that there is no " "unexpected padding."); -// NOTE: DN_Profiler /////////////////////////////////////////////////////////////////////////////// -#if !defined(DN_NO_PROFILER) -#if !defined(DN_PROFILER_ANCHOR_BUFFER_SIZE) - #define DN_PROFILER_ANCHOR_BUFFER_SIZE 256 -#endif - +// NOTE: DN_Profiler struct DN_ProfilerAnchor { // Inclusive refers to the time spent to complete the function call @@ -55,47 +49,50 @@ struct DN_ProfilerZone DN_U64 elapsed_tsc_at_zone_start; }; -#if defined(__cplusplus) -struct DN_ProfilerZoneScope +struct DN_ProfilerAnchorArray { - DN_ProfilerZoneScope(DN_Str8 name, DN_U16 anchor_index); - ~DN_ProfilerZoneScope(); - DN_ProfilerZone zone; + DN_ProfilerAnchor *data; + DN_USize count; }; -#define DN_Profiler_ZoneScopeAtIndex(name, anchor_index) auto DN_UniqueName(profile_zone_) = DN_ProfilerZoneScope(DN_STR8(name), anchor_index) -#define DN_Profiler_ZoneScope(name) DN_Profiler_ZoneScopeAtIndex(name, __COUNTER__ + 1) -#endif - -#define DN_Profiler_ZoneBlockIndex(name, index) \ - for (DN_ProfilerZone DN_UniqueName(profile_zone__) = DN_Profiler_BeginZoneAtIndex(name, index), DN_UniqueName(dummy__) = {}; \ - DN_UniqueName(dummy__).begin_tsc == 0; \ - DN_Profiler_EndZone(DN_UniqueName(profile_zone__)), DN_UniqueName(dummy__).begin_tsc = 1) - -#define DN_Profiler_ZoneBlock(name) DN_Profiler_ZoneBlockIndex(DN_STR8(name), __COUNTER__ + 1) - -enum DN_ProfilerAnchorBuffer +enum DN_ProfilerTSC { - DN_ProfilerAnchorBuffer_Back, - DN_ProfilerAnchorBuffer_Front, + DN_ProfilerTSC_RDTSC, + DN_ProfilerTSC_OSPerformanceCounter, }; struct DN_Profiler { - DN_ProfilerAnchor anchors[2][DN_PROFILER_ANCHOR_BUFFER_SIZE]; - DN_U8 active_anchor_buffer; - DN_U16 parent_zone; + DN_USize frame_index; + DN_ProfilerAnchor *anchors; + DN_USize anchors_count; + DN_USize anchors_per_frame; + DN_U16 parent_zone; + bool paused; + DN_ProfilerTSC tsc; + DN_U64 tsc_frequency; + DN_ProfilerZone frame_zone; + DN_F64 frame_avg_tsc; }; -DN_API DN_ProfilerAnchor * DN_Profiler_ReadBuffer (); -DN_API DN_ProfilerAnchor * DN_Profiler_WriteBuffer (); -#define DN_Profiler_BeginZone(name) DN_Profiler_BeginZoneAtIndex(DN_STR8(name), __COUNTER__ + 1) -DN_API DN_ProfilerZone DN_Profiler_BeginZoneAtIndex (DN_Str8 name, DN_U16 anchor_index); -DN_API void DN_Profiler_EndZone (DN_ProfilerZone zone); -DN_API DN_ProfilerAnchor * DN_Profiler_AnchorBuffer (DN_ProfilerAnchorBuffer buffer); -DN_API void DN_Profiler_SwapAnchorBuffer (); -DN_API void DN_Profiler_Dump (DN_U64 tsc_per_second); -#endif // !defined(DN_NO_PROFILER) +#define DN_Profiler_ZoneLoop(prof, name, index) \ + DN_ProfilerZone DN_UniqueName(zone_) = DN_Profiler_BeginZone(prof, DN_STR8(name), index), DN_UniqueName(dummy_) = {}; \ + DN_UniqueName(dummy_).begin_tsc == 0; \ + DN_Profiler_EndZone(prof, DN_UniqueName(zone_)), DN_UniqueName(dummy_).begin_tsc = 1 + +#define DN_Profiler_ZoneLoopAuto(prof, name) DN_Profiler_ZoneLoop(prof, name, __COUNTER__ + 1) + +DN_API DN_Profiler DN_Profiler_Init (DN_ProfilerAnchor *anchors, DN_USize count, DN_USize anchors_per_frame, DN_ProfilerTSC tsc, DN_U64 tsc_frequency); +DN_API DN_ProfilerZone DN_Profiler_BeginZone (DN_Profiler *profiler, DN_Str8 name, DN_U16 anchor_index); +#define DN_Profiler_BeginZoneAuto(prof, name) DN_Profiler_BeginZone(prof, DN_STR8(name), __COUNTER__ + 1) +DN_API void DN_Profiler_EndZone (DN_Profiler *profiler, DN_ProfilerZone zone); +DN_API DN_USize DN_Profiler_FrameCount (DN_Profiler const *profiler); +DN_API DN_ProfilerAnchorArray DN_Profiler_FrameAnchorsFromIndex (DN_Profiler *profiler, DN_USize frame_index); +DN_API DN_ProfilerAnchorArray DN_Profiler_FrameAnchors (DN_Profiler *profiler); +DN_API void DN_Profiler_NewFrame (DN_Profiler *profiler); +DN_API void DN_Profiler_Dump (DN_Profiler *profiler); +DN_API DN_F64 DN_Profiler_SecFromTSC (DN_Profiler *profiler, DN_U64 duration_tsc); +DN_API DN_F64 DN_Profiler_MsFromTSC (DN_Profiler *profiler, DN_U64 duration_tsc); #if defined(DN_LEAK_TRACKING) diff --git a/Source/OS/dn_os.h b/Source/OS/dn_os.h index 06e80c3..c13ce33 100644 --- a/Source/OS/dn_os.h +++ b/Source/OS/dn_os.h @@ -272,7 +272,7 @@ struct DN_OSCore DN_CPUReport cpu_report; DN_OSTLS tls; // Thread local storage state for the main thread. - // NOTE: Logging /////////////////////////////////////////////////////////////////////////////// + // NOTE: Logging DN_LOGEmitFromTypeFVFunc * log_callback; // Set this pointer to override the logging routine void * log_user_data; // User pointer passed into 'log_callback' bool log_to_file; // Output logs to file as well as standard out @@ -280,12 +280,12 @@ struct DN_OSCore DN_TicketMutex log_file_mutex; // Is locked when instantiating the log_file for the first time bool log_no_colour; // Disable colours in the logging output - // NOTE: OS ////////////////////////////////////////////////////////////////////////////////////// + // NOTE: OS DN_U32 logical_processor_count; DN_U32 page_size; DN_U32 alloc_granularity; - // NOTE: Memory //////////////////////////////////////////////////////////////////////////////// + // NOTE: Memory // Total OS mem allocs in lifetime of program (e.g. malloc, VirtualAlloc, HeapAlloc ...). This // only includes allocations routed through the library such as the growing nature of arenas or // using the memory allocation routines in the library like DN_OS_MemCommit and so forth.