From d01cf53ff8eb31349c430e1bc4ddff0f69d7fee4 Mon Sep 17 00:00:00 2001 From: doyle Date: Thu, 24 Aug 2023 22:14:24 +1000 Subject: [PATCH] perfaware/part2: Add bandwidth tracking --- part2/haversine.c | 103 ++++-------------------------------- part2/haversine_generator.c | 6 ++- part2/haversine_stdlib.c | 69 ++++++++++++++++++++++++ part2/haversine_stdlib.h | 35 ++++++++++++ 4 files changed, 117 insertions(+), 96 deletions(-) diff --git a/part2/haversine.c b/part2/haversine.c index d41771a..2df39ea 100644 --- a/part2/haversine.c +++ b/part2/haversine.c @@ -3,94 +3,12 @@ #include #include #include -#include "haversine_stdlib.h" -#include "haversine_stdlib.c" #include +#include "haversine_stdlib.h" #include "listing_0065_haversine_formula.cpp" #include "listing_0074_platform_metrics.cpp" - -typedef struct ProfilerAnchor { - HAV_Str8 label; - u64 elapsed_tsc_exclusive; // Does not include children - u64 elapsed_tsc_inclusive; // Includes children - u64 hits; -} ProfilerAnchor; - -typedef struct Profiler { - ProfilerAnchor anchors[4096]; - u64 begin_tsc; - u64 end_tsc; - u64 parent_index; -} Profiler; - -static Profiler g_profiler; - -static void Profiler_Dump() -{ - u64 total_elapsed_tsc = g_profiler.end_tsc - g_profiler.begin_tsc; - u64 cpu_frequency = EstimateCPUTimerFreq(); - if (cpu_frequency) - printf("\nTotal time: %0.4fms (CPU freq %llu)\n", 1000.0 * (f64)total_elapsed_tsc / (f64)cpu_frequency, cpu_frequency); - - for (uint32_t index = 1; index < HAV_ARRAY_UCOUNT(g_profiler.anchors); index++) { - ProfilerAnchor const *anchor = g_profiler.anchors + index; - if (!anchor->elapsed_tsc_inclusive) - break; - - f64 percent = total_elapsed_tsc ? (f64)anchor->elapsed_tsc_exclusive / (f64)total_elapsed_tsc * 100.0 : 100.0; - printf(" %.*s[%zu]: %llu (%.2f%%", HAV_STR8_FMT(anchor->label), anchor->hits, anchor->elapsed_tsc_exclusive, percent); - if (anchor->elapsed_tsc_inclusive != anchor->elapsed_tsc_exclusive) { - f64 percent_w_children = total_elapsed_tsc ? ((f64)anchor->elapsed_tsc_inclusive / (f64)total_elapsed_tsc * 100.0) : 100.0; - printf(", %.2f%% w/children", percent_w_children); - } - printf(")\n"); - } -} - -typedef struct ProfilerZone { - u64 parent_index; - uint32_t index; - HAV_Str8 label; - u64 elapsed_tsc_inclusive; - u64 tsc; -} ProfilerZone; - -#define Profiler_BeginZone(label) Profiler_BeginZone_(HAV_STR8(label), __COUNTER__ + 1) - -static ProfilerZone Profiler_BeginZone_(HAV_Str8 label, uint32_t index) -{ - ProfilerZone result = {0}; - #if defined(HAV_PROFILER) - result.index = index; - result.label = label; - result.tsc = ReadCPUTimer(); - result.elapsed_tsc_inclusive = g_profiler.anchors[index].elapsed_tsc_inclusive; - result.parent_index = g_profiler.parent_index; - g_profiler.parent_index = index; - #else - (void)label; (void)index; - #endif - return result; -} - -static void Profiler_EndZone(ProfilerZone zone) -{ - #if defined(HAV_PROFILER) - u64 elapsed_tsc = ReadCPUTimer() - zone.tsc; - ProfilerAnchor* anchor = g_profiler.anchors + zone.index; - ProfilerAnchor* parent = g_profiler.anchors + zone.parent_index; - - anchor->elapsed_tsc_exclusive += elapsed_tsc; - anchor->elapsed_tsc_inclusive = zone.elapsed_tsc_inclusive + elapsed_tsc; - anchor->label = zone.label; - anchor->hits++; - parent->elapsed_tsc_exclusive -= elapsed_tsc; - g_profiler.parent_index = zone.parent_index; - #else - (void)zone; - #endif -} +#include "haversine_stdlib.c" typedef struct Str8FindResult { bool found; @@ -174,20 +92,17 @@ int main(int argc, char **argv) if (argc == 3) arg_answers = (HAV_Str8){.data = argv[2], .size = strlen(argv[2])}; - ProfilerZone prof_file_read_zone = Profiler_BeginZone("File Read"); HAV_Buffer json_buffer = HAV_FileRead(arg_json.data); - Profiler_EndZone(prof_file_read_zone); - if (!HAV_BufferIsValid(json_buffer)) return 0; - ProfilerZone prof_parse_and_sum_zone = Profiler_BeginZone("Parse&Hav Sum"); + HAV_ProfilerZone prof_parse_and_sum_zone = HAV_Profiler_BeginZone("Parse&Hav Sum"); f64 haversine_sum = 0; size_t pair_count = 0; HAV_Str8 json_it = (HAV_Str8){.data = json_buffer.data, .size = json_buffer.size}; for (;; pair_count++) { - ProfilerZone prof_json_parse_zone = Profiler_BeginZone("Parse"); f64 x0 = 0.f, y0 = 0.f, x1 = 0.f, y1 = 0.f; + HAV_ProfilerZone prof_json_parse_zone = HAV_Profiler_BeginZoneBandwidth("Parse", json_it.size); HAV_Str8BinarySplitResult x0_key = HAV_Str8_BinarySplit(json_it, HAV_STR8("x0")); if (x0_key.rhs.size) { Str8FindResult x0_find_value = FindFirstCharThatLooksLikeANumber(x0_key.rhs); @@ -220,16 +135,16 @@ int main(int argc, char **argv) HAV_STR8_FMT(y1_value.lhs), y1); #endif - Profiler_EndZone(prof_json_parse_zone); + HAV_Profiler_EndZone(prof_json_parse_zone); if (!x0_key.rhs.size) break; - ProfilerZone prof_haversine_sum_zone = Profiler_BeginZone("Hav Sum"); + HAV_ProfilerZone prof_haversine_sum_zone = HAV_Profiler_BeginZoneBandwidth("Hav Sum", sizeof(x0) + sizeof(y0) + sizeof(x1) + sizeof(y1)); f64 haversine_dist = ReferenceHaversine(x0, y0, x1, y1, /*EarthRadius*/ 6372.8); haversine_sum += haversine_dist; - Profiler_EndZone(prof_haversine_sum_zone); + HAV_Profiler_EndZone(prof_haversine_sum_zone); } - Profiler_EndZone(prof_parse_and_sum_zone); + HAV_Profiler_EndZone(prof_parse_and_sum_zone); haversine_sum /= pair_count; size_t input_size = json_buffer.size; @@ -253,6 +168,6 @@ int main(int argc, char **argv) } g_profiler.end_tsc = ReadCPUTimer(); - Profiler_Dump(); + HAV_Profiler_Dump(); return 0; } diff --git a/part2/haversine_generator.c b/part2/haversine_generator.c index 6ea5baf..190dabc 100644 --- a/part2/haversine_generator.c +++ b/part2/haversine_generator.c @@ -3,10 +3,12 @@ #include #include #include -#include "haversine_stdlib.h" -#include "haversine_stdlib.c" #include + +#include "haversine_stdlib.h" +#include "listing_0074_platform_metrics.cpp" #include "listing_0065_haversine_formula.cpp" +#include "haversine_stdlib.c" #define PRINT_USAGE HAV_PrintLnFmt("Usage: %s [uniform/cluster] [random seed] [number of coordinate pairs to generate]", argv[0]) int main(int argc, char **argv) diff --git a/part2/haversine_stdlib.c b/part2/haversine_stdlib.c index 8a93d01..97022b9 100644 --- a/part2/haversine_stdlib.c +++ b/part2/haversine_stdlib.c @@ -55,6 +55,73 @@ bool HAV_CharIsDigit(char ch) return result; } +void HAV_Profiler_Dump() +{ + u64 total_elapsed_tsc = g_profiler.end_tsc - g_profiler.begin_tsc; + u64 cpu_frequency = EstimateCPUTimerFreq(); + if (cpu_frequency) + printf("\nTotal time: %0.4fms (CPU freq %llu)\n", 1000.0 * (f64)total_elapsed_tsc / (f64)cpu_frequency, cpu_frequency); + + for (uint32_t index = 1; index < HAV_ARRAY_UCOUNT(g_profiler.anchors); index++) { + HAV_ProfilerAnchor const *anchor = g_profiler.anchors + index; + if (!anchor->elapsed_tsc_inclusive) + break; + + f64 percent = total_elapsed_tsc ? (f64)anchor->elapsed_tsc_exclusive / (f64)total_elapsed_tsc * 100.0 : 100.0; + printf(" %.*s[%zu]: %llu (%.2f%%", HAV_STR8_FMT(anchor->label), anchor->hits, anchor->elapsed_tsc_exclusive, percent); + if (anchor->elapsed_tsc_inclusive != anchor->elapsed_tsc_exclusive) { + f64 percent_w_children = total_elapsed_tsc ? ((f64)anchor->elapsed_tsc_inclusive / (f64)total_elapsed_tsc * 100.0) : 100.0; + printf(", %.2f%% w/children", percent_w_children); + } + printf(")"); + + if (anchor->byte_count) { + f64 megabytes_processed = anchor->byte_count / (1024.f * 1024.f); + f64 elapsed_s = anchor->elapsed_tsc_inclusive / HAV_CAST(f64)cpu_frequency; + f64 bytes_per_s = anchor->byte_count / elapsed_s; + f64 gigabytes_bandwidth = bytes_per_s / (1024.f * 1024.f * 1024.f); + printf(" %.3fmb at %.2fgb/s", megabytes_processed, gigabytes_bandwidth); + } + printf("\n"); + } +} + +HAV_ProfilerZone HAV_Profiler_BeginZone_(HAV_Str8 label, uint32_t index, u64 byte_count) +{ + HAV_ProfilerZone result = {0}; + #if defined(HAV_PROFILER) + result.index = index; + result.label = label; + result.tsc = ReadCPUTimer(); + result.elapsed_tsc_inclusive = g_profiler.anchors[index].elapsed_tsc_inclusive; + result.byte_count = byte_count; + result.parent_index = g_profiler.parent_index; + g_profiler.parent_index = index; + #else + (void)label; (void)index; (void)byte_count; + #endif + return result; +} + +void HAV_Profiler_EndZone(HAV_ProfilerZone zone) +{ + #if defined(HAV_PROFILER) + u64 elapsed_tsc = ReadCPUTimer() - zone.tsc; + HAV_ProfilerAnchor* anchor = g_profiler.anchors + zone.index; + HAV_ProfilerAnchor* parent = g_profiler.anchors + zone.parent_index; + + anchor->elapsed_tsc_exclusive += elapsed_tsc; + anchor->elapsed_tsc_inclusive = zone.elapsed_tsc_inclusive + elapsed_tsc; + anchor->label = zone.label; + anchor->byte_count += zone.byte_count; + anchor->hits++; + parent->elapsed_tsc_exclusive -= elapsed_tsc; + g_profiler.parent_index = zone.parent_index; + #else + (void)zone; + #endif +} + #pragma warning(push) #pragma warning(disable: 4146) // warning C4146: unary minus operator applied to unsigned type, result still unsigned uint32_t HAV_PCG32_Pie (uint64_t *state) @@ -152,6 +219,7 @@ HAV_Buffer HAV_FileRead(char const *file_path) // NOTE: Read file to buffer // ========================================================================= DWORD bytes_read = 0; + HAV_ProfilerZone prof_file_read_zone = HAV_Profiler_BeginZoneBandwidth("File Read", file_size); BOOL read_file_result = ReadFile( /*HANDLE hFile*/ file_handle, /*LPVOID lpBuffer*/ buffer, @@ -159,6 +227,7 @@ HAV_Buffer HAV_FileRead(char const *file_path) /*LPDWORD lpNumberOfBytesRead*/ &bytes_read, /*LPOVERLAPPED lpOverlapped*/ NULL ); + HAV_Profiler_EndZone(prof_file_read_zone); // NOTE: Handle read result // ========================================================================= diff --git a/part2/haversine_stdlib.h b/part2/haversine_stdlib.h index 7d3fe66..3b6e502 100644 --- a/part2/haversine_stdlib.h +++ b/part2/haversine_stdlib.h @@ -60,6 +60,41 @@ HAV_Str8BinarySplitResult HAV_Str8_BinarySplit(HAV_Str8 buffer, HAV_Str8 find); bool HAV_CharIsWhiteSpace(char ch); bool HAV_CharIsDigit(char ch); +// NOTE: Profiler +// ============================================================================ +typedef struct HAV_ProfilerAnchor { + HAV_Str8 label; + u64 elapsed_tsc_exclusive; // Does not include children + u64 elapsed_tsc_inclusive; // Includes children + u64 byte_count; + u64 hits; +} HAV_ProfilerAnchor; + +typedef struct HAV_Profiler { + HAV_ProfilerAnchor anchors[4096]; + u64 begin_tsc; + u64 end_tsc; + u64 parent_index; +} HAV_Profiler; + +typedef struct HAV_ProfilerZone { + u64 parent_index; + uint32_t index; + HAV_Str8 label; + u64 elapsed_tsc_inclusive; + u64 tsc; + u64 byte_count; +} HAV_ProfilerZone; + +static HAV_Profiler g_profiler; + +#define HAV_Profiler_BeginZone(label) HAV_Profiler_BeginZone_(HAV_STR8(label), __COUNTER__ + 1, 0) +#define HAV_Profiler_BeginZoneBandwidth(label, byte_count) HAV_Profiler_BeginZone_(HAV_STR8(label), __COUNTER__ + 1, byte_count) + +static void HAV_Profiler_Dump(); +static HAV_ProfilerZone HAV_Profiler_BeginZone_(HAV_Str8 label, uint32_t index, u64 byte_count); +static void HAV_Profiler_EndZone(HAV_ProfilerZone zone); + // NOTE: PCG32 // ============================================================================ // NOTE: PCG RNG from Demetri Spanos: https://github.com/demetri/scribbles