perfaware/part2: Do profiling of nested blocks

This commit is contained in:
doyle 2023-07-08 00:44:38 +10:00
parent b91869a49e
commit 3e9d6980df
2 changed files with 109 additions and 53 deletions

View File

@ -10,6 +10,77 @@
#include "listing_0065_haversine_formula.cpp" #include "listing_0065_haversine_formula.cpp"
#include "listing_0074_platform_metrics.cpp" #include "listing_0074_platform_metrics.cpp"
typedef struct ProfilerRecord {
HAV_Str8 label;
u64 elapsed_tsc;
u64 elapsed_tsc_child;
u64 hits;
} ProfilerRecord;
typedef struct ProfilerAnchor {
u64 parent_index;
uint32_t index;
HAV_Str8 label;
u64 tsc;
} ProfilerAnchor;
typedef struct Profiler {
ProfilerRecord records[4096];
u64 begin_tsc;
u64 end_tsc;
u64 parent_index;
} Profiler;
static Profiler g_profiler;
#define Profiler_BeginAnchor(label) Profiler_BeginAnchor_(HAV_STR8(label), __COUNTER__ + 1)
static ProfilerAnchor Profiler_BeginAnchor_(HAV_Str8 label, uint32_t index)
{
ProfilerAnchor result = {0};
result.index = index;
result.label = label;
result.tsc = ReadCPUTimer();
result.parent_index = g_profiler.parent_index;
g_profiler.parent_index = index;
return result;
}
static void Profiler_EndAnchor(ProfilerAnchor anchor)
{
u64 elapsed_tsc = ReadCPUTimer() - anchor.tsc;
ProfilerRecord* record = g_profiler.records + anchor.index;
record->elapsed_tsc += elapsed_tsc;
record->label = anchor.label;
record->hits++;
ProfilerRecord* parent = g_profiler.records + anchor.parent_index;
parent->elapsed_tsc_child += elapsed_tsc;
g_profiler.parent_index = anchor.parent_index;
}
static void Profiler_Dump()
{
u64 total_elapsed_tsc = g_profiler.end_tsc - g_profiler.begin_tsc;
u64 cpu_frequency = EstimateCPUTimerFreq();
if (cpu_frequency)
printf("\nTotal time: %0.4fms (CPU freq %llu)\n", 1000.0 * (f64)total_elapsed_tsc / (f64)cpu_frequency, cpu_frequency);
for (uint32_t index = 1; index < HAV_ARRAY_UCOUNT(g_profiler.records); index++) {
ProfilerRecord const *record = g_profiler.records + index;
if (!record->elapsed_tsc)
break;
u64 record_exclusive_tsc = record->elapsed_tsc - record->elapsed_tsc_child;
f64 percent = total_elapsed_tsc ? (f64)record_exclusive_tsc / (f64)total_elapsed_tsc * 100.0 : 100.0;
printf(" %.*s[%zu]: %llu (%.2f%%", HAV_STR8_FMT(record->label), record->hits, record_exclusive_tsc, percent);
if (record->elapsed_tsc_child) {
f64 percent_w_children = total_elapsed_tsc ? ((f64)record->elapsed_tsc / (f64)total_elapsed_tsc * 100.0) : 100.0;
printf(", %.2f%% w/children", percent_w_children);
}
printf(")\n");
}
}
typedef struct Str8FindResult { typedef struct Str8FindResult {
bool found; bool found;
HAV_Str8 match; HAV_Str8 match;
@ -86,54 +157,50 @@ int main(int argc, char **argv)
return -1; return -1;
} }
u64 cpu_start_time = ReadCPUTimer(); g_profiler.begin_tsc = ReadCPUTimer();
HAV_Str8 arg_json = {argv[1], strlen(argv[1])}; HAV_Str8 arg_json = {argv[1], strlen(argv[1])};
HAV_Str8 arg_answers = {0}; HAV_Str8 arg_answers = {0};
if (argc == 3) if (argc == 3)
arg_answers = (HAV_Str8){.data = argv[2], .size = strlen(argv[2])}; arg_answers = (HAV_Str8){.data = argv[2], .size = strlen(argv[2])};
ProfilerAnchor prof_file_read_anchor = Profiler_BeginAnchor("File Read");
HAV_Buffer json_buffer = HAV_FileRead(arg_json.data); HAV_Buffer json_buffer = HAV_FileRead(arg_json.data);
Profiler_EndAnchor(prof_file_read_anchor);
if (!HAV_BufferIsValid(json_buffer)) if (!HAV_BufferIsValid(json_buffer))
return 0; return 0;
u64 cpu_misc_setup_time = ReadCPUTimer(); ProfilerAnchor prof_parse_and_sum_anchor = Profiler_BeginAnchor("Parse&Hav Sum");
f64 haversine_sum = 0;
u64 cpu_elapsed_parse_time = 0; size_t pair_count = 0;
u64 cpu_elapsed_haversine_sum_time = 0; HAV_Str8 json_it = (HAV_Str8){.data = json_buffer.data, .size = json_buffer.size};
f64 haversine_sum = 0;
size_t pair_count = 0;
HAV_Str8 json_it = (HAV_Str8){.data = json_buffer.data, .size = json_buffer.size};
u64 cpu_begin_parse_time = ReadCPUTimer();
for (;; pair_count++) { for (;; pair_count++) {
ProfilerAnchor prof_json_parse_anchor = Profiler_BeginAnchor("Parse");
f64 x0 = 0.f, y0 = 0.f, x1 = 0.f, y1 = 0.f;
HAV_Str8BinarySplitResult x0_key = HAV_Str8_BinarySplit(json_it, HAV_STR8("x0")); HAV_Str8BinarySplitResult x0_key = HAV_Str8_BinarySplit(json_it, HAV_STR8("x0"));
if (!x0_key.rhs.size) if (x0_key.rhs.size) {
break; Str8FindResult x0_find_value = FindFirstCharThatLooksLikeANumber(x0_key.rhs);
HAV_Str8BinarySplitResult x0_value = HAV_Str8_BinarySplit(x0_find_value.match_to_end_of_buffer, HAV_STR8(","));
Str8FindResult x0_find_value = FindFirstCharThatLooksLikeANumber(x0_key.rhs); HAV_Str8BinarySplitResult y0_key = HAV_Str8_BinarySplit(x0_value.rhs, HAV_STR8("y0"));
HAV_Str8BinarySplitResult x0_value = HAV_Str8_BinarySplit(x0_find_value.match_to_end_of_buffer, HAV_STR8(",")); Str8FindResult y0_find_value = FindFirstCharThatLooksLikeANumber(y0_key.rhs);
HAV_Str8BinarySplitResult y0_value = HAV_Str8_BinarySplit(y0_find_value.match_to_end_of_buffer, HAV_STR8(","));
HAV_Str8BinarySplitResult y0_key = HAV_Str8_BinarySplit(x0_value.rhs, HAV_STR8("y0")); HAV_Str8BinarySplitResult x1_key = HAV_Str8_BinarySplit(y0_value.rhs, HAV_STR8("x1"));
Str8FindResult y0_find_value = FindFirstCharThatLooksLikeANumber(y0_key.rhs); Str8FindResult x1_find_value = FindFirstCharThatLooksLikeANumber(x1_key.rhs);
HAV_Str8BinarySplitResult y0_value = HAV_Str8_BinarySplit(y0_find_value.match_to_end_of_buffer, HAV_STR8(",")); HAV_Str8BinarySplitResult x1_value = HAV_Str8_BinarySplit(x1_find_value.match_to_end_of_buffer, HAV_STR8(","));
HAV_Str8BinarySplitResult x1_key = HAV_Str8_BinarySplit(y0_value.rhs, HAV_STR8("x1")); HAV_Str8BinarySplitResult y1_key = HAV_Str8_BinarySplit(x1_value.rhs, HAV_STR8("y1"));
Str8FindResult x1_find_value = FindFirstCharThatLooksLikeANumber(x1_key.rhs); Str8FindResult y1_find_value = FindFirstCharThatLooksLikeANumber(y1_key.rhs);
HAV_Str8BinarySplitResult x1_value = HAV_Str8_BinarySplit(x1_find_value.match_to_end_of_buffer, HAV_STR8(",")); HAV_Str8BinarySplitResult y1_value = HAV_Str8_BinarySplit(y1_find_value.match_to_end_of_buffer, HAV_STR8("}"));
HAV_Str8BinarySplitResult y1_key = HAV_Str8_BinarySplit(x1_value.rhs, HAV_STR8("y1")); x0 = StringToF64(x0_value.lhs);
Str8FindResult y1_find_value = FindFirstCharThatLooksLikeANumber(y1_key.rhs); y0 = StringToF64(y0_value.lhs);
HAV_Str8BinarySplitResult y1_value = HAV_Str8_BinarySplit(y1_find_value.match_to_end_of_buffer, HAV_STR8("}")); x1 = StringToF64(x1_value.lhs);
y1 = StringToF64(y1_value.lhs);
f64 x0 = StringToF64(x0_value.lhs);
f64 y0 = StringToF64(y0_value.lhs);
f64 x1 = StringToF64(x1_value.lhs);
f64 y1 = StringToF64(y1_value.lhs);
u64 cpu_end_parse_time = ReadCPUTimer();
cpu_elapsed_parse_time += cpu_end_parse_time - cpu_begin_parse_time;
json_it = y1_value.rhs;
}
#if 0 #if 0
HAV_PrintLnFmt("{x0: %.*s (%f), y0: %.*s (%f), x1 %.*s (%f), y1: %.*s (%f)}", HAV_PrintLnFmt("{x0: %.*s (%f), y0: %.*s (%f), x1 %.*s (%f), y1: %.*s (%f)}",
HAV_STR8_FMT(x0_value.lhs), x0, HAV_STR8_FMT(x0_value.lhs), x0,
@ -142,16 +209,16 @@ int main(int argc, char **argv)
HAV_STR8_FMT(y1_value.lhs), y1); HAV_STR8_FMT(y1_value.lhs), y1);
#endif #endif
Profiler_EndAnchor(prof_json_parse_anchor);
if (!x0_key.rhs.size)
break;
ProfilerAnchor prof_haversine_sum_anchor = Profiler_BeginAnchor("Hav Sum");
f64 haversine_dist = ReferenceHaversine(x0, y0, x1, y1, /*EarthRadius*/ 6372.8); f64 haversine_dist = ReferenceHaversine(x0, y0, x1, y1, /*EarthRadius*/ 6372.8);
haversine_sum += haversine_dist; haversine_sum += haversine_dist;
u64 cpu_end_sum_time = ReadCPUTimer(); Profiler_EndAnchor(prof_haversine_sum_anchor);
cpu_elapsed_haversine_sum_time += cpu_end_sum_time - cpu_end_parse_time;
cpu_begin_parse_time = cpu_end_sum_time;
json_it = y1_value.rhs;
} }
Profiler_EndAnchor(prof_parse_and_sum_anchor);
u64 cpu_end_parse_and_sum_time = ReadCPUTimer();
haversine_sum /= pair_count; haversine_sum /= pair_count;
size_t input_size = json_buffer.size; size_t input_size = json_buffer.size;
@ -174,19 +241,7 @@ int main(int argc, char **argv)
} }
} }
u64 cpu_end_time = ReadCPUTimer(); g_profiler.end_tsc = ReadCPUTimer();
u64 cpu_elapsed_time = cpu_end_time - cpu_start_time; Profiler_Dump();
u64 cpu_frequency = EstimateCPUTimerFreq();
if (cpu_frequency)
printf("\nTotal time: %0.4fms (CPU freq %llu)\n", 1000.0 * (f64)cpu_elapsed_time / (f64)cpu_frequency, cpu_frequency);
u64 cpu_elapsed_setup_time = cpu_misc_setup_time - cpu_start_time;
u64 cpu_elapsed_verify_time = cpu_end_time - cpu_end_parse_and_sum_time;
printf(" Setup: %llu (%.2f%%)\n", cpu_elapsed_setup_time, (f64)cpu_elapsed_setup_time / (f64)cpu_elapsed_time * 100.0);
printf(" Parse: %llu (%.2f%%)\n", cpu_elapsed_parse_time, (f64)cpu_elapsed_parse_time / (f64)cpu_elapsed_time * 100.0);
printf(" Sum: %llu (%.2f%%)\n", cpu_elapsed_haversine_sum_time, (f64)cpu_elapsed_haversine_sum_time / (f64)cpu_elapsed_time * 100.0);
printf(" Verify: %llu (%.2f%%)\n", cpu_elapsed_verify_time, (f64)cpu_elapsed_verify_time / (f64)cpu_elapsed_time * 100.0);
return 0; return 0;
} }

View File

@ -18,6 +18,7 @@
#define HAV_ARRAY_UCOUNT(array) sizeof((array)) / sizeof((array)[0]) #define HAV_ARRAY_UCOUNT(array) sizeof((array)) / sizeof((array)[0])
#define HAV_CAST(Type) (Type) #define HAV_CAST(Type) (Type)
#define HAV_MIN(a, b) ((a) < (b) ? (a) : (b)) #define HAV_MIN(a, b) ((a) < (b) ? (a) : (b))
#define HAV_MAX(a, b) ((a) > (b) ? (a) : (b))
typedef float f32; typedef float f32;
typedef double f64; typedef double f64;