diff --git a/build.bat b/build.bat index 282f053..18205e8 100644 --- a/build.bat +++ b/build.bat @@ -324,4 +324,8 @@ cl %part2_dir%\haversine_generator.c /W4 /WX /Z7 /nologo /O2 /Fe:haversine_gener cl %part2_dir%\haversine.c /W4 /WX /Z7 /nologo /Fe:haversine_debug || exit /b 1 cl %part2_dir%\haversine.c /W4 /WX /Z7 /nologo /O2 /Fe:haversine_release || exit /b 1 + +cl %part2_dir%\listing_0071_os_timer_main.cpp /W4 /WX /Z7 /O2 /nologo /Fe:listing_0071_os_timer_main_release || exit /b 1 +cl %part2_dir%\listing_0072_cpu_timer_main.cpp /W4 /WX /Z7 /O2 /nologo /Fe:listing_0072_cpu_timer_main_release || exit /b 1 +cl %part2_dir%\listing_0073_cpu_timer_guessfreq_main.cpp /W4 /WX /Z7 /O2 /nologo /Fe:listing_0073_cpu_timer_guessfreq_release || exit /b 1 popd diff --git a/part2/haversine.c b/part2/haversine.c index ce99249..c4960cc 100644 --- a/part2/haversine.c +++ b/part2/haversine.c @@ -6,7 +6,9 @@ #include "haversine_stdlib.h" #include "haversine_stdlib.c" #include + #include "listing_0065_haversine_formula.cpp" +#include "listing_0074_platform_metrics.cpp" typedef struct Str8FindResult { bool found; @@ -84,6 +86,8 @@ int main(int argc, char **argv) return -1; } + u64 cpu_start_time = ReadCPUTimer(); + HAV_Str8 arg_json = {argv[1], strlen(argv[1])}; HAV_Str8 arg_answers = {0}; if (argc == 3) @@ -93,9 +97,15 @@ int main(int argc, char **argv) if (!HAV_BufferIsValid(json_buffer)) return 0; - f64 haversine_sum = 0; - size_t pair_count = 0; - HAV_Str8 json_it = (HAV_Str8){.data = json_buffer.data, .size = json_buffer.size}; + u64 cpu_misc_setup_time = ReadCPUTimer(); + + u64 cpu_elapsed_parse_time = 0; + u64 cpu_elapsed_haversine_sum_time = 0; + + f64 haversine_sum = 0; + size_t pair_count = 0; + HAV_Str8 json_it = (HAV_Str8){.data = json_buffer.data, .size = json_buffer.size}; + u64 cpu_begin_parse_time = ReadCPUTimer(); for (;; pair_count++) { HAV_Str8BinarySplitResult x0_key = HAV_Str8_BinarySplit(json_it, HAV_STR8("x0")); if (!x0_key.rhs.size) @@ -121,6 +131,9 @@ int main(int argc, char **argv) f64 x1 = StringToF64(x1_value.lhs); f64 y1 = StringToF64(y1_value.lhs); + u64 cpu_end_parse_time = ReadCPUTimer(); + cpu_elapsed_parse_time += cpu_end_parse_time - cpu_begin_parse_time; + #if 0 HAV_PrintLnFmt("{x0: %.*s (%f), y0: %.*s (%f), x1 %.*s (%f), y1: %.*s (%f)}", HAV_STR8_FMT(x0_value.lhs), x0, @@ -129,12 +142,17 @@ int main(int argc, char **argv) HAV_STR8_FMT(y1_value.lhs), y1); #endif - json_it = y1_value.rhs; - f64 haversine_dist = ReferenceHaversine(x0, y0, x1, y1, /*EarthRadius*/ 6372.8); haversine_sum += haversine_dist; + u64 cpu_end_sum_time = ReadCPUTimer(); + cpu_elapsed_haversine_sum_time += cpu_end_sum_time - cpu_end_parse_time; + + cpu_begin_parse_time = cpu_end_sum_time; + json_it = y1_value.rhs; } + u64 cpu_end_parse_and_sum_time = ReadCPUTimer(); + haversine_sum /= pair_count; size_t input_size = json_buffer.size; HAV_PrintLnFmt("Input size: %zu", input_size); @@ -154,7 +172,21 @@ int main(int argc, char **argv) HAV_PrintLnFmt("Reference sum: %f", reference_haversine_sum); HAV_PrintLnFmt("Difference: %f", difference); } - } + + u64 cpu_end_time = ReadCPUTimer(); + u64 cpu_elapsed_time = cpu_end_time - cpu_start_time; + u64 cpu_frequency = EstimateCPUTimerFreq(); + if (cpu_frequency) + printf("\nTotal time: %0.4fms (CPU freq %llu)\n", 1000.0 * (f64)cpu_elapsed_time / (f64)cpu_frequency, cpu_frequency); + + u64 cpu_elapsed_setup_time = cpu_misc_setup_time - cpu_start_time; + u64 cpu_elapsed_verify_time = cpu_end_time - cpu_end_parse_and_sum_time; + + printf(" Setup: %llu (%.2f%%)\n", cpu_elapsed_setup_time, (f64)cpu_elapsed_setup_time / (f64)cpu_elapsed_time * 100.0); + printf(" Parse: %llu (%.2f%%)\n", cpu_elapsed_parse_time, (f64)cpu_elapsed_parse_time / (f64)cpu_elapsed_time * 100.0); + printf(" Sum: %llu (%.2f%%)\n", cpu_elapsed_haversine_sum_time, (f64)cpu_elapsed_haversine_sum_time / (f64)cpu_elapsed_time * 100.0); + printf(" Verify: %llu (%.2f%%)\n", cpu_elapsed_verify_time, (f64)cpu_elapsed_verify_time / (f64)cpu_elapsed_time * 100.0); + return 0; } diff --git a/part2/haversine_stdlib.h b/part2/haversine_stdlib.h index 9fd43c3..cf45069 100644 --- a/part2/haversine_stdlib.h +++ b/part2/haversine_stdlib.h @@ -1,3 +1,5 @@ +#include + // NOTE: Macros // ============================================================================ #define HAV_STRINGIFY2(token) #token @@ -19,6 +21,7 @@ typedef float f32; typedef double f64; +typedef uint64_t u64; // NOTE: Globals // ============================================================================ diff --git a/part2/listing_0070_platform_metrics.cpp b/part2/listing_0070_platform_metrics.cpp new file mode 100644 index 0000000..a6f2dc0 --- /dev/null +++ b/part2/listing_0070_platform_metrics.cpp @@ -0,0 +1,70 @@ +/* ======================================================================== + + (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Please see https://computerenhance.com for more information + + ======================================================================== */ + +/* ======================================================================== + LISTING 70 + ======================================================================== */ + +#if _WIN32 + +#include +#include + +static u64 GetOSTimerFreq(void) +{ + LARGE_INTEGER Freq; + QueryPerformanceFrequency(&Freq); + return Freq.QuadPart; +} + +static u64 ReadOSTimer(void) +{ + LARGE_INTEGER Value; + QueryPerformanceCounter(&Value); + return Value.QuadPart; +} + +#else + +#include +#include + +static u64 GetOSTimerFreq(void) +{ + return 1000000; +} + +static u64 ReadOSTimer(void) +{ + // NOTE(casey): The "struct" keyword is not necessary here when compiling in C++, + // but just in case anyone is using this file from C, I include it. + struct timeval Value; + gettimeofday(&Value, 0); + + u64 Result = GetOSTimerFreq()*(u64)Value.tv_sec + (u64)Value.tv_usec; + return Result; +} + +#endif + +/* NOTE(casey): This does not need to be "inline", it could just be "static" + because compilers will inline it anyway. But compilers will warn about + static functions that aren't used. So "inline" is just the simplest way + to tell them to stop complaining about that. */ +inline u64 ReadCPUTimer(void) +{ + // NOTE(casey): If you were on ARM, you would need to replace __rdtsc + // with one of their performance counter read instructions, depending + // on which ones are available on your platform. + + return __rdtsc(); +} diff --git a/part2/listing_0071_os_timer_main.cpp b/part2/listing_0071_os_timer_main.cpp new file mode 100644 index 0000000..cc68fdc --- /dev/null +++ b/part2/listing_0071_os_timer_main.cpp @@ -0,0 +1,43 @@ +/* ======================================================================== + + (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Please see https://computerenhance.com for more information + + ======================================================================== */ + +/* ======================================================================== + LISTING 71 + ======================================================================== */ + +#include +#include + +typedef uint64_t u64; +typedef double f64; + +#include "listing_0070_platform_metrics.cpp" + +int main(void) +{ + u64 OSFreq = GetOSTimerFreq(); + printf(" OS Freq: %llu\n", OSFreq); + + u64 OSStart = ReadOSTimer(); + u64 OSEnd = 0; + u64 OSElapsed = 0; + while(OSElapsed < OSFreq) + { + OSEnd = ReadOSTimer(); + OSElapsed = OSEnd - OSStart; + } + + printf(" OS Timer: %llu -> %llu = %llu elapsed\n", OSStart, OSEnd, OSElapsed); + printf(" OS Seconds: %.4f\n", (f64)OSElapsed/(f64)OSFreq); + + return 0; +} diff --git a/part2/listing_0072_cpu_timer_main.cpp b/part2/listing_0072_cpu_timer_main.cpp new file mode 100644 index 0000000..4b5e814 --- /dev/null +++ b/part2/listing_0072_cpu_timer_main.cpp @@ -0,0 +1,49 @@ +/* ======================================================================== + + (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Please see https://computerenhance.com for more information + + ======================================================================== */ + +/* ======================================================================== + LISTING 72 + ======================================================================== */ + +#include +#include + +typedef uint64_t u64; +typedef double f64; + +#include "listing_0070_platform_metrics.cpp" + +int main(void) +{ + u64 OSFreq = GetOSTimerFreq(); + printf(" OS Freq: %llu\n", OSFreq); + + u64 CPUStart = ReadCPUTimer(); + u64 OSStart = ReadOSTimer(); + u64 OSEnd = 0; + u64 OSElapsed = 0; + while(OSElapsed < OSFreq) + { + OSEnd = ReadOSTimer(); + OSElapsed = OSEnd - OSStart; + } + + u64 CPUEnd = ReadCPUTimer(); + u64 CPUElapsed = CPUEnd - CPUStart; + + printf(" OS Timer: %llu -> %llu = %llu elapsed\n", OSStart, OSEnd, OSElapsed); + printf(" OS Seconds: %.4f\n", (f64)OSElapsed/(f64)OSFreq); + + printf(" CPU Timer: %llu -> %llu = %llu elapsed\n", CPUStart, CPUEnd, CPUElapsed); + + return 0; +} diff --git a/part2/listing_0073_cpu_timer_guessfreq_main.cpp b/part2/listing_0073_cpu_timer_guessfreq_main.cpp new file mode 100644 index 0000000..5577e06 --- /dev/null +++ b/part2/listing_0073_cpu_timer_guessfreq_main.cpp @@ -0,0 +1,63 @@ +/* ======================================================================== + + (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Please see https://computerenhance.com for more information + + ======================================================================== */ + +/* ======================================================================== + LISTING 73 + ======================================================================== */ + +#include +#include +#include + +typedef uint64_t u64; +typedef double f64; + +#include "listing_0070_platform_metrics.cpp" + +int main(int ArgCount, char **Args) +{ + u64 MillisecondsToWait = 1000; + if(ArgCount == 2) + { + MillisecondsToWait = atol(Args[1]); + } + + u64 OSFreq = GetOSTimerFreq(); + printf(" OS Freq: %llu (reported)\n", OSFreq); + + u64 CPUStart = ReadCPUTimer(); + u64 OSStart = ReadOSTimer(); + u64 OSEnd = 0; + u64 OSElapsed = 0; + u64 OSWaitTime = OSFreq * MillisecondsToWait / 1000; + while(OSElapsed < OSWaitTime) + { + OSEnd = ReadOSTimer(); + OSElapsed = OSEnd - OSStart; + } + + u64 CPUEnd = ReadCPUTimer(); + u64 CPUElapsed = CPUEnd - CPUStart; + u64 CPUFreq = 0; + if(OSElapsed) + { + CPUFreq = OSFreq * CPUElapsed / OSElapsed; + } + + printf(" OS Timer: %llu -> %llu = %llu elapsed\n", OSStart, OSEnd, OSElapsed); + printf(" OS Seconds: %.4f\n", (f64)OSElapsed/(f64)OSFreq); + + printf(" CPU Timer: %llu -> %llu = %llu elapsed\n", CPUStart, CPUEnd, CPUElapsed); + printf(" CPU Freq: %llu (guessed)\n", CPUFreq); + + return 0; +} diff --git a/part2/listing_0074_platform_metrics.cpp b/part2/listing_0074_platform_metrics.cpp new file mode 100644 index 0000000..43c32d6 --- /dev/null +++ b/part2/listing_0074_platform_metrics.cpp @@ -0,0 +1,98 @@ +/* ======================================================================== + + (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Please see https://computerenhance.com for more information + + ======================================================================== */ + +/* ======================================================================== + LISTING 74 + ======================================================================== */ + +#if _WIN32 + +#include +#include + +static u64 GetOSTimerFreq(void) +{ + LARGE_INTEGER Freq; + QueryPerformanceFrequency(&Freq); + return Freq.QuadPart; +} + +static u64 ReadOSTimer(void) +{ + LARGE_INTEGER Value; + QueryPerformanceCounter(&Value); + return Value.QuadPart; +} + +#else + +#include +#include + +static u64 GetOSTimerFreq(void) +{ + return 1000000; +} + +static u64 ReadOSTimer(void) +{ + // NOTE(casey): The "struct" keyword is not necessary here when compiling in C++, + // but just in case anyone is using this file from C, I include it. + struct timeval Value; + gettimeofday(&Value, 0); + + u64 Result = GetOSTimerFreq()*(u64)Value.tv_sec + (u64)Value.tv_usec; + return Result; +} + +#endif + +/* NOTE(casey): This does not need to be "inline", it could just be "static" + because compilers will inline it anyway. But compilers will warn about + static functions that aren't used. So "inline" is just the simplest way + to tell them to stop complaining about that. */ +inline u64 ReadCPUTimer(void) +{ + // NOTE(casey): If you were on ARM, you would need to replace __rdtsc + // with one of their performance counter read instructions, depending + // on which ones are available on your platform. + + return __rdtsc(); +} + +static u64 EstimateCPUTimerFreq(void) +{ + u64 MillisecondsToWait = 100; + u64 OSFreq = GetOSTimerFreq(); + + u64 CPUStart = ReadCPUTimer(); + u64 OSStart = ReadOSTimer(); + u64 OSEnd = 0; + u64 OSElapsed = 0; + u64 OSWaitTime = OSFreq * MillisecondsToWait / 1000; + while(OSElapsed < OSWaitTime) + { + OSEnd = ReadOSTimer(); + OSElapsed = OSEnd - OSStart; + } + + u64 CPUEnd = ReadCPUTimer(); + u64 CPUElapsed = CPUEnd - CPUStart; + + u64 CPUFreq = 0; + if(OSElapsed) + { + CPUFreq = OSFreq * CPUElapsed / OSElapsed; + } + + return CPUFreq; +} diff --git a/part2/listing_0075_timed_haversine_main.cpp b/part2/listing_0075_timed_haversine_main.cpp new file mode 100644 index 0000000..7100ae2 --- /dev/null +++ b/part2/listing_0075_timed_haversine_main.cpp @@ -0,0 +1,209 @@ +/* ======================================================================== + + (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Please see https://computerenhance.com for more information + + ======================================================================== */ + +/* ======================================================================== + LISTING 75 + ======================================================================== */ + +/* NOTE(casey): _CRT_SECURE_NO_WARNINGS is here because otherwise we cannot + call fopen(). If we replace fopen() with fopen_s() to avoid the warning, + then the code doesn't compile on Linux anymore, since fopen_s() does not + exist there. + + What exactly the CRT maintainers were thinking when they made this choice, + I have no idea. */ +#define _CRT_SECURE_NO_WARNINGS + +#include +#include +#include +#include +#include + +typedef uint8_t u8; +typedef uint32_t u32; +typedef uint64_t u64; + +typedef int32_t b32; + +typedef float f32; +typedef double f64; + +struct haversine_pair +{ + f64 X0, Y0; + f64 X1, Y1; +}; + +#include "listing_0074_platform_metrics.cpp" +#include "listing_0065_haversine_formula.cpp" +#include "listing_0068_buffer.cpp" +#include "listing_0069_lookup_json_parser.cpp" + +static buffer ReadEntireFile(char *FileName) +{ + buffer Result = {}; + + FILE *File = fopen(FileName, "rb"); + if(File) + { +#if _WIN32 + struct __stat64 Stat; + _stat64(FileName, &Stat); +#else + struct stat Stat; + stat(FileName, &Stat); +#endif + + Result = AllocateBuffer(Stat.st_size); + if(Result.Data) + { + if(fread(Result.Data, Result.Count, 1, File) != 1) + { + fprintf(stderr, "ERROR: Unable to read \"%s\".\n", FileName); + FreeBuffer(&Result); + } + } + } + else + { + fprintf(stderr, "ERROR: Unable to open \"%s\".\n", FileName); + } + + return Result; +} + +static f64 SumHaversineDistances(u64 PairCount, haversine_pair *Pairs) +{ + f64 Sum = 0; + + f64 SumCoef = 1 / (f64)PairCount; + for(u64 PairIndex = 0; PairIndex < PairCount; ++PairIndex) + { + haversine_pair Pair = Pairs[PairIndex]; + f64 EarthRadius = 6372.8; + f64 Dist = ReferenceHaversine(Pair.X0, Pair.Y0, Pair.X1, Pair.Y1, EarthRadius); + Sum += SumCoef*Dist; + } + + return Sum; +} + +static void PrintTimeElapsed(char const *Label, u64 TotalTSCElapsed, u64 Begin, u64 End) +{ + u64 Elapsed = End - Begin; + f64 Percent = 100.0 * ((f64)Elapsed / (f64)TotalTSCElapsed); + printf(" %s: %llu (%.2f%%)\n", Label, Elapsed, Percent); +} + +int main(int ArgCount, char **Args) +{ + u64 Prof_Begin = 0; + u64 Prof_Read = 0; + u64 Prof_MiscSetup = 0; + u64 Prof_Parse = 0; + u64 Prof_Sum = 0; + u64 Prof_MiscOutput = 0; + u64 Prof_End = 0; + + Prof_Begin = ReadCPUTimer(); + + int Result = 1; + + if((ArgCount == 2) || (ArgCount == 3)) + { + Prof_Read = ReadCPUTimer(); + buffer InputJSON = ReadEntireFile(Args[1]); + Prof_MiscSetup = ReadCPUTimer(); + + u32 MinimumJSONPairEncoding = 6*4; + u64 MaxPairCount = InputJSON.Count / MinimumJSONPairEncoding; + if(MaxPairCount) + { + buffer ParsedValues = AllocateBuffer(MaxPairCount * sizeof(haversine_pair)); + if(ParsedValues.Count) + { + haversine_pair *Pairs = (haversine_pair *)ParsedValues.Data; + + Prof_Parse = ReadCPUTimer(); + u64 PairCount = ParseHaversinePairs(InputJSON, MaxPairCount, Pairs); + Prof_Sum = ReadCPUTimer(); + f64 Sum = SumHaversineDistances(PairCount, Pairs); + Prof_MiscOutput = ReadCPUTimer(); + + Result = 0; + + fprintf(stdout, "Input size: %llu\n", InputJSON.Count); + fprintf(stdout, "Pair count: %llu\n", PairCount); + fprintf(stdout, "Haversine sum: %.16f\n", Sum); + + if(ArgCount == 3) + { + buffer AnswersF64 = ReadEntireFile(Args[2]); + if(AnswersF64.Count >= sizeof(f64)) + { + f64 *AnswerValues = (f64 *)AnswersF64.Data; + + fprintf(stdout, "\nValidation:\n"); + + u64 RefAnswerCount = (AnswersF64.Count - sizeof(f64)) / sizeof(f64); + if(PairCount != RefAnswerCount) + { + fprintf(stdout, "FAILED - pair count doesn't match %llu.\n", RefAnswerCount); + } + + f64 RefSum = AnswerValues[RefAnswerCount]; + fprintf(stdout, "Reference sum: %.16f\n", RefSum); + fprintf(stdout, "Difference: %.16f\n", Sum - RefSum); + + fprintf(stdout, "\n"); + } + } + } + + FreeBuffer(&ParsedValues); + } + else + { + fprintf(stderr, "ERROR: Malformed input JSON\n"); + } + + FreeBuffer(&InputJSON); + } + else + { + fprintf(stderr, "Usage: %s [haversine_input.json]\n", Args[0]); + fprintf(stderr, " %s [haversine_input.json] [answers.f64]\n", Args[0]); + } + + Prof_End = ReadCPUTimer(); + + if(Result == 0) + { + u64 TotalCPUElapsed = Prof_End - Prof_Begin; + + u64 CPUFreq = EstimateCPUTimerFreq(); + if(CPUFreq) + { + printf("\nTotal time: %0.4fms (CPU freq %llu)\n", 1000.0 * (f64)TotalCPUElapsed / (f64)CPUFreq, CPUFreq); + } + + PrintTimeElapsed("Startup", TotalCPUElapsed, Prof_Begin, Prof_Read); + PrintTimeElapsed("Read", TotalCPUElapsed, Prof_Read, Prof_MiscSetup); + PrintTimeElapsed("MiscSetup", TotalCPUElapsed, Prof_MiscSetup, Prof_Parse); + PrintTimeElapsed("Parse", TotalCPUElapsed, Prof_Parse, Prof_Sum); + PrintTimeElapsed("Sum", TotalCPUElapsed, Prof_Sum, Prof_MiscOutput); + PrintTimeElapsed("MiscOutput", TotalCPUElapsed, Prof_MiscOutput, Prof_End); + } + + return Result; +} diff --git a/project.rdbg b/project.rdbg index 48a7f59..9f4fff7 100644 Binary files a/project.rdbg and b/project.rdbg differ