perfaware/part2: Do RDTSC homework

This commit is contained in:
doyle 2023-07-04 21:40:08 +10:00
parent 662f5afd9b
commit b91869a49e
10 changed files with 577 additions and 6 deletions

View File

@ -324,4 +324,8 @@ cl %part2_dir%\haversine_generator.c /W4 /WX /Z7 /nologo /O2 /Fe:haversine_gener
cl %part2_dir%\haversine.c /W4 /WX /Z7 /nologo /Fe:haversine_debug || exit /b 1 cl %part2_dir%\haversine.c /W4 /WX /Z7 /nologo /Fe:haversine_debug || exit /b 1
cl %part2_dir%\haversine.c /W4 /WX /Z7 /nologo /O2 /Fe:haversine_release || exit /b 1 cl %part2_dir%\haversine.c /W4 /WX /Z7 /nologo /O2 /Fe:haversine_release || exit /b 1
cl %part2_dir%\listing_0071_os_timer_main.cpp /W4 /WX /Z7 /O2 /nologo /Fe:listing_0071_os_timer_main_release || exit /b 1
cl %part2_dir%\listing_0072_cpu_timer_main.cpp /W4 /WX /Z7 /O2 /nologo /Fe:listing_0072_cpu_timer_main_release || exit /b 1
cl %part2_dir%\listing_0073_cpu_timer_guessfreq_main.cpp /W4 /WX /Z7 /O2 /nologo /Fe:listing_0073_cpu_timer_guessfreq_release || exit /b 1
popd popd

View File

@ -6,7 +6,9 @@
#include "haversine_stdlib.h" #include "haversine_stdlib.h"
#include "haversine_stdlib.c" #include "haversine_stdlib.c"
#include <math.h> #include <math.h>
#include "listing_0065_haversine_formula.cpp" #include "listing_0065_haversine_formula.cpp"
#include "listing_0074_platform_metrics.cpp"
typedef struct Str8FindResult { typedef struct Str8FindResult {
bool found; bool found;
@ -84,6 +86,8 @@ int main(int argc, char **argv)
return -1; return -1;
} }
u64 cpu_start_time = ReadCPUTimer();
HAV_Str8 arg_json = {argv[1], strlen(argv[1])}; HAV_Str8 arg_json = {argv[1], strlen(argv[1])};
HAV_Str8 arg_answers = {0}; HAV_Str8 arg_answers = {0};
if (argc == 3) if (argc == 3)
@ -93,9 +97,15 @@ int main(int argc, char **argv)
if (!HAV_BufferIsValid(json_buffer)) if (!HAV_BufferIsValid(json_buffer))
return 0; return 0;
u64 cpu_misc_setup_time = ReadCPUTimer();
u64 cpu_elapsed_parse_time = 0;
u64 cpu_elapsed_haversine_sum_time = 0;
f64 haversine_sum = 0; f64 haversine_sum = 0;
size_t pair_count = 0; size_t pair_count = 0;
HAV_Str8 json_it = (HAV_Str8){.data = json_buffer.data, .size = json_buffer.size}; HAV_Str8 json_it = (HAV_Str8){.data = json_buffer.data, .size = json_buffer.size};
u64 cpu_begin_parse_time = ReadCPUTimer();
for (;; pair_count++) { for (;; pair_count++) {
HAV_Str8BinarySplitResult x0_key = HAV_Str8_BinarySplit(json_it, HAV_STR8("x0")); HAV_Str8BinarySplitResult x0_key = HAV_Str8_BinarySplit(json_it, HAV_STR8("x0"));
if (!x0_key.rhs.size) if (!x0_key.rhs.size)
@ -121,6 +131,9 @@ int main(int argc, char **argv)
f64 x1 = StringToF64(x1_value.lhs); f64 x1 = StringToF64(x1_value.lhs);
f64 y1 = StringToF64(y1_value.lhs); f64 y1 = StringToF64(y1_value.lhs);
u64 cpu_end_parse_time = ReadCPUTimer();
cpu_elapsed_parse_time += cpu_end_parse_time - cpu_begin_parse_time;
#if 0 #if 0
HAV_PrintLnFmt("{x0: %.*s (%f), y0: %.*s (%f), x1 %.*s (%f), y1: %.*s (%f)}", HAV_PrintLnFmt("{x0: %.*s (%f), y0: %.*s (%f), x1 %.*s (%f), y1: %.*s (%f)}",
HAV_STR8_FMT(x0_value.lhs), x0, HAV_STR8_FMT(x0_value.lhs), x0,
@ -129,12 +142,17 @@ int main(int argc, char **argv)
HAV_STR8_FMT(y1_value.lhs), y1); HAV_STR8_FMT(y1_value.lhs), y1);
#endif #endif
json_it = y1_value.rhs;
f64 haversine_dist = ReferenceHaversine(x0, y0, x1, y1, /*EarthRadius*/ 6372.8); f64 haversine_dist = ReferenceHaversine(x0, y0, x1, y1, /*EarthRadius*/ 6372.8);
haversine_sum += haversine_dist; haversine_sum += haversine_dist;
u64 cpu_end_sum_time = ReadCPUTimer();
cpu_elapsed_haversine_sum_time += cpu_end_sum_time - cpu_end_parse_time;
cpu_begin_parse_time = cpu_end_sum_time;
json_it = y1_value.rhs;
} }
u64 cpu_end_parse_and_sum_time = ReadCPUTimer();
haversine_sum /= pair_count; haversine_sum /= pair_count;
size_t input_size = json_buffer.size; size_t input_size = json_buffer.size;
HAV_PrintLnFmt("Input size: %zu", input_size); HAV_PrintLnFmt("Input size: %zu", input_size);
@ -154,7 +172,21 @@ int main(int argc, char **argv)
HAV_PrintLnFmt("Reference sum: %f", reference_haversine_sum); HAV_PrintLnFmt("Reference sum: %f", reference_haversine_sum);
HAV_PrintLnFmt("Difference: %f", difference); HAV_PrintLnFmt("Difference: %f", difference);
} }
} }
u64 cpu_end_time = ReadCPUTimer();
u64 cpu_elapsed_time = cpu_end_time - cpu_start_time;
u64 cpu_frequency = EstimateCPUTimerFreq();
if (cpu_frequency)
printf("\nTotal time: %0.4fms (CPU freq %llu)\n", 1000.0 * (f64)cpu_elapsed_time / (f64)cpu_frequency, cpu_frequency);
u64 cpu_elapsed_setup_time = cpu_misc_setup_time - cpu_start_time;
u64 cpu_elapsed_verify_time = cpu_end_time - cpu_end_parse_and_sum_time;
printf(" Setup: %llu (%.2f%%)\n", cpu_elapsed_setup_time, (f64)cpu_elapsed_setup_time / (f64)cpu_elapsed_time * 100.0);
printf(" Parse: %llu (%.2f%%)\n", cpu_elapsed_parse_time, (f64)cpu_elapsed_parse_time / (f64)cpu_elapsed_time * 100.0);
printf(" Sum: %llu (%.2f%%)\n", cpu_elapsed_haversine_sum_time, (f64)cpu_elapsed_haversine_sum_time / (f64)cpu_elapsed_time * 100.0);
printf(" Verify: %llu (%.2f%%)\n", cpu_elapsed_verify_time, (f64)cpu_elapsed_verify_time / (f64)cpu_elapsed_time * 100.0);
return 0; return 0;
} }

View File

@ -1,3 +1,5 @@
#include <stdint.h>
// NOTE: Macros // NOTE: Macros
// ============================================================================ // ============================================================================
#define HAV_STRINGIFY2(token) #token #define HAV_STRINGIFY2(token) #token
@ -19,6 +21,7 @@
typedef float f32; typedef float f32;
typedef double f64; typedef double f64;
typedef uint64_t u64;
// NOTE: Globals // NOTE: Globals
// ============================================================================ // ============================================================================

View File

@ -0,0 +1,70 @@
/* ========================================================================
(C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved.
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Please see https://computerenhance.com for more information
======================================================================== */
/* ========================================================================
LISTING 70
======================================================================== */
#if _WIN32
#include <intrin.h>
#include <windows.h>
static u64 GetOSTimerFreq(void)
{
LARGE_INTEGER Freq;
QueryPerformanceFrequency(&Freq);
return Freq.QuadPart;
}
static u64 ReadOSTimer(void)
{
LARGE_INTEGER Value;
QueryPerformanceCounter(&Value);
return Value.QuadPart;
}
#else
#include <x86intrin.h>
#include <sys/time.h>
static u64 GetOSTimerFreq(void)
{
return 1000000;
}
static u64 ReadOSTimer(void)
{
// NOTE(casey): The "struct" keyword is not necessary here when compiling in C++,
// but just in case anyone is using this file from C, I include it.
struct timeval Value;
gettimeofday(&Value, 0);
u64 Result = GetOSTimerFreq()*(u64)Value.tv_sec + (u64)Value.tv_usec;
return Result;
}
#endif
/* NOTE(casey): This does not need to be "inline", it could just be "static"
because compilers will inline it anyway. But compilers will warn about
static functions that aren't used. So "inline" is just the simplest way
to tell them to stop complaining about that. */
inline u64 ReadCPUTimer(void)
{
// NOTE(casey): If you were on ARM, you would need to replace __rdtsc
// with one of their performance counter read instructions, depending
// on which ones are available on your platform.
return __rdtsc();
}

View File

@ -0,0 +1,43 @@
/* ========================================================================
(C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved.
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Please see https://computerenhance.com for more information
======================================================================== */
/* ========================================================================
LISTING 71
======================================================================== */
#include <stdint.h>
#include <stdio.h>
typedef uint64_t u64;
typedef double f64;
#include "listing_0070_platform_metrics.cpp"
int main(void)
{
u64 OSFreq = GetOSTimerFreq();
printf(" OS Freq: %llu\n", OSFreq);
u64 OSStart = ReadOSTimer();
u64 OSEnd = 0;
u64 OSElapsed = 0;
while(OSElapsed < OSFreq)
{
OSEnd = ReadOSTimer();
OSElapsed = OSEnd - OSStart;
}
printf(" OS Timer: %llu -> %llu = %llu elapsed\n", OSStart, OSEnd, OSElapsed);
printf(" OS Seconds: %.4f\n", (f64)OSElapsed/(f64)OSFreq);
return 0;
}

View File

@ -0,0 +1,49 @@
/* ========================================================================
(C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved.
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Please see https://computerenhance.com for more information
======================================================================== */
/* ========================================================================
LISTING 72
======================================================================== */
#include <stdint.h>
#include <stdio.h>
typedef uint64_t u64;
typedef double f64;
#include "listing_0070_platform_metrics.cpp"
int main(void)
{
u64 OSFreq = GetOSTimerFreq();
printf(" OS Freq: %llu\n", OSFreq);
u64 CPUStart = ReadCPUTimer();
u64 OSStart = ReadOSTimer();
u64 OSEnd = 0;
u64 OSElapsed = 0;
while(OSElapsed < OSFreq)
{
OSEnd = ReadOSTimer();
OSElapsed = OSEnd - OSStart;
}
u64 CPUEnd = ReadCPUTimer();
u64 CPUElapsed = CPUEnd - CPUStart;
printf(" OS Timer: %llu -> %llu = %llu elapsed\n", OSStart, OSEnd, OSElapsed);
printf(" OS Seconds: %.4f\n", (f64)OSElapsed/(f64)OSFreq);
printf(" CPU Timer: %llu -> %llu = %llu elapsed\n", CPUStart, CPUEnd, CPUElapsed);
return 0;
}

View File

@ -0,0 +1,63 @@
/* ========================================================================
(C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved.
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Please see https://computerenhance.com for more information
======================================================================== */
/* ========================================================================
LISTING 73
======================================================================== */
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
typedef uint64_t u64;
typedef double f64;
#include "listing_0070_platform_metrics.cpp"
int main(int ArgCount, char **Args)
{
u64 MillisecondsToWait = 1000;
if(ArgCount == 2)
{
MillisecondsToWait = atol(Args[1]);
}
u64 OSFreq = GetOSTimerFreq();
printf(" OS Freq: %llu (reported)\n", OSFreq);
u64 CPUStart = ReadCPUTimer();
u64 OSStart = ReadOSTimer();
u64 OSEnd = 0;
u64 OSElapsed = 0;
u64 OSWaitTime = OSFreq * MillisecondsToWait / 1000;
while(OSElapsed < OSWaitTime)
{
OSEnd = ReadOSTimer();
OSElapsed = OSEnd - OSStart;
}
u64 CPUEnd = ReadCPUTimer();
u64 CPUElapsed = CPUEnd - CPUStart;
u64 CPUFreq = 0;
if(OSElapsed)
{
CPUFreq = OSFreq * CPUElapsed / OSElapsed;
}
printf(" OS Timer: %llu -> %llu = %llu elapsed\n", OSStart, OSEnd, OSElapsed);
printf(" OS Seconds: %.4f\n", (f64)OSElapsed/(f64)OSFreq);
printf(" CPU Timer: %llu -> %llu = %llu elapsed\n", CPUStart, CPUEnd, CPUElapsed);
printf(" CPU Freq: %llu (guessed)\n", CPUFreq);
return 0;
}

View File

@ -0,0 +1,98 @@
/* ========================================================================
(C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved.
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Please see https://computerenhance.com for more information
======================================================================== */
/* ========================================================================
LISTING 74
======================================================================== */
#if _WIN32
#include <intrin.h>
#include <windows.h>
static u64 GetOSTimerFreq(void)
{
LARGE_INTEGER Freq;
QueryPerformanceFrequency(&Freq);
return Freq.QuadPart;
}
static u64 ReadOSTimer(void)
{
LARGE_INTEGER Value;
QueryPerformanceCounter(&Value);
return Value.QuadPart;
}
#else
#include <x86intrin.h>
#include <sys/time.h>
static u64 GetOSTimerFreq(void)
{
return 1000000;
}
static u64 ReadOSTimer(void)
{
// NOTE(casey): The "struct" keyword is not necessary here when compiling in C++,
// but just in case anyone is using this file from C, I include it.
struct timeval Value;
gettimeofday(&Value, 0);
u64 Result = GetOSTimerFreq()*(u64)Value.tv_sec + (u64)Value.tv_usec;
return Result;
}
#endif
/* NOTE(casey): This does not need to be "inline", it could just be "static"
because compilers will inline it anyway. But compilers will warn about
static functions that aren't used. So "inline" is just the simplest way
to tell them to stop complaining about that. */
inline u64 ReadCPUTimer(void)
{
// NOTE(casey): If you were on ARM, you would need to replace __rdtsc
// with one of their performance counter read instructions, depending
// on which ones are available on your platform.
return __rdtsc();
}
static u64 EstimateCPUTimerFreq(void)
{
u64 MillisecondsToWait = 100;
u64 OSFreq = GetOSTimerFreq();
u64 CPUStart = ReadCPUTimer();
u64 OSStart = ReadOSTimer();
u64 OSEnd = 0;
u64 OSElapsed = 0;
u64 OSWaitTime = OSFreq * MillisecondsToWait / 1000;
while(OSElapsed < OSWaitTime)
{
OSEnd = ReadOSTimer();
OSElapsed = OSEnd - OSStart;
}
u64 CPUEnd = ReadCPUTimer();
u64 CPUElapsed = CPUEnd - CPUStart;
u64 CPUFreq = 0;
if(OSElapsed)
{
CPUFreq = OSFreq * CPUElapsed / OSElapsed;
}
return CPUFreq;
}

View File

@ -0,0 +1,209 @@
/* ========================================================================
(C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved.
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Please see https://computerenhance.com for more information
======================================================================== */
/* ========================================================================
LISTING 75
======================================================================== */
/* NOTE(casey): _CRT_SECURE_NO_WARNINGS is here because otherwise we cannot
call fopen(). If we replace fopen() with fopen_s() to avoid the warning,
then the code doesn't compile on Linux anymore, since fopen_s() does not
exist there.
What exactly the CRT maintainers were thinking when they made this choice,
I have no idea. */
#define _CRT_SECURE_NO_WARNINGS
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <math.h>
#include <sys/stat.h>
typedef uint8_t u8;
typedef uint32_t u32;
typedef uint64_t u64;
typedef int32_t b32;
typedef float f32;
typedef double f64;
struct haversine_pair
{
f64 X0, Y0;
f64 X1, Y1;
};
#include "listing_0074_platform_metrics.cpp"
#include "listing_0065_haversine_formula.cpp"
#include "listing_0068_buffer.cpp"
#include "listing_0069_lookup_json_parser.cpp"
static buffer ReadEntireFile(char *FileName)
{
buffer Result = {};
FILE *File = fopen(FileName, "rb");
if(File)
{
#if _WIN32
struct __stat64 Stat;
_stat64(FileName, &Stat);
#else
struct stat Stat;
stat(FileName, &Stat);
#endif
Result = AllocateBuffer(Stat.st_size);
if(Result.Data)
{
if(fread(Result.Data, Result.Count, 1, File) != 1)
{
fprintf(stderr, "ERROR: Unable to read \"%s\".\n", FileName);
FreeBuffer(&Result);
}
}
}
else
{
fprintf(stderr, "ERROR: Unable to open \"%s\".\n", FileName);
}
return Result;
}
static f64 SumHaversineDistances(u64 PairCount, haversine_pair *Pairs)
{
f64 Sum = 0;
f64 SumCoef = 1 / (f64)PairCount;
for(u64 PairIndex = 0; PairIndex < PairCount; ++PairIndex)
{
haversine_pair Pair = Pairs[PairIndex];
f64 EarthRadius = 6372.8;
f64 Dist = ReferenceHaversine(Pair.X0, Pair.Y0, Pair.X1, Pair.Y1, EarthRadius);
Sum += SumCoef*Dist;
}
return Sum;
}
static void PrintTimeElapsed(char const *Label, u64 TotalTSCElapsed, u64 Begin, u64 End)
{
u64 Elapsed = End - Begin;
f64 Percent = 100.0 * ((f64)Elapsed / (f64)TotalTSCElapsed);
printf(" %s: %llu (%.2f%%)\n", Label, Elapsed, Percent);
}
int main(int ArgCount, char **Args)
{
u64 Prof_Begin = 0;
u64 Prof_Read = 0;
u64 Prof_MiscSetup = 0;
u64 Prof_Parse = 0;
u64 Prof_Sum = 0;
u64 Prof_MiscOutput = 0;
u64 Prof_End = 0;
Prof_Begin = ReadCPUTimer();
int Result = 1;
if((ArgCount == 2) || (ArgCount == 3))
{
Prof_Read = ReadCPUTimer();
buffer InputJSON = ReadEntireFile(Args[1]);
Prof_MiscSetup = ReadCPUTimer();
u32 MinimumJSONPairEncoding = 6*4;
u64 MaxPairCount = InputJSON.Count / MinimumJSONPairEncoding;
if(MaxPairCount)
{
buffer ParsedValues = AllocateBuffer(MaxPairCount * sizeof(haversine_pair));
if(ParsedValues.Count)
{
haversine_pair *Pairs = (haversine_pair *)ParsedValues.Data;
Prof_Parse = ReadCPUTimer();
u64 PairCount = ParseHaversinePairs(InputJSON, MaxPairCount, Pairs);
Prof_Sum = ReadCPUTimer();
f64 Sum = SumHaversineDistances(PairCount, Pairs);
Prof_MiscOutput = ReadCPUTimer();
Result = 0;
fprintf(stdout, "Input size: %llu\n", InputJSON.Count);
fprintf(stdout, "Pair count: %llu\n", PairCount);
fprintf(stdout, "Haversine sum: %.16f\n", Sum);
if(ArgCount == 3)
{
buffer AnswersF64 = ReadEntireFile(Args[2]);
if(AnswersF64.Count >= sizeof(f64))
{
f64 *AnswerValues = (f64 *)AnswersF64.Data;
fprintf(stdout, "\nValidation:\n");
u64 RefAnswerCount = (AnswersF64.Count - sizeof(f64)) / sizeof(f64);
if(PairCount != RefAnswerCount)
{
fprintf(stdout, "FAILED - pair count doesn't match %llu.\n", RefAnswerCount);
}
f64 RefSum = AnswerValues[RefAnswerCount];
fprintf(stdout, "Reference sum: %.16f\n", RefSum);
fprintf(stdout, "Difference: %.16f\n", Sum - RefSum);
fprintf(stdout, "\n");
}
}
}
FreeBuffer(&ParsedValues);
}
else
{
fprintf(stderr, "ERROR: Malformed input JSON\n");
}
FreeBuffer(&InputJSON);
}
else
{
fprintf(stderr, "Usage: %s [haversine_input.json]\n", Args[0]);
fprintf(stderr, " %s [haversine_input.json] [answers.f64]\n", Args[0]);
}
Prof_End = ReadCPUTimer();
if(Result == 0)
{
u64 TotalCPUElapsed = Prof_End - Prof_Begin;
u64 CPUFreq = EstimateCPUTimerFreq();
if(CPUFreq)
{
printf("\nTotal time: %0.4fms (CPU freq %llu)\n", 1000.0 * (f64)TotalCPUElapsed / (f64)CPUFreq, CPUFreq);
}
PrintTimeElapsed("Startup", TotalCPUElapsed, Prof_Begin, Prof_Read);
PrintTimeElapsed("Read", TotalCPUElapsed, Prof_Read, Prof_MiscSetup);
PrintTimeElapsed("MiscSetup", TotalCPUElapsed, Prof_MiscSetup, Prof_Parse);
PrintTimeElapsed("Parse", TotalCPUElapsed, Prof_Parse, Prof_Sum);
PrintTimeElapsed("Sum", TotalCPUElapsed, Prof_Sum, Prof_MiscOutput);
PrintTimeElapsed("MiscOutput", TotalCPUElapsed, Prof_MiscOutput, Prof_End);
}
return Result;
}

Binary file not shown.