Pull in bug fixes from upstream

This commit is contained in:
doylet 2024-04-18 22:59:11 +10:00
parent 243a214b43
commit 765b8255f7
39 changed files with 2055 additions and 882 deletions

View File

@ -13,7 +13,7 @@ pushd Build
REM O2 Optimisation Level 2
REM Oi Use CPU Intrinsics
REM Z7 Combine multi-debug files to one debug file
set common_flags=-D DQN_UNIT_TESTS_WITH_MAIN -D DQN_UNIT_TESTS_WITH_KECCAK -D DQN_IMPLEMENTATION -D DQN_USE_STD_PRINTF /Tp %script_dir%\dqn.h
set common_flags=-D DQN_WITH_UNIT_TESTS -D DQN_UNIT_TESTS_WITH_MAIN -D DQN_UNIT_TESTS_WITH_KECCAK -D DQN_USE_STD_PRINTF %script_dir%\dqn.cpp
set msvc_driver_flags=%common_flags% -MT -EHa -GR- -Od -Oi -Z7 -wd4201 -W4 -nologo

View File

@ -11,10 +11,9 @@ pushd Build
-Werror \
-fsanitize=address \
-std=c++17 \
-D DQN_IMPLEMENTATION \
-D DQN_UNIT_TESTS_WITH_MAIN \
-D DQN_UNIT_TESTS_WITH_KECCAK \
-x c++ ${code_dir}/dqn.h \
-x ${code_dir}/dqn.cpp \
-g \
-o dqn_unit_tests
popd

62
dqn.cpp Normal file
View File

@ -0,0 +1,62 @@
#include "dqn.h"
/*
////////////////////////////////////////////////////////////////////////////////////////////////////
//
// /$$$$$$\ $$\ $$\ $$$$$$$\ $$\
// \_$$ _|$$$\ $$$ |$$ __$$\ $$ |
// $$ | $$$$\ $$$$ |$$ | $$ |$$ |
// $$ | $$\$$\$$ $$ |$$$$$$$ |$$ |
// $$ | $$ \$$$ $$ |$$ ____/ $$ |
// $$ | $$ |\$ /$$ |$$ | $$ |
// $$$$$$\ $$ | \_/ $$ |$$ | $$$$$$$$\
// \______|\__| \__|\__| \________|
//
// Implementation
//
////////////////////////////////////////////////////////////////////////////////////////////////////
*/
#if defined(DQN_WITH_CGEN)
#if !defined(DQN_NO_METADESK)
DQN_MSVC_WARNING_PUSH
DQN_MSVC_WARNING_DISABLE(4505) // warning C4505: '<function>': unreferenced function with internal linkage has been removed
#include "External/metadesk/md.c"
DQN_MSVC_WARNING_POP
#endif
#define DQN_CPP_FILE_IMPLEMENTATION
#include "Standalone/dqn_cpp_file.h"
#include "dqn_cgen.cpp"
#endif
#if defined(DQN_WITH_JSON)
#include "dqn_json.cpp"
#endif
#include "dqn_base.cpp"
#include "dqn_thread_context.cpp"
#include "dqn_external.cpp"
#include "dqn_allocator.cpp"
#include "dqn_debug.cpp"
#include "dqn_string.cpp"
#include "dqn_containers.cpp"
#include "dqn_type_info.cpp"
#if defined(DQN_PLATFORM_EMSCRIPTEN) || defined(DQN_PLATFORM_POSIX) || defined(DQN_PLATFORM_ARM64)
#include "dqn_os_posix.cpp"
#elif defined(DQN_PLATFORM_WIN32)
#include "dqn_os_win32.cpp"
#else
#error Please define a platform e.g. 'DQN_PLATFORM_WIN32' to enable the correct implementation for platform APIs
#endif
#include "dqn_os.cpp"
#include "dqn_math.cpp"
#include "dqn_hash.cpp"
#include "dqn_helpers.cpp"
#if defined(DQN_WITH_UNIT_TESTS)
#include "dqn_unit_tests.cpp"
#endif
#include "dqn_docs.cpp"

79
dqn.h
View File

@ -1,4 +1,4 @@
#if !defined(DQN_H)
#pragma once
#define DQN_H
/*
@ -15,6 +15,7 @@
// \___|
//
// dqn.h -- Personal standard library -- MIT License -- git.doylet.dev/dqn
// ASCII -- BigMoney-NW by Nathan Bloomfild
//
////////////////////////////////////////////////////////////////////////////////////////////////////
//
@ -42,11 +43,7 @@
//
// -- Compiling --
//
// Define DQN_IMPLEMENTATION macro in one and only one translation unit to
// enable the implementation of this library, for example:
//
// #define DQN_IMEPLEMENTATION
// #include "dqn.h"
// Compile dqn.cpp or include it into one of your translation units.
//
// Additionally, this library supports including/excluding specific sections
// of the library by using #define on the name of the section. These names are
@ -208,6 +205,14 @@
// library from being included. This might be useful if you are including the
// library in your project yourself. The library must still be defined and
// visible before this header.
//
// - Enable compilation of unit tests with the library.
//
// #define DQN_WITH_UNIT_TESTS
//
// - Increase the capacity of the job queue, default is 128.
//
// #define DQN_JOB_QUEUE_SPMC_SIZE 128
*/
#if defined(DQN_ONLY_VARRAY) || \
@ -343,65 +348,3 @@
#endif
#include "dqn_json.h"
#endif
#endif // DQN_H
#if defined(DQN_IMPLEMENTATION)
/*
////////////////////////////////////////////////////////////////////////////////////////////////////
//
// /$$$$$$\ $$\ $$\ $$$$$$$\ $$\
// \_$$ _|$$$\ $$$ |$$ __$$\ $$ |
// $$ | $$$$\ $$$$ |$$ | $$ |$$ |
// $$ | $$\$$\$$ $$ |$$$$$$$ |$$ |
// $$ | $$ \$$$ $$ |$$ ____/ $$ |
// $$ | $$ |\$ /$$ |$$ | $$ |
// $$$$$$\ $$ | \_/ $$ |$$ | $$$$$$$$\
// \______|\__| \__|\__| \________|
//
// Implementation
//
////////////////////////////////////////////////////////////////////////////////////////////////////
*/
#if defined(DQN_WITH_CGEN)
#if !defined(DQN_NO_METADESK)
DQN_MSVC_WARNING_PUSH
DQN_MSVC_WARNING_DISABLE(4505) // warning C4505: '<function>': unreferenced function with internal linkage has been removed
#include "External/metadesk/md.c"
DQN_MSVC_WARNING_POP
#endif
#define DQN_CPP_FILE_IMPLEMENTATION
#include "Standalone/dqn_cpp_file.h"
#include "dqn_cgen.cpp"
#endif
#if defined(DQN_WITH_JSON)
#define DQN_JSON_IMPLEMENTATION
#include "dqn_json.h"
#endif
#include "dqn_base.cpp"
#include "dqn_thread_context.cpp"
#include "dqn_external.cpp"
#include "dqn_allocator.cpp"
#include "dqn_debug.cpp"
#include "dqn_string.cpp"
#include "dqn_containers.cpp"
#include "dqn_type_info.cpp"
#if defined(DQN_PLATFORM_EMSCRIPTEN) || defined(DQN_PLATFORM_POSIX) || defined(DQN_PLATFORM_ARM64)
#include "dqn_os_posix.cpp"
#elif defined(DQN_PLATFORM_WIN32)
#include "dqn_os_win32.cpp"
#else
#error Please define a platform e.g. 'DQN_PLATFORM_WIN32' to enable the correct implementation for platform APIs
#endif
#include "dqn_os.cpp"
#include "dqn_math.cpp"
#include "dqn_hash.cpp"
#include "dqn_helpers.cpp"
#include "dqn_unit_tests.cpp"
#include "dqn_docs.cpp"
#endif // DQN_IMPLEMENTATION

View File

@ -345,7 +345,8 @@ DQN_API void *Dqn_ChunkPool_Alloc(Dqn_ChunkPool *pool, Dqn_usize size)
// NOTE: Store the offset to the original pointer behind the user's
// pointer.
DQN_MEMCPY(&(DQN_CAST(char *)slot->data)[-1], &offset_to_original_ptr, 1);
char *offset_to_original_storage = DQN_CAST(char *)slot->data - 1;
DQN_MEMCPY(offset_to_original_storage, &offset_to_original_ptr, 1);
}
// NOTE: Smuggle the slot type in the next pointer so that we know, when the
@ -427,6 +428,7 @@ DQN_API void Dqn_ArenaCatalog_Init(Dqn_ArenaCatalog *catalog, Dqn_ChunkPool *poo
DQN_API Dqn_ArenaCatalogItem *Dqn_ArenaCatalog_Find(Dqn_ArenaCatalog *catalog, Dqn_Str8 label)
{
Dqn_TicketMutex_Begin(&catalog->ticket_mutex);
Dqn_ArenaCatalogItem *result = &catalog->sentinel;
for (Dqn_ArenaCatalogItem *item = catalog->sentinel.next; item != &catalog->sentinel; item = item->next) {
if (item->label == label) {
@ -434,6 +436,7 @@ DQN_API Dqn_ArenaCatalogItem *Dqn_ArenaCatalog_Find(Dqn_ArenaCatalog *catalog, D
break;
}
}
Dqn_TicketMutex_End(&catalog->ticket_mutex);
return result;
}
@ -462,7 +465,9 @@ DQN_API void Dqn_ArenaCatalog_AddLabelRef(Dqn_ArenaCatalog *catalog, Dqn_Arena *
DQN_API void Dqn_ArenaCatalog_AddLabelCopy(Dqn_ArenaCatalog *catalog, Dqn_Arena *arena, Dqn_Str8 label)
{
Dqn_TicketMutex_Begin(&catalog->ticket_mutex);
Dqn_Str8 label_copy = Dqn_ChunkPool_AllocStr8Copy(catalog->pool, label);
Dqn_TicketMutex_End(&catalog->ticket_mutex);
Dqn_ArenaCatalog_AddLabelRef(catalog, arena, label_copy);
}
@ -470,14 +475,18 @@ DQN_API void Dqn_ArenaCatalog_AddF(Dqn_ArenaCatalog *catalog, Dqn_Arena *arena,
{
va_list args;
va_start(args, fmt);
Dqn_TicketMutex_Begin(&catalog->ticket_mutex);
Dqn_Str8 label = Dqn_ChunkPool_AllocStr8FV(catalog->pool, fmt, args);
Dqn_TicketMutex_End(&catalog->ticket_mutex);
va_end(args);
Dqn_ArenaCatalog_AddLabelRef(catalog, arena, label);
}
DQN_API void Dqn_ArenaCatalog_AddFV(Dqn_ArenaCatalog *catalog, Dqn_Arena *arena, DQN_FMT_ATTRIB char const *fmt, va_list args)
{
Dqn_TicketMutex_Begin(&catalog->ticket_mutex);
Dqn_Str8 label = Dqn_ChunkPool_AllocStr8FV(catalog->pool, fmt, args);
Dqn_TicketMutex_End(&catalog->ticket_mutex);
Dqn_ArenaCatalog_AddLabelRef(catalog, arena, label);
}
@ -494,14 +503,18 @@ DQN_API Dqn_Arena *Dqn_ArenaCatalog_AllocLabelRef(Dqn_ArenaCatalog *catalog, Dqn
DQN_API Dqn_Arena *Dqn_ArenaCatalog_AllocLabelCopy(Dqn_ArenaCatalog *catalog, Dqn_usize reserve, Dqn_usize commit, uint8_t arena_flags, Dqn_Str8 label)
{
Dqn_TicketMutex_Begin(&catalog->ticket_mutex);
Dqn_Str8 label_copy = Dqn_ChunkPool_AllocStr8Copy(catalog->pool, label);
Dqn_TicketMutex_End(&catalog->ticket_mutex);
Dqn_Arena *result = Dqn_ArenaCatalog_AllocLabelRef(catalog, reserve, commit, arena_flags, label_copy);
return result;
}
DQN_API Dqn_Arena *Dqn_ArenaCatalog_AllocFV(Dqn_ArenaCatalog *catalog, Dqn_usize reserve, Dqn_usize commit, uint8_t arena_flags, DQN_FMT_ATTRIB char const *fmt, va_list args)
{
Dqn_TicketMutex_Begin(&catalog->ticket_mutex);
Dqn_Str8 label = Dqn_ChunkPool_AllocStr8FV(catalog->pool, fmt, args);
Dqn_TicketMutex_End(&catalog->ticket_mutex);
Dqn_Arena *result = Dqn_ArenaCatalog_AllocLabelRef(catalog, reserve, commit, arena_flags, label);
return result;
}
@ -510,7 +523,9 @@ DQN_API Dqn_Arena *Dqn_ArenaCatalog_AllocF(Dqn_ArenaCatalog *catalog, Dqn_usize
{
va_list args;
va_start(args, fmt);
Dqn_TicketMutex_Begin(&catalog->ticket_mutex);
Dqn_Str8 label = Dqn_ChunkPool_AllocStr8FV(catalog->pool, fmt, args);
Dqn_TicketMutex_End(&catalog->ticket_mutex);
Dqn_Arena *result = Dqn_ArenaCatalog_AllocLabelRef(catalog, reserve, commit, arena_flags, label);
va_end(args);
return result;

View File

@ -1,3 +1,6 @@
#pragma once
#include "dqn.h"
/*
////////////////////////////////////////////////////////////////////////////////////////////////////
//
@ -51,6 +54,7 @@ struct Dqn_Arena
{
Dqn_ArenaBlock *curr;
uint8_t flags;
Dqn_TicketMutex mutex; // For user code to lock the arena, the arena itself does not use.
};
struct Dqn_ArenaTempMem

286
dqn_avx512f.cpp Normal file
View File

@ -0,0 +1,286 @@
#pragma once
#include "dqn.h"
#include <immintrin.h>
/*
////////////////////////////////////////////////////////////////////////////////////////////////////
//
// /$$$$$$ /$$ /$$ /$$ /$$ /$$$$$$$ /$$ /$$$$$$ /$$$$$$$$
// /$$__ $$| $$ | $$| $$ / $$ | $$____/ /$$$$ /$$__ $$| $$_____/
// | $$ \ $$| $$ | $$| $$/ $$/ | $$ |_ $$ |__/ \ $$| $$
// | $$$$$$$$| $$ / $$/ \ $$$$/ /$$$$$$| $$$$$$$ | $$ /$$$$$$/| $$$$$
// | $$__ $$ \ $$ $$/ >$$ $$|______/|_____ $$ | $$ /$$____/ | $$__/
// | $$ | $$ \ $$$/ /$$/\ $$ /$$ \ $$ | $$ | $$ | $$
// | $$ | $$ \ $/ | $$ \ $$ | $$$$$$//$$$$$$| $$$$$$$$| $$
// |__/ |__/ \_/ |__/ |__/ \______/|______/|________/|__/
//
// dqn_avx512f.h
//
////////////////////////////////////////////////////////////////////////////////////////////////////
*/
DQN_API Dqn_Str8FindResult Dqn_Str8_FindStr8AVX512F(Dqn_Str8 string, Dqn_Str8 find)
{
// NOTE: Algorithm as described in http://0x80.pl/articles/simd-strfind.html
Dqn_Str8FindResult result = {};
if (!Dqn_Str8_HasData(string) || !Dqn_Str8_HasData(find) || find.size > string.size)
return result;
__m512i const find_first_ch = _mm512_set1_epi8(find.data[0]);
__m512i const find_last_ch = _mm512_set1_epi8(find.data[find.size - 1]);
Dqn_usize const search_size = string.size - find.size;
Dqn_usize simd_iterations = search_size / sizeof(__m512i);
char const *ptr = string.data;
while (simd_iterations--) {
__m512i find_first_ch_block = _mm512_loadu_si512(ptr);
__m512i find_last_ch_block = _mm512_loadu_si512(ptr + find.size - 1);
// NOTE: AVX512F does not have a cmpeq so we use XOR to place a 0 bit
// where matches are found.
__m512i first_ch_matches = _mm512_xor_si512(find_first_ch_block, find_first_ch);
// NOTE: We can combine the 2nd XOR and merge the 2 XOR results into one
// operation using the ternarylogic intrinsic.
//
// A = first_ch_matches (find_first_ch_block ^ find_first_ch)
// B = find_last_ch_block
// C = find_last_ch
//
// ternarylogic op => A | (B ^ C) => 0b1111'0110 => 0xf6
//
// / A / B / C / B ^ C / A | (B ^ C) /
// | 0 | 0 | 0 | 0 | 0 |
// | 0 | 0 | 1 | 1 | 1 |
// | 0 | 1 | 0 | 1 | 1 |
// | 0 | 1 | 1 | 0 | 0 |
// | 1 | 0 | 0 | 0 | 1 |
// | 1 | 0 | 1 | 1 | 1 |
// | 1 | 1 | 0 | 1 | 1 |
// | 1 | 1 | 1 | 0 | 1 |
__m512i ch_matches = _mm512_ternarylogic_epi32(first_ch_matches, find_last_ch_block, find_last_ch, 0xf6);
// NOTE: Matches were XOR-ed and are hence indicated as zero so we mask
// out which 32 bit elements in the vector had zero bytes. This uses a
// bit twiddling trick
// https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
__mmask16 zero_byte_mask = {};
{
const __m512i v01 = _mm512_set1_epi32(0x01010101u);
const __m512i v80 = _mm512_set1_epi32(0x80808080u);
const __m512i v1 = _mm512_sub_epi32(ch_matches, v01);
const __m512i tmp1 = _mm512_ternarylogic_epi32(v1, ch_matches, v80, 0x20);
zero_byte_mask = _mm512_test_epi32_mask(tmp1, tmp1);
}
while (zero_byte_mask) {
uint64_t const lsb_zero_pos = _tzcnt_u64(zero_byte_mask);
char const *base_ptr = ptr + (4 * lsb_zero_pos);
if (DQN_MEMCMP(base_ptr + 0, find.data, find.size) == 0) {
result.found = true;
result.index = base_ptr - string.data;
} else if (DQN_MEMCMP(base_ptr + 1, find.data, find.size) == 0) {
result.found = true;
result.index = base_ptr - string.data + 1;
} else if (DQN_MEMCMP(base_ptr + 2, find.data, find.size) == 0) {
result.found = true;
result.index = base_ptr - string.data + 2;
} else if (DQN_MEMCMP(base_ptr + 3, find.data, find.size) == 0) {
result.found = true;
result.index = base_ptr - string.data + 3;
}
if (result.found) {
result.start_to_before_match = Dqn_Str8_Init(string.data, result.index);
result.match = Dqn_Str8_Init(string.data + result.index, find.size);
result.match_to_end_of_buffer = Dqn_Str8_Init(result.match.data, string.size - result.index);
return result;
}
zero_byte_mask = Dqn_Bit_ClearNextLSB(zero_byte_mask);
}
ptr += sizeof(__m512i);
}
for (Dqn_usize index = ptr - string.data; index < string.size; index++) {
Dqn_Str8 string_slice = Dqn_Str8_Slice(string, index, find.size);
if (Dqn_Str8_Eq(string_slice, find)) {
result.found = true;
result.index = index;
result.start_to_before_match = Dqn_Str8_Init(string.data, index);
result.match = Dqn_Str8_Init(string.data + index, find.size);
result.match_to_end_of_buffer = Dqn_Str8_Init(result.match.data, string.size - index);
return result;
}
}
return result;
}
DQN_API Dqn_Str8FindResult Dqn_Str8_FindLastStr8AVX512F(Dqn_Str8 string, Dqn_Str8 find)
{
// NOTE: Algorithm as described in http://0x80.pl/articles/simd-strfind.html
Dqn_Str8FindResult result = {};
if (!Dqn_Str8_HasData(string) || !Dqn_Str8_HasData(find) || find.size > string.size)
return result;
__m512i const find_first_ch = _mm512_set1_epi8(find.data[0]);
__m512i const find_last_ch = _mm512_set1_epi8(find.data[find.size - 1]);
Dqn_usize const search_size = string.size - find.size;
Dqn_usize simd_iterations = search_size / sizeof(__m512i);
char const *ptr = string.data + search_size + 1;
while (simd_iterations--) {
ptr -= sizeof(__m512i);
__m512i find_first_ch_block = _mm512_loadu_si512(ptr);
__m512i find_last_ch_block = _mm512_loadu_si512(ptr + find.size - 1);
// NOTE: AVX512F does not have a cmpeq so we use XOR to place a 0 bit
// where matches are found.
__m512i first_ch_matches = _mm512_xor_si512(find_first_ch_block, find_first_ch);
// NOTE: We can combine the 2nd XOR and merge the 2 XOR results into one
// operation using the ternarylogic intrinsic.
//
// A = first_ch_matches (find_first_ch_block ^ find_first_ch)
// B = find_last_ch_block
// C = find_last_ch
//
// ternarylogic op => A | (B ^ C) => 0b1111'0110 => 0xf6
//
// / A / B / C / B ^ C / A | (B ^ C) /
// | 0 | 0 | 0 | 0 | 0 |
// | 0 | 0 | 1 | 1 | 1 |
// | 0 | 1 | 0 | 1 | 1 |
// | 0 | 1 | 1 | 0 | 0 |
// | 1 | 0 | 0 | 0 | 1 |
// | 1 | 0 | 1 | 1 | 1 |
// | 1 | 1 | 0 | 1 | 1 |
// | 1 | 1 | 1 | 0 | 1 |
__m512i ch_matches = _mm512_ternarylogic_epi32(first_ch_matches, find_last_ch_block, find_last_ch, 0xf6);
// NOTE: Matches were XOR-ed and are hence indicated as zero so we mask
// out which 32 bit elements in the vector had zero bytes. This uses a
// bit twiddling trick
// https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
__mmask16 zero_byte_mask = {};
{
const __m512i v01 = _mm512_set1_epi32(0x01010101u);
const __m512i v80 = _mm512_set1_epi32(0x80808080u);
const __m512i v1 = _mm512_sub_epi32(ch_matches, v01);
const __m512i tmp1 = _mm512_ternarylogic_epi32(v1, ch_matches, v80, 0x20);
zero_byte_mask = _mm512_test_epi32_mask(tmp1, tmp1);
}
while (zero_byte_mask) {
uint64_t const lsb_zero_pos = _tzcnt_u64(zero_byte_mask);
char const *base_ptr = ptr + (4 * lsb_zero_pos);
if (DQN_MEMCMP(base_ptr + 0, find.data, find.size) == 0) {
result.found = true;
result.index = base_ptr - string.data;
} else if (DQN_MEMCMP(base_ptr + 1, find.data, find.size) == 0) {
result.found = true;
result.index = base_ptr - string.data + 1;
} else if (DQN_MEMCMP(base_ptr + 2, find.data, find.size) == 0) {
result.found = true;
result.index = base_ptr - string.data + 2;
} else if (DQN_MEMCMP(base_ptr + 3, find.data, find.size) == 0) {
result.found = true;
result.index = base_ptr - string.data + 3;
}
if (result.found) {
result.start_to_before_match = Dqn_Str8_Init(string.data, result.index);
result.match = Dqn_Str8_Init(string.data + result.index, find.size);
result.match_to_end_of_buffer = Dqn_Str8_Init(result.match.data, string.size - result.index);
return result;
}
zero_byte_mask = Dqn_Bit_ClearNextLSB(zero_byte_mask);
}
}
for (Dqn_usize index = ptr - string.data - 1; index < string.size; index--) {
Dqn_Str8 string_slice = Dqn_Str8_Slice(string, index, find.size);
if (Dqn_Str8_Eq(string_slice, find)) {
result.found = true;
result.index = index;
result.start_to_before_match = Dqn_Str8_Init(string.data, index);
result.match = Dqn_Str8_Init(string.data + index, find.size);
result.match_to_end_of_buffer = Dqn_Str8_Init(result.match.data, string.size - index);
return result;
}
}
return result;
}
DQN_API Dqn_Str8BinarySplitResult Dqn_Str8_BinarySplitAVX512F(Dqn_Str8 string, Dqn_Str8 find)
{
Dqn_Str8BinarySplitResult result = {};
Dqn_Str8FindResult find_result = Dqn_Str8_FindStr8AVX512F(string, find);
if (find_result.found) {
result.lhs.data = string.data;
result.lhs.size = find_result.index;
result.rhs = Dqn_Str8_Advance(find_result.match_to_end_of_buffer, find.size);
} else {
result.lhs = string;
}
return result;
}
DQN_API Dqn_Str8BinarySplitResult Dqn_Str8_BinarySplitLastAVX512F(Dqn_Str8 string, Dqn_Str8 find)
{
Dqn_Str8BinarySplitResult result = {};
Dqn_Str8FindResult find_result = Dqn_Str8_FindLastStr8AVX512F(string, find);
if (find_result.found) {
result.lhs.data = string.data;
result.lhs.size = find_result.index;
result.rhs = Dqn_Str8_Advance(find_result.match_to_end_of_buffer, find.size);
} else {
result.lhs = string;
}
return result;
}
DQN_API Dqn_usize Dqn_Str8_SplitAVX512F(Dqn_Str8 string, Dqn_Str8 delimiter, Dqn_Str8 *splits, Dqn_usize splits_count, Dqn_Str8SplitIncludeEmptyStrings mode)
{
Dqn_usize result = 0; // The number of splits in the actual string.
if (!Dqn_Str8_HasData(string) || !Dqn_Str8_HasData(delimiter) || delimiter.size <= 0)
return result;
Dqn_Str8BinarySplitResult split = {};
Dqn_Str8 first = string;
do {
split = Dqn_Str8_BinarySplitAVX512F(first, delimiter);
if (split.lhs.size || mode == Dqn_Str8SplitIncludeEmptyStrings_Yes) {
if (splits && result < splits_count)
splits[result] = split.lhs;
result++;
}
first = split.rhs;
} while (first.size);
return result;
}
DQN_API Dqn_Slice<Dqn_Str8> Dqn_Str8_SplitAllocAVX512F(Dqn_Arena *arena, Dqn_Str8 string, Dqn_Str8 delimiter, Dqn_Str8SplitIncludeEmptyStrings mode)
{
Dqn_Slice<Dqn_Str8> result = {};
Dqn_usize splits_required = Dqn_Str8_SplitAVX512F(string, delimiter, /*splits*/ nullptr, /*count*/ 0, mode);
result.data = Dqn_Arena_NewArray(arena, Dqn_Str8, splits_required, Dqn_ZeroMem_No);
if (result.data) {
result.size = Dqn_Str8_SplitAVX512F(string, delimiter, result.data, splits_required, mode);
DQN_ASSERT(splits_required == result.size);
}
return result;
}

30
dqn_avx512f.h Normal file
View File

@ -0,0 +1,30 @@
#if !defined(DQN_AVX512F_H)
#define DQN_AVX512F_H
/*
////////////////////////////////////////////////////////////////////////////////////////////////////
//
// $$$$$$\ $$\ $$\ $$\ $$\ $$$$$$$\ $$\ $$$$$$\ $$$$$$$$\
// $$ __$$\ $$ | $$ |$$ | $$ | $$ ____| $$$$ | $$ __$$\ $$ _____|
// $$ / $$ |$$ | $$ |\$$\ $$ | $$ | \_$$ | \__/ $$ |$$ |
// $$$$$$$$ |\$$\ $$ | \$$$$ /$$$$$$\ $$$$$$$\ $$ | $$$$$$ |$$$$$\
// $$ __$$ | \$$\$$ / $$ $$< \______|\_____$$\ $$ | $$ ____/ $$ __|
// $$ | $$ | \$$$ / $$ /\$$\ $$\ $$ | $$ | $$ | $$ |
// $$ | $$ | \$ / $$ / $$ | \$$$$$$ |$$$$$$\ $$$$$$$$\ $$ |
// \__| \__| \_/ \__| \__| \______/ \______|\________|\__|
//
// dqn_avx512f.h -- Functions implemented w/ AVX512
//
////////////////////////////////////////////////////////////////////////////////////////////////////
*/
#include "dqn.h"
DQN_API Dqn_Str8FindResult Dqn_Str8_FindStr8AVX512F (Dqn_Str8 string, Dqn_Str8 find);
DQN_API Dqn_Str8FindResult Dqn_Str8_FindLastStr8AVX512F (Dqn_Str8 string, Dqn_Str8 find);
DQN_API Dqn_Str8BinarySplitResult Dqn_Str8_BinarySplitAVX512F (Dqn_Str8 string, Dqn_Str8 find);
DQN_API Dqn_Str8BinarySplitResult Dqn_Str8_BinarySplitLastAVX512F(Dqn_Str8 string, Dqn_Str8 find);
DQN_API Dqn_usize Dqn_Str8_SplitAVX512F (Dqn_Str8 string, Dqn_Str8 delimiter, Dqn_Str8 *splits, Dqn_usize splits_count, Dqn_Str8SplitIncludeEmptyStrings mode);
DQN_API Dqn_Slice<Dqn_Str8> Dqn_Str8_SplitAllocAVX512F (Dqn_Arena *arena, Dqn_Str8 string, Dqn_Str8 delimiter, Dqn_Str8SplitIncludeEmptyStrings mode);
#endif // DQN_AVX512F_H

View File

@ -1,3 +1,6 @@
#pragma once
#include "dqn.h"
/*
////////////////////////////////////////////////////////////////////////////////////////////////////
//
@ -21,16 +24,196 @@
#include <cpuid.h>
#endif
DQN_API Dqn_CPUIDRegisters Dqn_CPUID(int function_id)
Dqn_CPUFeatureDecl g_dqn_cpu_feature_decl[Dqn_CPUFeature_Count];
DQN_API Dqn_CPUIDResult Dqn_CPU_ID(Dqn_CPUIDArgs args)
{
Dqn_CPUIDRegisters result = {};
#if defined(DQN_COMPILER_MSVC)
__cpuid(DQN_CAST(int *)result.array, function_id);
#elif defined(DQN_COMPILER_GCC) || defined(DQN_COMPILER_CLANG)
__get_cpuid(function_id, &result.array[0] /*eax*/, &result.array[1] /*ebx*/, &result.array[2] /*ecx*/ , &result.array[3] /*edx*/);
#else
#error "Compiler not supported"
#endif
Dqn_CPUIDResult result = {};
__cpuidex(result.values, args.eax, args.ecx);
return result;
}
DQN_API Dqn_usize Dqn_CPU_HasFeatureArray(Dqn_CPUReport const *report, Dqn_CPUFeatureQuery *features, Dqn_usize features_size)
{
Dqn_usize result = 0;
Dqn_usize const BITS = sizeof(report->features[0]) * 8;
DQN_FOR_UINDEX(feature_index, features_size) {
Dqn_CPUFeatureQuery *query = features + feature_index;
Dqn_usize chunk_index = query->feature / BITS;
Dqn_usize chunk_bit = query->feature % BITS;
uint64_t chunk = report->features[chunk_index];
query->available = chunk & (1ULL << chunk_bit);
result += DQN_CAST(int)query->available;
}
return result;
}
DQN_API bool Dqn_CPU_HasFeature(Dqn_CPUReport const *report, Dqn_CPUFeature feature)
{
Dqn_CPUFeatureQuery query = {};
query.feature = feature;
bool result = Dqn_CPU_HasFeatureArray(report, &query, 1) == 1;
return result;
}
DQN_API bool Dqn_CPU_HasAllFeatures(Dqn_CPUReport const *report, Dqn_CPUFeature const *features, Dqn_usize features_size)
{
bool result = true;
for (Dqn_usize index = 0; result && index < features_size; index++)
result &= Dqn_CPU_HasFeature(report, features[index]);
return result;
}
DQN_API void Dqn_CPU_SetFeature(Dqn_CPUReport *report, Dqn_CPUFeature feature)
{
DQN_ASSERT(feature < Dqn_CPUFeature_Count);
Dqn_usize const BITS = sizeof(report->features[0]) * 8;
Dqn_usize chunk_index = feature / BITS;
Dqn_usize chunk_bit = feature % BITS;
report->features[chunk_index] |= (1ULL << chunk_bit);
}
DQN_API Dqn_CPUReport Dqn_CPU_Report()
{
Dqn_CPUReport result = {};
Dqn_CPUIDResult fn_0000_[16] = {};
Dqn_CPUIDResult fn_8000_[64] = {};
int const EXTENDED_FUNC_BASE_EAX = 0x8000'0000;
int const REGISTER_SIZE = sizeof(fn_0000_[0].reg.eax);
// NOTE: Query standard/extended numbers ///////////////////////////////////////////////////////
{
Dqn_CPUIDArgs args = {};
// NOTE: Query standard function (e.g. eax = 0x0) for function count + cpu vendor
args = {};
fn_0000_[0] = Dqn_CPU_ID(args);
// NOTE: Query extended function (e.g. eax = 0x8000'0000) for function count + cpu vendor
args = {};
args.eax = DQN_CAST(int) EXTENDED_FUNC_BASE_EAX;
fn_8000_[0] = Dqn_CPU_ID(args);
}
// NOTE: Extract function count ////////////////////////////////////////////////////////////////
int const STANDARD_FUNC_MAX_EAX = fn_0000_[0x0000].reg.eax;
int const EXTENDED_FUNC_MAX_EAX = fn_8000_[0x0000].reg.eax;
// NOTE: Enumerate all CPUID results for the known function counts /////////////////////////////
{
DQN_ASSERT((STANDARD_FUNC_MAX_EAX + 1) <= DQN_ARRAY_ICOUNT(fn_0000_));
DQN_ASSERT((DQN_CAST(Dqn_isize)EXTENDED_FUNC_MAX_EAX - EXTENDED_FUNC_BASE_EAX + 1) <= DQN_ARRAY_ICOUNT(fn_8000_));
for (int eax = 1; eax <= STANDARD_FUNC_MAX_EAX; eax++) {
Dqn_CPUIDArgs args = {};
args.eax = eax;
fn_0000_[eax] = Dqn_CPU_ID(args);
}
for (int eax = EXTENDED_FUNC_BASE_EAX + 1, index = 1; eax <= EXTENDED_FUNC_MAX_EAX; eax++, index++) {
Dqn_CPUIDArgs args = {};
args.eax = eax;
fn_8000_[index] = Dqn_CPU_ID(args);
}
}
// NOTE: Query CPU vendor //////////////////////////////////////////////////////////////////////
{
DQN_MEMCPY(result.vendor + 0, &fn_8000_[0x0000].reg.ebx, REGISTER_SIZE);
DQN_MEMCPY(result.vendor + 4, &fn_8000_[0x0000].reg.edx, REGISTER_SIZE);
DQN_MEMCPY(result.vendor + 8, &fn_8000_[0x0000].reg.ecx, REGISTER_SIZE);
}
// NOTE: Query CPU brand ///////////////////////////////////////////////////////////////////////
if (EXTENDED_FUNC_MAX_EAX >= (EXTENDED_FUNC_BASE_EAX + 4)) {
DQN_MEMCPY(result.brand + 0, &fn_8000_[0x0002].reg.eax, REGISTER_SIZE);
DQN_MEMCPY(result.brand + 4, &fn_8000_[0x0002].reg.ebx, REGISTER_SIZE);
DQN_MEMCPY(result.brand + 8, &fn_8000_[0x0002].reg.ecx, REGISTER_SIZE);
DQN_MEMCPY(result.brand + 12, &fn_8000_[0x0002].reg.edx, REGISTER_SIZE);
DQN_MEMCPY(result.brand + 16, &fn_8000_[0x0003].reg.eax, REGISTER_SIZE);
DQN_MEMCPY(result.brand + 20, &fn_8000_[0x0003].reg.ebx, REGISTER_SIZE);
DQN_MEMCPY(result.brand + 24, &fn_8000_[0x0003].reg.ecx, REGISTER_SIZE);
DQN_MEMCPY(result.brand + 28, &fn_8000_[0x0003].reg.edx, REGISTER_SIZE);
DQN_MEMCPY(result.brand + 32, &fn_8000_[0x0004].reg.eax, REGISTER_SIZE);
DQN_MEMCPY(result.brand + 36, &fn_8000_[0x0004].reg.ebx, REGISTER_SIZE);
DQN_MEMCPY(result.brand + 40, &fn_8000_[0x0004].reg.ecx, REGISTER_SIZE);
DQN_MEMCPY(result.brand + 44, &fn_8000_[0x0004].reg.edx, REGISTER_SIZE);
DQN_ASSERT(result.brand[sizeof(result.brand) - 1] == 0);
}
// NOTE: Query CPU features //////////////////////////////////////////////////////////////////
for (Dqn_usize ext_index = 0; ext_index < Dqn_CPUFeature_Count; ext_index++) {
bool available = false;
// NOTE: Mask bits taken from various manuals
// - AMD64 Architecture Programmer's Manual, Volumes 1-5
// - https://en.wikipedia.org/wiki/CPUID#Calling_CPUID
switch (DQN_CAST(Dqn_CPUFeature)ext_index) {
case Dqn_CPUFeature_3DNow: available = (fn_8000_[0x0001].reg.edx & (1 << 31)); break;
case Dqn_CPUFeature_3DNowExt: available = (fn_8000_[0x0001].reg.edx & (1 << 30)); break;
case Dqn_CPUFeature_ABM: available = (fn_8000_[0x0001].reg.ecx & (1 << 5)); break;
case Dqn_CPUFeature_AES: available = (fn_0000_[0x0001].reg.ecx & (1 << 25)); break;
case Dqn_CPUFeature_AVX: available = (fn_0000_[0x0001].reg.ecx & (1 << 28)); break;
case Dqn_CPUFeature_AVX2: available = (fn_0000_[0x0007].reg.ebx & (1 << 0)); break;
case Dqn_CPUFeature_AVX512F: available = (fn_0000_[0x0007].reg.ebx & (1 << 16)); break;
case Dqn_CPUFeature_AVX512DQ: available = (fn_0000_[0x0007].reg.ebx & (1 << 17)); break;
case Dqn_CPUFeature_AVX512IFMA: available = (fn_0000_[0x0007].reg.ebx & (1 << 21)); break;
case Dqn_CPUFeature_AVX512PF: available = (fn_0000_[0x0007].reg.ebx & (1 << 26)); break;
case Dqn_CPUFeature_AVX512ER: available = (fn_0000_[0x0007].reg.ebx & (1 << 27)); break;
case Dqn_CPUFeature_AVX512CD: available = (fn_0000_[0x0007].reg.ebx & (1 << 28)); break;
case Dqn_CPUFeature_AVX512BW: available = (fn_0000_[0x0007].reg.ebx & (1 << 30)); break;
case Dqn_CPUFeature_AVX512VL: available = (fn_0000_[0x0007].reg.ebx & (1 << 31)); break;
case Dqn_CPUFeature_AVX512VBMI: available = (fn_0000_[0x0007].reg.ecx & (1 << 1)); break;
case Dqn_CPUFeature_AVX512VBMI2: available = (fn_0000_[0x0007].reg.ecx & (1 << 6)); break;
case Dqn_CPUFeature_AVX512VNNI: available = (fn_0000_[0x0007].reg.ecx & (1 << 11)); break;
case Dqn_CPUFeature_AVX512BITALG: available = (fn_0000_[0x0007].reg.ecx & (1 << 12)); break;
case Dqn_CPUFeature_AVX512VPOPCNTDQ: available = (fn_0000_[0x0007].reg.ecx & (1 << 14)); break;
case Dqn_CPUFeature_AVX5124VNNIW: available = (fn_0000_[0x0007].reg.edx & (1 << 2)); break;
case Dqn_CPUFeature_AVX5124FMAPS: available = (fn_0000_[0x0007].reg.edx & (1 << 3)); break;
case Dqn_CPUFeature_AVX512VP2INTERSECT: available = (fn_0000_[0x0007].reg.edx & (1 << 8)); break;
case Dqn_CPUFeature_AVX512FP16: available = (fn_0000_[0x0007].reg.edx & (1 << 23)); break;
case Dqn_CPUFeature_CLZERO: available = (fn_8000_[0x0008].reg.ebx & (1 << 0)); break;
case Dqn_CPUFeature_CMPXCHG8B: available = (fn_0000_[0x0001].reg.edx & (1 << 8)); break;
case Dqn_CPUFeature_CMPXCHG16B: available = (fn_0000_[0x0001].reg.ecx & (1 << 13)); break;
case Dqn_CPUFeature_F16C: available = (fn_0000_[0x0001].reg.ecx & (1 << 29)); break;
case Dqn_CPUFeature_FMA: available = (fn_0000_[0x0001].reg.ecx & (1 << 12)); break;
case Dqn_CPUFeature_FMA4: available = (fn_8000_[0x0001].reg.ecx & (1 << 16)); break;
case Dqn_CPUFeature_FP128: available = (fn_8000_[0x001A].reg.eax & (1 << 0)); break;
case Dqn_CPUFeature_FP256: available = (fn_8000_[0x001A].reg.eax & (1 << 2)); break;
case Dqn_CPUFeature_FPU: available = (fn_0000_[0x0001].reg.edx & (1 << 0)); break;
case Dqn_CPUFeature_MMX: available = (fn_0000_[0x0001].reg.edx & (1 << 23)); break;
case Dqn_CPUFeature_MONITOR: available = (fn_0000_[0x0001].reg.ecx & (1 << 3)); break;
case Dqn_CPUFeature_MOVBE: available = (fn_0000_[0x0001].reg.ecx & (1 << 22)); break;
case Dqn_CPUFeature_MOVU: available = (fn_8000_[0x001A].reg.eax & (1 << 1)); break;
case Dqn_CPUFeature_MmxExt: available = (fn_8000_[0x0001].reg.edx & (1 << 22)); break;
case Dqn_CPUFeature_PCLMULQDQ: available = (fn_0000_[0x0001].reg.ecx & (1 << 1)); break;
case Dqn_CPUFeature_POPCNT: available = (fn_0000_[0x0001].reg.ecx & (1 << 23)); break;
case Dqn_CPUFeature_RDRAND: available = (fn_0000_[0x0001].reg.ecx & (1 << 30)); break;
case Dqn_CPUFeature_RDSEED: available = (fn_0000_[0x0007].reg.ebx & (1 << 18)); break;
case Dqn_CPUFeature_RDTSCP: available = (fn_8000_[0x0001].reg.edx & (1 << 27)); break;
case Dqn_CPUFeature_SHA: available = (fn_0000_[0x0007].reg.ebx & (1 << 29)); break;
case Dqn_CPUFeature_SSE: available = (fn_0000_[0x0001].reg.edx & (1 << 25)); break;
case Dqn_CPUFeature_SSE2: available = (fn_0000_[0x0001].reg.edx & (1 << 26)); break;
case Dqn_CPUFeature_SSE3: available = (fn_0000_[0x0001].reg.ecx & (1 << 0)); break;
case Dqn_CPUFeature_SSE41: available = (fn_0000_[0x0001].reg.ecx & (1 << 19)); break;
case Dqn_CPUFeature_SSE42: available = (fn_0000_[0x0001].reg.ecx & (1 << 20)); break;
case Dqn_CPUFeature_SSE4A: available = (fn_8000_[0x0001].reg.ecx & (1 << 6)); break;
case Dqn_CPUFeature_SSSE3: available = (fn_0000_[0x0001].reg.ecx & (1 << 9)); break;
case Dqn_CPUFeature_TSC: available = (fn_0000_[0x0001].reg.edx & (1 << 4)); break;
case Dqn_CPUFeature_TscInvariant: available = (fn_8000_[0x0007].reg.edx & (1 << 8)); break;
case Dqn_CPUFeature_VAES: available = (fn_0000_[0x0007].reg.ecx & (1 << 9)); break;
case Dqn_CPUFeature_VPCMULQDQ: available = (fn_0000_[0x0007].reg.ecx & (1 << 10)); break;
case Dqn_CPUFeature_Count: DQN_INVALID_CODE_PATH; break;
}
if (available)
Dqn_CPU_SetFeature(&result, DQN_CAST(Dqn_CPUFeature)ext_index);
}
return result;
}
#endif // !defined(DQN_PLATFORM_ARM64) && !defined(DQN_PLATFORM_EMSCRIPTEN)

View File

@ -73,6 +73,7 @@
#include <stdio.h>
#include <stdint.h>
#include <limits.h>
#include <inttypes.h> // PRIu64...
#if !defined(DQN_OS_WIN32)
#include <stdlib.h> // exit()
@ -455,10 +456,110 @@ struct Dqn_ErrorSink
#endif
#if !defined(DQN_PLATFORM_ARM64)
struct Dqn_CPUIDRegisters
struct Dqn_CPURegisters
{
Dqn_uint array[4]; // Values from 'CPUID' instruction for each register (EAX, EBX, ECX, EDX)
int eax;
int ebx;
int ecx;
int edx;
};
union Dqn_CPUIDResult
{
Dqn_CPURegisters reg;
int values[4];
};
struct Dqn_CPUIDArgs
{
int eax;
int ecx;
};
#define DQN_CPU_FEAT_XMACRO \
DQN_CPU_FEAT_XENTRY(3DNow) \
DQN_CPU_FEAT_XENTRY(3DNowExt) \
DQN_CPU_FEAT_XENTRY(ABM) \
DQN_CPU_FEAT_XENTRY(AES) \
DQN_CPU_FEAT_XENTRY(AVX) \
DQN_CPU_FEAT_XENTRY(AVX2) \
DQN_CPU_FEAT_XENTRY(AVX512F) \
DQN_CPU_FEAT_XENTRY(AVX512DQ) \
DQN_CPU_FEAT_XENTRY(AVX512IFMA) \
DQN_CPU_FEAT_XENTRY(AVX512PF) \
DQN_CPU_FEAT_XENTRY(AVX512ER) \
DQN_CPU_FEAT_XENTRY(AVX512CD) \
DQN_CPU_FEAT_XENTRY(AVX512BW) \
DQN_CPU_FEAT_XENTRY(AVX512VL) \
DQN_CPU_FEAT_XENTRY(AVX512VBMI) \
DQN_CPU_FEAT_XENTRY(AVX512VBMI2) \
DQN_CPU_FEAT_XENTRY(AVX512VNNI) \
DQN_CPU_FEAT_XENTRY(AVX512BITALG) \
DQN_CPU_FEAT_XENTRY(AVX512VPOPCNTDQ) \
DQN_CPU_FEAT_XENTRY(AVX5124VNNIW) \
DQN_CPU_FEAT_XENTRY(AVX5124FMAPS) \
DQN_CPU_FEAT_XENTRY(AVX512VP2INTERSECT) \
DQN_CPU_FEAT_XENTRY(AVX512FP16) \
DQN_CPU_FEAT_XENTRY(CLZERO) \
DQN_CPU_FEAT_XENTRY(CMPXCHG8B) \
DQN_CPU_FEAT_XENTRY(CMPXCHG16B) \
DQN_CPU_FEAT_XENTRY(F16C) \
DQN_CPU_FEAT_XENTRY(FMA) \
DQN_CPU_FEAT_XENTRY(FMA4) \
DQN_CPU_FEAT_XENTRY(FP128) \
DQN_CPU_FEAT_XENTRY(FP256) \
DQN_CPU_FEAT_XENTRY(FPU) \
DQN_CPU_FEAT_XENTRY(MMX) \
DQN_CPU_FEAT_XENTRY(MONITOR) \
DQN_CPU_FEAT_XENTRY(MOVBE) \
DQN_CPU_FEAT_XENTRY(MOVU) \
DQN_CPU_FEAT_XENTRY(MmxExt) \
DQN_CPU_FEAT_XENTRY(PCLMULQDQ) \
DQN_CPU_FEAT_XENTRY(POPCNT) \
DQN_CPU_FEAT_XENTRY(RDRAND) \
DQN_CPU_FEAT_XENTRY(RDSEED) \
DQN_CPU_FEAT_XENTRY(RDTSCP) \
DQN_CPU_FEAT_XENTRY(SHA) \
DQN_CPU_FEAT_XENTRY(SSE) \
DQN_CPU_FEAT_XENTRY(SSE2) \
DQN_CPU_FEAT_XENTRY(SSE3) \
DQN_CPU_FEAT_XENTRY(SSE41) \
DQN_CPU_FEAT_XENTRY(SSE42) \
DQN_CPU_FEAT_XENTRY(SSE4A) \
DQN_CPU_FEAT_XENTRY(SSSE3) \
DQN_CPU_FEAT_XENTRY(TSC) \
DQN_CPU_FEAT_XENTRY(TscInvariant) \
DQN_CPU_FEAT_XENTRY(VAES) \
DQN_CPU_FEAT_XENTRY(VPCMULQDQ)
enum Dqn_CPUFeature
{
#define DQN_CPU_FEAT_XENTRY(label) Dqn_CPUFeature_##label,
DQN_CPU_FEAT_XMACRO
#undef DQN_CPU_FEAT_XENTRY
Dqn_CPUFeature_Count,
};
struct Dqn_CPUFeatureDecl
{
Dqn_CPUFeature value;
Dqn_Str8 label;
};
struct Dqn_CPUFeatureQuery
{
Dqn_CPUFeature feature;
bool available;
};
struct Dqn_CPUReport
{
char vendor [4 /*bytes*/ * 3 /*EDX, ECX, EBX*/ + 1 /*null*/];
char brand [48];
uint64_t features[(Dqn_CPUFeature_Count / (sizeof(uint64_t) * 8)) + 1];
};
extern Dqn_CPUFeatureDecl g_dqn_cpu_feature_decl[Dqn_CPUFeature_Count];
#endif // DQN_PLATFORM_ARM64
// NOTE: [$TMUT] Dqn_TicketMutex ///////////////////////////////////////////////////////////////////
@ -468,6 +569,17 @@ struct Dqn_TicketMutex
unsigned int volatile serving; // The ticket ID to block the mutex on until it is returned
};
// NOTE: [$MUTX] Dqn_OSMutex ///////////////////////////////////////////////////////////////////////
struct Dqn_OSMutex
{
#if defined(DQN_OS_WIN32) && !defined(DQN_OS_WIN32_USE_PTHREADS)
char win32_handle[48];
#else
pthread_mutex_t posix_handle;
pthread_mutexattr_t posix_attribs;
#endif
};
// NOTE: [$PRIN] Dqn_Print /////////////////////////////////////////////////////////////////////////
enum Dqn_PrintStd
{
@ -516,7 +628,13 @@ typedef void Dqn_LogProc(Dqn_Str8 type,
DQN_FORCE_INLINE uint64_t Dqn_Atomic_SetValue64 (uint64_t volatile *target, uint64_t value);
DQN_FORCE_INLINE long Dqn_Atomic_SetValue32 (long volatile *target, long value);
#if !defined(DQN_PLATFORM_ARM64)
DQN_API Dqn_CPUIDRegisters Dqn_CPUID (int function_id);
DQN_API Dqn_CPUIDResult Dqn_CPU_ID (Dqn_CPUIDArgs args);
DQN_API Dqn_usize Dqn_CPU_HasFeatureArray (Dqn_CPUReport const *report, Dqn_CPUFeatureQuery *features, Dqn_usize features_size);
DQN_API bool Dqn_CPU_HasFeature (Dqn_CPUReport const *report, Dqn_CPUFeature feature);
DQN_API bool Dqn_CPU_HasAllFeatures (Dqn_CPUReport const *report, Dqn_CPUFeature const *features, Dqn_usize features_size);
template <Dqn_usize N> bool Dqn_CPU_HasAllFeaturesCArray (Dqn_CPUReport const *report, Dqn_CPUFeature const (&features)[N]);
DQN_API void Dqn_CPU_SetFeature (Dqn_CPUReport *report, Dqn_CPUFeature feature);
DQN_API Dqn_CPUReport Dqn_CPU_Report ();
#endif
// NOTE: [$TMUT] Dqn_TicketMutex ///////////////////////////////////////////////////////////////////
@ -671,3 +789,9 @@ DQN_FORCE_INLINE long Dqn_Atomic_SetValue32(long volatile *target, long value)
#error Unsupported compiler
#endif
}
template <Dqn_usize N> bool Dqn_CPU_HasAllFeaturesCArray(Dqn_CPUReport const *report, Dqn_CPUFeature const (&features)[N])
{
bool result = Dqn_CPU_HasAllFeatures(report, features, N);
return result;
}

View File

@ -45,6 +45,7 @@ Dqn_CGenTableHeaderType const DQN_CGEN_TABLE_CODE_GEN_STRUCT_HEADER_LIST[] =
Dqn_CGenTableHeaderType_CppType,
Dqn_CGenTableHeaderType_CppName,
Dqn_CGenTableHeaderType_CppIsPtr,
Dqn_CGenTableHeaderType_CppOpEquals,
Dqn_CGenTableHeaderType_CppArraySize,
Dqn_CGenTableHeaderType_CppArraySizeField,
Dqn_CGenTableHeaderType_GenTypeInfo,
@ -320,6 +321,7 @@ DQN_API Dqn_Str8 Dqn_CGen_TableHeaderTypeToDeclStr8(Dqn_CGenTableHeaderType type
case Dqn_CGenTableHeaderType_CppName: result = DQN_STR8("cpp_name"); break;
case Dqn_CGenTableHeaderType_CppValue: result = DQN_STR8("cpp_value"); break;
case Dqn_CGenTableHeaderType_CppIsPtr: result = DQN_STR8("cpp_is_ptr"); break;
case Dqn_CGenTableHeaderType_CppOpEquals: result = DQN_STR8("cpp_op_equals"); break;
case Dqn_CGenTableHeaderType_CppArraySize: result = DQN_STR8("cpp_array_size"); break;
case Dqn_CGenTableHeaderType_CppArraySizeField: result = DQN_STR8("cpp_array_size_field"); break;
case Dqn_CGenTableHeaderType_GenTypeInfo: result = DQN_STR8("gen_type_info"); break;
@ -626,6 +628,46 @@ DQN_API void Dqn_CGen_EmitCodeForTables(Dqn_CGen *cgen, Dqn_CGenEmit emit, Dqn_C
} break;
}
}
// NOTE: Str8 to enum conversion ////////////////////////////////////////////////////////////
for (Dqn_CGenTable *table = cgen->first_table; table != 0; table = table->next) {
if (table->type != Dqn_CGenTableType_CodeGenEnum)
continue;
for (Dqn_CGenLookupTableIterator it = {}; Dqn_CGen_LookupNextTableInCodeGenTable(cgen, table, &it);) {
Dqn_Str8 type_name = it.cgen_table_column[Dqn_CGenTableHeaderType_Name].string;
Dqn_CppStructBlock(cpp, "%.*sStr8ToEnumResult", DQN_STR_FMT(type_name)) {
Dqn_CppLine(cpp, "bool success;");
Dqn_CppLine(cpp, "%.*s value;", DQN_STR_FMT(type_name));
}
}
}
for (Dqn_CGenTable *table = cgen->first_table; table != 0; table = table->next) {
if (table->type != Dqn_CGenTableType_CodeGenEnum)
continue;
for (Dqn_CGenLookupTableIterator it = {}; Dqn_CGen_LookupNextTableInCodeGenTable(cgen, table, &it);) {
Dqn_Str8 type_name = it.cgen_table_column[Dqn_CGenTableHeaderType_Name].string;
Dqn_CppLine(cpp, "%.*sStr8ToEnumResult %.*s_Str8ToEnum(Dqn_Str8 string);", DQN_STR_FMT(type_name), DQN_STR_FMT(type_name));
}
}
// NOTE: Operator == and != ////////////////////////////////////////////////////////////////
for (Dqn_CGenTable *table = cgen->first_table; table != 0; table = table->next) {
if (table->type != Dqn_CGenTableType_CodeGenStruct)
continue;
for (Dqn_CGenLookupTableIterator it = {}; Dqn_CGen_LookupNextTableInCodeGenTable(cgen, table, &it);) {
Dqn_Str8 cpp_op_equals = it.cgen_table_column[Dqn_CGenTableHeaderType_CppOpEquals].string;
if (cpp_op_equals != DQN_STR8("true"))
continue;
Dqn_Str8 type_name = it.cgen_table_column[Dqn_CGenTableHeaderType_Name].string;
Dqn_CppLine(cpp, "bool operator==(%.*s const &lhs, %.*s const &rhs);", DQN_STR_FMT(type_name), DQN_STR_FMT(type_name));
Dqn_CppLine(cpp, "bool operator!=(%.*s const &lhs, %.*s const &rhs);", DQN_STR_FMT(type_name), DQN_STR_FMT(type_name));
}
}
}
if (emit & Dqn_CGenEmit_Implementation) {
@ -685,7 +727,7 @@ DQN_API void Dqn_CGen_EmitCodeForTables(Dqn_CGen *cgen, Dqn_CGenEmit emit, Dqn_C
Dqn_usize cpp_type_enum_padding = cpp_type_padding + (orig_cpp_type_info.size - cpp_type_info.size);
Dqn_CppLine(cpp,
"{%2d, DQN_STR8(\"%.*s\"),%*s/*value*/ 0, offsetof(%.*s, %.*s),%*ssizeof(((%.*s*)0)->%.*s),%*sDQN_STR8(\"%.*s\"),%*s%.*s,%*s/*is_pointer*/ %s,%s /*array_size*/ %.*s, /*array_size_field*/ %.*s},",
"{%2d, DQN_STR8(\"%.*s\"),%*s/*value*/ 0, offsetof(%.*s, %.*s),%*ssizeof(((%.*s*)0)->%.*s),%*salignof(%.*s),%*sDQN_STR8(\"%.*s\"),%*s%.*s,%*s/*is_pointer*/ %s,%s /*array_size*/ %.*s, /*array_size_field*/ %.*s},",
row_index,
DQN_STR_FMT(cpp_name.column.string),
cpp_name_padding, "",
@ -700,6 +742,10 @@ DQN_API void Dqn_CGen_EmitCodeForTables(Dqn_CGen *cgen, Dqn_CGenEmit emit, Dqn_C
DQN_STR_FMT(cpp_name.column.string),
cpp_name_padding, "",
// NOTE: alignof(a->b)
DQN_STR_FMT(cpp_type.column.string),
cpp_type_padding, "",
// NOTE: Type string
DQN_STR_FMT(cpp_type.column.string),
cpp_type_padding, "",
@ -726,7 +772,7 @@ DQN_API void Dqn_CGen_EmitCodeForTables(Dqn_CGen *cgen, Dqn_CGenEmit emit, Dqn_C
Dqn_usize cpp_name_padding = 1 + it.table->headers[cpp_name.index].longest_string - cpp_name.column.string.size;
Dqn_Str8 cpp_value_str8 = Dqn_Str8_HasData(cpp_value.column.string) ? cpp_value.column.string : Dqn_Str8_InitF(scratch.arena, "%zu", row_index);
Dqn_CppLine(cpp,
"{%2d, DQN_STR8(\"%.*s\"),%*s/*value*/ %.*s, /*offset_of*/ 0, sizeof(%.*s), DQN_STR8(\"\"), %.*s_Type_%.*s, /*is_pointer*/ false, /*array_size*/ 0, /*array_size_field*/ NULL},",
"{%2d, DQN_STR8(\"%.*s\"),%*s/*value*/ %.*s, /*offset_of*/ 0, sizeof(%.*s), alignof(%.*s), DQN_STR8(\"\"), %.*s_Type_%.*s, /*is_pointer*/ false, /*array_size*/ 0, /*array_size_field*/ NULL},",
row_index,
// NOTE: Name string
@ -739,6 +785,9 @@ DQN_API void Dqn_CGen_EmitCodeForTables(Dqn_CGen *cgen, Dqn_CGenEmit emit, Dqn_C
// NOTE: sizeof(a)
DQN_STR_FMT(struct_or_enum_name),
// NOTE: alignof(a)
DQN_STR_FMT(struct_or_enum_name),
// NOTE: ..._Type_...
DQN_STR_FMT(emit_prefix),
DQN_STR_FMT(struct_or_enum_name));
@ -812,17 +861,13 @@ DQN_API void Dqn_CGen_EmitCodeForTables(Dqn_CGen *cgen, Dqn_CGenEmit emit, Dqn_C
}
}
// NOTE: Str8 to enum conversion ////////////////////////////////////////////////////////////
for (Dqn_CGenTable *table = cgen->first_table; table != 0; table = table->next) {
if (table->type != Dqn_CGenTableType_CodeGenEnum)
continue;
for (Dqn_CGenLookupTableIterator it = {}; Dqn_CGen_LookupNextTableInCodeGenTable(cgen, table, &it);) {
Dqn_Str8 type_name = it.cgen_table_column[Dqn_CGenTableHeaderType_Name].string;
Dqn_CppStructBlock(cpp, "%.*sStr8ToEnumResult", DQN_STR_FMT(type_name)) {
Dqn_CppLine(cpp, "bool success;");
Dqn_CppLine(cpp, "%.*s value;", DQN_STR_FMT(type_name));
}
Dqn_CppFuncBlock(cpp, "%.*sStr8ToEnumResult %.*s_Str8ToEnum(Dqn_Str8 string)", DQN_STR_FMT(type_name), DQN_STR_FMT(type_name)) {
Dqn_CppLine(cpp, "%.*sStr8ToEnumResult result = {};", DQN_STR_FMT(type_name));
Dqn_CppForBlock(cpp, "Dqn_usize index = 0; !result.success && index < DQN_ARRAY_UCOUNT(g_%.*s_type_fields); index++", DQN_STR_FMT(type_name)) {
@ -838,5 +883,84 @@ DQN_API void Dqn_CGen_EmitCodeForTables(Dqn_CGen *cgen, Dqn_CGenEmit emit, Dqn_C
}
}
}
// NOTE: Operator == and != ////////////////////////////////////////////////////////////////
for (Dqn_CGenTable *table = cgen->first_table; table != 0; table = table->next) {
if (table->type != Dqn_CGenTableType_CodeGenStruct)
continue;
for (Dqn_CGenLookupTableIterator it = {}; Dqn_CGen_LookupNextTableInCodeGenTable(cgen, table, &it);) {
Dqn_Str8 cpp_op_equals = it.cgen_table_column[Dqn_CGenTableHeaderType_CppOpEquals].string;
if (cpp_op_equals != DQN_STR8("true"))
continue;
Dqn_Str8 type_name = it.cgen_table_column[Dqn_CGenTableHeaderType_Name].string;
Dqn_CppFuncBlock(cpp, "bool operator==(%.*s const &lhs, %.*s const &rhs)", DQN_STR_FMT(type_name), DQN_STR_FMT(type_name)) {
for (Dqn_usize row_index = 0; row_index < it.table->row_count; row_index++) {
Dqn_CGenTableRow const *row = it.table->rows + row_index;
Dqn_CGenLookupColumnAtHeader cpp_name = Dqn_CGen_LookupColumnAtHeader(it.table, it.cgen_table_column[Dqn_CGenTableHeaderType_CppName].string, row);
Dqn_CGenLookupColumnAtHeader cpp_is_ptr = Dqn_CGen_LookupColumnAtHeader(it.table, it.cgen_table_column[Dqn_CGenTableHeaderType_CppIsPtr].string, row);
Dqn_CGenLookupColumnAtHeader cpp_array_size = Dqn_CGen_LookupColumnAtHeader(it.table, it.cgen_table_column[Dqn_CGenTableHeaderType_CppArraySize].string, row);
Dqn_CGenLookupColumnAtHeader cpp_array_size_field = Dqn_CGen_LookupColumnAtHeader(it.table, it.cgen_table_column[Dqn_CGenTableHeaderType_CppArraySizeField].string, row);
// TODO(doyle): Check if we're an integral type or not to double check if we
// can use memcmp or operator==
if (Dqn_Str8_HasData(cpp_array_size_field.column.string)) {
Dqn_CppIfChain(cpp) {
Dqn_CppIfOrElseIfBlock(cpp,
"lhs.%.*s != rhs.%.*s",
DQN_STR_FMT(cpp_array_size_field.column.string),
DQN_STR_FMT(cpp_array_size_field.column.string)) {
Dqn_CppLine(cpp, "return false;");
}
}
Dqn_CppIfChain(cpp) {
Dqn_CppIfOrElseIfBlock(cpp,
"DQN_MEMCMP(lhs.%.*s, rhs.%.*s, lhs.%.*s) != 0",
DQN_STR_FMT(cpp_name.column.string),
DQN_STR_FMT(cpp_name.column.string),
DQN_STR_FMT(cpp_array_size_field.column.string)) {
Dqn_CppLine(cpp, "return false;");
}
}
} else if (Dqn_Str8_HasData(cpp_array_size.column.string)) {
Dqn_CppIfChain(cpp) {
Dqn_CppIfOrElseIfBlock(cpp,
"DQN_MEMCMP(lhs.%.*s, rhs.%.*s, %.*s) != 0",
DQN_STR_FMT(cpp_name.column.string),
DQN_STR_FMT(cpp_name.column.string),
DQN_STR_FMT(cpp_array_size.column.string)) {
Dqn_CppLine(cpp, "return false;");
}
}
} else if (cpp_is_ptr.column.string == DQN_STR8("true")) {
Dqn_CppIfChain(cpp) {
Dqn_CppIfOrElseIfBlock(cpp,
"*lhs.%.*s != *rhs.%.*s",
DQN_STR_FMT(cpp_name.column.string),
DQN_STR_FMT(cpp_name.column.string)) {
Dqn_CppLine(cpp, "return false;");
}
}
} else {
Dqn_CppIfChain(cpp) {
Dqn_CppIfOrElseIfBlock(cpp,
"lhs.%.*s != rhs.%.*s",
DQN_STR_FMT(cpp_name.column.string),
DQN_STR_FMT(cpp_name.column.string)) {
Dqn_CppLine(cpp, "return false;");
}
}
}
}
Dqn_CppLine(cpp, "return true;");
}
Dqn_CppFuncBlock(cpp, "bool operator!=(%.*s const &lhs, %.*s const &rhs)", DQN_STR_FMT(type_name), DQN_STR_FMT(type_name)) {
Dqn_CppLine(cpp, "bool result = !(lhs == rhs);");
Dqn_CppLine(cpp, "return result;");
}
}
}
}
}

View File

@ -67,6 +67,7 @@ enum Dqn_CGenTableHeaderType
Dqn_CGenTableHeaderType_CppName,
Dqn_CGenTableHeaderType_CppValue,
Dqn_CGenTableHeaderType_CppIsPtr,
Dqn_CGenTableHeaderType_CppOpEquals,
Dqn_CGenTableHeaderType_CppArraySize,
Dqn_CGenTableHeaderType_CppArraySizeField,
Dqn_CGenTableHeaderType_GenTypeInfo,

View File

@ -1,3 +1,6 @@
#pragma once
#include "dqn.h"
/*
////////////////////////////////////////////////////////////////////////////////////////////////////
//

View File

@ -1,3 +1,6 @@
#pragma once
#include "dqn.h"
/*
////////////////////////////////////////////////////////////////////////////////////////////////////
//
@ -281,6 +284,8 @@ template <typename T, Dqn_usize N> void Dqn_FArra
#define DQN_TO_SLICE(val) Dqn_Slice_Init((val)->data, (val)->size)
template <typename T> Dqn_Slice<T> Dqn_Slice_Init (T* const data, Dqn_usize size);
template <typename T, Dqn_usize N> Dqn_Slice<T> Dqn_Slice_InitCArray (Dqn_Arena *arena, T const (&array)[N]);
template <typename T> Dqn_Slice<T> Dqn_Slice_Copy (Dqn_Arena *arena, Dqn_Slice<T> slice);
template <typename T> Dqn_Slice<T> Dqn_Slice_CopyPtr (Dqn_Arena *arena, T* const data, Dqn_usize size);
template <typename T> Dqn_Slice<T> Dqn_Slice_Alloc (Dqn_Arena *arena, Dqn_usize size, Dqn_ZeroMem zero_mem);
Dqn_Str8 Dqn_Slice_Str8Render (Dqn_Arena *arena, Dqn_Slice<Dqn_Str8> array, Dqn_Str8 separator);
Dqn_Str8 Dqn_Slice_Str8RenderSpaceSeparated (Dqn_Arena *arena, Dqn_Slice<Dqn_Str8> array);
@ -622,7 +627,7 @@ template <typename T> bool Dqn_VArray_Reserve(Dqn_VArray<T> *array, Dqn_usize co
#endif // !defined(DQN_NO_VARRAY)
#if !defined(DQN_NO_SARRAY)
// NOTE: [$FARR] Dqn_SArray ////////////////////////////////////////////////////////////////////////
// NOTE: [$SARR] Dqn_SArray ////////////////////////////////////////////////////////////////////////
template <typename T> Dqn_SArray<T> Dqn_SArray_Init(Dqn_Arena *arena, Dqn_usize size, Dqn_ZeroMem zero_mem)
{
Dqn_SArray<T> result = {};
@ -700,7 +705,7 @@ template <typename T> T *Dqn_SArray_Add(Dqn_SArray<T> *array, T const &item)
return result;
}
template <typename T, Dqn_usize N> T *Dqn_SArray_InsertArray(Dqn_SArray<T> *array, Dqn_usize index, T const *items, Dqn_usize count)
template <typename T> T *Dqn_SArray_InsertArray(Dqn_SArray<T> *array, Dqn_usize index, T const *items, Dqn_usize count)
{
T *result = nullptr;
if (!Dqn_SArray_IsValid(array))
@ -715,7 +720,7 @@ template <typename T, Dqn_usize N> T *Dqn_SArray_InsertCArray(Dqn_SArray<T> *arr
return result;
}
template <typename T, Dqn_usize N> T *Dqn_SArray_Insert(Dqn_SArray<T> *array, Dqn_usize index, T const &item)
template <typename T> T *Dqn_SArray_Insert(Dqn_SArray<T> *array, Dqn_usize index, T const &item)
{
T *result = Dqn_SArray_InsertArray(array, index, &item, 1);
return result;
@ -896,6 +901,19 @@ Dqn_Slice<T> Dqn_Slice_InitCArray(Dqn_Arena *arena, T const (&array)[N])
return result;
}
template <typename T> Dqn_Slice<T> Dqn_Slice_CopyPtr(Dqn_Arena *arena, T *const data, Dqn_usize size)
{
T *copy = Dqn_Arena_NewArrayCopy(arena, T, data, size);
Dqn_Slice<T> result = Dqn_Slice_Init(copy, copy ? size : 0);
return result;
}
template <typename T> Dqn_Slice<T> Dqn_Slice_Copy(Dqn_Arena *arena, Dqn_Slice<T> slice)
{
Dqn_Slice<T> result = Dqn_Slice_CopyPtr(arena, slice.data, slice.size);
return result;
}
template <typename T> Dqn_Slice<T> Dqn_Slice_Alloc(Dqn_Arena *arena, Dqn_usize size, Dqn_ZeroMem zero_mem)
{
Dqn_Slice<T> result = {};
@ -1442,7 +1460,7 @@ template <typename T> DQN_API T *Dqn_List_At(Dqn_List<T> *list, Dqn_usize index,
if (!list || !list->chunk_size || index >= list->count)
return nullptr;
Dqn_usize total_chunks = list->count / (list->chunk_size + (list->chunk_size - 1));
Dqn_usize total_chunks = (list->count / list->chunk_size) + ((list->chunk_size % list->count) ? 1 : 0);
Dqn_usize desired_chunk = index / list->chunk_size;
Dqn_usize forward_scan_dist = desired_chunk;