Get latest DQN lib from side projects

This commit is contained in:
2025-02-14 00:27:42 +11:00
parent 6b403eae71
commit a844d2b992
45 changed files with 11343 additions and 10359 deletions
+64 -62
View File
@@ -19,19 +19,19 @@
////////////////////////////////////////////////////////////////////////////////////////////////////
*/
DQN_API Dqn_Str8FindResult Dqn_Str8_FindStr8AVX512F(Dqn_Str8 string, Dqn_Str8 find)
DN_API DN_Str8FindResult DN_Str8_FindStr8AVX512F(DN_Str8 string, DN_Str8 find)
{
// NOTE: Algorithm as described in http://0x80.pl/articles/simd-strfind.html
Dqn_Str8FindResult result = {};
if (!Dqn_Str8_HasData(string) || !Dqn_Str8_HasData(find) || find.size > string.size)
DN_Str8FindResult result = {};
if (!DN_Str8_HasData(string) || !DN_Str8_HasData(find) || find.size > string.size)
return result;
__m512i const find_first_ch = _mm512_set1_epi8(find.data[0]);
__m512i const find_last_ch = _mm512_set1_epi8(find.data[find.size - 1]);
Dqn_usize const search_size = string.size - find.size;
Dqn_usize simd_iterations = search_size / sizeof(__m512i);
char const *ptr = string.data;
DN_USize const search_size = string.size - find.size;
DN_USize simd_iterations = search_size / sizeof(__m512i);
char const *ptr = string.data;
while (simd_iterations--) {
__m512i find_first_ch_block = _mm512_loadu_si512(ptr);
@@ -79,41 +79,43 @@ DQN_API Dqn_Str8FindResult Dqn_Str8_FindStr8AVX512F(Dqn_Str8 string, Dqn_Str8 fi
uint64_t const lsb_zero_pos = _tzcnt_u64(zero_byte_mask);
char const *base_ptr = ptr + (4 * lsb_zero_pos);
if (DQN_MEMCMP(base_ptr + 0, find.data, find.size) == 0) {
if (DN_MEMCMP(base_ptr + 0, find.data, find.size) == 0) {
result.found = true;
result.index = base_ptr - string.data;
} else if (DQN_MEMCMP(base_ptr + 1, find.data, find.size) == 0) {
} else if (DN_MEMCMP(base_ptr + 1, find.data, find.size) == 0) {
result.found = true;
result.index = base_ptr - string.data + 1;
} else if (DQN_MEMCMP(base_ptr + 2, find.data, find.size) == 0) {
} else if (DN_MEMCMP(base_ptr + 2, find.data, find.size) == 0) {
result.found = true;
result.index = base_ptr - string.data + 2;
} else if (DQN_MEMCMP(base_ptr + 3, find.data, find.size) == 0) {
} else if (DN_MEMCMP(base_ptr + 3, find.data, find.size) == 0) {
result.found = true;
result.index = base_ptr - string.data + 3;
}
if (result.found) {
result.start_to_before_match = Dqn_Str8_Init(string.data, result.index);
result.match = Dqn_Str8_Init(string.data + result.index, find.size);
result.match_to_end_of_buffer = Dqn_Str8_Init(result.match.data, string.size - result.index);
result.start_to_before_match = DN_Str8_Init(string.data, result.index);
result.match = DN_Str8_Init(string.data + result.index, find.size);
result.match_to_end_of_buffer = DN_Str8_Init(result.match.data, string.size - result.index);
result.after_match_to_end_of_buffer = DN_Str8_Advance(result.match_to_end_of_buffer, find.size);
return result;
}
zero_byte_mask = Dqn_Bit_ClearNextLSB(zero_byte_mask);
zero_byte_mask = DN_Bit_ClearNextLSB(zero_byte_mask);
}
ptr += sizeof(__m512i);
}
for (Dqn_usize index = ptr - string.data; index < string.size; index++) {
Dqn_Str8 string_slice = Dqn_Str8_Slice(string, index, find.size);
if (Dqn_Str8_Eq(string_slice, find)) {
result.found = true;
result.index = index;
result.start_to_before_match = Dqn_Str8_Init(string.data, index);
result.match = Dqn_Str8_Init(string.data + index, find.size);
result.match_to_end_of_buffer = Dqn_Str8_Init(result.match.data, string.size - index);
for (DN_USize index = ptr - string.data; index < string.size; index++) {
DN_Str8 string_slice = DN_Str8_Slice(string, index, find.size);
if (DN_Str8_Eq(string_slice, find)) {
result.found = true;
result.index = index;
result.start_to_before_match = DN_Str8_Init(string.data, index);
result.match = DN_Str8_Init(string.data + index, find.size);
result.match_to_end_of_buffer = DN_Str8_Init(result.match.data, string.size - index);
result.after_match_to_end_of_buffer = DN_Str8_Advance(result.match_to_end_of_buffer, find.size);
return result;
}
}
@@ -121,18 +123,18 @@ DQN_API Dqn_Str8FindResult Dqn_Str8_FindStr8AVX512F(Dqn_Str8 string, Dqn_Str8 fi
return result;
}
DQN_API Dqn_Str8FindResult Dqn_Str8_FindLastStr8AVX512F(Dqn_Str8 string, Dqn_Str8 find)
DN_API DN_Str8FindResult DN_Str8_FindLastStr8AVX512F(DN_Str8 string, DN_Str8 find)
{
// NOTE: Algorithm as described in http://0x80.pl/articles/simd-strfind.html
Dqn_Str8FindResult result = {};
if (!Dqn_Str8_HasData(string) || !Dqn_Str8_HasData(find) || find.size > string.size)
DN_Str8FindResult result = {};
if (!DN_Str8_HasData(string) || !DN_Str8_HasData(find) || find.size > string.size)
return result;
__m512i const find_first_ch = _mm512_set1_epi8(find.data[0]);
__m512i const find_last_ch = _mm512_set1_epi8(find.data[find.size - 1]);
Dqn_usize const search_size = string.size - find.size;
Dqn_usize simd_iterations = search_size / sizeof(__m512i);
DN_USize const search_size = string.size - find.size;
DN_USize simd_iterations = search_size / sizeof(__m512i);
char const *ptr = string.data + search_size + 1;
while (simd_iterations--) {
@@ -182,39 +184,39 @@ DQN_API Dqn_Str8FindResult Dqn_Str8_FindLastStr8AVX512F(Dqn_Str8 string, Dqn_Str
uint64_t const lsb_zero_pos = _tzcnt_u64(zero_byte_mask);
char const *base_ptr = ptr + (4 * lsb_zero_pos);
if (DQN_MEMCMP(base_ptr + 0, find.data, find.size) == 0) {
if (DN_MEMCMP(base_ptr + 0, find.data, find.size) == 0) {
result.found = true;
result.index = base_ptr - string.data;
} else if (DQN_MEMCMP(base_ptr + 1, find.data, find.size) == 0) {
} else if (DN_MEMCMP(base_ptr + 1, find.data, find.size) == 0) {
result.found = true;
result.index = base_ptr - string.data + 1;
} else if (DQN_MEMCMP(base_ptr + 2, find.data, find.size) == 0) {
} else if (DN_MEMCMP(base_ptr + 2, find.data, find.size) == 0) {
result.found = true;
result.index = base_ptr - string.data + 2;
} else if (DQN_MEMCMP(base_ptr + 3, find.data, find.size) == 0) {
} else if (DN_MEMCMP(base_ptr + 3, find.data, find.size) == 0) {
result.found = true;
result.index = base_ptr - string.data + 3;
}
if (result.found) {
result.start_to_before_match = Dqn_Str8_Init(string.data, result.index);
result.match = Dqn_Str8_Init(string.data + result.index, find.size);
result.match_to_end_of_buffer = Dqn_Str8_Init(result.match.data, string.size - result.index);
result.start_to_before_match = DN_Str8_Init(string.data, result.index);
result.match = DN_Str8_Init(string.data + result.index, find.size);
result.match_to_end_of_buffer = DN_Str8_Init(result.match.data, string.size - result.index);
return result;
}
zero_byte_mask = Dqn_Bit_ClearNextLSB(zero_byte_mask);
zero_byte_mask = DN_Bit_ClearNextLSB(zero_byte_mask);
}
}
for (Dqn_usize index = ptr - string.data - 1; index < string.size; index--) {
Dqn_Str8 string_slice = Dqn_Str8_Slice(string, index, find.size);
if (Dqn_Str8_Eq(string_slice, find)) {
for (DN_USize index = ptr - string.data - 1; index < string.size; index--) {
DN_Str8 string_slice = DN_Str8_Slice(string, index, find.size);
if (DN_Str8_Eq(string_slice, find)) {
result.found = true;
result.index = index;
result.start_to_before_match = Dqn_Str8_Init(string.data, index);
result.match = Dqn_Str8_Init(string.data + index, find.size);
result.match_to_end_of_buffer = Dqn_Str8_Init(result.match.data, string.size - index);
result.start_to_before_match = DN_Str8_Init(string.data, index);
result.match = DN_Str8_Init(string.data + index, find.size);
result.match_to_end_of_buffer = DN_Str8_Init(result.match.data, string.size - index);
return result;
}
}
@@ -222,14 +224,14 @@ DQN_API Dqn_Str8FindResult Dqn_Str8_FindLastStr8AVX512F(Dqn_Str8 string, Dqn_Str
return result;
}
DQN_API Dqn_Str8BinarySplitResult Dqn_Str8_BinarySplitAVX512F(Dqn_Str8 string, Dqn_Str8 find)
DN_API DN_Str8BinarySplitResult DN_Str8_BinarySplitAVX512F(DN_Str8 string, DN_Str8 find)
{
Dqn_Str8BinarySplitResult result = {};
Dqn_Str8FindResult find_result = Dqn_Str8_FindStr8AVX512F(string, find);
DN_Str8BinarySplitResult result = {};
DN_Str8FindResult find_result = DN_Str8_FindStr8AVX512F(string, find);
if (find_result.found) {
result.lhs.data = string.data;
result.lhs.size = find_result.index;
result.rhs = Dqn_Str8_Advance(find_result.match_to_end_of_buffer, find.size);
result.rhs = DN_Str8_Advance(find_result.match_to_end_of_buffer, find.size);
} else {
result.lhs = string;
}
@@ -237,14 +239,14 @@ DQN_API Dqn_Str8BinarySplitResult Dqn_Str8_BinarySplitAVX512F(Dqn_Str8 string, D
return result;
}
DQN_API Dqn_Str8BinarySplitResult Dqn_Str8_BinarySplitLastAVX512F(Dqn_Str8 string, Dqn_Str8 find)
DN_API DN_Str8BinarySplitResult DN_Str8_BinarySplitLastAVX512F(DN_Str8 string, DN_Str8 find)
{
Dqn_Str8BinarySplitResult result = {};
Dqn_Str8FindResult find_result = Dqn_Str8_FindLastStr8AVX512F(string, find);
DN_Str8BinarySplitResult result = {};
DN_Str8FindResult find_result = DN_Str8_FindLastStr8AVX512F(string, find);
if (find_result.found) {
result.lhs.data = string.data;
result.lhs.size = find_result.index;
result.rhs = Dqn_Str8_Advance(find_result.match_to_end_of_buffer, find.size);
result.rhs = DN_Str8_Advance(find_result.match_to_end_of_buffer, find.size);
} else {
result.lhs = string;
}
@@ -252,17 +254,17 @@ DQN_API Dqn_Str8BinarySplitResult Dqn_Str8_BinarySplitLastAVX512F(Dqn_Str8 strin
return result;
}
DQN_API Dqn_usize Dqn_Str8_SplitAVX512F(Dqn_Str8 string, Dqn_Str8 delimiter, Dqn_Str8 *splits, Dqn_usize splits_count, Dqn_Str8SplitIncludeEmptyStrings mode)
DN_API DN_USize DN_Str8_SplitAVX512F(DN_Str8 string, DN_Str8 delimiter, DN_Str8 *splits, DN_USize splits_count, DN_Str8SplitIncludeEmptyStrings mode)
{
Dqn_usize result = 0; // The number of splits in the actual string.
if (!Dqn_Str8_HasData(string) || !Dqn_Str8_HasData(delimiter) || delimiter.size <= 0)
DN_USize result = 0; // The number of splits in the actual string.
if (!DN_Str8_HasData(string) || !DN_Str8_HasData(delimiter) || delimiter.size <= 0)
return result;
Dqn_Str8BinarySplitResult split = {};
Dqn_Str8 first = string;
DN_Str8BinarySplitResult split = {};
DN_Str8 first = string;
do {
split = Dqn_Str8_BinarySplitAVX512F(first, delimiter);
if (split.lhs.size || mode == Dqn_Str8SplitIncludeEmptyStrings_Yes) {
split = DN_Str8_BinarySplitAVX512F(first, delimiter);
if (split.lhs.size || mode == DN_Str8SplitIncludeEmptyStrings_Yes) {
if (splits && result < splits_count)
splits[result] = split.lhs;
result++;
@@ -273,14 +275,14 @@ DQN_API Dqn_usize Dqn_Str8_SplitAVX512F(Dqn_Str8 string, Dqn_Str8 delimiter, Dqn
return result;
}
DQN_API Dqn_Slice<Dqn_Str8> Dqn_Str8_SplitAllocAVX512F(Dqn_Arena *arena, Dqn_Str8 string, Dqn_Str8 delimiter, Dqn_Str8SplitIncludeEmptyStrings mode)
DN_API DN_Slice<DN_Str8> DN_Str8_SplitAllocAVX512F(DN_Arena *arena, DN_Str8 string, DN_Str8 delimiter, DN_Str8SplitIncludeEmptyStrings mode)
{
Dqn_Slice<Dqn_Str8> result = {};
Dqn_usize splits_required = Dqn_Str8_SplitAVX512F(string, delimiter, /*splits*/ nullptr, /*count*/ 0, mode);
result.data = Dqn_Arena_NewArray(arena, Dqn_Str8, splits_required, Dqn_ZeroMem_No);
DN_Slice<DN_Str8> result = {};
DN_USize splits_required = DN_Str8_SplitAVX512F(string, delimiter, /*splits*/ nullptr, /*count*/ 0, mode);
result.data = DN_Arena_NewArray(arena, DN_Str8, splits_required, DN_ZeroMem_No);
if (result.data) {
result.size = Dqn_Str8_SplitAVX512F(string, delimiter, result.data, splits_required, mode);
DQN_ASSERT(splits_required == result.size);
result.size = DN_Str8_SplitAVX512F(string, delimiter, result.data, splits_required, mode);
DN_ASSERT(splits_required == result.size);
}
return result;
}