Fix some issues in INI implementation

This commit is contained in:
doylet 2025-10-11 17:44:03 +11:00
parent 1706c5c12a
commit fd7a543e34
4 changed files with 639 additions and 113 deletions

View File

@ -1,4 +1,4 @@
// Generated by the DN single header generator 2025-09-28 21:52:37
// Generated by the DN single header generator 2025-10-11 17:43:41
#define DN_BASE_INC_CPP

View File

@ -1,4 +1,4 @@
// Generated by the DN single header generator 2025-09-28 21:52:36
// Generated by the DN single header generator 2025-10-11 17:43:41
#if !defined(DN_BASE_INC_H)
#define DN_BASE_INC_H

View File

@ -1,16 +1,111 @@
#include "dn_ini.h"
#if defined(__cplusplus__)
#include <stdbool.h>
#if !defined(__cplusplus__)
#include <stdbool.h>
#endif
#include <stdio.h>
typedef struct DN_INIArena {
char *base;
size_t used, max;
} DN_INIArena;
typedef struct DN_INIStr8BSplit {
DN_INIStr8 lhs;
DN_INIStr8 rhs;
} DN_INIStr8BSplit;
void *DN_INI_ArenaAlloc(DN_INIArena *arena, size_t size)
{
size_t new_used = arena->used + size;
void *result = 0;
if (new_used <= arena->max) {
result = arena->base + arena->used;
arena->used = new_used;
DN_INI_Memset(result, 0, size);
}
return result;
}
static bool DN_INI_CharIsWhitespace_(char ch)
{
bool result = ch == ' ' || ch == '\r' || ch == '\n' || ch == '\t';
return result;
}
DN_INITokeniser DN_INI_TokeniserFromPtr(char const *buf, size_t count)
static DN_INIStr8 DN_INI_Str8FromPtr(char const *data, uint32_t count)
{
DN_INIStr8 result = {};
result.data = (char *)data;
result.size = count;
return result;
}
static bool DN_INI_Str8Eq(DN_INIStr8 lhs, DN_INIStr8 rhs)
{
bool result = lhs.size == rhs.size && DN_INI_Memcmp(lhs.data, rhs.data, lhs.size) == 0;
return result;
}
static DN_INIStr8 DN_INI_Str8Slice(DN_INIStr8 slice, uint32_t offset, uint32_t size)
{
DN_INIStr8 result = {};
if (slice.data) {
uint32_t max_offset = slice.size;
uint32_t final_offset = offset <= max_offset ? offset : max_offset;
uint32_t max_size = slice.size - final_offset;
result.data = slice.data + final_offset;
result.size = size <= max_size ? size : max_size;
}
return result;
}
static DN_INIStr8BSplit DN_INI_Str8BSplit(DN_INIStr8 str8, DN_INIStr8 find)
{
DN_INIStr8BSplit result = {};
if (find.size > str8.size)
return result;
for (size_t index = 0; index < (str8.size - find.size) + 1; index++) {
DN_INIStr8 slice = DN_INI_Str8FromPtr(str8.data + index, find.size);
if (DN_INI_Str8Eq(slice, find)) {
result.lhs = DN_INI_Str8FromPtr(str8.data, (uint32_t)index);
uint32_t rhs_size = (uint32_t)(str8.size - (index + 1));
DN_INI_Assert(rhs_size < str8.size);
result.rhs = DN_INI_Str8FromPtr(str8.data + index + 1, rhs_size);
break;
}
}
if (!result.lhs.data)
result.lhs = str8;
return result;
}
static DN_INIStr8BSplit DN_INI_Str8BSplitReverse(DN_INIStr8 str8, DN_INIStr8 find)
{
DN_INIStr8BSplit result = {};
if (find.size > str8.size)
return result;
for (size_t index = str8.size - find.size; index > 0; index--) {
DN_INIStr8 slice = DN_INI_Str8FromPtr(str8.data + index, find.size);
if (DN_INI_Str8Eq(slice, find)) {
result.lhs = DN_INI_Str8FromPtr(str8.data, (uint32_t)index);
uint32_t rhs_size = (uint32_t)(str8.size - index - find.size);
DN_INI_Assert(rhs_size < str8.size);
result.rhs = DN_INI_Str8FromPtr(str8.data + index + find.size, rhs_size);
break;
}
}
if (!result.lhs.data)
result.lhs = str8;
return result;
}
DN_INITokeniser DN_INI_TokeniserFromPtr(char const *buf, uint32_t count)
{
DN_INITokeniser result = {};
result.data = (char *)buf;
@ -20,7 +115,7 @@ DN_INITokeniser DN_INI_TokeniserFromPtr(char const *buf, size_t count)
DN_INIToken DN_INI_NextToken(DN_INITokeniser const *tokeniser)
{
size_t pos = tokeniser->pos;
uint32_t pos = tokeniser->pos;
DN_INIToken result = {};
result.line = tokeniser->line;
result.line_start = tokeniser->line_start ? tokeniser->line_start : tokeniser->data;
@ -31,12 +126,19 @@ DN_INIToken DN_INI_NextToken(DN_INITokeniser const *tokeniser)
if (tokeniser->data[pos++] == '\n') {
result.line++;
result.line_start = tokeniser->data + pos;
result.new_line = true;
}
continue;
}
if (pos >= tokeniser->count) {
if (tokeniser->prev_token == DN_INITokenType_Nil || tokeniser->prev_token == DN_INITokenType_Value || tokeniser->prev_token == DN_INITokenType_Comment || tokeniser->prev_token == DN_INITokenType_Value || tokeniser->prev_token == DN_INITokenType_KeyValueSeparator) {
if (tokeniser->prev_token.type == DN_INITokenType_Nil ||
tokeniser->prev_token.type == DN_INITokenType_Value ||
tokeniser->prev_token.type == DN_INITokenType_Comment ||
tokeniser->prev_token.type == DN_INITokenType_Value ||
tokeniser->prev_token.type == DN_INITokenType_KeyValueSeparator ||
tokeniser->prev_token.type == DN_INITokenType_MultilineValue ||
tokeniser->prev_token.type == DN_INITokenType_Section) {
result.type = DN_INITokenType_EndOfStream;
} else {
result.type = DN_INITokenType_Error;
@ -60,12 +162,16 @@ DN_INIToken DN_INI_NextToken(DN_INITokeniser const *tokeniser)
result.type = DN_INITokenType_Section;
result.next_p = pos + 1;
}
result.count = (tokeniser->data + pos) - result.data;
result.count = (uint32_t)((tokeniser->data + pos) - result.data);
}
} break;
case '#': {
if (tokeniser->prev_token != DN_INITokenType_Nil && tokeniser->prev_token != DN_INITokenType_Comment && tokeniser->prev_token != DN_INITokenType_Value && tokeniser->prev_token != DN_INITokenType_Section) {
if (tokeniser->prev_token.type != DN_INITokenType_Nil &&
tokeniser->prev_token.type != DN_INITokenType_Comment &&
tokeniser->prev_token.type != DN_INITokenType_Value &&
tokeniser->prev_token.type != DN_INITokenType_MultilineValue &&
tokeniser->prev_token.type != DN_INITokenType_Section) {
result.data = (char *)tokeniser->data + pos + 1;
result.type = DN_INITokenType_Error;
result.error = DN_INIStr8Lit("Invalid comment that was not preceeded by another comment, section, value or at the start of the file");
@ -76,7 +182,7 @@ DN_INIToken DN_INI_NextToken(DN_INITokeniser const *tokeniser)
pos++;
if (pos >= tokeniser->count || tokeniser->data[pos] == '\n') {
result.type = DN_INITokenType_Comment;
result.count = (tokeniser->data + pos) - result.data;
result.count = (uint32_t)((tokeniser->data + pos) - result.data);
result.next_p = pos;
}
}
@ -84,7 +190,7 @@ DN_INIToken DN_INI_NextToken(DN_INITokeniser const *tokeniser)
} break;
case '=': {
if (tokeniser->prev_token == DN_INITokenType_Key) {
if (tokeniser->prev_token.type == DN_INITokenType_Key) {
result.type = DN_INITokenType_KeyValueSeparator;
} else {
result.type = DN_INITokenType_Error;
@ -95,59 +201,85 @@ DN_INIToken DN_INI_NextToken(DN_INITokeniser const *tokeniser)
result.count = 1;
} break;
case '"': {
result.data = (char *)tokeniser->data + pos + 1;
while (result.type == DN_INITokenType_Nil) {
pos++;
if (pos >= tokeniser->count) {
result.type = DN_INITokenType_EndOfStream;
result.count = (tokeniser->data + pos) - result.data;
} else if (tokeniser->data[pos] == '"') {
if (tokeniser->prev_token == DN_INITokenType_KeyValueSeparator) {
result.type = DN_INITokenType_Value;
} else {
result.type = DN_INITokenType_Error;
result.error = DN_INIStr8Lit("Invalid quoted string, value was not preceeded by a key-value separator");
}
result.count = (tokeniser->data + pos) - result.data;
result.next_p = pos + 1;
}
}
} break;
default: {
bool quoted = tokeniser->data[pos] == '"';
if (quoted)
pos++;
result.data = (char *)tokeniser->data + pos;
while (result.type == DN_INITokenType_Nil) {
pos++;
for (; result.type == DN_INITokenType_Nil; pos++) {
bool end_of_stream = pos >= tokeniser->count;
if (end_of_stream || DN_INI_CharIsWhitespace_(tokeniser->data[pos]) || tokeniser->data[pos] == '#') {
if (result.type == DN_INITokenType_Nil) {
if (tokeniser->prev_token == DN_INITokenType_KeyValueSeparator) {
if (tokeniser->data[pos] == ' ') // Value can have spaces in it without quotes
continue;
result.type = DN_INITokenType_Value;
} else if (tokeniser->prev_token == DN_INITokenType_Key) {
result.type = DN_INITokenType_Error;
result.error = DN_INIStr8Lit("Invalid unquoted string, multiple consecutive keys encountered");
} else {
result.type = DN_INITokenType_Key;
}
bool end_of_quote = !end_of_stream && quoted && tokeniser->data[pos] == '"';
if (end_of_stream ||
DN_INI_CharIsWhitespace_(tokeniser->data[pos]) ||
tokeniser->data[pos] == '#' ||
tokeniser->data[pos] == '\\' ||
tokeniser->data[pos] == '=' ||
end_of_quote) {
uint32_t next_p = pos;
if (end_of_quote) {
next_p = pos + 1;
DN_INI_Assert(!end_of_stream);
DN_INI_Assert(tokeniser->data[pos] == '"');
}
result.count = (tokeniser->data + pos) - result.data;
result.next_p = pos;
if (!end_of_stream && tokeniser->data[pos] == '\\') {
if (tokeniser->prev_token.type != DN_INITokenType_KeyValueSeparator &&
tokeniser->prev_token.type != DN_INITokenType_Value &&
tokeniser->prev_token.type != DN_INITokenType_MultilineValue) {
result.type = DN_INITokenType_Error;
result.error = DN_INIStr8Lit("Invalid unquoted string, escape character '\\' is only allowed in INI values");
result.count = (uint32_t)((tokeniser->data + pos) - result.data);
result.next_p = next_p;
break;
}
if (result.type == DN_INITokenType_Value && tokeniser->data[pos] == '#') {
DN_INIStr8 esc_str8 = DN_INI_Str8Slice(DN_INI_Str8FromPtr(tokeniser->data, tokeniser->count), pos + 1, 1);
if (DN_INI_Str8Eq(esc_str8, DN_INIStr8Lit("\n")))
next_p += 2;
else
next_p += 1;
}
// NOTE: We only have a continuation of a multiline if we didn't have a newline, e.g.:
//
// foo=bar \\n
// baz\n
// next=property
//
// 'baz' is a multiline value that appends to 'bar '. When the tokeniser then reads
// 'next', the previous value is a multiline-value, but, we started a new-line which
// terminates the multi-line value. This means that we know we're starting a new
// key-value pair so we should _not_ append the multi-line value.
bool multiline_value = tokeniser->prev_token.type == DN_INITokenType_MultilineValue && !result.new_line;
if (tokeniser->prev_token.type == DN_INITokenType_KeyValueSeparator || multiline_value || tokeniser->prev_token.type == DN_INITokenType_Value) {
if (tokeniser->data[pos] == ' ') // Value can have spaces in it without quotes
continue;
result.type = tokeniser->prev_token.type == DN_INITokenType_KeyValueSeparator ? DN_INITokenType_Value : DN_INITokenType_MultilineValue;
} else if (tokeniser->prev_token.type == DN_INITokenType_Key) {
result.type = DN_INITokenType_Error;
result.error = DN_INIStr8Lit("Invalid unquoted string, multiple consecutive keys encountered");
} else {
result.type = DN_INITokenType_Key;
}
result.count = (uint32_t)((tokeniser->data + pos) - result.data);
result.next_p = next_p;
if (result.type == DN_INITokenType_Value && tokeniser->data[pos] == '#')
while (result.count && DN_INI_CharIsWhitespace_(result.data[result.count - 1]))
result.count--;
}
}
}
} break;
}
}
result.column = result.data - result.line_start;
result.column = (uint32_t)(result.data - result.line_start);
return result;
}
@ -156,18 +288,84 @@ void DN_INI_EatToken(DN_INITokeniser *tokeniser, DN_INIToken token)
DN_INI_Assert(token.data >= tokeniser->data && token.data <= tokeniser->data + tokeniser->count);
DN_INI_Assert(tokeniser->pos <= tokeniser->count);
tokeniser->pos = token.next_p;
tokeniser->prev_token = token.type;
tokeniser->prev_token = token;
tokeniser->line = token.line;
tokeniser->column = token.column;
tokeniser->line_start = token.line_start;
}
DN_INIParse DN_INI_ParseFromBuffer(char const *buf, size_t count, DN_INISection *sections, size_t sections_count, DN_INIKeyValue *key_values, size_t key_values_count)
static DN_INIToken DN_INI_MakeParseOutOfMemoryErrorToken_(DN_INIToken token)
{
DN_INIToken result = token;
result.type = DN_INITokenType_Error;
result.error = DN_INIStr8Lit("Out of memory");
return result;
}
DN_INISection *DN_INI_FindSectionStr8(DN_INISection *section, DN_INIStr8 str8)
{
DN_INIStr8 section_name = str8;
DN_INISection *result = section;
DN_INISection *curr = section;
while (result) {
DN_INIStr8BSplit split = DN_INI_Str8BSplit(section_name, DN_INIStr8Lit("."));
if (split.lhs.size == 0)
break;
result = 0;
for (DN_INISection *it = curr->child_first; !result && it; it = it->next) {
if (DN_INI_Str8Eq(it->name, split.lhs)) {
curr = result = it;
section_name = split.rhs;
}
}
}
return result;
}
DN_INISection *DN_INI_FindSection(DN_INISection *section, char const *name, uint32_t name_size)
{
DN_INISection *result = DN_INI_FindSectionStr8(section, DN_INI_Str8FromPtr(name, name_size));
return result;
}
DN_INIKeyValue *DN_INI_KeyFromSectionStr8(DN_INISection *section, DN_INIStr8 str8)
{
DN_INIKeyValue *result = 0;
if (section) {
DN_INIStr8BSplit split = DN_INI_Str8BSplitReverse(str8, DN_INIStr8Lit("."));
DN_INIStr8 find_key = str8;
DN_INISection *find_section = section;
if (split.rhs.size) {
find_section = DN_INI_FindSection(section, split.lhs.data, split.lhs.size);
find_key = split.rhs;
}
if (find_section) {
for (DN_INIKeyValue *it = find_section->first_key_value; !result && it; it = it->next)
if (DN_INI_Str8Eq(it->key, find_key))
result = it;
}
}
return result;
}
DN_INIKeyValue *DN_INI_KeyFromSection(DN_INISection *section, char const *key, uint32_t key_size)
{
DN_INIKeyValue *result = DN_INI_KeyFromSectionStr8(section, DN_INI_Str8FromPtr(key, key_size));
return result;
}
DN_INIParse DN_INI_ParseFromPtr(char const *buf, uint32_t count, char *base, uint32_t base_count)
{
DN_INIParse result = {};
result.sections = sections;
DN_INITokeniser tokeniser = DN_INI_TokeniserFromPtr(buf, count);
DN_INIKeyValue *next_key_value = key_values;
DN_INIArena arena = {};
arena.base = base;
arena.max = base_count;
DN_INIParse result = {};
DN_INISection *curr_section = &result.first_section;
DN_INIKeyValue *key_value = 0;
for (;;) {
DN_INIToken token = DN_INI_NextToken(&tokeniser);
if (token.type == DN_INITokenType_EndOfStream)
@ -179,56 +377,136 @@ DN_INIParse DN_INI_ParseFromBuffer(char const *buf, size_t count, DN_INISection
break;
}
DN_INI_EatToken(&tokeniser, token);
switch (token.type) {
case DN_INITokenType_EndOfStream: /*FALLTHRU*/
case DN_INITokenType_Error: /*FALLTHRU*/
case DN_INITokenType_Nil: DN_InvalidCodePath; break;
case DN_INITokenType_Nil: DN_INI_Assert(!"Invalid code path"); break;
case DN_INITokenType_KeyValueSeparator: break;
case DN_INITokenType_Comment: break;
case DN_INITokenType_Section: {
result.sections_count++;
if (result.sections_count <= sections_count) {
DN_INISection *section = sections + (result.sections_count - 1);
section->name.data = token.data;
section->name.size = token.count;
section->token = token;
section->key_values = next_key_value;
DN_INISection *parent = &result.first_section;
DN_INIStr8 section_name = DN_INI_Str8FromPtr(token.data, token.count);
curr_section = &result.first_section;
for (;;) {
DN_INIStr8BSplit split = DN_INI_Str8BSplit(section_name, DN_INIStr8Lit("."));
if (split.lhs.size == 0)
break;
DN_INISection *next_section = DN_INI_FindSection(parent, split.lhs.data, split.lhs.size);
if (!next_section) {
result.total_sections_count++;
next_section = (DN_INISection *)DN_INI_ArenaAlloc(&arena, sizeof(*parent));
if (next_section) {
if (!parent->child_first)
parent->child_first = next_section;
if (parent->child_last)
parent->child_last->next = next_section;
parent->child_last = next_section;
next_section->name = split.lhs;
next_section->token = token;
next_section->parent = parent;
}
}
if (base && !parent) {
result.error_token = DN_INI_MakeParseOutOfMemoryErrorToken_(token);
return result;
}
section_name = split.rhs;
curr_section = next_section;
parent = curr_section;
}
} break;
case DN_INITokenType_Key: {
result.key_values_count++;
if (result.sections_count <= sections_count && result.key_values_count <= key_values_count) {
DN_INISection *section = sections + result.sections_count - 1;
DN_INIKeyValue *key_value = next_key_value++;
key_value->key.data = token.data;
key_value->key.size = token.count;
section->key_values_count++;
key_value = DN_INI_KeyFromSection(curr_section, token.data, token.count);
if (!key_value) {
result.total_key_values_count++;
key_value = (DN_INIKeyValue *)DN_INI_ArenaAlloc(&arena, sizeof(*key_value));
if (base && !key_value) {
result.error_token = DN_INI_MakeParseOutOfMemoryErrorToken_(token);
return result;
}
if (key_value) {
key_value->key.data = token.data;
key_value->key.size = token.count;
}
if (curr_section) {
if (!curr_section->first_key_value)
curr_section->first_key_value = key_value;
if (curr_section->last_key_value)
curr_section->last_key_value->next = key_value;
curr_section->last_key_value = key_value;
curr_section->key_values_count++;
}
}
} break;
case DN_INITokenType_MultilineValue: {
uint32_t bytes_req = token.count;
if (tokeniser.prev_token.type == DN_INITokenType_Value) {
// NOTE: We saw a value, then the next token was a multiline value, we will merge these
// values into 1 stream, so we need to copy the previous string out as well.
bytes_req += tokeniser.prev_token.count;
}
result.memory_required += bytes_req;
if (curr_section && key_value) {
DN_INI_Assert(curr_section->key_values_count);
DN_INI_Assert(key_value->key.size);
char *string = (char *)DN_INI_ArenaAlloc(&arena, bytes_req);
if (!string) {
result.error_token = DN_INI_MakeParseOutOfMemoryErrorToken_(token);
return result;
}
char *dest = string;
if (tokeniser.prev_token.type == DN_INITokenType_Value) {
DN_INI_Memcpy(dest, tokeniser.prev_token.data, tokeniser.prev_token.count);
dest += tokeniser.prev_token.count;
key_value->value.data = string;
key_value->value.size = tokeniser.prev_token.count;
} else {
// NOTE: If we have a multi-line value we are accumulating onto the same key-value.
// Invariant to this is that the arena is only being used to allocate contiguous memory
// for the string. Essentially each time we visit this branch we're just bumping the
// capacity of the original string we allocated at the start of the multi-line value.
// This is what this assert checks, that we're expanding in place the string and there
// hasn't been some other allocation that took place inbetween that broke continuity.
DN_INI_Assert(key_value->value.data + key_value->value.size == string);
}
DN_INI_Memcpy(dest, token.data, token.count);
key_value->value.size += token.count;
}
} break;
case DN_INITokenType_Value: {
if (result.sections_count <= sections_count && result.key_values_count <= key_values_count) {
DN_INISection *section = sections + result.sections_count - 1;
DN_INIKeyValue *key_value = section->key_values + (section->key_values_count - 1);
DN_INI_Assert(section->key_values_count);
if (curr_section && key_value) {
DN_INI_Assert(curr_section->key_values_count);
DN_INI_Assert(key_value->key.size);
DN_INI_Assert(!key_value->value.data);
key_value->value.data = token.data;
key_value->value.size = token.count;
}
} break;
}
DN_INI_EatToken(&tokeniser, token);
}
result.memory_required += (result.total_sections_count * sizeof(DN_INISection)) + (result.total_key_values_count * sizeof(DN_INIKeyValue));
return result;
}
#if defined(DN_INI_WITH_UNIT_TESTS) || 1
void DN_INI_UnitTests()
{
// NOTE: Section and comments
{
char const EXAMPLE[] =
"[metadata]\n"
@ -237,15 +515,229 @@ void DN_INI_UnitTests()
" version = attr: this8.__version__\n"
"\n";
DN_INIParse parse = DN_INI_ParseFromBuffer(EXAMPLE, sizeof(EXAMPLE) - 1, 0, 0, 0, 0);
DN_INI_Assert(parse.sections_count == 1);
DN_INI_Assert(parse.key_values_count == 2);
DN_INIParse parse = DN_INI_ParseFromPtr(EXAMPLE, sizeof(EXAMPLE) - 1, 0, 0);
DN_INI_Assert(parse.error_token.type == DN_INITokenType_Nil);
DN_INI_Assert(parse.total_sections_count == 1);
DN_INI_Assert(parse.total_key_values_count == 2);
char parse_memory[sizeof(DN_INIKeyValue) * 2 + sizeof(DN_INISection) * 1];
parse = DN_INI_ParseFromPtr(EXAMPLE, sizeof(EXAMPLE) - 1, parse_memory, sizeof(parse_memory));
DN_INI_Assert(parse.error_token.type == DN_INITokenType_Nil);
}
// NOTE: Global section
{
char const EXAMPLE[] =
"first=hello\n"
"[metadata]\n"
" name = this8 # test\n"
" version = attr: this8.__version__\n"
"\n";
DN_INIParse parse = DN_INI_ParseFromPtr(EXAMPLE, sizeof(EXAMPLE) - 1, 0, 0);
DN_INI_Assert(parse.error_token.type == DN_INITokenType_Nil);
DN_INI_Assert(parse.total_sections_count == 1);
DN_INI_Assert(parse.total_key_values_count == 3);
char parse_memory[sizeof(DN_INIKeyValue) * 3 + sizeof(DN_INISection) * 1];
parse = DN_INI_ParseFromPtr(EXAMPLE, sizeof(EXAMPLE) - 1, parse_memory, sizeof(parse_memory));
DN_INI_Assert(parse.error_token.type == DN_INITokenType_Nil);
}
// NOTE: Empty section
{
char const EXAMPLE[] =
"first=hello\n"
"[metadata]\n\n";
DN_INIParse parse = DN_INI_ParseFromPtr(EXAMPLE, sizeof(EXAMPLE) - 1, 0, 0);
DN_INI_Assert(parse.error_token.type == DN_INITokenType_Nil);
DN_INI_Assert(parse.total_sections_count == 1);
DN_INI_Assert(parse.total_key_values_count == 1);
char parse_memory[sizeof(DN_INIKeyValue) * 1 + sizeof(DN_INISection) * 2];
parse = DN_INI_ParseFromPtr(EXAMPLE, sizeof(EXAMPLE) - 1, parse_memory, sizeof(parse_memory));
DN_INI_Assert(parse.error_token.type == DN_INITokenType_Nil);
}
// NOTE: Multiple empty sections
{
char const EXAMPLE[] =
"[metadata]\n\n"
"[metadata2]\n\n";
DN_INIParse parse = DN_INI_ParseFromPtr(EXAMPLE, sizeof(EXAMPLE) - 1, 0, 0);
DN_INI_Assert(parse.error_token.type == DN_INITokenType_Nil);
DN_INI_Assert(parse.total_sections_count == 2);
DN_INI_Assert(parse.total_key_values_count == 0);
char parse_memory[sizeof(DN_INIKeyValue) * 0 + sizeof(DN_INISection) * 2];
parse = DN_INI_ParseFromPtr(EXAMPLE, sizeof(EXAMPLE) - 1, parse_memory, sizeof(parse_memory));
DN_INI_Assert(parse.error_token.type == DN_INITokenType_Nil);
}
// NOTE: Repeated section override
{
char const EXAMPLE[] =
"[metadata]\n"
"foo=bar\n"
"[metadata]\n"
"foo=baz\n";
DN_INIParse parse = DN_INI_ParseFromPtr(EXAMPLE, sizeof(EXAMPLE) - 1, 0, 0);
DN_INI_Assert(parse.error_token.type == DN_INITokenType_Nil);
// NOTE: Because sections can override each other, when parsing with no memory, e.g. no context
// we can't easily tell if a section is repeated or not without retokenising the entire file
// every time we hit a section. Then the total section count returned in the initial pass is
// an estimate. The same goes with the key-values
DN_INI_Assert(parse.total_sections_count == 2);
DN_INI_Assert(parse.total_key_values_count == 2);
char parse_memory[sizeof(DN_INIKeyValue) * 1 + sizeof(DN_INISection) * 1];
parse = DN_INI_ParseFromPtr(EXAMPLE, sizeof(EXAMPLE) - 1, parse_memory, sizeof(parse_memory));
DN_INI_Assert(parse.error_token.type == DN_INITokenType_Nil);
DN_INI_Assert(parse.total_sections_count == 1);
DN_INI_Assert(parse.total_key_values_count == 1);
DN_INI_Assert(parse.first_section.child_first);
DN_INI_Assert(parse.first_section.child_first->first_key_value);
DN_INI_Assert(parse.first_section.child_first->first_key_value == parse.first_section.child_first->last_key_value);
DN_INI_Assert(DN_INI_Str8Eq(parse.first_section.child_first->first_key_value->value, DN_INIStr8Lit("baz")));
}
// NOTE: Out-of-order repeated section override
{
char const EXAMPLE[] =
"[metadata]\n"
"foo=bar\n"
"[surprise]"
"[metadata]\n"
"foo=baz\n";
DN_INIParse parse = DN_INI_ParseFromPtr(EXAMPLE, sizeof(EXAMPLE) - 1, 0, 0);
DN_INI_Assert(parse.error_token.type == DN_INITokenType_Nil);
DN_INI_Assert(parse.total_sections_count == 3);
DN_INI_Assert(parse.total_key_values_count == 2);
char parse_memory[sizeof(DN_INIKeyValue) * 1 + sizeof(DN_INISection) * 2];
parse = DN_INI_ParseFromPtr(EXAMPLE, sizeof(EXAMPLE) - 1, parse_memory, sizeof(parse_memory));
DN_INI_Assert(parse.error_token.type == DN_INITokenType_Nil);
DN_INI_Assert(parse.total_sections_count == 2);
DN_INI_Assert(parse.total_key_values_count == 1);
DN_INI_Assert(parse.first_section.child_first);
DN_INI_Assert(parse.first_section.child_first->first_key_value);
DN_INI_Assert(parse.first_section.child_first->first_key_value == parse.first_section.child_first->last_key_value);
DN_INI_Assert(DN_INI_Str8Eq(parse.first_section.child_first->first_key_value->value, DN_INIStr8Lit("baz")));
}
// NOTE: Subsection
{
char const EXAMPLE[] =
"[metadata]\n"
"foo=bar\n"
"[metadata.test]\n"
"hello=world\n";
DN_INIParse parse = DN_INI_ParseFromPtr(EXAMPLE, sizeof(EXAMPLE) - 1, 0, 0);
DN_INI_Assert(parse.error_token.type == DN_INITokenType_Nil);
DN_INI_Assert(parse.total_sections_count == 3);
DN_INI_Assert(parse.total_key_values_count == 2);
char parse_memory[sizeof(DN_INIKeyValue) * 2 + sizeof(DN_INISection) * 3];
parse = DN_INI_ParseFromPtr(EXAMPLE, sizeof(EXAMPLE) - 1, parse_memory, sizeof(parse_memory));
DN_INI_Assert(DN_INI_Str8Eq(parse.first_section.child_first->name, DN_INIStr8Lit("metadata")));
DN_INI_Assert(DN_INI_Str8Eq(parse.first_section.child_first->first_key_value->key, DN_INIStr8Lit("foo")));
DN_INI_Assert(DN_INI_Str8Eq(parse.first_section.child_first->first_key_value->value, DN_INIStr8Lit("bar")));
DN_INI_Assert(DN_INI_Str8Eq(parse.first_section.child_first->child_first->name, DN_INIStr8Lit("test")));
DN_INI_Assert(DN_INI_Str8Eq(parse.first_section.child_first->child_first->first_key_value->key, DN_INIStr8Lit("hello")));
DN_INI_Assert(DN_INI_Str8Eq(parse.first_section.child_first->child_first->first_key_value->value, DN_INIStr8Lit("world")));
}
// NOTE: Repeated subsections
{
char const EXAMPLE[] =
"[metadata]\n"
"foo=bar\n"
"[metadata.test]\n"
"foo=bar\n"
"\n"
"[metadata]\n"
"foo=baz\n"
"[metadata.test]\n"
"foo=baz\n";
DN_INIParse parse = DN_INI_ParseFromPtr(EXAMPLE, sizeof(EXAMPLE) - 1, 0, 0);
DN_INI_Assert(parse.error_token.type == DN_INITokenType_Nil);
DN_INI_Assert(parse.total_sections_count == 6);
DN_INI_Assert(parse.total_key_values_count == 4);
char parse_memory[sizeof(DN_INIKeyValue) * 2 + sizeof(DN_INISection) * 2];
parse = DN_INI_ParseFromPtr(EXAMPLE, sizeof(EXAMPLE) - 1, parse_memory, sizeof(parse_memory));
DN_INI_Assert(parse.error_token.type == DN_INITokenType_Nil);
DN_INISection sections[128] = {};
DN_INIKeyValue key_values[128] = {};
parse = DN_INI_ParseFromBuffer(EXAMPLE, sizeof(EXAMPLE) - 1, sections, sizeof(sections) / sizeof(sections[0]), key_values, sizeof(key_values) / sizeof(key_values[0]));
DN_INI_Assert(DN_INI_Str8Eq(parse.first_section.child_first->name, DN_INIStr8Lit("metadata")));
DN_INI_Assert(DN_INI_Str8Eq(parse.first_section.child_first->first_key_value->key, DN_INIStr8Lit("foo")));
DN_INI_Assert(DN_INI_Str8Eq(parse.first_section.child_first->first_key_value->value, DN_INIStr8Lit("baz")));
DN_INI_Assert(DN_INI_Str8Eq(parse.first_section.child_first->child_first->name, DN_INIStr8Lit("test")));
DN_INI_Assert(DN_INI_Str8Eq(parse.first_section.child_first->child_first->first_key_value->key, DN_INIStr8Lit("foo")));
DN_INI_Assert(DN_INI_Str8Eq(parse.first_section.child_first->child_first->first_key_value->value, DN_INIStr8Lit("baz")));
DN_INIKeyValue *key_value = DN_INI_KeyFromSectionStr8(&parse.first_section, DN_INIStr8Lit("metadata.test.foo"));
DN_INI_Assert(DN_INI_Str8Eq(key_value->key, DN_INIStr8Lit("foo")));
DN_INI_Assert(DN_INI_Str8Eq(key_value->value, DN_INIStr8Lit("baz")));
}
// NOTE: Multi line value
{
char const EXAMPLE[] =
"[metadata]\n"
"foo=bar \\\n"
"baz\n"
"abc=def \\\n"
"ghi\\\n"
"j"
;
DN_INIParse parse = DN_INI_ParseFromPtr(EXAMPLE, sizeof(EXAMPLE) - 1, 0, 0);
DN_INI_Assert(parse.error_token.type == DN_INITokenType_Nil);
DN_INI_Assert(parse.total_sections_count == 1);
DN_INI_Assert(parse.total_key_values_count == 2);
char parse_memory[256];
DN_INI_Assert(parse.memory_required <= sizeof(parse_memory));
parse = DN_INI_ParseFromPtr(EXAMPLE, sizeof(EXAMPLE) - 1, parse_memory, sizeof(parse_memory));
DN_INI_Assert(parse.error_token.type == DN_INITokenType_Nil);
DN_INI_Assert(DN_INI_Str8Eq(parse.first_section.child_first->first_key_value->key, DN_INIStr8Lit("foo")));
DN_INI_Assert(DN_INI_Str8Eq(parse.first_section.child_first->first_key_value->value, DN_INIStr8Lit("bar baz")));
DN_INI_Assert(DN_INI_Str8Eq(parse.first_section.child_first->last_key_value->key, DN_INIStr8Lit("abc")));
DN_INI_Assert(DN_INI_Str8Eq(parse.first_section.child_first->last_key_value->value, DN_INIStr8Lit("def ghij")));
}
// NOTE: Multi line immediately after key-value separator
{
char const EXAMPLE[] =
"[metadata]\n"
"foo=\\\n"
"baz\\\n"
"j"
;
DN_INIParse parse = DN_INI_ParseFromPtr(EXAMPLE, sizeof(EXAMPLE) - 1, 0, 0);
DN_INI_Assert(parse.error_token.type == DN_INITokenType_Nil);
DN_INI_Assert(parse.total_sections_count == 1);
DN_INI_Assert(parse.total_key_values_count == 1);
char parse_memory[256];
DN_INI_Assert(parse.memory_required <= sizeof(parse_memory));
parse = DN_INI_ParseFromPtr(EXAMPLE, sizeof(EXAMPLE) - 1, parse_memory, sizeof(parse_memory));
DN_INI_Assert(parse.error_token.type == DN_INITokenType_Nil);
DN_INI_Assert(DN_INI_Str8Eq(parse.first_section.child_first->first_key_value->key, DN_INIStr8Lit("foo")));
DN_INI_Assert(DN_INI_Str8Eq(parse.first_section.child_first->first_key_value->value, DN_INIStr8Lit("bazj")));
}
}
#endif

View File

@ -1,17 +1,34 @@
#if !defined(DN_INI_H)
#define DN_INI_H
#include <stddef.h> // size_t
#include <stdint.h> // uint32_t
#if !defined(DN_INI_Assert)
#include <assert.h>
#define DN_INI_Assert(expr) assert(expr)
#endif
#if !defined(DN_INI_Memset) || !defined(DN_INI_Memcmp) || !defined(DN_INI_Memcpy)
#include <string.h>
#if !defined(DN_INI_Memset)
#define DN_INI_Memset(ptr, val, size) memset(ptr, val, size)
#endif
#if !defined(DN_INI_Memcmp)
#define DN_INI_Memcmp(dest, src, size) memcmp(dest, src, size)
#endif
#if !defined(DN_INI_Memcpy)
#define DN_INI_Memcpy(dest, src, size) memcpy(dest, src, size)
#endif
#endif
typedef enum DN_INITokenType {
DN_INITokenType_Nil,
DN_INITokenType_Section,
DN_INITokenType_Key,
DN_INITokenType_KeyValueSeparator,
DN_INITokenType_MultilineValue,
DN_INITokenType_Value,
DN_INITokenType_Comment,
DN_INITokenType_EndOfStream,
@ -19,57 +36,74 @@ typedef enum DN_INITokenType {
} DN_INITokenType;
typedef struct DN_INIStr8 {
char *data;
size_t size;
char *data;
uint32_t size;
} DN_INIStr8;
#define DN_INIStr8Lit(str) DN_INIStr8{(char *)str, sizeof(str)/sizeof(str[0]) - 1}
#if defined(__cplusplus)
#define DN_INIStr8Lit(str) DN_INIStr8{(char *)str, sizeof(str)/sizeof(str[0]) - 1}
#else
#define DN_INIStr8Lit(str) (DN_INIStr8){(char *)str, sizeof(str)/sizeof(str[0]) - 1}
#endif
typedef struct DN_INIToken {
DN_INITokenType type;
char *data;
size_t count;
size_t next_p;
DN_INITokenType type;
uint32_t count;
uint32_t next_p;
bool new_line;
// NOTE: Line metadata
DN_INIStr8 error;
size_t line;
size_t column;
uint32_t line;
uint32_t column;
char *line_start;
} DN_INIToken;
typedef struct DN_INITokeniser {
char *data;
char *line_start;
size_t count;
size_t pos;
DN_INITokenType prev_token;
size_t line;
size_t column;
uint32_t count;
uint32_t pos;
DN_INIToken prev_token;
uint32_t line;
uint32_t column;
} DN_INITokeniser;
typedef struct DN_INIKeyValue {
DN_INIStr8 key;
DN_INIStr8 value;
} DN_INIKeyValue;
typedef struct DN_INIKeyValue DN_INIKeyValue;
struct DN_INIKeyValue {
DN_INIStr8 key;
DN_INIStr8 value;
DN_INIKeyValue *next;
};
typedef struct DN_INISection {
typedef struct DN_INISection DN_INISection;
struct DN_INISection {
DN_INIStr8 name;
DN_INIKeyValue *key_values;
size_t key_values_count;
DN_INIKeyValue *first_key_value;
DN_INIKeyValue *last_key_value;
uint32_t key_values_count;
DN_INIToken token;
} DN_INISection;
DN_INISection *next, *parent;
DN_INISection *child_first, *child_last;
};
typedef struct DN_INIParse {
DN_INISection *sections;
size_t sections_count;
size_t key_values_count;
DN_INIToken error_token;
DN_INISection first_section;
uint32_t total_sections_count;
uint32_t total_key_values_count;
DN_INIToken error_token;
uint32_t memory_required;
} DN_INIParse;
DN_INITokeniser DN_INI_TokeniserFromPtr(char const *buf, size_t count);
DN_INIToken DN_INI_NextToken (DN_INITokeniser const *tokeniser);
void DN_INI_EatToken (DN_INITokeniser *tokeniser, DN_INIToken token);
DN_INIParse DN_INI_ParseFromBuffer (char const *buf, size_t count, DN_INISection *sections, size_t sections_count, DN_INIKeyValue *key_values, size_t key_values_count);
DN_INITokeniser DN_INI_TokeniserFromPtr (char const *buf, uint32_t count);
DN_INIToken DN_INI_NextToken (DN_INITokeniser const *tokeniser);
void DN_INI_EatToken (DN_INITokeniser *tokeniser, DN_INIToken token);
DN_INISection * DN_INI_FindSectionStr8 (DN_INISection *section, DN_INIStr8 str8);
DN_INISection * DN_INI_FindSection (DN_INISection *section, char const *name, uint32_t name_size);
DN_INIKeyValue *DN_INI_KeyFromSectionStr8(DN_INISection *section, DN_INIStr8 str8);
DN_INIKeyValue *DN_INI_KeyFromSection (DN_INISection *section, char const *key, uint32_t key_size);
DN_INIParse DN_INI_ParseFromPtr (char const *buf, uint32_t count, char *base, uint32_t base_count);
#if defined(DN_INI_WITH_UNIT_TESTS)
void DN_INI_UnitTests ();