From 437e7e434d7bcc230107b413f78c52443759bd69 Mon Sep 17 00:00:00 2001 From: Brendan Hansen Date: Wed, 13 May 2020 15:00:23 -0500 Subject: [PATCH] Added hashtable implementation and working on strings --- bh.h | 194 ++++++++++++++++++++++++++++++++++++++++++++- docs/parse_grammar | 12 +-- onyx.c | 17 ++-- onyxlex.c | 12 +-- onyxlex.h | 18 ++--- onyxparser.c | 2 + onyxparser.h | 18 +++++ 7 files changed, 241 insertions(+), 32 deletions(-) create mode 100644 onyxparser.c create mode 100644 onyxparser.h diff --git a/bh.h b/bh.h index f6574665..22c808a9 100644 --- a/bh.h +++ b/bh.h @@ -8,6 +8,7 @@ #include #include // TODO: Replace with needed functions +#include //------------------------------------------------------------------------------------- // Better types @@ -24,6 +25,7 @@ typedef signed long i64; typedef signed long long i128; typedef unsigned long isize; typedef i32 b32; +typedef void* ptr; //------------------------------------------------------------------------------------- // Better character functions @@ -47,6 +49,8 @@ i64 chars_match(char* ptr1, char* ptr2); //------------------------------------------------------------------------------------- // Better strings //------------------------------------------------------------------------------------- +#ifndef BH_NO_STRING + typedef struct bh__string { u64 length; u64 capacity; @@ -95,10 +99,14 @@ void bh_string_trim_end_space(bh_string* str); // TEMP void bh_string_print(bh_string* str); +#endif + //------------------------------------------------------------------------------------- // Better files //------------------------------------------------------------------------------------- +#ifndef BH_NO_FILE + typedef enum bh_file_error { BH_FILE_ERROR_NONE, BH_FILE_ERROR_INVALID @@ -164,9 +172,13 @@ bh_file_contents bh_file_read_contents_bh_file(bh_file* file); bh_file_contents bh_file_read_contents_direct(const char* filename); i32 bh_file_contents_delete(bh_file_contents* contents); +#endif + //------------------------------------------------------------------------------------- // Better dynamically-sized arrays //------------------------------------------------------------------------------------- +#ifndef BH_NO_ARRAY + typedef struct bh__arr { i32 length, capacity; } bh__arr; @@ -220,6 +232,60 @@ void* bh__arr_copy(void *arr, i32 elemsize); void bh__arr_insertn(void **arr, i32 elemsize, i32 index, i32 numelems); void bh__arr_deleten(void **arr, i32 elemsize, i32 index, i32 numelems); +#endif + +//------------------------------------------------------------------------------------- +// HASH TABLE FUNCTIONS +//------------------------------------------------------------------------------------- +#ifndef BH_NO_HASHTABLE + +#define BH__HASH_STORED_KEY_SIZE 64 +typedef struct bh__hash_entry { + char key[BH__HASH_STORED_KEY_SIZE]; + // Value follows +} bh__hash_entry; + +#define BH__HASH_MODULUS 1021 +#define BH__HASH_KEYSIZE 16 +u64 bh__hash_function(const char* str, i32 len) { + u64 hash = 5381; + i32 c, l = 0; + if (len == 0) len = BH__HASH_KEYSIZE; + + while ((c = *str++) && l++ < len) { + hash = (hash << 5) + hash + c; + } + + return hash % BH__HASH_MODULUS; +} + +#define bh_hash(T) T* + +#ifdef BH_HASH_SIZE_SAFE + #define bh_hash_init(tab) bh__hash_init((ptr **) &(tab)) + #define bh_hash_free(tab) bh__hash_free((ptr **) &(tab)) + #define bh_hash_put(T, tab, key, value) (assert(sizeof(T) == sizeof(*(tab))), (*((T *) bh__hash_put((ptr *) tab, sizeof(T), key)) = (T) value)) + #define bh_hash_has(T, tab, key) (assert(sizeof(T) == sizeof(*(tab))), (bh__hash_has((ptr *) tab, sizeof(T), key))) + #define bh_hash_get(T, tab, key) (assert(sizeof(T) == sizeof(*(tab))), (*((T *) bh__hash_get((ptr *) tab, sizeof(T), key)))) + #define bh_hash_delete(T, tab, key) (assert(sizeof(T) == sizeof(*(tab))), bh__hash_delete((ptr *) tab, sizeof(T), key)) +#else + #define bh_hash_init(tab) bh__hash_init((ptr **) &(tab)) + #define bh_hash_free(tab) bh__hash_free((ptr **) &(tab)) + #define bh_hash_put(T, tab, key, value) (*((T *) bh__hash_put((ptr *) tab, sizeof(T), key)) = value) + #define bh_hash_has(T, tab, key) (bh__hash_has((ptr *) tab, sizeof(T), key)) + #define bh_hash_get(T, tab, key) (*((T *) bh__hash_get((ptr *) tab, sizeof(T), key))) + #define bh_hash_delete(T, tab, key) (bh__hash_delete((ptr *) tab, sizeof(T), key)) +#endif + +b32 bh__hash_init(ptr **table); +b32 bh__hash_free(ptr **table); +ptr bh__hash_put(ptr *table, i32 elemsize, char *key); +b32 bh__hash_has(ptr *table, i32 elemsize, char *key); +ptr bh__hash_get(ptr *table, i32 elemsize, char *key); +void bh__hash_delete(ptr *table, i32 elemsize, char *key); + +#endif + #ifdef BH_DEFINE #undef BH_DEFINE @@ -269,6 +335,8 @@ i64 chars_match(char* ptr1, char* ptr2) { //------------------------------------------------------------------------------------- // STRING IMPLEMENTATION //------------------------------------------------------------------------------------- +#ifndef BH_NO_STRING + bh_string* bh_string_new_cap(unsigned long cap) { bh__string* str; str = (bh__string*) malloc(sizeof(*str) + sizeof(char) * cap + 1); @@ -287,7 +355,7 @@ bh_string* bh_string_new_str(const char* cstr) { data[i] = cstr[i]; } - data[i] = 0; // Always null terminate the string + data[len] = 0; // Always null terminate the string str->length = len; str->capacity = len; @@ -396,11 +464,13 @@ void bh_string_print(bh_string* str) { write(STDOUT_FILENO, str->data, str->length); } - +#endif // ifndef BH_NO_STRING //------------------------------------------------------------------------------------- // FILE IMPLEMENTATION //------------------------------------------------------------------------------------- +#ifndef BH_NO_FILE + bh_file_error bh_file_get_standard(bh_file* file, bh_file_standard stand) { i32 sd_fd = -1; const char* filename = NULL; @@ -578,9 +648,12 @@ b32 bh_file_contents_delete(bh_file_contents* contents) { return 1; } +#endif // ifndef BH_NO_FILE + //------------------------------------------------------------------------------------- // ARRAY IMPLEMENTATION //------------------------------------------------------------------------------------- +#ifndef BH_NO_ARRAY b32 bh__arr_grow(void** arr, i32 elemsize, i32 cap) { bh__arr* arrptr; @@ -594,7 +667,6 @@ b32 bh__arr_grow(void** arr, i32 elemsize, i32 cap) { } else { arrptr = bh__arrhead(*arr); - if (arrptr->length > cap) return 1; if (arrptr->capacity < cap) { void* p; @@ -688,6 +760,122 @@ void bh__arr_insertn(void **arr, i32 elemsize, i32 index, i32 numelems) { } } +#endif // ifndef BH_NO_ARRAY + +//------------------------------------------------------------------------------------- +// HASHTABLE IMPLEMENTATION +//------------------------------------------------------------------------------------- +#ifndef BH_NO_HASHTABLE + +b32 bh__hash_init(ptr **table) { + *table = malloc(sizeof(ptr) * BH__HASH_MODULUS); + if (*table == NULL) return 0; + + for (i32 i = 0; i < BH__HASH_MODULUS; i++) { + (*table)[i] = NULL; + } + + return 1; +} + +b32 bh__hash_free(ptr **table) { + for (i32 i = 0; i < BH__HASH_MODULUS; i++) { + if ((*table)[i] != NULL) { + bh_arr_free(*((*table) + i)); + } + } + + free(*table); + *table = NULL; +} + +// Assumes NULL terminated string for key +ptr bh__hash_put(ptr *table, i32 elemsize, char *key) { + u64 index = bh__hash_function(key, 0); + + elemsize += sizeof(bh__hash_entry); + + ptr arrptr = table[index]; + i32 len = bh_arr_length(arrptr); + + while (len--) { + if (strncmp(key, (char *) arrptr, BH__HASH_STORED_KEY_SIZE) == 0) goto found_matching; + arrptr = (ptr)((char *) arrptr + elemsize); + } + + // Didn't find it in the array, make a new one + arrptr = table[index]; + len = bh_arr_length(arrptr); + bh__arr_grow(&arrptr, elemsize, len + 1); + bh__arrhead(arrptr)->length++; + table[index] = arrptr; + + arrptr = (ptr)(((char *) arrptr) + elemsize * len); + strncpy(arrptr, key, BH__HASH_STORED_KEY_SIZE); + +found_matching: + return (ptr)(((char *) arrptr) + BH__HASH_STORED_KEY_SIZE); +} + +b32 bh__hash_has(ptr *table, i32 elemsize, char *key) { + u64 index = bh__hash_function(key, 0); + + ptr arrptr = table[index]; + i32 len = bh_arr_length(arrptr); + if (arrptr == NULL) return 0; + + i32 stride = elemsize + BH__HASH_STORED_KEY_SIZE; + + while (len--) { + if (strncmp(key, (char *) arrptr, BH__HASH_STORED_KEY_SIZE) == 0) return 1; + arrptr = (ptr)((char *) arrptr + stride); + } + + return 0; +} + +ptr bh__hash_get(ptr *table, i32 elemsize, char *key) { + u64 index = bh__hash_function(key, 0); + + ptr arrptr = table[index]; + i32 len = bh_arr_length(arrptr); + assert(arrptr != NULL); + + i32 stride = elemsize + BH__HASH_STORED_KEY_SIZE; + + while (len--) { + if (strncmp(key, (char *) arrptr, BH__HASH_STORED_KEY_SIZE) == 0) { + return (ptr)((char *) arrptr + BH__HASH_STORED_KEY_SIZE); + } + + arrptr = (ptr)((char *) arrptr + stride); + } + + return 0; +} + +void bh__hash_delete(ptr *table, i32 elemsize, char *key) { + u64 index = bh__hash_function(key, 0); + + ptr arrptr = table[index]; + i32 len = bh_arr_length(arrptr); + if (arrptr == NULL) return; // Didn't exist + + i32 stride = elemsize + BH__HASH_STORED_KEY_SIZE; + i32 i = 0; + + while (len && strncmp(key, (char *) arrptr, BH__HASH_STORED_KEY_SIZE) != 0) { + arrptr = (ptr)((char *) arrptr + stride); + i++, len--; + } + + if (len == 0) return; // Didn't exist + + bh__arr_deleten((void **) &arrptr, elemsize, i, 1); +} + +#endif // ifndef BH_NO_HASHTABLE + #endif // ifdef BH_DEFINE #endif // ifndef BH_H diff --git a/docs/parse_grammar b/docs/parse_grammar index 0a07b3f7..a71cd866 100644 --- a/docs/parse_grammar +++ b/docs/parse_grammar @@ -2,7 +2,7 @@ Note: ~ is empty Goal: Design the language to have no ambiguity -SOURCE_FILE = TOP_LEVEL_STATEMENT TOKEN_TYPE_SYM_SEMICOLON SOURCE_FILE | ~ +SOURCE_FILE = TOP_LEVEL_STATEMENT ; SOURCE_FILE | ~ TOP_LEVEL_STATEMENT = USE_DECLARATION @@ -26,9 +26,9 @@ FUNCTION_DECLARATION = proc TOKEN_TYPE_SYMBOL FUNCTION_TYPE BLOCK FUNCTION_TYPE = :: ( FUNCTION_PARAMS ) -> TOKEN_TYPE_SYMBOL -BLOCK = { STATEMENTS } - -STATEMENTS = STATEMENT ; STATEMENTS | ~ +-- This may be too weird... +BLOCK = { STATEMENTS +STATEMENTS = STATEMENT ; STATEMENTS | } STATEMENT = ASSIGNMENT_STATEMENT @@ -42,9 +42,9 @@ ASSIGNMENT_STATEMENT = TOKEN_TYPE_SYMBOL = EXPRESSION IF_STATEMENT = if EXPRESSION BLOCK ELSE_IF_STATEMENT ELSE_STATEMENT -ELSEIF_STATEMENT = TOKEN_TYPE_KEYWORD_ELSEIF EXPRESSION BLOCK ELSEIF_STATEMENT | ~ +ELSEIF_STATEMENT = elseif EXPRESSION BLOCK ELSEIF_STATEMENT | ~ -ELSE_STATEMENT = TOKEN_TYPE_KEYWORD_ELSE BLOCK | ~ +ELSE_STATEMENT = else BLOCK | ~ -- This needs to be better FOR_STATEMENT = for STATEMENT ; EXPRESSION ; STATEMENT BLOCK diff --git a/onyx.c b/onyx.c index 32e38f5b..0378e4ad 100644 --- a/onyx.c +++ b/onyx.c @@ -1,3 +1,4 @@ +#define BH_NO_STRING #define BH_DEFINE #include "bh.h" @@ -5,8 +6,8 @@ #include "onyxlex.h" -bh_arr(Token) parse_tokens(bh_file_contents *fc) { - Tokenizer tknizer = { +bh_arr(OnyxToken) parse_tokens(bh_file_contents *fc) { + OnyxTokenizer tknizer = { .start = fc->data, .curr = fc->data, .end = fc->data + fc->length - 1, @@ -14,12 +15,12 @@ bh_arr(Token) parse_tokens(bh_file_contents *fc) { .line_start = fc->data, }; - bh_arr(Token) token_arr = NULL; + bh_arr(OnyxToken) token_arr = NULL; bh_arr_grow(token_arr, 512); - Token tk; + OnyxToken tk; do { - tk = get_token(&tknizer); + tk = onyx_get_token(&tknizer); bh_arr_push(token_arr, tk); } while (tk.type != TOKEN_TYPE_END_STREAM); @@ -37,12 +38,12 @@ int main(int argc, char *argv[]) { bh_file_contents fc = bh_file_read_contents(&source_file); bh_file_close(&source_file); - bh_arr(Token) token_arr = parse_tokens(&fc); + bh_arr(OnyxToken) token_arr = parse_tokens(&fc); printf("There are %d tokens (Allocated space for %d tokens)\n", bh_arr_length(token_arr), bh_arr_capacity(token_arr)); - for (Token* it = token_arr; !bh_arr_end(token_arr, it); it++) { - printf("%s\n", get_token_type_name(*it)); + for (OnyxToken* it = token_arr; !bh_arr_end(token_arr, it); it++) { + printf("%s\n", onyx_get_token_type_name(*it)); } bh_file_contents_delete(&fc); diff --git a/onyxlex.c b/onyxlex.c index af930ded..4e69467d 100644 --- a/onyxlex.c +++ b/onyxlex.c @@ -1,7 +1,7 @@ #include "bh.h" #include "onyxlex.h" -static const char* TokenTypeNames[] = { +static const char* onyx_token_type_names[] = { "TOKEN_TYPE_UNKNOWN", "TOKEN_TYPE_END_STREAM", @@ -70,7 +70,7 @@ static const char* TokenTypeNames[] = { } #endif -static b32 token_lit(Tokenizer* tokenizer, Token* tk, char* lit, TokenType type) { +static b32 token_lit(OnyxTokenizer* tokenizer, OnyxToken* tk, char* lit, OnyxTokenType type) { i64 len = chars_match(tokenizer->curr, lit); if (len > 0) { tk->type = type; @@ -86,12 +86,12 @@ static b32 token_lit(Tokenizer* tokenizer, Token* tk, char* lit, TokenType type) return 0; } -const char* get_token_type_name(Token tkn) { - return TokenTypeNames[tkn.type]; +const char* onyx_get_token_type_name(OnyxToken tkn) { + return onyx_token_type_names[tkn.type]; } -Token get_token(Tokenizer* tokenizer) { - Token tk; +OnyxToken onyx_get_token(OnyxTokenizer* tokenizer) { + OnyxToken tk; // Skip whitespace while (char_is_whitespace(*tokenizer->curr) && tokenizer->curr != tokenizer->end) diff --git a/onyxlex.h b/onyxlex.h index 608d133b..b77320a7 100644 --- a/onyxlex.h +++ b/onyxlex.h @@ -3,15 +3,15 @@ #include "bh.h" -typedef struct Tokenizer { +typedef struct OnyxTokenizer { char *start, *curr, *end; // TODO: Fix the line number and column count char* line_start; u64 line_number; -} Tokenizer; +} OnyxTokenizer; -typedef enum TokenType { +typedef enum OnyxTokenType { TOKEN_TYPE_UNKNOWN, TOKEN_TYPE_END_STREAM, @@ -62,16 +62,16 @@ typedef enum TokenType { TOKEN_TYPE_LITERAL_NUMERIC, TOKEN_TYPE_COUNT -} TokenType; +} OnyxTokenType; -typedef struct Token { - TokenType type; +typedef struct OnyxToken { + OnyxTokenType type; char* token; isize length; u64 line_number, line_column; -} Token; +} OnyxToken; -const char* get_token_type_name(Token tkn); -Token get_token(Tokenizer* tokenizer); +const char* onyx_get_token_type_name(OnyxToken tkn); +OnyxToken onyx_get_token(OnyxTokenizer* tokenizer); #endif \ No newline at end of file diff --git a/onyxparser.c b/onyxparser.c new file mode 100644 index 00000000..873380b6 --- /dev/null +++ b/onyxparser.c @@ -0,0 +1,2 @@ + +#include "onyxparser.h" diff --git a/onyxparser.h b/onyxparser.h new file mode 100644 index 00000000..eccf9610 --- /dev/null +++ b/onyxparser.h @@ -0,0 +1,18 @@ +#include "bh.h" + +#include "onyxlex.h" + +typedef struct OnyxParser { + OnyxTokenizer tokenizer; + OnyxToken *prev; + OnyxToken *curr; + + bh_hash(OnyxIdentifier) idens; + + bh_arr(OnyxToken) tokens; /* Maybe don't store the whole array? Ask for one as you need it? */ +} OnyxParser; + +typedef struct OnyxParseNode { + OnyxToken *token; + +}; \ No newline at end of file -- 2.25.1