From: Brendan Hansen Date: Thu, 14 May 2020 15:40:30 +0000 (-0500) Subject: Added hash table iteration X-Git-Url: https://git.brendanfh.com/?a=commitdiff_plain;h=4812d47285f6d0ce94273f7a5f418ee5cd075639;p=onyx.git Added hash table iteration --- diff --git a/bh.h b/bh.h index 22c808a9..da236743 100644 --- a/bh.h +++ b/bh.h @@ -242,11 +242,12 @@ void bh__arr_deleten(void **arr, i32 elemsize, i32 index, i32 numelems); #define BH__HASH_STORED_KEY_SIZE 64 typedef struct bh__hash_entry { char key[BH__HASH_STORED_KEY_SIZE]; - // Value follows + i32 value; // NOTE: Not actually an i32, just used as a placeholder for offset } bh__hash_entry; #define BH__HASH_MODULUS 1021 -#define BH__HASH_KEYSIZE 16 +#define BH__HASH_KEYSIZE 64 +#ifdef BH_DEFINE u64 bh__hash_function(const char* str, i32 len) { u64 hash = 5381; i32 c, l = 0; @@ -258,6 +259,13 @@ u64 bh__hash_function(const char* str, i32 len) { return hash % BH__HASH_MODULUS; } +#endif + +typedef struct bh_hash_iterator { + ptr *tab, *endtab, arr; + i32 elemsize, arridx; + bh__hash_entry* entry; +} bh_hash_iterator; #define bh_hash(T) T* @@ -277,12 +285,18 @@ u64 bh__hash_function(const char* str, i32 len) { #define bh_hash_delete(T, tab, key) (bh__hash_delete((ptr *) tab, sizeof(T), key)) #endif +#define bh_hash_iter_setup(T, tab) (assert(sizeof(T) == sizeof(*(tab))), bh__hash_iter_setup((ptr *) tab, sizeof(T))) +#define bh_hash_iter_key(it) (it.entry->key) +#define bh_hash_iter_value(T, it) (assert(sizeof(T) == it.elemsize), *(T *)&(it.entry->value)) + b32 bh__hash_init(ptr **table); b32 bh__hash_free(ptr **table); ptr bh__hash_put(ptr *table, i32 elemsize, char *key); b32 bh__hash_has(ptr *table, i32 elemsize, char *key); ptr bh__hash_get(ptr *table, i32 elemsize, char *key); void bh__hash_delete(ptr *table, i32 elemsize, char *key); +bh_hash_iterator bh__hash_iter_setup(ptr *table, i32 elemsize); +b32 bh_hash_iter_next(bh_hash_iterator* it); #endif @@ -662,6 +676,8 @@ b32 bh__arr_grow(void** arr, i32 elemsize, i32 cap) { if (cap == 0 && elemsize == 0) return 1; arrptr = (bh__arr *) malloc(sizeof(*arrptr) + elemsize * cap); + if (arrptr == NULL) return 0; + arrptr->capacity = cap; arrptr->length = 0; @@ -793,7 +809,7 @@ b32 bh__hash_free(ptr **table) { ptr bh__hash_put(ptr *table, i32 elemsize, char *key) { u64 index = bh__hash_function(key, 0); - elemsize += sizeof(bh__hash_entry); + elemsize += BH__HASH_STORED_KEY_SIZE; ptr arrptr = table[index]; i32 len = bh_arr_length(arrptr); @@ -821,9 +837,9 @@ b32 bh__hash_has(ptr *table, i32 elemsize, char *key) { u64 index = bh__hash_function(key, 0); ptr arrptr = table[index]; - i32 len = bh_arr_length(arrptr); if (arrptr == NULL) return 0; + i32 len = bh_arr_length(arrptr); i32 stride = elemsize + BH__HASH_STORED_KEY_SIZE; while (len--) { @@ -874,6 +890,45 @@ void bh__hash_delete(ptr *table, i32 elemsize, char *key) { bh__arr_deleten((void **) &arrptr, elemsize, i, 1); } +bh_hash_iterator bh__hash_iter_setup(ptr *table, i32 elemsize) { + bh_hash_iterator it = { + .tab = table, + .endtab = table + BH__HASH_MODULUS, + .arr = NULL, + .elemsize = elemsize, + .entry = NULL + }; + return it; +} + +b32 bh_hash_iter_next(bh_hash_iterator* it) { + if (it->tab == NULL) return 0; + + if (it->entry != NULL) { + it->arridx++; + if (it->arridx >= bh_arr_length(it->arr)) { + it->tab++; + goto step_to_next; + } + + it->entry = (bh__hash_entry *)((char *)(it->entry) + BH__HASH_STORED_KEY_SIZE + it->elemsize); + return 1; + } + +step_to_next: + // Set forward to find next valid + while (*it->tab == NULL && it->tab != it->endtab) { + it->tab++; + } + + if (it->tab == it->endtab) return 0; + + it->arr = *it->tab; + it->entry = it->arr; + it->arridx = 0; + return 1; +} + #endif // ifndef BH_NO_HASHTABLE #endif // ifdef BH_DEFINE diff --git a/onyx b/onyx index e6cb94a2..8f837e0e 100755 Binary files a/onyx and b/onyx differ diff --git a/onyx.c b/onyx.c index 0378e4ad..58a3ca49 100644 --- a/onyx.c +++ b/onyx.c @@ -6,27 +6,6 @@ #include "onyxlex.h" -bh_arr(OnyxToken) parse_tokens(bh_file_contents *fc) { - OnyxTokenizer tknizer = { - .start = fc->data, - .curr = fc->data, - .end = fc->data + fc->length - 1, - .line_number = 1, - .line_start = fc->data, - }; - - bh_arr(OnyxToken) token_arr = NULL; - bh_arr_grow(token_arr, 512); - - OnyxToken tk; - do { - tk = onyx_get_token(&tknizer); - bh_arr_push(token_arr, tk); - } while (tk.type != TOKEN_TYPE_END_STREAM); - - return token_arr; -} - int main(int argc, char *argv[]) { bh_file source_file; bh_file_error err = bh_file_open(&source_file, argv[1]); @@ -38,7 +17,9 @@ int main(int argc, char *argv[]) { bh_file_contents fc = bh_file_read_contents(&source_file); bh_file_close(&source_file); - bh_arr(OnyxToken) token_arr = parse_tokens(&fc); + bh_hash(u16) symbol_count; + bh_hash_init(symbol_count); + bh_arr(OnyxToken) token_arr = onyx_parse_tokens(&fc, symbol_count); printf("There are %d tokens (Allocated space for %d tokens)\n", bh_arr_length(token_arr), bh_arr_capacity(token_arr)); @@ -46,6 +27,14 @@ int main(int argc, char *argv[]) { printf("%s\n", onyx_get_token_type_name(*it)); } + bh_hash_iterator it = bh_hash_iter_setup(u16, symbol_count); + while (bh_hash_iter_next(&it)) { + const char* sym = bh_hash_iter_key(it); + u16 count = bh_hash_iter_value(u16, it); + + printf("%s was seen %d times.\n", sym, count); + } + bh_file_contents_delete(&fc); bh_arr_free(token_arr); diff --git a/onyxlex.c b/onyxlex.c index 4e69467d..fb552d13 100644 --- a/onyxlex.c +++ b/onyxlex.c @@ -236,3 +236,41 @@ OnyxToken onyx_get_token(OnyxTokenizer* tokenizer) { token_parsed: return tk; } + +bh_arr(OnyxToken) onyx_parse_tokens(bh_file_contents *fc, bh_hash(u16) symcount) { + OnyxTokenizer tknizer = { + .start = fc->data, + .curr = fc->data, + .end = fc->data + fc->length - 1, + .line_number = 1, + .line_start = fc->data, + .symbol_count = symcount, + }; + + bh_arr(OnyxToken) token_arr = NULL; + bh_arr_grow(token_arr, 512); + + OnyxToken tk; + do { + tk = onyx_get_token(&tknizer); + + if (tk.type == TOKEN_TYPE_SYMBOL) { + u16 val = 0; + + char tmp = tk.token[tk.length]; + tk.token[tk.length] = '\0'; + + if (bh_hash_has(u16, tknizer.symbol_count, tk.token)) { + val = bh_hash_get(u16, tknizer.symbol_count, tk.token); + } + + bh_hash_put(u16, tknizer.symbol_count, tk.token, val + 1); + + tk.token[tk.length] = tmp; + } + + bh_arr_push(token_arr, tk); + } while (tk.type != TOKEN_TYPE_END_STREAM); + + return token_arr; +} \ No newline at end of file diff --git a/onyxlex.h b/onyxlex.h index b77320a7..fe6b729d 100644 --- a/onyxlex.h +++ b/onyxlex.h @@ -9,6 +9,8 @@ typedef struct OnyxTokenizer { // TODO: Fix the line number and column count char* line_start; u64 line_number; + + bh_hash(u16) symbol_count; } OnyxTokenizer; typedef enum OnyxTokenType { @@ -73,5 +75,6 @@ typedef struct OnyxToken { const char* onyx_get_token_type_name(OnyxToken tkn); OnyxToken onyx_get_token(OnyxTokenizer* tokenizer); +bh_arr(OnyxToken) onyx_parse_tokens(bh_file_contents *fc, bh_hash(u16) symcount); #endif \ No newline at end of file