Added hash table iteration
authorBrendan Hansen <brendan.f.hansen@gmail.com>
Thu, 14 May 2020 15:40:30 +0000 (10:40 -0500)
committerBrendan Hansen <brendan.f.hansen@gmail.com>
Thu, 14 May 2020 15:40:30 +0000 (10:40 -0500)
bh.h
onyx
onyx.c
onyxlex.c
onyxlex.h

diff --git a/bh.h b/bh.h
index 22c808a93021ef418fa91e5c9d6cb411d47c8708..da236743cc6bca493c6a4f59723f76e0536e601d 100644 (file)
--- a/bh.h
+++ b/bh.h
@@ -242,11 +242,12 @@ void bh__arr_deleten(void **arr, i32 elemsize, i32 index, i32 numelems);
 #define BH__HASH_STORED_KEY_SIZE 64
 typedef struct bh__hash_entry {
        char key[BH__HASH_STORED_KEY_SIZE];
-       // Value follows
+       i32 value; // NOTE: Not actually an i32, just used as a placeholder for offset
 } bh__hash_entry;
 
 #define BH__HASH_MODULUS 1021
-#define BH__HASH_KEYSIZE 16
+#define BH__HASH_KEYSIZE 64
+#ifdef BH_DEFINE
 u64 bh__hash_function(const char* str, i32 len) {
        u64 hash = 5381;
        i32 c, l = 0;
@@ -258,6 +259,13 @@ u64 bh__hash_function(const char* str, i32 len) {
 
        return hash % BH__HASH_MODULUS;
 }
+#endif
+
+typedef struct bh_hash_iterator {
+       ptr *tab, *endtab, arr;
+       i32 elemsize, arridx;
+       bh__hash_entry* entry;
+} bh_hash_iterator;
 
 #define bh_hash(T)             T*
 
@@ -277,12 +285,18 @@ u64 bh__hash_function(const char* str, i32 len) {
        #define bh_hash_delete(T, tab, key)             (bh__hash_delete((ptr *) tab, sizeof(T), key))
 #endif
 
+#define bh_hash_iter_setup(T, tab)                     (assert(sizeof(T) == sizeof(*(tab))), bh__hash_iter_setup((ptr *) tab, sizeof(T)))
+#define bh_hash_iter_key(it)                           (it.entry->key)
+#define bh_hash_iter_value(T, it)                      (assert(sizeof(T) == it.elemsize), *(T *)&(it.entry->value))
+
 b32 bh__hash_init(ptr **table);
 b32 bh__hash_free(ptr **table);
 ptr bh__hash_put(ptr *table, i32 elemsize, char *key);
 b32 bh__hash_has(ptr *table, i32 elemsize, char *key);
 ptr bh__hash_get(ptr *table, i32 elemsize, char *key);
 void bh__hash_delete(ptr *table, i32 elemsize, char *key);
+bh_hash_iterator bh__hash_iter_setup(ptr *table, i32 elemsize);
+b32 bh_hash_iter_next(bh_hash_iterator* it);
 
 #endif
 
@@ -662,6 +676,8 @@ b32 bh__arr_grow(void** arr, i32 elemsize, i32 cap) {
                if (cap == 0 && elemsize == 0) return 1;
 
                arrptr = (bh__arr *) malloc(sizeof(*arrptr) + elemsize * cap);
+               if (arrptr == NULL) return 0;
+
                arrptr->capacity = cap;
                arrptr->length = 0;
 
@@ -793,7 +809,7 @@ b32 bh__hash_free(ptr **table) {
 ptr bh__hash_put(ptr *table, i32 elemsize, char *key) {
        u64 index = bh__hash_function(key, 0);
 
-       elemsize += sizeof(bh__hash_entry);
+       elemsize += BH__HASH_STORED_KEY_SIZE;
 
        ptr arrptr = table[index];
        i32 len = bh_arr_length(arrptr);
@@ -821,9 +837,9 @@ b32 bh__hash_has(ptr *table, i32 elemsize, char *key) {
        u64 index = bh__hash_function(key, 0);  
 
        ptr arrptr = table[index];
-       i32 len = bh_arr_length(arrptr);
        if (arrptr == NULL) return 0;
 
+       i32 len = bh_arr_length(arrptr);
        i32 stride = elemsize + BH__HASH_STORED_KEY_SIZE;       
 
        while (len--) {
@@ -874,6 +890,45 @@ void bh__hash_delete(ptr *table, i32 elemsize, char *key) {
        bh__arr_deleten((void **) &arrptr, elemsize, i, 1);
 }
 
+bh_hash_iterator bh__hash_iter_setup(ptr *table, i32 elemsize) {
+       bh_hash_iterator it = {
+               .tab = table,
+               .endtab = table + BH__HASH_MODULUS,
+               .arr = NULL,
+               .elemsize = elemsize,
+               .entry = NULL
+       };
+       return it;
+}
+
+b32 bh_hash_iter_next(bh_hash_iterator* it) {
+       if (it->tab == NULL) return 0;
+
+       if (it->entry != NULL) {
+               it->arridx++;
+               if (it->arridx >= bh_arr_length(it->arr)) {
+                       it->tab++;
+                       goto step_to_next;
+               }
+
+               it->entry = (bh__hash_entry *)((char *)(it->entry) + BH__HASH_STORED_KEY_SIZE + it->elemsize);
+               return 1;
+       }
+
+step_to_next:
+       // Set forward to find next valid
+       while (*it->tab == NULL && it->tab != it->endtab) {
+               it->tab++;
+       }
+
+       if (it->tab == it->endtab) return 0;
+
+       it->arr = *it->tab;
+       it->entry = it->arr;
+       it->arridx = 0;
+       return 1;
+}
+
 #endif // ifndef BH_NO_HASHTABLE
 
 #endif // ifdef BH_DEFINE
diff --git a/onyx b/onyx
index e6cb94a24b220508298f7e54c473440ad335c933..8f837e0e1b922cc10e8a170268a9541a09eda703 100755 (executable)
Binary files a/onyx and b/onyx differ
diff --git a/onyx.c b/onyx.c
index 0378e4adf4c299fbaccd0c66f1b9186dde4ab997..58a3ca49a5c04445515d41831c64ae8792901c27 100644 (file)
--- a/onyx.c
+++ b/onyx.c
@@ -6,27 +6,6 @@
 
 #include "onyxlex.h"
 
-bh_arr(OnyxToken) parse_tokens(bh_file_contents *fc) {
-       OnyxTokenizer tknizer = {
-               .start                  = fc->data,
-               .curr                   = fc->data,
-               .end                    = fc->data + fc->length - 1,
-               .line_number    = 1,
-               .line_start     = fc->data,
-       };
-
-       bh_arr(OnyxToken) token_arr = NULL;
-       bh_arr_grow(token_arr, 512);
-
-       OnyxToken tk;
-       do {
-               tk = onyx_get_token(&tknizer);
-               bh_arr_push(token_arr, tk);
-       } while (tk.type != TOKEN_TYPE_END_STREAM);
-
-       return token_arr;
-}
-
 int main(int argc, char *argv[]) {
        bh_file source_file;
        bh_file_error err = bh_file_open(&source_file, argv[1]);
@@ -38,7 +17,9 @@ int main(int argc, char *argv[]) {
        bh_file_contents fc = bh_file_read_contents(&source_file);
        bh_file_close(&source_file);
 
-       bh_arr(OnyxToken) token_arr = parse_tokens(&fc);
+       bh_hash(u16) symbol_count;
+       bh_hash_init(symbol_count);
+       bh_arr(OnyxToken) token_arr = onyx_parse_tokens(&fc, symbol_count);
 
        printf("There are %d tokens (Allocated space for %d tokens)\n", bh_arr_length(token_arr), bh_arr_capacity(token_arr));
 
@@ -46,6 +27,14 @@ int main(int argc, char *argv[]) {
                printf("%s\n", onyx_get_token_type_name(*it));
        }
 
+       bh_hash_iterator it = bh_hash_iter_setup(u16, symbol_count);
+       while (bh_hash_iter_next(&it)) {
+               const char* sym = bh_hash_iter_key(it);
+               u16 count = bh_hash_iter_value(u16, it);
+
+               printf("%s was seen %d times.\n", sym, count);
+       }
+
        bh_file_contents_delete(&fc);
        bh_arr_free(token_arr);
 
index 4e69467d46a081d90940fa29ea2bdf386a06926a..fb552d136ac90aeb81e9297f96ecb82f2c20212b 100644 (file)
--- a/onyxlex.c
+++ b/onyxlex.c
@@ -236,3 +236,41 @@ OnyxToken onyx_get_token(OnyxTokenizer* tokenizer) {
 token_parsed:
        return tk;
 }
+
+bh_arr(OnyxToken) onyx_parse_tokens(bh_file_contents *fc, bh_hash(u16) symcount) {
+       OnyxTokenizer tknizer = {
+               .start                  = fc->data,
+               .curr                   = fc->data,
+               .end                    = fc->data + fc->length - 1,
+               .line_number    = 1,
+               .line_start     = fc->data,
+               .symbol_count   = symcount,
+       };
+
+       bh_arr(OnyxToken) token_arr = NULL;
+       bh_arr_grow(token_arr, 512);
+
+       OnyxToken tk;
+       do {
+               tk = onyx_get_token(&tknizer);
+
+               if (tk.type == TOKEN_TYPE_SYMBOL) {
+                       u16 val = 0;
+
+                       char tmp = tk.token[tk.length];
+                       tk.token[tk.length] = '\0';
+
+                       if (bh_hash_has(u16, tknizer.symbol_count, tk.token)) {
+                               val = bh_hash_get(u16, tknizer.symbol_count, tk.token);
+                       }
+
+                       bh_hash_put(u16, tknizer.symbol_count, tk.token, val + 1);
+
+                       tk.token[tk.length] = tmp;
+               }
+
+               bh_arr_push(token_arr, tk);
+       } while (tk.type != TOKEN_TYPE_END_STREAM);
+
+       return token_arr;
+}
\ No newline at end of file
index b77320a774b01b434da973e97749bd225c7f9722..fe6b729d8047d3e69d63344556290aa8e3e6026e 100644 (file)
--- a/onyxlex.h
+++ b/onyxlex.h
@@ -9,6 +9,8 @@ typedef struct OnyxTokenizer {
        // TODO: Fix the line number and column count
        char* line_start;
        u64 line_number;
+
+       bh_hash(u16) symbol_count;
 } OnyxTokenizer;
 
 typedef enum OnyxTokenType {
@@ -73,5 +75,6 @@ typedef struct OnyxToken {
 
 const char* onyx_get_token_type_name(OnyxToken tkn);
 OnyxToken onyx_get_token(OnyxTokenizer* tokenizer);
+bh_arr(OnyxToken) onyx_parse_tokens(bh_file_contents *fc, bh_hash(u16) symcount);
 
 #endif
\ No newline at end of file