#define BH__HASH_STORED_KEY_SIZE 64
typedef struct bh__hash_entry {
char key[BH__HASH_STORED_KEY_SIZE];
- // Value follows
+ i32 value; // NOTE: Not actually an i32, just used as a placeholder for offset
} bh__hash_entry;
#define BH__HASH_MODULUS 1021
-#define BH__HASH_KEYSIZE 16
+#define BH__HASH_KEYSIZE 64
+#ifdef BH_DEFINE
u64 bh__hash_function(const char* str, i32 len) {
u64 hash = 5381;
i32 c, l = 0;
return hash % BH__HASH_MODULUS;
}
+#endif
+
+typedef struct bh_hash_iterator {
+ ptr *tab, *endtab, arr;
+ i32 elemsize, arridx;
+ bh__hash_entry* entry;
+} bh_hash_iterator;
#define bh_hash(T) T*
#define bh_hash_delete(T, tab, key) (bh__hash_delete((ptr *) tab, sizeof(T), key))
#endif
+#define bh_hash_iter_setup(T, tab) (assert(sizeof(T) == sizeof(*(tab))), bh__hash_iter_setup((ptr *) tab, sizeof(T)))
+#define bh_hash_iter_key(it) (it.entry->key)
+#define bh_hash_iter_value(T, it) (assert(sizeof(T) == it.elemsize), *(T *)&(it.entry->value))
+
b32 bh__hash_init(ptr **table);
b32 bh__hash_free(ptr **table);
ptr bh__hash_put(ptr *table, i32 elemsize, char *key);
b32 bh__hash_has(ptr *table, i32 elemsize, char *key);
ptr bh__hash_get(ptr *table, i32 elemsize, char *key);
void bh__hash_delete(ptr *table, i32 elemsize, char *key);
+bh_hash_iterator bh__hash_iter_setup(ptr *table, i32 elemsize);
+b32 bh_hash_iter_next(bh_hash_iterator* it);
#endif
if (cap == 0 && elemsize == 0) return 1;
arrptr = (bh__arr *) malloc(sizeof(*arrptr) + elemsize * cap);
+ if (arrptr == NULL) return 0;
+
arrptr->capacity = cap;
arrptr->length = 0;
ptr bh__hash_put(ptr *table, i32 elemsize, char *key) {
u64 index = bh__hash_function(key, 0);
- elemsize += sizeof(bh__hash_entry);
+ elemsize += BH__HASH_STORED_KEY_SIZE;
ptr arrptr = table[index];
i32 len = bh_arr_length(arrptr);
u64 index = bh__hash_function(key, 0);
ptr arrptr = table[index];
- i32 len = bh_arr_length(arrptr);
if (arrptr == NULL) return 0;
+ i32 len = bh_arr_length(arrptr);
i32 stride = elemsize + BH__HASH_STORED_KEY_SIZE;
while (len--) {
bh__arr_deleten((void **) &arrptr, elemsize, i, 1);
}
+bh_hash_iterator bh__hash_iter_setup(ptr *table, i32 elemsize) {
+ bh_hash_iterator it = {
+ .tab = table,
+ .endtab = table + BH__HASH_MODULUS,
+ .arr = NULL,
+ .elemsize = elemsize,
+ .entry = NULL
+ };
+ return it;
+}
+
+b32 bh_hash_iter_next(bh_hash_iterator* it) {
+ if (it->tab == NULL) return 0;
+
+ if (it->entry != NULL) {
+ it->arridx++;
+ if (it->arridx >= bh_arr_length(it->arr)) {
+ it->tab++;
+ goto step_to_next;
+ }
+
+ it->entry = (bh__hash_entry *)((char *)(it->entry) + BH__HASH_STORED_KEY_SIZE + it->elemsize);
+ return 1;
+ }
+
+step_to_next:
+ // Set forward to find next valid
+ while (*it->tab == NULL && it->tab != it->endtab) {
+ it->tab++;
+ }
+
+ if (it->tab == it->endtab) return 0;
+
+ it->arr = *it->tab;
+ it->entry = it->arr;
+ it->arridx = 0;
+ return 1;
+}
+
#endif // ifndef BH_NO_HASHTABLE
#endif // ifdef BH_DEFINE
#include "onyxlex.h"
-bh_arr(OnyxToken) parse_tokens(bh_file_contents *fc) {
- OnyxTokenizer tknizer = {
- .start = fc->data,
- .curr = fc->data,
- .end = fc->data + fc->length - 1,
- .line_number = 1,
- .line_start = fc->data,
- };
-
- bh_arr(OnyxToken) token_arr = NULL;
- bh_arr_grow(token_arr, 512);
-
- OnyxToken tk;
- do {
- tk = onyx_get_token(&tknizer);
- bh_arr_push(token_arr, tk);
- } while (tk.type != TOKEN_TYPE_END_STREAM);
-
- return token_arr;
-}
-
int main(int argc, char *argv[]) {
bh_file source_file;
bh_file_error err = bh_file_open(&source_file, argv[1]);
bh_file_contents fc = bh_file_read_contents(&source_file);
bh_file_close(&source_file);
- bh_arr(OnyxToken) token_arr = parse_tokens(&fc);
+ bh_hash(u16) symbol_count;
+ bh_hash_init(symbol_count);
+ bh_arr(OnyxToken) token_arr = onyx_parse_tokens(&fc, symbol_count);
printf("There are %d tokens (Allocated space for %d tokens)\n", bh_arr_length(token_arr), bh_arr_capacity(token_arr));
printf("%s\n", onyx_get_token_type_name(*it));
}
+ bh_hash_iterator it = bh_hash_iter_setup(u16, symbol_count);
+ while (bh_hash_iter_next(&it)) {
+ const char* sym = bh_hash_iter_key(it);
+ u16 count = bh_hash_iter_value(u16, it);
+
+ printf("%s was seen %d times.\n", sym, count);
+ }
+
bh_file_contents_delete(&fc);
bh_arr_free(token_arr);
token_parsed:
return tk;
}
+
+bh_arr(OnyxToken) onyx_parse_tokens(bh_file_contents *fc, bh_hash(u16) symcount) {
+ OnyxTokenizer tknizer = {
+ .start = fc->data,
+ .curr = fc->data,
+ .end = fc->data + fc->length - 1,
+ .line_number = 1,
+ .line_start = fc->data,
+ .symbol_count = symcount,
+ };
+
+ bh_arr(OnyxToken) token_arr = NULL;
+ bh_arr_grow(token_arr, 512);
+
+ OnyxToken tk;
+ do {
+ tk = onyx_get_token(&tknizer);
+
+ if (tk.type == TOKEN_TYPE_SYMBOL) {
+ u16 val = 0;
+
+ char tmp = tk.token[tk.length];
+ tk.token[tk.length] = '\0';
+
+ if (bh_hash_has(u16, tknizer.symbol_count, tk.token)) {
+ val = bh_hash_get(u16, tknizer.symbol_count, tk.token);
+ }
+
+ bh_hash_put(u16, tknizer.symbol_count, tk.token, val + 1);
+
+ tk.token[tk.length] = tmp;
+ }
+
+ bh_arr_push(token_arr, tk);
+ } while (tk.type != TOKEN_TYPE_END_STREAM);
+
+ return token_arr;
+}
\ No newline at end of file