From 437e7e434d7bcc230107b413f78c52443759bd69 Mon Sep 17 00:00:00 2001
From: Brendan Hansen <brendan.f.hansen@gmail.com>
Date: Wed, 13 May 2020 15:00:23 -0500
Subject: [PATCH] Added hashtable implementation and working on strings

---
 bh.h               | 194 ++++++++++++++++++++++++++++++++++++++++++++-
 docs/parse_grammar |  12 +--
 onyx.c             |  17 ++--
 onyxlex.c          |  12 +--
 onyxlex.h          |  18 ++---
 onyxparser.c       |   2 +
 onyxparser.h       |  18 +++++
 7 files changed, 241 insertions(+), 32 deletions(-)
 create mode 100644 onyxparser.c
 create mode 100644 onyxparser.h

diff --git a/bh.h b/bh.h
index f6574665..22c808a9 100644
--- a/bh.h
+++ b/bh.h
@@ -8,6 +8,7 @@
 
 #include <stdlib.h>
 #include <string.h> // TODO: Replace with needed functions
+#include <assert.h>
 
 //-------------------------------------------------------------------------------------
 // Better types
@@ -24,6 +25,7 @@ typedef signed long i64;
 typedef signed long long i128;
 typedef unsigned long isize;
 typedef i32 b32;
+typedef void* ptr;
 
 //-------------------------------------------------------------------------------------
 // Better character functions
@@ -47,6 +49,8 @@ i64 chars_match(char* ptr1, char* ptr2);
 //-------------------------------------------------------------------------------------
 // Better strings
 //-------------------------------------------------------------------------------------
+#ifndef BH_NO_STRING
+
 typedef struct bh__string {
 	u64 length;
 	u64 capacity;
@@ -95,10 +99,14 @@ void bh_string_trim_end_space(bh_string* str);
 // TEMP
 void bh_string_print(bh_string* str);
 
+#endif
+
 
 //-------------------------------------------------------------------------------------
 // Better files
 //-------------------------------------------------------------------------------------
+#ifndef BH_NO_FILE
+
 typedef enum bh_file_error {
 	BH_FILE_ERROR_NONE,
 	BH_FILE_ERROR_INVALID
@@ -164,9 +172,13 @@ bh_file_contents bh_file_read_contents_bh_file(bh_file* file);
 bh_file_contents bh_file_read_contents_direct(const char* filename);
 i32 bh_file_contents_delete(bh_file_contents* contents);
 
+#endif
+
 //-------------------------------------------------------------------------------------
 // Better dynamically-sized arrays
 //-------------------------------------------------------------------------------------
+#ifndef BH_NO_ARRAY
+
 typedef struct bh__arr {
 	i32 length, capacity;
 } bh__arr;
@@ -220,6 +232,60 @@ void* bh__arr_copy(void *arr, i32 elemsize);
 void bh__arr_insertn(void **arr, i32 elemsize, i32 index, i32 numelems);
 void bh__arr_deleten(void **arr, i32 elemsize, i32 index, i32 numelems);
 
+#endif
+
+//-------------------------------------------------------------------------------------
+// HASH TABLE FUNCTIONS
+//-------------------------------------------------------------------------------------
+#ifndef BH_NO_HASHTABLE
+
+#define BH__HASH_STORED_KEY_SIZE 64
+typedef struct bh__hash_entry {
+	char key[BH__HASH_STORED_KEY_SIZE];
+	// Value follows
+} bh__hash_entry;
+
+#define BH__HASH_MODULUS 1021
+#define BH__HASH_KEYSIZE 16
+u64 bh__hash_function(const char* str, i32 len) {
+	u64 hash = 5381;
+	i32 c, l = 0;
+	if (len == 0) len = BH__HASH_KEYSIZE;
+
+	while ((c = *str++) && l++ < len) {
+		hash = (hash << 5) + hash + c;
+	}
+
+	return hash % BH__HASH_MODULUS;
+}
+
+#define bh_hash(T)		T*
+
+#ifdef BH_HASH_SIZE_SAFE
+	#define bh_hash_init(tab)				bh__hash_init((ptr **) &(tab))
+	#define bh_hash_free(tab)				bh__hash_free((ptr **) &(tab))
+	#define bh_hash_put(T, tab, key, value) (assert(sizeof(T) == sizeof(*(tab))), (*((T *) bh__hash_put((ptr *) tab, sizeof(T), key)) = (T) value))
+	#define bh_hash_has(T, tab, key)		(assert(sizeof(T) == sizeof(*(tab))), (bh__hash_has((ptr *) tab, sizeof(T), key)))
+	#define bh_hash_get(T, tab, key)		(assert(sizeof(T) == sizeof(*(tab))), (*((T *) bh__hash_get((ptr *) tab, sizeof(T), key))))
+	#define bh_hash_delete(T, tab, key)		(assert(sizeof(T) == sizeof(*(tab))), bh__hash_delete((ptr *) tab, sizeof(T), key))
+#else
+	#define bh_hash_init(tab)				bh__hash_init((ptr **) &(tab))
+	#define bh_hash_free(tab)				bh__hash_free((ptr **) &(tab))
+	#define bh_hash_put(T, tab, key, value) (*((T *) bh__hash_put((ptr *) tab, sizeof(T), key)) = value)
+	#define bh_hash_has(T, tab, key)		(bh__hash_has((ptr *) tab, sizeof(T), key))
+	#define bh_hash_get(T, tab, key)		(*((T *) bh__hash_get((ptr *) tab, sizeof(T), key)))
+	#define bh_hash_delete(T, tab, key)		(bh__hash_delete((ptr *) tab, sizeof(T), key))
+#endif
+
+b32 bh__hash_init(ptr **table);
+b32 bh__hash_free(ptr **table);
+ptr bh__hash_put(ptr *table, i32 elemsize, char *key);
+b32 bh__hash_has(ptr *table, i32 elemsize, char *key);
+ptr bh__hash_get(ptr *table, i32 elemsize, char *key);
+void bh__hash_delete(ptr *table, i32 elemsize, char *key);
+
+#endif
+
 #ifdef BH_DEFINE
 #undef BH_DEFINE
 
@@ -269,6 +335,8 @@ i64 chars_match(char* ptr1, char* ptr2) {
 //-------------------------------------------------------------------------------------
 // STRING IMPLEMENTATION
 //-------------------------------------------------------------------------------------
+#ifndef BH_NO_STRING
+
 bh_string* bh_string_new_cap(unsigned long cap) {
 	bh__string* str;
 	str = (bh__string*) malloc(sizeof(*str) + sizeof(char) * cap + 1);
@@ -287,7 +355,7 @@ bh_string* bh_string_new_str(const char* cstr) {
 		data[i] = cstr[i];
 	}
 
-	data[i] = 0; // Always null terminate the string
+	data[len] = 0; // Always null terminate the string
 
 	str->length = len;
 	str->capacity = len;
@@ -396,11 +464,13 @@ void bh_string_print(bh_string* str) {
 	write(STDOUT_FILENO, str->data, str->length);
 }
 
-
+#endif // ifndef BH_NO_STRING
 
 //-------------------------------------------------------------------------------------
 // FILE IMPLEMENTATION
 //-------------------------------------------------------------------------------------
+#ifndef BH_NO_FILE
+
 bh_file_error bh_file_get_standard(bh_file* file, bh_file_standard stand) {
 	i32 sd_fd = -1;
 	const char* filename = NULL;
@@ -578,9 +648,12 @@ b32 bh_file_contents_delete(bh_file_contents* contents) {
 	return 1;
 }
 
+#endif // ifndef BH_NO_FILE
+
 //-------------------------------------------------------------------------------------
 // ARRAY IMPLEMENTATION
 //-------------------------------------------------------------------------------------
+#ifndef BH_NO_ARRAY
 
 b32 bh__arr_grow(void** arr, i32 elemsize, i32 cap) {
 	bh__arr* arrptr;
@@ -594,7 +667,6 @@ b32 bh__arr_grow(void** arr, i32 elemsize, i32 cap) {
 
 	} else {
 		arrptr = bh__arrhead(*arr);
-		if (arrptr->length > cap) return 1;
 
 		if (arrptr->capacity < cap) {
 			void* p;
@@ -688,6 +760,122 @@ void bh__arr_insertn(void **arr, i32 elemsize, i32 index, i32 numelems) {
 	}
 }
 
+#endif // ifndef BH_NO_ARRAY
+
+//-------------------------------------------------------------------------------------
+// HASHTABLE IMPLEMENTATION
+//-------------------------------------------------------------------------------------
+#ifndef BH_NO_HASHTABLE
+
+b32 bh__hash_init(ptr **table) {
+	*table = malloc(sizeof(ptr) * BH__HASH_MODULUS);
+	if (*table == NULL) return 0;
+
+	for (i32 i = 0; i < BH__HASH_MODULUS; i++) {
+		(*table)[i] = NULL;
+	}
+
+	return 1;
+}
+
+b32 bh__hash_free(ptr **table) {
+	for (i32 i = 0; i < BH__HASH_MODULUS; i++) {
+		if ((*table)[i] != NULL) {
+			bh_arr_free(*((*table) + i));
+		}
+	}
+
+	free(*table);
+	*table = NULL;
+}
+
+// Assumes NULL terminated string for key
+ptr bh__hash_put(ptr *table, i32 elemsize, char *key) {
+	u64 index = bh__hash_function(key, 0);
+
+	elemsize += sizeof(bh__hash_entry);
+
+	ptr arrptr = table[index];
+	i32 len = bh_arr_length(arrptr);
+
+	while (len--) {
+		if (strncmp(key, (char *) arrptr, BH__HASH_STORED_KEY_SIZE) == 0) goto found_matching;
+		arrptr = (ptr)((char *) arrptr + elemsize);
+	}
+
+	// Didn't find it in the array, make a new one
+	arrptr = table[index];
+	len = bh_arr_length(arrptr);
+	bh__arr_grow(&arrptr, elemsize, len + 1);
+	bh__arrhead(arrptr)->length++;
+	table[index] = arrptr;
+
+	arrptr = (ptr)(((char *) arrptr) + elemsize * len);
+	strncpy(arrptr, key, BH__HASH_STORED_KEY_SIZE);
+
+found_matching:
+	return (ptr)(((char *) arrptr) + BH__HASH_STORED_KEY_SIZE);
+}
+
+b32 bh__hash_has(ptr *table, i32 elemsize, char *key) {
+	u64 index = bh__hash_function(key, 0);	
+
+	ptr arrptr = table[index];
+	i32 len = bh_arr_length(arrptr);
+	if (arrptr == NULL) return 0;
+
+	i32 stride = elemsize + BH__HASH_STORED_KEY_SIZE;	
+
+	while (len--) {
+		if (strncmp(key, (char *) arrptr, BH__HASH_STORED_KEY_SIZE) == 0) return 1;
+		arrptr = (ptr)((char *) arrptr + stride);
+	}
+
+	return 0;
+}
+
+ptr bh__hash_get(ptr *table, i32 elemsize, char *key) {
+	u64 index = bh__hash_function(key, 0);
+
+	ptr arrptr = table[index];
+	i32 len = bh_arr_length(arrptr);
+	assert(arrptr != NULL);
+
+	i32 stride = elemsize + BH__HASH_STORED_KEY_SIZE;
+
+	while (len--) {
+		if (strncmp(key, (char *) arrptr, BH__HASH_STORED_KEY_SIZE) == 0) {
+			return (ptr)((char *) arrptr + BH__HASH_STORED_KEY_SIZE);
+		}
+
+		arrptr = (ptr)((char *) arrptr + stride);
+	}
+
+	return 0;
+}
+
+void bh__hash_delete(ptr *table, i32 elemsize, char *key) {
+	u64 index = bh__hash_function(key, 0);
+
+	ptr arrptr = table[index];
+	i32 len = bh_arr_length(arrptr);
+	if (arrptr == NULL) return; // Didn't exist
+
+	i32 stride = elemsize + BH__HASH_STORED_KEY_SIZE;
+	i32 i = 0;
+
+	while (len && strncmp(key, (char *) arrptr, BH__HASH_STORED_KEY_SIZE) != 0) {
+		arrptr = (ptr)((char *) arrptr + stride);
+		i++, len--;
+	}
+
+	if (len == 0) return; // Didn't exist
+
+	bh__arr_deleten((void **) &arrptr, elemsize, i, 1);
+}
+
+#endif // ifndef BH_NO_HASHTABLE
+
 #endif // ifdef BH_DEFINE
 
 #endif // ifndef BH_H
diff --git a/docs/parse_grammar b/docs/parse_grammar
index 0a07b3f7..a71cd866 100644
--- a/docs/parse_grammar
+++ b/docs/parse_grammar
@@ -2,7 +2,7 @@ Note: ~ is empty
 Goal: Design the language to have no ambiguity
 
 
-SOURCE_FILE = TOP_LEVEL_STATEMENT TOKEN_TYPE_SYM_SEMICOLON SOURCE_FILE | ~
+SOURCE_FILE = TOP_LEVEL_STATEMENT ; SOURCE_FILE | ~
 
 TOP_LEVEL_STATEMENT
 	= USE_DECLARATION
@@ -26,9 +26,9 @@ FUNCTION_DECLARATION = proc TOKEN_TYPE_SYMBOL FUNCTION_TYPE BLOCK
 
 FUNCTION_TYPE = :: ( FUNCTION_PARAMS ) -> TOKEN_TYPE_SYMBOL
 
-BLOCK = { STATEMENTS }
-
-STATEMENTS = STATEMENT ; STATEMENTS | ~
+-- This may be too weird...
+BLOCK = { STATEMENTS
+STATEMENTS = STATEMENT ; STATEMENTS | }
 
 STATEMENT
 	= ASSIGNMENT_STATEMENT
@@ -42,9 +42,9 @@ ASSIGNMENT_STATEMENT = TOKEN_TYPE_SYMBOL = EXPRESSION
 IF_STATEMENT
 	= if EXPRESSION BLOCK ELSE_IF_STATEMENT ELSE_STATEMENT
 
-ELSEIF_STATEMENT = TOKEN_TYPE_KEYWORD_ELSEIF EXPRESSION BLOCK ELSEIF_STATEMENT | ~
+ELSEIF_STATEMENT = elseif EXPRESSION BLOCK ELSEIF_STATEMENT | ~
 
-ELSE_STATEMENT = TOKEN_TYPE_KEYWORD_ELSE BLOCK | ~
+ELSE_STATEMENT = else BLOCK | ~
 
 -- This needs to be better
 FOR_STATEMENT = for STATEMENT ; EXPRESSION ; STATEMENT BLOCK
diff --git a/onyx.c b/onyx.c
index 32e38f5b..0378e4ad 100644
--- a/onyx.c
+++ b/onyx.c
@@ -1,3 +1,4 @@
+#define BH_NO_STRING
 #define BH_DEFINE
 #include "bh.h"
 
@@ -5,8 +6,8 @@
 
 #include "onyxlex.h"
 
-bh_arr(Token) parse_tokens(bh_file_contents *fc) {
-	Tokenizer tknizer = {
+bh_arr(OnyxToken) parse_tokens(bh_file_contents *fc) {
+	OnyxTokenizer tknizer = {
 		.start 			= fc->data,
 		.curr 			= fc->data,
 		.end 			= fc->data + fc->length - 1,
@@ -14,12 +15,12 @@ bh_arr(Token) parse_tokens(bh_file_contents *fc) {
 		.line_start 	= fc->data,
 	};
 
-	bh_arr(Token) token_arr = NULL;
+	bh_arr(OnyxToken) token_arr = NULL;
 	bh_arr_grow(token_arr, 512);
 
-	Token tk;
+	OnyxToken tk;
 	do {
-		tk = get_token(&tknizer);
+		tk = onyx_get_token(&tknizer);
 		bh_arr_push(token_arr, tk);
 	} while (tk.type != TOKEN_TYPE_END_STREAM);
 
@@ -37,12 +38,12 @@ int main(int argc, char *argv[]) {
 	bh_file_contents fc = bh_file_read_contents(&source_file);
 	bh_file_close(&source_file);
 
-	bh_arr(Token) token_arr = parse_tokens(&fc);
+	bh_arr(OnyxToken) token_arr = parse_tokens(&fc);
 
 	printf("There are %d tokens (Allocated space for %d tokens)\n", bh_arr_length(token_arr), bh_arr_capacity(token_arr));
 
-	for (Token* it = token_arr; !bh_arr_end(token_arr, it); it++) {
-		printf("%s\n", get_token_type_name(*it));
+	for (OnyxToken* it = token_arr; !bh_arr_end(token_arr, it); it++) {
+		printf("%s\n", onyx_get_token_type_name(*it));
 	}
 
 	bh_file_contents_delete(&fc);
diff --git a/onyxlex.c b/onyxlex.c
index af930ded..4e69467d 100644
--- a/onyxlex.c
+++ b/onyxlex.c
@@ -1,7 +1,7 @@
 #include "bh.h"
 #include "onyxlex.h"
 
-static const char* TokenTypeNames[] = {
+static const char* onyx_token_type_names[] = {
 	"TOKEN_TYPE_UNKNOWN",
 	"TOKEN_TYPE_END_STREAM",
 
@@ -70,7 +70,7 @@ static const char* TokenTypeNames[] = {
 }
 #endif
 
-static b32 token_lit(Tokenizer* tokenizer, Token* tk, char* lit, TokenType type) {
+static b32 token_lit(OnyxTokenizer* tokenizer, OnyxToken* tk, char* lit, OnyxTokenType type) {
 	i64 len = chars_match(tokenizer->curr, lit);
 	if (len > 0) {
 		tk->type = type;
@@ -86,12 +86,12 @@ static b32 token_lit(Tokenizer* tokenizer, Token* tk, char* lit, TokenType type)
 	return 0;
 }
 
-const char* get_token_type_name(Token tkn) {
-	return TokenTypeNames[tkn.type];
+const char* onyx_get_token_type_name(OnyxToken tkn) {
+	return onyx_token_type_names[tkn.type];
 }
 
-Token get_token(Tokenizer* tokenizer) {
-	Token tk;
+OnyxToken onyx_get_token(OnyxTokenizer* tokenizer) {
+	OnyxToken tk;
 
 	// Skip whitespace
 	while (char_is_whitespace(*tokenizer->curr) && tokenizer->curr != tokenizer->end)
diff --git a/onyxlex.h b/onyxlex.h
index 608d133b..b77320a7 100644
--- a/onyxlex.h
+++ b/onyxlex.h
@@ -3,15 +3,15 @@
 
 #include "bh.h"
 
-typedef struct Tokenizer {
+typedef struct OnyxTokenizer {
 	char *start, *curr, *end;
 
 	// TODO: Fix the line number and column count
 	char* line_start;
 	u64 line_number;
-} Tokenizer;
+} OnyxTokenizer;
 
-typedef enum TokenType {
+typedef enum OnyxTokenType {
 	TOKEN_TYPE_UNKNOWN,
 	TOKEN_TYPE_END_STREAM,
 
@@ -62,16 +62,16 @@ typedef enum TokenType {
 	TOKEN_TYPE_LITERAL_NUMERIC,
 
 	TOKEN_TYPE_COUNT
-} TokenType;
+} OnyxTokenType;
 
-typedef struct Token {
-	TokenType type;
+typedef struct OnyxToken {
+	OnyxTokenType type;
 	char* token;
 	isize length;
 	u64 line_number, line_column;
-} Token;
+} OnyxToken;
 
-const char* get_token_type_name(Token tkn);
-Token get_token(Tokenizer* tokenizer);
+const char* onyx_get_token_type_name(OnyxToken tkn);
+OnyxToken onyx_get_token(OnyxTokenizer* tokenizer);
 
 #endif
\ No newline at end of file
diff --git a/onyxparser.c b/onyxparser.c
new file mode 100644
index 00000000..873380b6
--- /dev/null
+++ b/onyxparser.c
@@ -0,0 +1,2 @@
+
+#include "onyxparser.h"
diff --git a/onyxparser.h b/onyxparser.h
new file mode 100644
index 00000000..eccf9610
--- /dev/null
+++ b/onyxparser.h
@@ -0,0 +1,18 @@
+#include "bh.h"
+
+#include "onyxlex.h"
+
+typedef struct OnyxParser {
+	OnyxTokenizer tokenizer;
+	OnyxToken *prev;
+	OnyxToken *curr;
+
+	bh_hash(OnyxIdentifier) idens;
+
+	bh_arr(OnyxToken) tokens; /* Maybe don't store the whole array? Ask for one as you need it? */
+} OnyxParser;
+
+typedef struct OnyxParseNode {
+	OnyxToken *token;
+
+};
\ No newline at end of file
-- 
2.25.1