Added hashtable implementation and working on strings

author Brendan Hansen <brendan.f.hansen@gmail.com>

Wed, 13 May 2020 20:00:23 +0000 (15:00 -0500)

committer Brendan Hansen <brendan.f.hansen@gmail.com>

Wed, 13 May 2020 20:00:23 +0000 (15:00 -0500)
author Brendan Hansen <brendan.f.hansen@gmail.com>
Wed, 13 May 2020 20:00:23 +0000 (15:00 -0500)
committer Brendan Hansen <brendan.f.hansen@gmail.com>
Wed, 13 May 2020 20:00:23 +0000 (15:00 -0500)
diff --git a/bh.h b/bh.h

index f6574665d720557392d1aa83c15aa600e6ad676b..22c808a93021ef418fa91e5c9d6cb411d47c8708 100644 (file)
--- a/bh.h
+++ b/bh.h
@@ -8,6 +8,7 @@
  
  #include <stdlib.h>
  #include <string.h> // TODO: Replace with needed functions
+#include <assert.h>
  
  //-------------------------------------------------------------------------------------
  // Better types
@@ -24,6 +25,7 @@ typedef signed long i64;
  typedef signed long long i128;
  typedef unsigned long isize;
  typedef i32 b32;
+typedef void* ptr;
  
  //-------------------------------------------------------------------------------------
  // Better character functions
@@ -47,6 +49,8 @@ i64 chars_match(char* ptr1, char* ptr2);
  //-------------------------------------------------------------------------------------
  // Better strings
  //-------------------------------------------------------------------------------------
+#ifndef BH_NO_STRING
+
  typedef struct bh__string {
         u64 length;
         u64 capacity;
@@ -95,10 +99,14 @@ void bh_string_trim_end_space(bh_string* str);
  // TEMP
  void bh_string_print(bh_string* str);
  
+#endif
+
  
  //-------------------------------------------------------------------------------------
  // Better files
  //-------------------------------------------------------------------------------------
+#ifndef BH_NO_FILE
+
  typedef enum bh_file_error {
         BH_FILE_ERROR_NONE,
         BH_FILE_ERROR_INVALID
@@ -164,9 +172,13 @@ bh_file_contents bh_file_read_contents_bh_file(bh_file* file);
  bh_file_contents bh_file_read_contents_direct(const char* filename);
  i32 bh_file_contents_delete(bh_file_contents* contents);
  
+#endif
+
  //-------------------------------------------------------------------------------------
  // Better dynamically-sized arrays
  //-------------------------------------------------------------------------------------
+#ifndef BH_NO_ARRAY
+
  typedef struct bh__arr {
         i32 length, capacity;
  } bh__arr;
@@ -220,6 +232,60 @@ void* bh__arr_copy(void *arr, i32 elemsize);
  void bh__arr_insertn(void **arr, i32 elemsize, i32 index, i32 numelems);
  void bh__arr_deleten(void **arr, i32 elemsize, i32 index, i32 numelems);
  
+#endif
+
+//-------------------------------------------------------------------------------------
+// HASH TABLE FUNCTIONS
+//-------------------------------------------------------------------------------------
+#ifndef BH_NO_HASHTABLE
+
+#define BH__HASH_STORED_KEY_SIZE 64
+typedef struct bh__hash_entry {
+       char key[BH__HASH_STORED_KEY_SIZE];
+       // Value follows
+} bh__hash_entry;
+
+#define BH__HASH_MODULUS 1021
+#define BH__HASH_KEYSIZE 16
+u64 bh__hash_function(const char* str, i32 len) {
+       u64 hash = 5381;
+       i32 c, l = 0;
+       if (len == 0) len = BH__HASH_KEYSIZE;
+
+       while ((c = *str++) && l++ < len) {
+               hash = (hash << 5) + hash + c;
+       }
+
+       return hash % BH__HASH_MODULUS;
+}
+
+#define bh_hash(T)             T*
+
+#ifdef BH_HASH_SIZE_SAFE
+       #define bh_hash_init(tab)                               bh__hash_init((ptr **) &(tab))
+       #define bh_hash_free(tab)                               bh__hash_free((ptr **) &(tab))
+       #define bh_hash_put(T, tab, key, value) (assert(sizeof(T) == sizeof(*(tab))), (*((T *) bh__hash_put((ptr *) tab, sizeof(T), key)) = (T) value))
+       #define bh_hash_has(T, tab, key)                (assert(sizeof(T) == sizeof(*(tab))), (bh__hash_has((ptr *) tab, sizeof(T), key)))
+       #define bh_hash_get(T, tab, key)                (assert(sizeof(T) == sizeof(*(tab))), (*((T *) bh__hash_get((ptr *) tab, sizeof(T), key))))
+       #define bh_hash_delete(T, tab, key)             (assert(sizeof(T) == sizeof(*(tab))), bh__hash_delete((ptr *) tab, sizeof(T), key))
+#else
+       #define bh_hash_init(tab)                               bh__hash_init((ptr **) &(tab))
+       #define bh_hash_free(tab)                               bh__hash_free((ptr **) &(tab))
+       #define bh_hash_put(T, tab, key, value) (*((T *) bh__hash_put((ptr *) tab, sizeof(T), key)) = value)
+       #define bh_hash_has(T, tab, key)                (bh__hash_has((ptr *) tab, sizeof(T), key))
+       #define bh_hash_get(T, tab, key)                (*((T *) bh__hash_get((ptr *) tab, sizeof(T), key)))
+       #define bh_hash_delete(T, tab, key)             (bh__hash_delete((ptr *) tab, sizeof(T), key))
+#endif
+
+b32 bh__hash_init(ptr **table);
+b32 bh__hash_free(ptr **table);
+ptr bh__hash_put(ptr *table, i32 elemsize, char *key);
+b32 bh__hash_has(ptr *table, i32 elemsize, char *key);
+ptr bh__hash_get(ptr *table, i32 elemsize, char *key);
+void bh__hash_delete(ptr *table, i32 elemsize, char *key);
+
+#endif
+
  #ifdef BH_DEFINE
  #undef BH_DEFINE
  
@@ -269,6 +335,8 @@ i64 chars_match(char* ptr1, char* ptr2) {
  //-------------------------------------------------------------------------------------
  // STRING IMPLEMENTATION
  //-------------------------------------------------------------------------------------
+#ifndef BH_NO_STRING
+
  bh_string* bh_string_new_cap(unsigned long cap) {
         bh__string* str;
         str = (bh__string*) malloc(sizeof(*str) + sizeof(char) * cap + 1);
@@ -287,7 +355,7 @@ bh_string* bh_string_new_str(const char* cstr) {
                 data[i] = cstr[i];
         }
  
-       data[i] = 0; // Always null terminate the string
+       data[len] = 0; // Always null terminate the string
  
         str->length = len;
         str->capacity = len;
@@ -396,11 +464,13 @@ void bh_string_print(bh_string* str) {
         write(STDOUT_FILENO, str->data, str->length);
  }
  
-
+#endif // ifndef BH_NO_STRING
  
  //-------------------------------------------------------------------------------------
  // FILE IMPLEMENTATION
  //-------------------------------------------------------------------------------------
+#ifndef BH_NO_FILE
+
  bh_file_error bh_file_get_standard(bh_file* file, bh_file_standard stand) {
         i32 sd_fd = -1;
         const char* filename = NULL;
@@ -578,9 +648,12 @@ b32 bh_file_contents_delete(bh_file_contents* contents) {
         return 1;
  }
  
+#endif // ifndef BH_NO_FILE
+
  //-------------------------------------------------------------------------------------
  // ARRAY IMPLEMENTATION
  //-------------------------------------------------------------------------------------
+#ifndef BH_NO_ARRAY
  
  b32 bh__arr_grow(void** arr, i32 elemsize, i32 cap) {
         bh__arr* arrptr;
@@ -594,7 +667,6 @@ b32 bh__arr_grow(void** arr, i32 elemsize, i32 cap) {
  
         } else {
                 arrptr = bh__arrhead(*arr);
-               if (arrptr->length > cap) return 1;
  
                 if (arrptr->capacity < cap) {
                         void* p;
@@ -688,6 +760,122 @@ void bh__arr_insertn(void **arr, i32 elemsize, i32 index, i32 numelems) {
         }
  }
  
+#endif // ifndef BH_NO_ARRAY
+
+//-------------------------------------------------------------------------------------
+// HASHTABLE IMPLEMENTATION
+//-------------------------------------------------------------------------------------
+#ifndef BH_NO_HASHTABLE
+
+b32 bh__hash_init(ptr **table) {
+       *table = malloc(sizeof(ptr) * BH__HASH_MODULUS);
+       if (*table == NULL) return 0;
+
+       for (i32 i = 0; i < BH__HASH_MODULUS; i++) {
+               (*table)[i] = NULL;
+       }
+
+       return 1;
+}
+
+b32 bh__hash_free(ptr **table) {
+       for (i32 i = 0; i < BH__HASH_MODULUS; i++) {
+               if ((*table)[i] != NULL) {
+                       bh_arr_free(*((*table) + i));
+               }
+       }
+
+       free(*table);
+       *table = NULL;
+}
+
+// Assumes NULL terminated string for key
+ptr bh__hash_put(ptr *table, i32 elemsize, char *key) {
+       u64 index = bh__hash_function(key, 0);
+
+       elemsize += sizeof(bh__hash_entry);
+
+       ptr arrptr = table[index];
+       i32 len = bh_arr_length(arrptr);
+
+       while (len--) {
+               if (strncmp(key, (char *) arrptr, BH__HASH_STORED_KEY_SIZE) == 0) goto found_matching;
+               arrptr = (ptr)((char *) arrptr + elemsize);
+       }
+
+       // Didn't find it in the array, make a new one
+       arrptr = table[index];
+       len = bh_arr_length(arrptr);
+       bh__arr_grow(&arrptr, elemsize, len + 1);
+       bh__arrhead(arrptr)->length++;
+       table[index] = arrptr;
+
+       arrptr = (ptr)(((char *) arrptr) + elemsize * len);
+       strncpy(arrptr, key, BH__HASH_STORED_KEY_SIZE);
+
+found_matching:
+       return (ptr)(((char *) arrptr) + BH__HASH_STORED_KEY_SIZE);
+}
+
+b32 bh__hash_has(ptr *table, i32 elemsize, char *key) {
+       u64 index = bh__hash_function(key, 0);  
+
+       ptr arrptr = table[index];
+       i32 len = bh_arr_length(arrptr);
+       if (arrptr == NULL) return 0;
+
+       i32 stride = elemsize + BH__HASH_STORED_KEY_SIZE;       
+
+       while (len--) {
+               if (strncmp(key, (char *) arrptr, BH__HASH_STORED_KEY_SIZE) == 0) return 1;
+               arrptr = (ptr)((char *) arrptr + stride);
+       }
+
+       return 0;
+}
+
+ptr bh__hash_get(ptr *table, i32 elemsize, char *key) {
+       u64 index = bh__hash_function(key, 0);
+
+       ptr arrptr = table[index];
+       i32 len = bh_arr_length(arrptr);
+       assert(arrptr != NULL);
+
+       i32 stride = elemsize + BH__HASH_STORED_KEY_SIZE;
+
+       while (len--) {
+               if (strncmp(key, (char *) arrptr, BH__HASH_STORED_KEY_SIZE) == 0) {
+                       return (ptr)((char *) arrptr + BH__HASH_STORED_KEY_SIZE);
+               }
+
+               arrptr = (ptr)((char *) arrptr + stride);
+       }
+
+       return 0;
+}
+
+void bh__hash_delete(ptr *table, i32 elemsize, char *key) {
+       u64 index = bh__hash_function(key, 0);
+
+       ptr arrptr = table[index];
+       i32 len = bh_arr_length(arrptr);
+       if (arrptr == NULL) return; // Didn't exist
+
+       i32 stride = elemsize + BH__HASH_STORED_KEY_SIZE;
+       i32 i = 0;
+
+       while (len && strncmp(key, (char *) arrptr, BH__HASH_STORED_KEY_SIZE) != 0) {
+               arrptr = (ptr)((char *) arrptr + stride);
+               i++, len--;
+       }
+
+       if (len == 0) return; // Didn't exist
+
+       bh__arr_deleten((void **) &arrptr, elemsize, i, 1);
+}
+
+#endif // ifndef BH_NO_HASHTABLE
+
  #endif // ifdef BH_DEFINE
  
  #endif // ifndef BH_H
diff --git a/docs/parse_grammar b/docs/parse_grammar

index 0a07b3f7158d0e5ec7c858f80959cd13a1b7aba3..a71cd8663b6c9b6f65c1609cfd0e6a54c7e058fd 100644 (file)
--- a/docs/parse_grammar
+++ b/docs/parse_grammar
@@ -2,7 +2,7 @@ Note: ~ is empty
  Goal: Design the language to have no ambiguity
  
  
-SOURCE_FILE = TOP_LEVEL_STATEMENT TOKEN_TYPE_SYM_SEMICOLON SOURCE_FILE | ~
+SOURCE_FILE = TOP_LEVEL_STATEMENT ; SOURCE_FILE | ~
  
  TOP_LEVEL_STATEMENT
         = USE_DECLARATION
@@ -26,9 +26,9 @@ FUNCTION_DECLARATION = proc TOKEN_TYPE_SYMBOL FUNCTION_TYPE BLOCK
  
  FUNCTION_TYPE = :: ( FUNCTION_PARAMS ) -> TOKEN_TYPE_SYMBOL
  
-BLOCK = { STATEMENTS }
-
-STATEMENTS = STATEMENT ; STATEMENTS | ~
+-- This may be too weird...
+BLOCK = { STATEMENTS
+STATEMENTS = STATEMENT ; STATEMENTS | }
  
  STATEMENT
         = ASSIGNMENT_STATEMENT
@@ -42,9 +42,9 @@ ASSIGNMENT_STATEMENT = TOKEN_TYPE_SYMBOL = EXPRESSION
  IF_STATEMENT
         = if EXPRESSION BLOCK ELSE_IF_STATEMENT ELSE_STATEMENT
  
-ELSEIF_STATEMENT = TOKEN_TYPE_KEYWORD_ELSEIF EXPRESSION BLOCK ELSEIF_STATEMENT | ~
+ELSEIF_STATEMENT = elseif EXPRESSION BLOCK ELSEIF_STATEMENT | ~
  
-ELSE_STATEMENT = TOKEN_TYPE_KEYWORD_ELSE BLOCK | ~
+ELSE_STATEMENT = else BLOCK | ~
  
  -- This needs to be better
  FOR_STATEMENT = for STATEMENT ; EXPRESSION ; STATEMENT BLOCK
diff --git a/onyx.c b/onyx.c

index 32e38f5baed81a9b4454c37b9fc6a931b77103eb..0378e4adf4c299fbaccd0c66f1b9186dde4ab997 100644 (file)
--- a/onyx.c
+++ b/onyx.c
@@ -1,3 +1,4 @@
+#define BH_NO_STRING
  #define BH_DEFINE
  #include "bh.h"
  
@@ -5,8 +6,8 @@
  
  #include "onyxlex.h"
  
-bh_arr(Token) parse_tokens(bh_file_contents *fc) {
-       Tokenizer tknizer = {
+bh_arr(OnyxToken) parse_tokens(bh_file_contents *fc) {
+       OnyxTokenizer tknizer = {
                 .start                  = fc->data,
                 .curr                   = fc->data,
                 .end                    = fc->data + fc->length - 1,
@@ -14,12 +15,12 @@ bh_arr(Token) parse_tokens(bh_file_contents *fc) {
                 .line_start     = fc->data,
         };
  
-       bh_arr(Token) token_arr = NULL;
+       bh_arr(OnyxToken) token_arr = NULL;
         bh_arr_grow(token_arr, 512);
  
-       Token tk;
+       OnyxToken tk;
         do {
-               tk = get_token(&tknizer);
+               tk = onyx_get_token(&tknizer);
                 bh_arr_push(token_arr, tk);
         } while (tk.type != TOKEN_TYPE_END_STREAM);
  
@@ -37,12 +38,12 @@ int main(int argc, char *argv[]) {
         bh_file_contents fc = bh_file_read_contents(&source_file);
         bh_file_close(&source_file);
  
-       bh_arr(Token) token_arr = parse_tokens(&fc);
+       bh_arr(OnyxToken) token_arr = parse_tokens(&fc);
  
         printf("There are %d tokens (Allocated space for %d tokens)\n", bh_arr_length(token_arr), bh_arr_capacity(token_arr));
  
-       for (Token* it = token_arr; !bh_arr_end(token_arr, it); it++) {
-               printf("%s\n", get_token_type_name(*it));
+       for (OnyxToken* it = token_arr; !bh_arr_end(token_arr, it); it++) {
+               printf("%s\n", onyx_get_token_type_name(*it));
         }
  
         bh_file_contents_delete(&fc);
diff --git a/onyxlex.c b/onyxlex.c

index af930ded9ad0496f5cc48d3185856a076f65ac7a..4e69467d46a081d90940fa29ea2bdf386a06926a 100644 (file)
--- a/onyxlex.c
+++ b/onyxlex.c
@@ -1,7 +1,7 @@
  #include "bh.h"
  #include "onyxlex.h"
  
-static const char* TokenTypeNames[] = {
+static const char* onyx_token_type_names[] = {
         "TOKEN_TYPE_UNKNOWN",
         "TOKEN_TYPE_END_STREAM",
  
@@ -70,7 +70,7 @@ static const char* TokenTypeNames[] = {
  }
  #endif
  
-static b32 token_lit(Tokenizer* tokenizer, Token* tk, char* lit, TokenType type) {
+static b32 token_lit(OnyxTokenizer* tokenizer, OnyxToken* tk, char* lit, OnyxTokenType type) {
         i64 len = chars_match(tokenizer->curr, lit);
         if (len > 0) {
                 tk->type = type;
@@ -86,12 +86,12 @@ static b32 token_lit(Tokenizer* tokenizer, Token* tk, char* lit, TokenType type)
         return 0;
  }
  
-const char* get_token_type_name(Token tkn) {
-       return TokenTypeNames[tkn.type];
+const char* onyx_get_token_type_name(OnyxToken tkn) {
+       return onyx_token_type_names[tkn.type];
  }
  
-Token get_token(Tokenizer* tokenizer) {
-       Token tk;
+OnyxToken onyx_get_token(OnyxTokenizer* tokenizer) {
+       OnyxToken tk;
  
         // Skip whitespace
         while (char_is_whitespace(*tokenizer->curr) && tokenizer->curr != tokenizer->end)
diff --git a/onyxlex.h b/onyxlex.h

index 608d133bbb9ca427f073114158dfc20f5f58bf7d..b77320a774b01b434da973e97749bd225c7f9722 100644 (file)
--- a/onyxlex.h
+++ b/onyxlex.h
@@ -3,15 +3,15 @@
  
  #include "bh.h"
  
-typedef struct Tokenizer {
+typedef struct OnyxTokenizer {
         char *start, *curr, *end;
  
         // TODO: Fix the line number and column count
         char* line_start;
         u64 line_number;
-} Tokenizer;
+} OnyxTokenizer;
  
-typedef enum TokenType {
+typedef enum OnyxTokenType {
         TOKEN_TYPE_UNKNOWN,
         TOKEN_TYPE_END_STREAM,
  
@@ -62,16 +62,16 @@ typedef enum TokenType {
         TOKEN_TYPE_LITERAL_NUMERIC,
  
         TOKEN_TYPE_COUNT
-} TokenType;
+} OnyxTokenType;
  
-typedef struct Token {
-       TokenType type;
+typedef struct OnyxToken {
+       OnyxTokenType type;
         char* token;
         isize length;
         u64 line_number, line_column;
-} Token;
+} OnyxToken;
  
-const char* get_token_type_name(Token tkn);
-Token get_token(Tokenizer* tokenizer);
+const char* onyx_get_token_type_name(OnyxToken tkn);
+OnyxToken onyx_get_token(OnyxTokenizer* tokenizer);
  
  #endif
 \ No newline at end of file
diff --git a/onyxparser.c b/onyxparser.c

new file mode 100644 (file)

index 0000000..873380b
--- /dev/null
+++ b/onyxparser.c
@@ -0,0 +1,2 @@
+
+#include "onyxparser.h"
diff --git a/onyxparser.h b/onyxparser.h

new file mode 100644 (file)

index 0000000..eccf961
--- /dev/null
+++ b/onyxparser.h
@@ -0,0 +1,18 @@
+#include "bh.h"
+
+#include "onyxlex.h"
+
+typedef struct OnyxParser {
+       OnyxTokenizer tokenizer;
+       OnyxToken *prev;
+       OnyxToken *curr;
+
+       bh_hash(OnyxIdentifier) idens;
+
+       bh_arr(OnyxToken) tokens; /* Maybe don't store the whole array? Ask for one as you need it? */
+} OnyxParser;
+
+typedef struct OnyxParseNode {
+       OnyxToken *token;
+
+};
+\ No newline at end of file
author	Brendan Hansen <brendan.f.hansen@gmail.com>
	Wed, 13 May 2020 20:00:23 +0000 (15:00 -0500)
committer	Brendan Hansen <brendan.f.hansen@gmail.com>
	Wed, 13 May 2020 20:00:23 +0000 (15:00 -0500)
bh.h		patch \| blob \| history
docs/parse_grammar		patch \| blob \| history
onyx.c		patch \| blob \| history
onyxlex.c		patch \| blob \| history
onyxlex.h		patch \| blob \| history
onyxparser.c	[new file with mode: 0644]	patch \| blob
onyxparser.h	[new file with mode: 0644]	patch \| blob