From 740b84a253c2daebe65516c14095a538a03d184a Mon Sep 17 00:00:00 2001 From: Brendan Hansen Date: Mon, 7 Dec 2020 10:07:07 -0600 Subject: [PATCH] added StrMap hash table; additions to str library --- core/std/js.onyx | 1 + core/std/wasi.onyx | 1 + core/string.onyx | 65 +++++++++++++++++++++- core/strmap.onyx | 131 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 197 insertions(+), 1 deletion(-) create mode 100644 core/strmap.onyx diff --git a/core/std/js.onyx b/core/std/js.onyx index 9506070c..8dfb8d97 100644 --- a/core/std/js.onyx +++ b/core/std/js.onyx @@ -12,6 +12,7 @@ package core #include_file "core/random" #include_file "core/stdio" #include_file "core/string" +#include_file "core/strmap" #include_file "core/sys/js" diff --git a/core/std/wasi.onyx b/core/std/wasi.onyx index 938de5fa..dabf7e5d 100644 --- a/core/std/wasi.onyx +++ b/core/std/wasi.onyx @@ -13,6 +13,7 @@ package core #include_file "core/random" #include_file "core/stdio" #include_file "core/string" +#include_file "core/strmap" #include_file "core/wasi" #include_file "core/sys/wasi" diff --git a/core/string.onyx b/core/string.onyx index 29afffe0..5872dab3 100644 --- a/core/string.onyx +++ b/core/string.onyx @@ -7,7 +7,7 @@ make :: proc (s: cstring) -> string { } length :: proc { - proc (s: ^u8) -> u32 { + proc (s: cstring) -> u32 { len := 0; c := s; while *c != #char "\0" { @@ -23,6 +23,21 @@ length :: proc { }, } +alloc_copy :: proc (orig: string) -> string { + new_str : string; + new_str.data = calloc(sizeof u8 * orig.count); + new_str.count = orig.count; + copy(orig, new_str); + return new_str; +} + +copy :: proc (orig: string, dest: string) { + len := orig.count; + if dest.count < len do len = dest.count; + + for i: 0 .. len do dest.data[i] = orig.data[i]; +} + concat :: proc (s1: string, s2: string) -> string { len1 :: length(s1); len2 :: length(s2); @@ -345,6 +360,54 @@ read_line :: proc (str: ^string, out: ^string) { } } +read_until :: proc (str: ^string, upto: u8, skip := 0) -> string { + if str.count == 0 do return ""; + + out : string; + out.data = str.data; + out.count = 0; + + s := skip; + for ch: *str { + if ch == upto { + if s <= 0 do break; + else do s -= 1; + } + + out.count += 1; + } + + str.data += out.count; + str.count -= out.count; + + return out; +} + +read_until_either :: proc (str: ^string, skip: u32, uptos: ..u8) -> string { + if str.count == 0 do return ""; + + out : string; + out.data = str.data; + out.count = 0; + + s := skip; + for ch: *str { + for upto: uptos { + if ch == upto { + if s <= 0 do break break; + else do s -= 1; + } + } + + out.count += 1; + } + + str.data += out.count; + str.count -= out.count; + + return out; +} + advance_line :: proc (str: ^string) { if str.count == 0 do return; diff --git a/core/strmap.onyx b/core/strmap.onyx new file mode 100644 index 00000000..f5a3be55 --- /dev/null +++ b/core/strmap.onyx @@ -0,0 +1,131 @@ +package core.strmap + +use package core.array as array +use package core.str as str +use package core { print } + +StrMap :: struct ($T) { + hashes : [..] i32; + entries : [..] StrMapEntry(T); + + key_store : [..] u8; +} + +StrMapEntry :: struct ($T) { + next : i32; + key : string; + value : T; +} + +init :: proc (use smap: ^StrMap($T), hash_count: i32 = 16) { + array.init(^hashes, hash_count); + array.init(^entries, 4); + array.init(^key_store, 16); + + for i: 0 .. hash_count do array.push(^hashes, -1); +} + +free :: proc (use smap: ^StrMap($T)) { + array.free(^hashes); + array.free(^entries); + array.free(^key_store); +} + +put :: proc (use smap: ^StrMap($T), key: string, value: T, copy_key := false) { + lr := lookup(smap, key); + + if lr.entry_index >= 0 { + entries[lr.entry_index].value = value; + return; + } + + // FIX: This is broken at the moment, because the array could relocated + // and that would break all entries key member. + // + // new_key := key; + // if copy_key { + // new_key.data = ^key_store[key_store.count]; + // new_key.count = key.count; + // array.ensure_capacity(^key_store, key_store.count + key.count); + // key_store.count += key.count; + + // str.copy(key, new_key); + // } + + entry : StrMapEntry(T); + entry.key = key; + entry.value = value; + entry.next = hashes[lr.hash_index]; + + array.push(^entries, entry); + + hashes[lr.hash_index] = entries.count - 1; +} + +has :: proc (use smap: ^StrMap($T), key: string) -> bool { + lr := lookup(smap, key); + return lr.entry_index >= 0; +} + +get :: proc (use smap: ^StrMap($T), key: string, default := cast(T) 0) -> T { + lr := lookup(smap, key); + if lr.entry_index >= 0 do return entries[lr.entry_index].value; + + return default; +} + +delete :: proc (use smap: ^StrMap($T), key: string) { + lr := lookup(smap, key); + if lr.entry_index < 0 do return; + + if lr.entry_prev < 0 do hashes[lr.hash_index] = entries[lr.entry_index].next; + else do entries[lr.entry_prev].next = entries[lr.entry_index].next; + + if lr.entry_index == entries.count - 1 { + array.pop(^entries); + return; + } + + array.fast_delete(^entries, lr.entry_index); + last := lookup(smap, entries[lr.entry_index].key); + if last.entry_prev >= 0 do entries[last.entry_prev].next = lr.entry_index; + else do hashes[last.hash_index] = lr.entry_index; +} + +clear :: proc (use smap: ^StrMap($T)) { + for i: 0 .. hashes.count do hashes.data[i] = -1; + entries.count = 0; +} + + + +// +// Private symbols +// + +#private_file +StrMapLookupResult :: struct { + hash_index : i32 = -1; + entry_index : i32 = -1; + entry_prev : i32 = -1; +} + +#private_file +lookup :: proc (use smap: ^StrMap($T), key: string) -> StrMapLookupResult { + lr := StrMapLookupResult.{}; + + hash: u32 = 5381; + for ch: key do hash += (hash << 5) + ~~ch; + + lr.hash_index = hash % hashes.count; + lr.entry_index = hashes[lr.hash_index]; + + while lr.entry_index >= 0 { + if str.equal(entries[lr.entry_index].key, key) do return lr; + + lr.entry_prev = lr.entry_index; + lr.entry_index = entries[lr.entry_index].next; + } + + return lr; +} -- 2.25.1