added: `core.hash.md5`; bugfix: `io.read_bytes` and rotation instructions

author Brendan Hansen <brendan.f.hansen@gmail.com>

Tue, 28 Mar 2023 20:07:28 +0000 (15:07 -0500)

committer Brendan Hansen <brendan.f.hansen@gmail.com>

Tue, 28 Mar 2023 20:07:28 +0000 (15:07 -0500)
author Brendan Hansen <brendan.f.hansen@gmail.com>
Tue, 28 Mar 2023 20:07:28 +0000 (15:07 -0500)
committer Brendan Hansen <brendan.f.hansen@gmail.com>
Tue, 28 Mar 2023 20:07:28 +0000 (15:07 -0500)
diff --git a/core/hash/md5.onyx b/core/hash/md5.onyx

new file mode 100644 (file)

index 0000000..635a57d
--- /dev/null
+++ b/core/hash/md5.onyx
@@ -0,0 +1,182 @@
+package core.hash.md5
+
+use core {io, memory, conv}
+use core.intrinsics.wasm {rotl_i32}
+
+
+digest :: #match #local -> MD5_Digest {}
+
+#overload
+digest :: (x: str) -> MD5_Digest {
+    string_reader, string_stream := io.reader_from_string(x);
+    defer cfree(string_stream);
+    defer delete(&string_reader);
+
+    return digest(&string_reader);
+}
+
+#overload
+digest :: (r: &io.Reader) -> MD5_Digest {
+    dig := MD5_Digest.make();
+
+    remaining_bytes_to_digest := 0;
+
+    bytes_to_digest: [64] u8;
+    while !r->is_empty() {
+        byte_count, err := r->read_bytes(bytes_to_digest);
+        
+        // Exit early to handle the tail case.
+        // This is subject to read_pending errors if the reader
+        // does not have enough bytes and returns read_pending.
+        if byte_count < 64 {
+            remaining_bytes_to_digest = byte_count;
+            break;
+        }
+
+        do_cycle(&dig, bytes_to_digest);
+    }
+
+    dig->finish(bytes_to_digest[0..remaining_bytes_to_digest]);
+    return dig;
+}
+
+
+MD5_Digest :: struct {
+    a, b, c, d: u32;
+    bytes_digested: u64;
+}
+
+#inject MD5_Digest {
+    make :: () => MD5_Digest.{
+        a = 0x67452301,
+        b = 0xefcdab89,
+        c = 0x98badcfe,
+        d = 0x10325476,
+        bytes_digested = 0,
+    }
+
+    reset :: (self: &#Self) {
+        *self = MD5_Digest.make();
+    }
+
+    finish :: (self: &#Self, tail: [] u8) {
+        assert(tail.count < 64, "Tail too long");
+
+        bytes_to_digest: [64] u8;
+        memory.copy(~~bytes_to_digest, tail.data, tail.count);
+        bytes_to_digest[tail.count] = 0x80;
+
+        if tail.count >= 56 {
+            do_cycle(self, bytes_to_digest, accumulate=false);
+            memory.set(~~bytes_to_digest, 0, 64);
+
+        } else {
+            self.bytes_digested += ~~tail.count;
+        }
+
+        *cast(&u64, &bytes_to_digest[56]) = self.bytes_digested * 8;
+
+        do_cycle(self, bytes_to_digest, accumulate=false);
+    }
+
+    #doc "Returns a temporary byte array of the hash."
+    as_str :: (self: #Self) -> [] u8 {
+        result := make_temp([] u8, 16);
+        for i: 0  .. 4  do result[i] = ~~((self.a & (0xff << shift(i))) >> shift(i));
+        for i: 4  .. 8  do result[i] = ~~((self.b & (0xff << shift(i))) >> shift(i));
+        for i: 8  .. 12 do result[i] = ~~((self.c & (0xff << shift(i))) >> shift(i));
+        for i: 12 .. 16 do result[i] = ~~((self.d & (0xff << shift(i))) >> shift(i));
+        return result;
+    }
+
+    #doc "Returns a temporary string of the hash."
+    as_hex_str :: (self: #Self) -> str {
+        result := make_temp([..] u8, 32);
+        for i: 0  .. 4  do conv.format(&result, "{w2b16}", (self.a & (0xff << shift(i)) >> shift(i)));
+        for i: 4  .. 8  do conv.format(&result, "{w2b16}", (self.b & (0xff << shift(i)) >> shift(i)));
+        for i: 8  .. 12 do conv.format(&result, "{w2b16}", (self.c & (0xff << shift(i)) >> shift(i)));
+        for i: 12 .. 16 do conv.format(&result, "{w2b16}", (self.d & (0xff << shift(i)) >> shift(i)));
+        return result;
+    }
+}
+
+#local
+shift :: macro (x: u32) => ((x & 3) << 3);
+
+#local
+do_cycle :: (digest: &MD5_Digest, bytes: [64] u8, accumulate := true) {
+    // This assumes a little-endian implementation, but that 
+    // is assumed by WebAssembly.
+    M: [&] u32 = ~~bytes;
+
+    a := digest.a;
+    b := digest.b;
+    c := digest.c;
+    d := digest.d;
+
+    for i: 0 .. 64 {
+        F, g: u32;
+        if 0 <= i && i <= 15 {
+            F = (b & c) | (~b & d);
+            g = i;
+        }
+        elseif 16 <= i && i <= 31 {
+            F = (d & b) | (~d & c);
+            g = (5 * i + 1) % 16;
+        }
+        elseif 32 <= i && i <= 47 {
+            F = b ^ (c ^ d);
+            g = (3 * i + 5) % 16;
+        }
+        elseif 48 <= i && i <= 63 {
+            F = c ^ (b | ~d);
+            g = (7 * i) % 16;
+        }
+
+        F += a + K[i] + M[g];
+        a = d;
+        d = c;
+        c = b;
+        b += rotl_i32(F, shifts[i]);
+    }
+
+    digest.a += a;
+    digest.b += b;
+    digest.c += c;
+    digest.d += d;
+
+    if accumulate {
+        digest.bytes_digested += 64;
+    }
+}
+
+
+#local {
+    shifts := u32.[
+        7, 12, 17, 22,  7, 12, 17, 22,  7, 12, 17, 22,  7, 12, 17, 22,
+        5,  9, 14, 20,  5,  9, 14, 20,  5,  9, 14, 20,  5,  9, 14, 20,
+        4, 11, 16, 23,  4, 11, 16, 23,  4, 11, 16, 23,  4, 11, 16, 23,
+        6, 10, 15, 21,  6, 10, 15, 21,  6, 10, 15, 21,  6, 10, 15, 21
+    ];
+
+    K := u32.[
+        0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
+        0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
+        0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
+        0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
+        0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
+        0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
+        0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
+        0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
+        0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
+        0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
+        0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05,
+        0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
+        0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
+        0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
+        0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
+        0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391,
+    ]
+}
+
+
diff --git a/core/io/reader.onyx b/core/io/reader.onyx

index d7d506d79184e74af043d0035d9f907b4fa6c10f..66130e3c01b54de33c0ae614e11f04bdd9f5c6cb 100644 (file)
--- a/core/io/reader.onyx
+++ b/core/io/reader.onyx
@@ -182,8 +182,8 @@ read_bytes :: (use reader: &Reader, bytes: [] u8) -> (i32, Error) {
          start += to_write;
      }
  
-    last_byte = cast(i32) bytes[bytes.count - 1];
-    return bytes.count, .None;
+    last_byte = cast(i32) bytes[write_index - 1];
+    return write_index, .None;
  }
  
  read_string :: (use reader: &Reader, bytes := 1, allocator := context.allocator) -> str {
diff --git a/core/std.onyx b/core/std.onyx

index fcd009b05d6fd63de3de8c43d01b4ef44b438032..58aff9950a92f8d497b1233767fa5f9e016adca6 100644 (file)
--- a/core/std.onyx
+++ b/core/std.onyx
@@ -24,7 +24,9 @@ use runtime
  
  #load "./math/math"
  #load "./random/random"
+
  #load "./hash/hash"
+#load "./hash/md5"
  
  #load "./string/string"
  #load "./string/buffer"
diff --git a/interpreter/src/vm/vm_instrs.h b/interpreter/src/vm/vm_instrs.h

index c1916ceceaa8a4a15195311c50a02142c258d94d..131f3ee72d3a0d7a0f95181e872f907bc74e8e57 100644 (file)
--- a/interpreter/src/vm/vm_instrs.h
+++ b/interpreter/src/vm/vm_instrs.h
@@ -100,10 +100,39 @@ OVM_OP_INTEGER_EXEC(sar, >>)
      VAL(instr->r).ctype = func( VAL(instr->a).ctype, VAL(instr->b).ctype ); \
      VAL(instr->r).type = t;
  
-OVMI_INSTR_EXEC(rotl_i32) { OVM_OP(OVM_TYPE_I32, __rold, u32); NEXT_OP; }
-OVMI_INSTR_EXEC(rotl_i64) { OVM_OP(OVM_TYPE_I64, __rolq, u64); NEXT_OP; }
-OVMI_INSTR_EXEC(rotr_i32) { OVM_OP(OVM_TYPE_I32, __rord, u32); NEXT_OP; }
-OVMI_INSTR_EXEC(rotr_i64) { OVM_OP(OVM_TYPE_I64, __rorq, u64); NEXT_OP; }
+#ifndef ROTATION_FUNCTIONS
+#define ROTATION_FUNCTIONS
+
+static inline u32 rotl32(u32 value, u32 count) {
+    const unsigned int mask = 0xFF;
+    count &= mask;
+    return (value << count) | (value >> (-count & mask));
+}
+
+static inline u64 rotl64(u64 value, u32 count) {
+    const unsigned int mask = 0xFF;
+    count &= mask;
+    return (value << count) | (value >> (-count & mask));
+}
+
+static inline u32 rotr32(u32 value, u32 count) {
+    const unsigned int mask = 0xFF;
+    count &= mask;
+    return (value >> count) | (value << (-count & mask));
+}
+
+static inline u64 rotr64(u64 value, u32 count) {
+    const unsigned int mask = 0xFF;
+    count &= mask;
+    return (value >> count) | (value << (-count & mask));
+}
+
+#endif
+
+OVMI_INSTR_EXEC(rotl_i32) { OVM_OP(OVM_TYPE_I32, rotl32, u32); NEXT_OP; }
+OVMI_INSTR_EXEC(rotl_i64) { OVM_OP(OVM_TYPE_I64, rotl64, u64); NEXT_OP; }
+OVMI_INSTR_EXEC(rotr_i32) { OVM_OP(OVM_TYPE_I32, rotr32, u32); NEXT_OP; }
+OVMI_INSTR_EXEC(rotr_i64) { OVM_OP(OVM_TYPE_I64, rotr64, u64); NEXT_OP; }
  
  OVM_OP_FLOAT_EXEC(min, bh_min)
  OVM_OP_FLOAT_EXEC(max, bh_max)
author	Brendan Hansen <brendan.f.hansen@gmail.com>
	Tue, 28 Mar 2023 20:07:28 +0000 (15:07 -0500)
committer	Brendan Hansen <brendan.f.hansen@gmail.com>
	Tue, 28 Mar 2023 20:07:28 +0000 (15:07 -0500)
core/hash/md5.onyx	[new file with mode: 0644]	patch \| blob
core/io/reader.onyx		patch \| blob \| history
core/std.onyx		patch \| blob \| history
interpreter/src/vm/vm_instrs.h		patch \| blob \| history