From: Brendan Hansen Date: Thu, 10 Sep 2020 18:50:46 +0000 (-0500) Subject: added simd example; simd bugfixes X-Git-Url: https://git.brendanfh.com/?a=commitdiff_plain;h=0a696419010cc5aa4089569d46ac1fc98329969a;p=onyx.git added simd example; simd bugfixes --- diff --git a/onyx b/onyx index efe0a3be..699262fd 100755 Binary files a/onyx and b/onyx differ diff --git a/progs/particle_sym.onyx b/progs/particle_sym.onyx new file mode 100644 index 00000000..c694db38 --- /dev/null +++ b/progs/particle_sym.onyx @@ -0,0 +1,75 @@ +package main + +#include_file "core/std/wasi" +#include_file "core/simd_intrinsics" + +use package core +use package simd + +main :: proc (args: [] cstring) { + init_positions(); + init_velocities(); + + print("Beginning simulation.\n"); + + for i: 0 .. 20 { + update(); + print(cast(i64) (avg_motion() * 100000000000000000.0f)); + print("\n"); + } +} + +OBJECT_COUNT :: 10000 + +positions : [OBJECT_COUNT] f32x4 +velocities : [OBJECT_COUNT] f32x4 + +init_positions :: proc () { + for ^p: positions { + *p = f32x4_replace_lane(*p, 0, random_float(-127.0f, 127.0f)); + *p = f32x4_replace_lane(*p, 1, random_float(-127.0f, 127.0f)); + *p = f32x4_replace_lane(*p, 2, random_float(-127.0f, 127.0f)); + *p = f32x4_replace_lane(*p, 3, random_float(-127.0f, 127.0f)); + } +} + +init_velocities :: proc () { + for ^v: velocities do *v = f32x4_const(0.0f, 0.0f, 0.0f, 0.0f); +} + +horizontal_add :: proc (f: f32x4) -> f32 { + return f32x4_extract_lane(f, 0) + + f32x4_extract_lane(f, 1) + + f32x4_extract_lane(f, 2) + + f32x4_extract_lane(f, 3); +} + +distance :: proc (x: f32x4, y: f32x4) -> f32 { + d := f32x4_sub(x, y); + return horizontal_add(f32x4_mul(d, d)); +} + +update :: proc () { + for i: 0 .. OBJECT_COUNT { + for j: 0 .. OBJECT_COUNT { + dist := distance(positions[i], positions[j]); + if dist == 0.0f do continue; + + velocities[i] = f32x4_add(velocities[i], f32x4_div(f32x4_sub(positions[j], positions[i]), f32x4_splat(dist))); + } + } + + for i: 0 .. OBJECT_COUNT { + positions[i] = f32x4_add(positions[i], velocities[i]); + } +} + +avg_motion :: proc () -> f32 { + avg_motion := f32x4_splat(0.0f); + obj_vec := f32x4_splat(cast(f32) OBJECT_COUNT); + for v: velocities { + avg_motion = f32x4_add(avg_motion, f32x4_div(v, obj_vec)); + } + + return distance(f32x4_splat(0.0f), avg_motion); +} diff --git a/src/onyxwasm.c b/src/onyxwasm.c index b9312e9e..f5286ec6 100644 --- a/src/onyxwasm.c +++ b/src/onyxwasm.c @@ -1387,7 +1387,7 @@ EMIT_FUNC(call, AstCall* call) { WIP(WI_V128_CONST, byte_buffer); \ } -#define SIMD_LANE_INSTR(instr, arg) \ +#define SIMD_EXTRACT_LANE_INSTR(instr, arg) \ emit_expression(mod, &code, arg->value);\ arg = (AstArgument *) arg->next; \ if (arg->value->kind != Ast_Kind_NumLit) { \ @@ -1397,6 +1397,20 @@ EMIT_FUNC(call, AstCall* call) { } \ WID(instr, (u8) ((AstNumLit *) arg->value)->value.i); +#define SIMD_REPLACE_LANE_INSTR(instr, arg) { \ + emit_expression(mod, &code, arg->value);\ + arg = (AstArgument *) arg->next; \ + if (arg->value->kind != Ast_Kind_NumLit) { \ + onyx_report_error(arg->token->pos, "SIMD lane instructions expect a compile time lane number."); \ + *pcode = code; \ + return; \ + } \ + u8 lane = (u8) ((AstNumLit *) arg->value)->value.i; \ + arg = (AstArgument *) arg->next; \ + emit_expression(mod, &code, arg->value); \ + WID(instr, lane); \ + } + EMIT_FUNC(intrinsic_call, AstIntrinsicCall* call) { bh_arr(WasmInstruction) code = *pcode; @@ -1544,20 +1558,20 @@ EMIT_FUNC(intrinsic_call, AstIntrinsicCall* call) { break; } - case ONYX_INTRINSIC_I8X16_EXTRACT_LANE_S: SIMD_LANE_INSTR(WI_I8X16_EXTRACT_LANE_S, call->arguments); break; - case ONYX_INTRINSIC_I8X16_EXTRACT_LANE_U: SIMD_LANE_INSTR(WI_I8X16_EXTRACT_LANE_U, call->arguments); break; - case ONYX_INTRINSIC_I8X16_REPLACE_LANE: SIMD_LANE_INSTR(WI_I8X16_REPLACE_LANE, call->arguments); break; - case ONYX_INTRINSIC_I16X8_EXTRACT_LANE_S: SIMD_LANE_INSTR(WI_I16X8_EXTRACT_LANE_S, call->arguments); break; - case ONYX_INTRINSIC_I16X8_EXTRACT_LANE_U: SIMD_LANE_INSTR(WI_I16X8_EXTRACT_LANE_U, call->arguments); break; - case ONYX_INTRINSIC_I16X8_REPLACE_LANE: SIMD_LANE_INSTR(WI_I16X8_REPLACE_LANE, call->arguments); break; - case ONYX_INTRINSIC_I32X4_EXTRACT_LANE: SIMD_LANE_INSTR(WI_I32X4_EXTRACT_LANE, call->arguments); break; - case ONYX_INTRINSIC_I32X4_REPLACE_LANE: SIMD_LANE_INSTR(WI_I32X4_REPLACE_LANE, call->arguments); break; - case ONYX_INTRINSIC_I64X2_EXTRACT_LANE: SIMD_LANE_INSTR(WI_I64X2_EXTRACT_LANE, call->arguments); break; - case ONYX_INTRINSIC_I64X2_REPLACE_LANE: SIMD_LANE_INSTR(WI_I64X2_REPLACE_LANE, call->arguments); break; - case ONYX_INTRINSIC_F32X4_EXTRACT_LANE: SIMD_LANE_INSTR(WI_F32X4_EXTRACT_LANE, call->arguments); break; - case ONYX_INTRINSIC_F32X4_REPLACE_LANE: SIMD_LANE_INSTR(WI_F32X4_REPLACE_LANE, call->arguments); break; - case ONYX_INTRINSIC_F64X2_EXTRACT_LANE: SIMD_LANE_INSTR(WI_F64X2_EXTRACT_LANE, call->arguments); break; - case ONYX_INTRINSIC_F64X2_REPLACE_LANE: SIMD_LANE_INSTR(WI_F64X2_REPLACE_LANE, call->arguments); break; + case ONYX_INTRINSIC_I8X16_EXTRACT_LANE_S: SIMD_EXTRACT_LANE_INSTR(WI_I8X16_EXTRACT_LANE_S, call->arguments); break; + case ONYX_INTRINSIC_I8X16_EXTRACT_LANE_U: SIMD_EXTRACT_LANE_INSTR(WI_I8X16_EXTRACT_LANE_U, call->arguments); break; + case ONYX_INTRINSIC_I8X16_REPLACE_LANE: SIMD_REPLACE_LANE_INSTR(WI_I8X16_REPLACE_LANE, call->arguments); break; + case ONYX_INTRINSIC_I16X8_EXTRACT_LANE_S: SIMD_EXTRACT_LANE_INSTR(WI_I16X8_EXTRACT_LANE_S, call->arguments); break; + case ONYX_INTRINSIC_I16X8_EXTRACT_LANE_U: SIMD_EXTRACT_LANE_INSTR(WI_I16X8_EXTRACT_LANE_U, call->arguments); break; + case ONYX_INTRINSIC_I16X8_REPLACE_LANE: SIMD_REPLACE_LANE_INSTR(WI_I16X8_REPLACE_LANE, call->arguments); break; + case ONYX_INTRINSIC_I32X4_EXTRACT_LANE: SIMD_EXTRACT_LANE_INSTR(WI_I32X4_EXTRACT_LANE, call->arguments); break; + case ONYX_INTRINSIC_I32X4_REPLACE_LANE: SIMD_REPLACE_LANE_INSTR(WI_I32X4_REPLACE_LANE, call->arguments); break; + case ONYX_INTRINSIC_I64X2_EXTRACT_LANE: SIMD_EXTRACT_LANE_INSTR(WI_I64X2_EXTRACT_LANE, call->arguments); break; + case ONYX_INTRINSIC_I64X2_REPLACE_LANE: SIMD_REPLACE_LANE_INSTR(WI_I64X2_REPLACE_LANE, call->arguments); break; + case ONYX_INTRINSIC_F32X4_EXTRACT_LANE: SIMD_EXTRACT_LANE_INSTR(WI_F32X4_EXTRACT_LANE, call->arguments); break; + case ONYX_INTRINSIC_F32X4_REPLACE_LANE: SIMD_REPLACE_LANE_INSTR(WI_F32X4_REPLACE_LANE, call->arguments); break; + case ONYX_INTRINSIC_F64X2_EXTRACT_LANE: SIMD_EXTRACT_LANE_INSTR(WI_F64X2_EXTRACT_LANE, call->arguments); break; + case ONYX_INTRINSIC_F64X2_REPLACE_LANE: SIMD_REPLACE_LANE_INSTR(WI_F64X2_REPLACE_LANE, call->arguments); break; case ONYX_INTRINSIC_I8X16_SWIZZLE: WI(WI_I8X16_SWIZZLE); break; case ONYX_INTRINSIC_I8X16_SPLAT: WI(WI_I8X16_SPLAT); break;