diff --git a/NOTICE b/NOTICE index 14d821345c..cd28e9fd96 100644 --- a/NOTICE +++ b/NOTICE @@ -62,6 +62,7 @@ under the licensing terms detailed in LICENSE: * Kam Chehresa * Mopsgamer <79159094+Mopsgamer@users.noreply.github.com> * EDM115 +* Geraint Luff Portions of this software are derived from third-party works licensed under the following terms: diff --git a/src/builtins.ts b/src/builtins.ts index f876049603..4f90cb8ee6 100644 --- a/src/builtins.ts +++ b/src/builtins.ts @@ -118,6 +118,9 @@ import { isPowerOf2 } from "./util"; +// Use the built-in `TextEncoder` for UTF-8 conversion +declare let TextEncoder: any; + /** Internal names of various compiler built-ins. */ export namespace BuiltinNames { @@ -749,6 +752,7 @@ export namespace BuiltinNames { export const memory_copy = "~lib/memory/memory.copy"; export const memory_fill = "~lib/memory/memory.fill"; export const memory_data = "~lib/memory/memory.data"; + export const memory_dataUTF8 = "~lib/memory/memory.dataUTF8"; // std/typedarray.ts export const Int8Array = "~lib/typedarray/Int8Array"; @@ -3491,6 +3495,41 @@ function builtin_memory_data(ctx: BuiltinFunctionContext): ExpressionRef { } builtinFunctions.set(BuiltinNames.memory_data, builtin_memory_data); +// memory.dataUTF8(value) -> usize +function builtin_memory_dataUTF8(ctx: BuiltinFunctionContext): ExpressionRef { + let compiler = ctx.compiler; + let module = compiler.module; + if ( + checkTypeAbsent(ctx) | + checkArgsRequired(ctx, 1) + ) return module.unreachable(); + let operands = ctx.operands; + let usizeType = compiler.options.usizeType; + let offset: i64; + let arg0 = operands[0]; + if (!arg0.isLiteralKind(LiteralKind.String)) { + compiler.error( + DiagnosticCode.String_literal_expected, + arg0.range + ); + return module.unreachable(); + } + let str = (arg0).value; + let array : Uint8Array = new TextEncoder('utf8').encode(str); + let arrayNullTerminated = new Uint8Array(array.length + 1); + arrayNullTerminated.set(array); + offset = compiler.addAlignedMemorySegment(arrayNullTerminated, 1).offset; + // FIXME: what if recompiles happen? recompiles are bad. + compiler.currentType = usizeType; + if (usizeType == Type.usize32) { + assert(!i64_high(offset)); + return module.i32(i64_low(offset)); + } else { + return module.i64(i64_low(offset), i64_high(offset)); + } +} +builtinFunctions.set(BuiltinNames.memory_dataUTF8, builtin_memory_dataUTF8); + // === GC ===================================================================================== function builtin_i31_new(ctx: BuiltinFunctionContext): ExpressionRef { diff --git a/std/assembly/index.d.ts b/std/assembly/index.d.ts index a604397320..fdd20ffdc5 100644 --- a/std/assembly/index.d.ts +++ b/std/assembly/index.d.ts @@ -1798,6 +1798,8 @@ declare namespace memory { export function data(size: i32, align?: i32): usize; /** Gets a pointer to a pre-initialized static chunk of memory. Alignment defaults to the size of `T`. Arguments must be compile-time constants. */ export function data(values: T[], align?: i32): usize; + /** Gets a pointer to a pre-initialized static chunk of memory containing null-terminated UTF8. Value must be a compile-time constant. */ + export function dataUTF8(value: string): usize; export namespace atomic { /** Performs a wait operation on a 32-bit integer value in memory suspending this agent if the condition is met. */ diff --git a/std/assembly/memory.ts b/std/assembly/memory.ts index 5bff3516f7..e83b0c25ca 100644 --- a/std/assembly/memory.ts +++ b/std/assembly/memory.ts @@ -76,6 +76,11 @@ export namespace memory { // @ts-ignore: decorator @builtin export declare function data(size: T, align?: i32): usize; + + /** Gets a pointer to a null-terminated UTF8 constant in static memory. */ + // @ts-ignore: decorator + @builtin + export declare function dataUTF8(str : string): usize; } // @ts-ignore: decorator diff --git a/tests/compiler/memory.debug.wat b/tests/compiler/memory.debug.wat index f9d1689708..994ec328c9 100644 --- a/tests/compiler/memory.debug.wat +++ b/tests/compiler/memory.debug.wat @@ -4,9 +4,9 @@ (type $2 (func (param i32 i32 i32 i32))) (import "env" "abort" (func $~lib/builtins/abort (param i32 i32 i32 i32))) (global $memory/ptr (mut i32) (i32.const 80)) - (global $~lib/memory/__data_end i32 (i32.const 212)) - (global $~lib/memory/__stack_pointer (mut i32) (i32.const 32980)) - (global $~lib/memory/__heap_base i32 (i32.const 32980)) + (global $~lib/memory/__data_end i32 (i32.const 220)) + (global $~lib/memory/__stack_pointer (mut i32) (i32.const 32988)) + (global $~lib/memory/__heap_base i32 (i32.const 32988)) (memory $0 1) (data $0 (i32.const 16) "\00\00\00\00") (data $1 (i32.const 28) ",\00\00\00\00\00\00\00\00\00\00\00\02\00\00\00\12\00\00\00m\00e\00m\00o\00r\00y\00.\00t\00s\00\00\00\00\00\00\00\00\00\00\00") @@ -33,6 +33,8 @@ (data $22 (i32.const 206) "\01") (data $23 (i32.const 207) "\01") (data $24 (i32.const 208) "\01") + (data $25 (i32.const 209) ":)\00") + (data $26 (i32.const 212) "\f0\9f\90\8c\00") (table $0 1 1 funcref) (elem $0 (i32.const 1)) (export "memory" (memory $0)) @@ -475,6 +477,126 @@ call $~lib/builtins/abort unreachable end + i32.const 209 + global.set $memory/ptr + global.get $memory/ptr + i32.load8_u + i32.const 58 + i32.eq + i32.eqz + if + i32.const 0 + i32.const 48 + i32.const 66 + i32.const 1 + call $~lib/builtins/abort + unreachable + end + global.get $memory/ptr + i32.const 1 + i32.add + i32.load8_u + i32.const 41 + i32.eq + i32.eqz + if + i32.const 0 + i32.const 48 + i32.const 67 + i32.const 1 + call $~lib/builtins/abort + unreachable + end + global.get $memory/ptr + i32.const 2 + i32.add + i32.load8_u + i32.const 0 + i32.eq + i32.eqz + if + i32.const 0 + i32.const 48 + i32.const 68 + i32.const 1 + call $~lib/builtins/abort + unreachable + end + i32.const 212 + global.set $memory/ptr + global.get $memory/ptr + i32.load8_u + i32.const 240 + i32.eq + i32.eqz + if + i32.const 0 + i32.const 48 + i32.const 71 + i32.const 1 + call $~lib/builtins/abort + unreachable + end + global.get $memory/ptr + i32.const 1 + i32.add + i32.load8_u + i32.const 159 + i32.eq + i32.eqz + if + i32.const 0 + i32.const 48 + i32.const 72 + i32.const 1 + call $~lib/builtins/abort + unreachable + end + global.get $memory/ptr + i32.const 2 + i32.add + i32.load8_u + i32.const 144 + i32.eq + i32.eqz + if + i32.const 0 + i32.const 48 + i32.const 73 + i32.const 1 + call $~lib/builtins/abort + unreachable + end + global.get $memory/ptr + i32.const 3 + i32.add + i32.load8_u + i32.const 140 + i32.eq + i32.eqz + if + i32.const 0 + i32.const 48 + i32.const 74 + i32.const 1 + call $~lib/builtins/abort + unreachable + end + global.get $memory/ptr + i32.const 4 + i32.add + i32.load8_u + i32.const 0 + i32.eq + i32.eqz + if + i32.const 0 + i32.const 48 + i32.const 75 + i32.const 1 + call $~lib/builtins/abort + unreachable + end ) (func $~start call $start:memory diff --git a/tests/compiler/memory.release.wat b/tests/compiler/memory.release.wat index b1afa82e8f..dc4aa28e5a 100644 --- a/tests/compiler/memory.release.wat +++ b/tests/compiler/memory.release.wat @@ -14,6 +14,8 @@ (data $22 (i32.const 1214) "\01") (data $23 (i32.const 1215) "\01") (data $24 (i32.const 1216) "\01") + (data $25 (i32.const 1217) ":)") + (data $26 (i32.const 1220) "\f0\9f\90\8c") (export "memory" (memory $0)) (start $~start) (func $start:memory @@ -256,6 +258,102 @@ global.set $memory/ptr i32.const 1215 global.set $memory/ptr + i32.const 1217 + global.set $memory/ptr + i32.const 1217 + i32.load8_u + i32.const 58 + i32.ne + if + i32.const 0 + i32.const 1056 + i32.const 66 + i32.const 1 + call $~lib/builtins/abort + unreachable + end + global.get $memory/ptr + i32.load8_u offset=1 + i32.const 41 + i32.ne + if + i32.const 0 + i32.const 1056 + i32.const 67 + i32.const 1 + call $~lib/builtins/abort + unreachable + end + global.get $memory/ptr + i32.load8_u offset=2 + if + i32.const 0 + i32.const 1056 + i32.const 68 + i32.const 1 + call $~lib/builtins/abort + unreachable + end + i32.const 1220 + global.set $memory/ptr + i32.const 1220 + i32.load8_u + i32.const 240 + i32.ne + if + i32.const 0 + i32.const 1056 + i32.const 71 + i32.const 1 + call $~lib/builtins/abort + unreachable + end + global.get $memory/ptr + i32.load8_u offset=1 + i32.const 159 + i32.ne + if + i32.const 0 + i32.const 1056 + i32.const 72 + i32.const 1 + call $~lib/builtins/abort + unreachable + end + global.get $memory/ptr + i32.load8_u offset=2 + i32.const 144 + i32.ne + if + i32.const 0 + i32.const 1056 + i32.const 73 + i32.const 1 + call $~lib/builtins/abort + unreachable + end + global.get $memory/ptr + i32.load8_u offset=3 + i32.const 140 + i32.ne + if + i32.const 0 + i32.const 1056 + i32.const 74 + i32.const 1 + call $~lib/builtins/abort + unreachable + end + global.get $memory/ptr + i32.load8_u offset=4 + if + i32.const 0 + i32.const 1056 + i32.const 75 + i32.const 1 + call $~lib/builtins/abort + unreachable + end ) (func $~start call $start:memory diff --git a/tests/compiler/memory.ts b/tests/compiler/memory.ts index 3d19bb838b..2bc0265f9d 100644 --- a/tests/compiler/memory.ts +++ b/tests/compiler/memory.ts @@ -59,3 +59,17 @@ assert(ptr + 4 == (ptr = memory.data([1], 4))); assert(ptr + 2 == (ptr = memory.data([1], 2))); assert(ptr + 1 == (ptr = memory.data([1], 1))); assert(ptr + 1 == memory.data([1], 16)); + +// Should correctly encode strings to UTF-8 + +ptr = memory.dataUTF8(":)"); +assert(load(ptr) == 0x3A); +assert(load(ptr + 1) == 0x29); +assert(load(ptr + 2) == 0); + +ptr = memory.dataUTF8("🐌"); +assert(load(ptr) == 0xF0); +assert(load(ptr + 1) == 0x9F); +assert(load(ptr + 2) == 0x90); +assert(load(ptr + 3) == 0x8C); +assert(load(ptr + 4) == 0x00);