Skip to content

Commit 4dce1f3

Browse files
alex28shSpace Team
authored andcommitted
[Wasm] UTF-8 encoding for strings (KT-79357)
1 parent 2580165 commit 4dce1f3

File tree

3 files changed

+26
-8
lines changed

3 files changed

+26
-8
lines changed

compiler/ir/backend.wasm/src/org/jetbrains/kotlin/backend/wasm/ir2wasm/ConstantData.kt

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ private fun addressToString(address: Int): String =
2727
class ConstantDataCharField(val value: WasmSymbol<Char>) : ConstantDataElement() {
2828
constructor(value: Char) : this(WasmSymbol(value))
2929

30-
override fun toBytes(): ByteArray = ByteArray(2).apply { value.owner.toLittleEndianBytes(this, 0) }
30+
override fun toBytes(): ByteArray = ByteArray(2).apply { value.owner.toLittleEndianBytes(this, false, 0) }
3131

3232
override fun dump(indent: String, startAddress: Int): String {
3333
return "${addressToString(startAddress)}: $indent i32 : ${value.owner} ;;\n"
@@ -85,9 +85,12 @@ class ConstantDataIntArray(val value: List<WasmSymbol<Int>>) : ConstantDataEleme
8585
class ConstantDataCharArray(val value: List<WasmSymbol<Char>>) : ConstantDataElement() {
8686
constructor(value: CharArray) : this(value.map { WasmSymbol(it) })
8787

88+
private val isLatin: Boolean
89+
get() = value.all { it.owner.code in 0..255 }
90+
8891
override fun toBytes(): ByteArray {
89-
return ByteArray(value.size * 2).apply {
90-
value.forEachIndexed { index, symbol -> symbol.owner.toLittleEndianBytes(this, index * 2) }
92+
return ByteArray(sizeInBytes).apply {
93+
value.forEachIndexed { index, symbol -> symbol.owner.toLittleEndianBytes(this, isLatin, index * 2) }
9194
}
9295
}
9396

@@ -96,7 +99,8 @@ class ConstantDataCharArray(val value: List<WasmSymbol<Char>>) : ConstantDataEle
9699
return "${addressToString(startAddress)}: $indent i16[] : ${value.map { it.owner }.toCharArray().contentToString()} ;;\n"
97100
}
98101

99-
override val sizeInBytes: Int = value.size * CHAR_SIZE_BYTES
102+
override val sizeInBytes: Int = value.size *
103+
if (isLatin) BYTE_SIZE_BYTES else CHAR_SIZE_BYTES
100104
}
101105

102106
class ConstantDataStruct(val elements: List<ConstantDataElement>) : ConstantDataElement() {
@@ -138,7 +142,9 @@ fun Int.toLittleEndianBytes(to: ByteArray, offset: Int) {
138142
to[offset + 3] = (this ushr 24).toByte()
139143
}
140144

141-
fun Char.toLittleEndianBytes(to: ByteArray, offset: Int) {
145+
fun Char.toLittleEndianBytes(to: ByteArray, isLatin: Boolean, offset: Int) {
142146
to[offset] = (this.code and 0xFF).toByte()
143-
to[offset + 1] = (this.code ushr Byte.SIZE_BITS).toByte()
147+
if (!isLatin) {
148+
to[offset + 1] = (this.code ushr Byte.SIZE_BITS).toByte()
149+
}
144150
}

compiler/ir/backend.wasm/src/org/jetbrains/kotlin/backend/wasm/ir2wasm/DeclarationGenerator.kt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -623,10 +623,12 @@ fun generateConstExpression(
623623
is IrConstKind.String -> {
624624
val stringValue = expression.value as String
625625
val (literalAddress, literalPoolId) = context.referenceStringLiteralAddressAndId(stringValue)
626+
val isLatin = stringValue.all { it.code in 0..255 }
626627
body.commentGroupStart { "const string: \"$stringValue\"" }
627628
body.buildConstI32Symbol(literalPoolId, location)
628629
body.buildConstI32Symbol(literalAddress, location)
629630
body.buildConstI32(stringValue.length, location)
631+
body.buildConstI32(if (isLatin) 1 else 0, location)
630632
body.buildCall(context.referenceFunction(backendContext.wasmSymbols.stringGetLiteral), location)
631633
body.commentGroupEnd()
632634
}

libraries/stdlib/wasm/builtins/kotlin/String.kt

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,13 +157,23 @@ public actual class String internal @WasmPrimitiveConstructor constructor(
157157
internal inline fun WasmCharArray.createString(): String =
158158
String(null, this.len(), this)
159159

160-
internal fun stringLiteral(poolId: Int, startAddress: Int, length: Int): String {
160+
internal fun stringLiteral(poolId: Int, startAddress: Int, length: Int, isLatin: Int = 0): String {
161161
val cached = stringPool[poolId]
162162
if (cached !== null) {
163163
return cached
164164
}
165165

166-
val chars = array_new_data0<WasmCharArray>(startAddress, length)
166+
val chars: WasmCharArray
167+
if (isLatin == 0) {
168+
chars = array_new_data0<WasmCharArray>(startAddress, length)
169+
} else {
170+
val bytes = array_new_data0<WasmByteArray>(startAddress, length)
171+
chars = WasmCharArray(length)
172+
for (i in 0..<length) {
173+
val chr = bytes.get(i).toInt().toChar()
174+
chars.set(i, chr)
175+
}
176+
}
167177
val newString = String(null, length, chars)
168178
stringPool[poolId] = newString
169179
return newString

0 commit comments

Comments
 (0)