Skip to content

Commit 71a1512

Browse files
authored
[Strings] Adopt new instruction binary encoding (#5714)
See WebAssembly/stringref#46. This format is already adopted by V8: https://chromium-review.googlesource.com/c/v8/v8/+/3892695. The text format is left unchanged (see #5607 for a discussion on the subject). I have also added support for string.encode_lossy_utf8 and string.encode_lossy_utf8 array (by allowing the replace policy for Binaryen's string.encode_wtf8 instruction).
1 parent d7b14a6 commit 71a1512

File tree

14 files changed

+254
-124
lines changed

14 files changed

+254
-124
lines changed

src/binaryen-c.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1038,11 +1038,13 @@ BinaryenOp BinaryenBrOnCast(void) { return BrOnCast; }
10381038
BinaryenOp BinaryenBrOnCastFail(void) { return BrOnCastFail; };
10391039
BinaryenOp BinaryenStringNewUTF8(void) { return StringNewUTF8; }
10401040
BinaryenOp BinaryenStringNewWTF8(void) { return StringNewWTF8; }
1041-
BinaryenOp BinaryenStringNewReplace(void) { return StringNewReplace; }
1041+
BinaryenOp BinaryenStringNewLossyUTF8(void) { return StringNewLossyUTF8; }
10421042
BinaryenOp BinaryenStringNewWTF16(void) { return StringNewWTF16; }
10431043
BinaryenOp BinaryenStringNewUTF8Array(void) { return StringNewUTF8Array; }
10441044
BinaryenOp BinaryenStringNewWTF8Array(void) { return StringNewWTF8Array; }
1045-
BinaryenOp BinaryenStringNewReplaceArray(void) { return StringNewReplaceArray; }
1045+
BinaryenOp BinaryenStringNewLossyUTF8Array(void) {
1046+
return StringNewLossyUTF8Array;
1047+
}
10461048
BinaryenOp BinaryenStringNewWTF16Array(void) { return StringNewWTF16Array; }
10471049
BinaryenOp BinaryenStringNewFromCodePoint(void) {
10481050
return StringNewFromCodePoint;
@@ -1055,9 +1057,13 @@ BinaryenOp BinaryenStringMeasureWTF16View(void) {
10551057
return StringMeasureWTF16View;
10561058
}
10571059
BinaryenOp BinaryenStringEncodeUTF8(void) { return StringEncodeUTF8; }
1060+
BinaryenOp BinaryenStringEncodeLossyUTF8(void) { return StringEncodeLossyUTF8; }
10581061
BinaryenOp BinaryenStringEncodeWTF8(void) { return StringEncodeWTF8; }
10591062
BinaryenOp BinaryenStringEncodeWTF16(void) { return StringEncodeWTF16; }
10601063
BinaryenOp BinaryenStringEncodeUTF8Array(void) { return StringEncodeUTF8Array; }
1064+
BinaryenOp BinaryenStringEncodeLossyUTF8Array(void) {
1065+
return StringEncodeLossyUTF8Array;
1066+
}
10611067
BinaryenOp BinaryenStringEncodeWTF8Array(void) { return StringEncodeWTF8Array; }
10621068
BinaryenOp BinaryenStringEncodeWTF16Array(void) {
10631069
return StringEncodeWTF16Array;

src/binaryen-c.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -690,11 +690,11 @@ BINARYEN_API BinaryenOp BinaryenBrOnCast(void);
690690
BINARYEN_API BinaryenOp BinaryenBrOnCastFail(void);
691691
BINARYEN_API BinaryenOp BinaryenStringNewUTF8(void);
692692
BINARYEN_API BinaryenOp BinaryenStringNewWTF8(void);
693-
BINARYEN_API BinaryenOp BinaryenStringNewReplace(void);
693+
BINARYEN_API BinaryenOp BinaryenStringNewLossyUTF8(void);
694694
BINARYEN_API BinaryenOp BinaryenStringNewWTF16(void);
695695
BINARYEN_API BinaryenOp BinaryenStringNewUTF8Array(void);
696696
BINARYEN_API BinaryenOp BinaryenStringNewWTF8Array(void);
697-
BINARYEN_API BinaryenOp BinaryenStringNewReplaceArray(void);
697+
BINARYEN_API BinaryenOp BinaryenStringNewLossyUTF8Array(void);
698698
BINARYEN_API BinaryenOp BinaryenStringNewWTF16Array(void);
699699
BINARYEN_API BinaryenOp BinaryenStringNewFromCodePoint(void);
700700
BINARYEN_API BinaryenOp BinaryenStringMeasureUTF8(void);
@@ -703,9 +703,11 @@ BINARYEN_API BinaryenOp BinaryenStringMeasureWTF16(void);
703703
BINARYEN_API BinaryenOp BinaryenStringMeasureIsUSV(void);
704704
BINARYEN_API BinaryenOp BinaryenStringMeasureWTF16View(void);
705705
BINARYEN_API BinaryenOp BinaryenStringEncodeUTF8(void);
706+
BINARYEN_API BinaryenOp BinaryenStringEncodeLossyUTF8(void);
706707
BINARYEN_API BinaryenOp BinaryenStringEncodeWTF8(void);
707708
BINARYEN_API BinaryenOp BinaryenStringEncodeWTF16(void);
708709
BINARYEN_API BinaryenOp BinaryenStringEncodeUTF8Array(void);
710+
BINARYEN_API BinaryenOp BinaryenStringEncodeLossyUTF8Array(void);
709711
BINARYEN_API BinaryenOp BinaryenStringEncodeWTF8Array(void);
710712
BINARYEN_API BinaryenOp BinaryenStringEncodeWTF16Array(void);
711713
BINARYEN_API BinaryenOp BinaryenStringAsWTF8(void);

src/ir/effects.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -841,13 +841,13 @@ class EffectAnalyzer {
841841
switch (curr->op) {
842842
case StringNewUTF8:
843843
case StringNewWTF8:
844-
case StringNewReplace:
844+
case StringNewLossyUTF8:
845845
case StringNewWTF16:
846846
parent.readsMemory = true;
847847
break;
848848
case StringNewUTF8Array:
849849
case StringNewWTF8Array:
850-
case StringNewReplaceArray:
850+
case StringNewLossyUTF8Array:
851851
case StringNewWTF16Array:
852852
parent.readsArray = true;
853853
break;
@@ -865,11 +865,13 @@ class EffectAnalyzer {
865865
parent.implicitTrap = true;
866866
switch (curr->op) {
867867
case StringEncodeUTF8:
868+
case StringEncodeLossyUTF8:
868869
case StringEncodeWTF8:
869870
case StringEncodeWTF16:
870871
parent.writesMemory = true;
871872
break;
872873
case StringEncodeUTF8Array:
874+
case StringEncodeLossyUTF8Array:
873875
case StringEncodeWTF8Array:
874876
case StringEncodeWTF16Array:
875877
parent.writesArray = true;

src/js/binaryen.js-post.js

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -578,11 +578,11 @@ function initializeConstants() {
578578
'BrOnCastFail',
579579
'StringNewUTF8',
580580
'StringNewWTF8',
581-
'StringNewReplace',
581+
'StringNewLossyUTF8',
582582
'StringNewWTF16',
583583
'StringNewUTF8Array',
584584
'StringNewWTF8Array',
585-
'StringNewReplaceArray',
585+
'StringNewLossyUTF8Array',
586586
'StringNewWTF16Array',
587587
'StringNewFromCodePoint',
588588
'StringMeasureUTF8',
@@ -591,9 +591,11 @@ function initializeConstants() {
591591
'StringMeasureIsUSV',
592592
'StringMeasureWTF16View',
593593
'StringEncodeUTF8',
594+
'StringEncodeLossyUTF8',
594595
'StringEncodeWTF8',
595596
'StringEncodeWTF16',
596597
'StringEncodeUTF8Array',
598+
'StringEncodeLossyUTF8Array',
597599
'StringEncodeWTF8Array',
598600
'StringEncodeWTF16Array',
599601
'StringAsWTF8',

src/passes/Print.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2375,7 +2375,7 @@ struct PrintExpressionContents
23752375
case StringNewWTF8:
23762376
printMedium(o, "string.new_wtf8 wtf8");
23772377
break;
2378-
case StringNewReplace:
2378+
case StringNewLossyUTF8:
23792379
printMedium(o, "string.new_wtf8 replace");
23802380
break;
23812381
case StringNewWTF16:
@@ -2391,7 +2391,7 @@ struct PrintExpressionContents
23912391
case StringNewWTF8Array:
23922392
printMedium(o, "string.new_wtf8_array wtf8");
23932393
break;
2394-
case StringNewReplaceArray:
2394+
case StringNewLossyUTF8Array:
23952395
printMedium(o, "string.new_wtf8_array replace");
23962396
break;
23972397
case StringNewWTF16Array:
@@ -2437,6 +2437,9 @@ struct PrintExpressionContents
24372437
case StringEncodeUTF8:
24382438
printMedium(o, "string.encode_wtf8 utf8");
24392439
break;
2440+
case StringEncodeLossyUTF8:
2441+
printMedium(o, "string.encode_wtf8 replace");
2442+
break;
24402443
case StringEncodeWTF8:
24412444
printMedium(o, "string.encode_wtf8 wtf8");
24422445
break;
@@ -2446,6 +2449,9 @@ struct PrintExpressionContents
24462449
case StringEncodeUTF8Array:
24472450
printMedium(o, "string.encode_wtf8_array utf8");
24482451
break;
2452+
case StringEncodeLossyUTF8Array:
2453+
printMedium(o, "string.encode_wtf8_array replace");
2454+
break;
24492455
case StringEncodeWTF8Array:
24502456
printMedium(o, "string.encode_wtf8_array wtf8");
24512457
break;

src/wasm-binary.h

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1141,16 +1141,21 @@ enum ASTNodes {
11411141
ArrayFill = 0x0f,
11421142
ArrayInitData = 0x54,
11431143
ArrayInitElem = 0x55,
1144-
StringNewWTF8 = 0x80,
1144+
StringNewUTF8 = 0x80,
11451145
StringNewWTF16 = 0x81,
11461146
StringConst = 0x82,
1147+
StringMeasureUTF8 = 0x83,
11471148
StringMeasureWTF8 = 0x84,
11481149
StringMeasureWTF16 = 0x85,
1149-
StringEncodeWTF8 = 0x86,
1150+
StringEncodeUTF8 = 0x86,
11501151
StringEncodeWTF16 = 0x87,
11511152
StringConcat = 0x88,
11521153
StringEq = 0x89,
11531154
StringIsUSV = 0x8a,
1155+
StringNewLossyUTF8 = 0x8b,
1156+
StringNewWTF8 = 0x8c,
1157+
StringEncodeLossyUTF8 = 0x8d,
1158+
StringEncodeWTF8 = 0x8e,
11541159
StringNewUTF8Try = 0x8f,
11551160
StringAsWTF8 = 0x90,
11561161
StringViewWTF8Advance = 0x91,
@@ -1167,10 +1172,14 @@ enum ASTNodes {
11671172
StringCompare = 0xa8,
11681173
StringFromCodePoint = 0xa9,
11691174
StringHash = 0xaa,
1170-
StringNewWTF8Array = 0xb0,
1175+
StringNewUTF8Array = 0xb0,
11711176
StringNewWTF16Array = 0xb1,
1172-
StringEncodeWTF8Array = 0xb2,
1177+
StringEncodeUTF8Array = 0xb2,
11731178
StringEncodeWTF16Array = 0xb3,
1179+
StringNewLossyUTF8Array = 0xb4,
1180+
StringNewWTF8Array = 0xb5,
1181+
StringEncodeLossyUTF8Array = 0xb6,
1182+
StringEncodeWTF8Array = 0xb7,
11741183
StringNewUTF8ArrayTry = 0xb8,
11751184
};
11761185

@@ -1182,12 +1191,6 @@ enum MemoryAccess {
11821191

11831192
enum MemoryFlags { HasMaximum = 1 << 0, IsShared = 1 << 1, Is64 = 1 << 2 };
11841193

1185-
enum StringPolicy {
1186-
UTF8 = 0x00,
1187-
WTF8 = 0x01,
1188-
Replace = 0x02,
1189-
};
1190-
11911194
enum FeaturePrefix {
11921195
FeatureUsed = '+',
11931196
FeatureRequired = '=',

src/wasm.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -574,12 +574,12 @@ enum StringNewOp {
574574
// Linear memory
575575
StringNewUTF8,
576576
StringNewWTF8,
577-
StringNewReplace,
577+
StringNewLossyUTF8,
578578
StringNewWTF16,
579579
// GC
580580
StringNewUTF8Array,
581581
StringNewWTF8Array,
582-
StringNewReplaceArray,
582+
StringNewLossyUTF8Array,
583583
StringNewWTF16Array,
584584
// Other
585585
StringNewFromCodePoint,
@@ -596,9 +596,11 @@ enum StringMeasureOp {
596596

597597
enum StringEncodeOp {
598598
StringEncodeUTF8,
599+
StringEncodeLossyUTF8,
599600
StringEncodeWTF8,
600601
StringEncodeWTF16,
601602
StringEncodeUTF8Array,
603+
StringEncodeLossyUTF8Array,
602604
StringEncodeWTF8Array,
603605
StringEncodeWTF16Array,
604606
};

0 commit comments

Comments
 (0)