Skip to content

Commit 155523a

Browse files
authored
[emscripten::val] Support literal utf-8/16 strings (#16349)
It already has supports for cstrings with Latin1 encoding. And std::string could be used for utf-8 encoded strings, but with cost on extra copy of wire type. I don't want to extend cstring for utf encoding so that it keeps fast in most cases. Instead, add new static functions to specially support u8string and u16string.
1 parent 96ca5bf commit 155523a

File tree

4 files changed

+54
-0
lines changed

4 files changed

+54
-0
lines changed

src/embind/emval.js

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,18 @@ var LibraryEmVal = {
134134
return Emval.toHandle(getStringOrSymbol(v));
135135
},
136136

137+
_emval_new_u8string__sig: 'ii',
138+
_emval_new_u8string__deps: ['$Emval'],
139+
_emval_new_u8string: function(v) {
140+
return Emval.toHandle(UTF8ToString(v));
141+
},
142+
143+
_emval_new_u16string__sig: 'ii',
144+
_emval_new_u16string__deps: ['$Emval'],
145+
_emval_new_u16string: function(v) {
146+
return Emval.toHandle(UTF16ToString(v));
147+
},
148+
137149
_emval_take_value__sig: 'iii',
138150
_emval_take_value__deps: ['$Emval', '$requireRegisteredType'],
139151
_emval_take_value: function(type, argv) {

system/include/emscripten/val.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ void _emval_run_destructors(EM_DESTRUCTORS handle);
5454
EM_VAL _emval_new_array();
5555
EM_VAL _emval_new_object();
5656
EM_VAL _emval_new_cstring(const char*);
57+
EM_VAL _emval_new_u8string(const char*);
58+
EM_VAL _emval_new_u16string(const char16_t*);
5759

5860
EM_VAL _emval_take_value(TYPEID type, EM_VAR_ARGS argv);
5961

@@ -351,6 +353,14 @@ class val {
351353
return val(internal::_emval_new_object());
352354
}
353355

356+
static val u8string(const char* s) {
357+
return val(internal::_emval_new_u8string(s));
358+
}
359+
360+
static val u16string(const char16_t* s) {
361+
return val(internal::_emval_new_u16string(s));
362+
}
363+
354364
static val undefined() {
355365
return val(EM_VAL(internal::_EMVAL_UNDEFINED));
356366
}

tests/embind/test_val.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -639,6 +639,26 @@ int main()
639639
ensure(aAsNumberVectorUint32_t.at(2) == 0); // 0 is returned if can not be converted for integers
640640
ensure(aAsNumberVectorUint32_t.at(3) == 100000); // Date returns milliseconds since epoch
641641

642+
test("val u8string(const char* s)");
643+
val::global().set("a", val::u8string(u8"abc"));
644+
ensure_js("a == 'abc'");
645+
val::global().set("a", val::u8string(u8"你好"));
646+
ensure_js_not("a == 'abc'");
647+
ensure_js("a == '你好'");
648+
auto u8_str = val::global()["a"].as<std::string>();
649+
ensure(u8_str == u8"你好");
650+
651+
test("val u16string(const char16_t* s)");
652+
val::global().set("a", val::u16string(u"hello"));
653+
ensure_js("a == 'hello'");
654+
val::global().set("a", val::u16string(u"世界"));
655+
ensure_js_not("a == 'hello'");
656+
ensure_js("a == '世界'");
657+
// UTF-16 encoded SMILING FACE WITH OPEN MOUTH (U+1F603)
658+
const char16_t* s = u"😃 = \U0001F603 is :-D";
659+
val::global().set("a", val::u16string(s));
660+
ensure_js("a == '😃 = \U0001F603 is :-D'");
661+
642662
printf("end\n");
643663
return 0;
644664
}

tests/embind/test_val.out

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,4 +268,16 @@ pass
268268
pass
269269
pass
270270
pass
271+
test:
272+
val u8string(const char* s)
273+
pass
274+
pass
275+
pass
276+
pass
277+
test:
278+
val u16string(const char16_t* s)
279+
pass
280+
pass
281+
pass
282+
pass
271283
end

0 commit comments

Comments
 (0)