Fix handling of uint64_t in Embind (#24285)

RReverser · web-flow · commit 670290895c82 · 2025-05-09T01:52:33.000+01:00
Before this fix, `uint64_t` would be returned as a signed integer from Embind (e.g. if you return `UINT64_MAX`, it gets returned as `-1`). I fixed that behaviour so that unsigned integers are correctly "fixed up" like they already are for `uint32_t`, and added tests for 64-bit integer limits to `test_i64_binding`. In the process I had to also delete the invalid test `other.test_embind_long_long` - it had incorrect expectations (see commit message for more details) and is now superseded by the correct test mentioned above. - Fixes #20354. Closes #20353. - Fixes #13902.
diff --git a/src/lib/libembind.js b/src/lib/libembind.js
@@ -318,59 +318,51 @@ var LibraryEmbind = {
     }
   },
 
+#if ASSERTIONS
+  $assertIntegerRange__deps: ['$embindRepr'],
+  $assertIntegerRange: (typeName, value, minRange, maxRange) => {
+    if (value < minRange || value > maxRange) {
+      throw new TypeError(`Passing a number "${embindRepr(value)}" from JS side to C/C++ side to an argument of type "${typeName}", which is outside the valid range [${minRange}, ${maxRange}]!`);
+    }
+  },
+#endif
+
   _embind_register_integer__docs: '/** @suppress {globalThis} */',
   // When converting a number from JS to C++ side, the valid range of the number is
   // [minRange, maxRange], inclusive.
   _embind_register_integer__deps: [
     '$integerReadValueFromPointer', '$readLatin1String', '$registerType',
 #if ASSERTIONS
     '$embindRepr',
+    '$assertIntegerRange',
 #endif
   ],
   _embind_register_integer: (primitiveType, name, size, minRange, maxRange) => {
     name = readLatin1String(name);
-    // LLVM doesn't have signed and unsigned 32-bit types, so u32 literals come
-    // out as 'i32 -1'. Always treat those as max u32.
-    if (maxRange === -1) {
-      maxRange = 4294967295;
-    }
 
-    var fromWireType = (value) => value;
+    const isUnsignedType = minRange === 0;
 
-    if (minRange === 0) {
+    let fromWireType = (value) => value;
+    if (isUnsignedType) {
       var bitshift = 32 - 8*size;
       fromWireType = (value) => (value << bitshift) >>> bitshift;
+      maxRange = fromWireType(maxRange);
     }
 
-    var isUnsignedType = (name.includes('unsigned'));
-    var checkAssertions = (value, toTypeName) => {
+    registerType(primitiveType, {
+      name,
+      'fromWireType': fromWireType,
+      'toWireType': (destructors, value) => {
 #if ASSERTIONS
-      if (typeof value != "number" && typeof value != "boolean") {
-        throw new TypeError(`Cannot convert "${embindRepr(value)}" to ${toTypeName}`);
-      }
-      if (value < minRange || value > maxRange) {
-        throw new TypeError(`Passing a number "${embindRepr(value)}" from JS side to C/C++ side to an argument of type "${name}", which is outside the valid range [${minRange}, ${maxRange}]!`);
-      }
-#endif
-    }
-    var toWireType;
-    if (isUnsignedType) {
-      toWireType = function(destructors, value) {
-        checkAssertions(value, this.name);
-        return value >>> 0;
-      }
-    } else {
-      toWireType = function(destructors, value) {
-        checkAssertions(value, this.name);
+        if (typeof value != "number" && typeof value != "boolean") {
+          throw new TypeError(`Cannot convert "${embindRepr(value)}" to ${name}`);
+        }
+        assertIntegerRange(name, value, minRange, maxRange);
+  #endif
         // The VM will perform JS to Wasm value conversion, according to the spec:
         // https://www.w3.org/TR/wasm-js-api-1/#towebassemblyvalue
         return value;
-      }
-    }
-    registerType(primitiveType, {
-      name,
-      'fromWireType': fromWireType,
-      'toWireType': toWireType,
+      },
       argPackAdvance: GenericWireTypeSize,
       'readValueFromPointer': integerReadValueFromPointer(name, size, minRange !== 0),
       destructorFunction: null, // This type does not need a destructor
@@ -380,31 +372,46 @@ var LibraryEmbind = {
 #if WASM_BIGINT
   _embind_register_bigint__docs: '/** @suppress {globalThis} */',
   _embind_register_bigint__deps: [
-    '$embindRepr', '$readLatin1String', '$registerType', '$integerReadValueFromPointer'],
+    '$readLatin1String', '$registerType', '$integerReadValueFromPointer',
+#if ASSERTIONS
+    '$embindRepr',
+    '$assertIntegerRange',
+#endif
+  ],
   _embind_register_bigint: (primitiveType, name, size, minRange, maxRange) => {
     name = readLatin1String(name);
 
-    var isUnsignedType = (name.indexOf('u') != -1);
+    const isUnsignedType = minRange === 0n;
 
-    // maxRange comes through as -1 for uint64_t (see issue 13902). Work around that temporarily
+    let fromWireType = (value) => value;
     if (isUnsignedType) {
-      maxRange = (1n << 64n) - 1n;
+      // uint64 get converted to int64 in ABI, fix them up like we do for 32-bit integers.
+      const bitSize = size * 8;
+      fromWireType = (value) => {
+#if MEMORY64
+        // FIXME(https://github.com/emscripten-core/emscripten/issues/16975)
+        // `size_t` ends up here, but it's transferred in the ABI as a plain number instead of a bigint.
+        if (typeof value == 'number') {
+          return value >>> 0;
+        }
+#endif
+        return BigInt.asUintN(bitSize, value);
+      }
+      maxRange = fromWireType(maxRange);
     }
 
     registerType(primitiveType, {
       name,
-      'fromWireType': (value) => value,
-      'toWireType': function(destructors, value) {
-        if (typeof value != "bigint" && typeof value != "number") {
-          throw new TypeError(`Cannot convert "${embindRepr(value)}" to ${this.name}`);
-        }
+      'fromWireType': fromWireType,
+      'toWireType': (destructors, value) => {
         if (typeof value == "number") {
           value = BigInt(value);
         }
 #if ASSERTIONS
-        if (value < minRange || value > maxRange) {
-          throw new TypeError(`Passing a number "${embindRepr(value)}" from JS side to C/C++ side to an argument of type "${name}", which is outside the valid range [${minRange}, ${maxRange}]!`);
+        else if (typeof value != "bigint") {
+          throw new TypeError(`Cannot convert "${embindRepr(value)}" to ${this.name}`);
         }
+        assertIntegerRange(name, value, minRange, maxRange);
 #endif
         return value;
       },
@@ -1193,7 +1200,7 @@ var LibraryEmbind = {
   $constNoSmartPtrRawPointerToWireType__docs: '/** @suppress {globalThis} */',
   // If we know a pointer type is not going to have SmartPtr logic in it, we can
   // special-case optimize it a bit (compare to genericPointerToWireType)
-  $constNoSmartPtrRawPointerToWireType__deps: ['$throwBindingError', '$upcastPointer'],
+  $constNoSmartPtrRawPointerToWireType__deps: ['$throwBindingError', '$upcastPointer', '$embindRepr'],
   $constNoSmartPtrRawPointerToWireType: function(destructors, handle) {
     if (handle === null) {
       if (this.isReference) {
@@ -1216,7 +1223,7 @@ var LibraryEmbind = {
   $nonConstNoSmartPtrRawPointerToWireType__docs: '/** @suppress {globalThis} */',
   // An optimized version for non-const method accesses - there we must additionally restrict that
   // the pointer is not a const-pointer.
-  $nonConstNoSmartPtrRawPointerToWireType__deps: ['$throwBindingError', '$upcastPointer'],
+  $nonConstNoSmartPtrRawPointerToWireType__deps: ['$throwBindingError', '$upcastPointer', '$embindRepr'],
   $nonConstNoSmartPtrRawPointerToWireType: function(destructors, handle) {
     if (handle === null) {
       if (this.isReference) {
diff --git a/src/lib/libembind_gen.js b/src/lib/libembind_gen.js
@@ -367,8 +367,8 @@ var LibraryEmbind = {
         ['unsigned long', ['bigint']],
 #endif
 #if WASM_BIGINT
-        ['int64_t', ['bigint']],
-        ['uint64_t', ['bigint']],
+        ['long long', ['bigint']],
+        ['unsigned long long', ['bigint']],
 #endif
         ['void', ['void']],
         ['std::string', [jsString, 'string']],
diff --git a/system/lib/embind/bind.cpp b/system/lib/embind/bind.cpp
@@ -142,8 +142,8 @@ EMSCRIPTEN_BINDINGS(builtin) {
   register_integer<unsigned long>("unsigned long");
 #endif
 
-  register_bigint<int64_t>("int64_t");
-  register_bigint<uint64_t>("uint64_t");
+  register_bigint<signed long long>("long long");
+  register_bigint<unsigned long long>("unsigned long long");
 
   register_float<float>("float");
   register_float<double>("double");
diff --git a/test/code_size/embind_hello_wasm.json b/test/code_size/embind_hello_wasm.json
@@ -1,10 +1,10 @@
 {
   "a.html": 552,
   "a.html.gz": 380,
-  "a.js": 9005,
-  "a.js.gz": 3961,
-  "a.wasm": 7332,
-  "a.wasm.gz": 3369,
-  "total": 16889,
-  "total_gz": 7710
+  "a.js": 8831,
+  "a.js.gz": 3897,
+  "a.wasm": 7344,
+  "a.wasm.gz": 3368,
+  "total": 16727,
+  "total_gz": 7645
 }
diff --git a/test/code_size/embind_val_wasm.json b/test/code_size/embind_val_wasm.json
@@ -1,10 +1,10 @@
 {
   "a.html": 552,
   "a.html.gz": 380,
-  "a.js": 6862,
-  "a.js.gz": 2965,
-  "a.wasm": 9133,
-  "a.wasm.gz": 4710,
-  "total": 16547,
-  "total_gz": 8055
+  "a.js": 6688,
+  "a.js.gz": 2893,
+  "a.wasm": 9137,
+  "a.wasm.gz": 4700,
+  "total": 16377,
+  "total_gz": 7973
 }
diff --git a/test/embind/test_embind_long_long.cpp b/test/embind/test_embind_long_long.cpp
diff --git a/test/embind/test_i64_binding.cpp b/test/embind/test_i64_binding.cpp
@@ -13,35 +13,24 @@
 using namespace emscripten;
 using namespace std;
 
-#define assert_js(X) assert(run_js(X))
-
 void test(string message)
 {
-  cout << "test:\n" << message << "\n";
-}
-
-
-void execute_js(string js_code)
-{
-  js_code.append(";");
-  const char* js_code_pointer = js_code.c_str();
-  EM_ASM_INT({
-    var js_code = UTF8ToString($0);
-    return eval(js_code);
-  }, js_code_pointer);
+  printf("test: %s\n", message.c_str());
 }
 
-int run_js(string js_code)
-{
-  js_code.append(";");
-  const char* js_code_pointer = js_code.c_str();
-  return EM_ASM_INT({
-    var js_code = UTF8ToString($0);
-    return eval(js_code);
-  }, js_code_pointer);
+void assert_js_eq(string X, string Y) {
+  string js_code;
+  js_code += "const x = " + X + ";";
+  js_code += "const y = " + Y + ";";
+  js_code += "assert(x === y, `" + X + ": actual = ${typeof x} ${x}, expected = ${typeof y} ${y}`);";
+  emscripten_run_script(js_code.c_str());
 }
 
 EMSCRIPTEN_BINDINGS(tests) {
+  emscripten::function("int64_min", &numeric_limits<int64_t>::min);
+  emscripten::function("int64_max", &numeric_limits<int64_t>::max);
+  emscripten::function("uint64_max", &numeric_limits<uint64_t>::max);
+
   register_vector<int64_t>("Int64Vector");
   register_vector<uint64_t>("UInt64Vector");
 }
@@ -50,42 +39,44 @@ extern "C" void ensure_js_throws_with_assertions_enabled(const char* js_code, co
 
 int main()
 {
-  const int64_t max_int64_t = numeric_limits<int64_t>::max();
-  const int64_t min_int64_t = numeric_limits<int64_t>::min();
-  const uint64_t max_uint64_t = numeric_limits<uint64_t>::max();
+  test("limits");
+
+  assert_js_eq("Module.int64_min()", to_string(numeric_limits<int64_t>::min()) + "n");
+  assert_js_eq("Module.int64_max()", to_string(numeric_limits<int64_t>::max()) + "n");
+  assert_js_eq("Module.uint64_max()", to_string(numeric_limits<uint64_t>::max()) + "n");
 
   printf("start\n");
 
   test("vector<int64_t>");
   val myval(std::vector<int64_t>{1, 2, 3, -4});
   val::global().set("v64", myval);
-  assert_js("v64.get(0) === 1n");
-  assert_js("v64.get(1) === 2n");
-  assert_js("v64.get(2) === 3n");
-  assert_js("v64.get(3) === -4n");
+  assert_js_eq("v64.get(0)", "1n");
+  assert_js_eq("v64.get(1)", "2n");
+  assert_js_eq("v64.get(2)", "3n");
+  assert_js_eq("v64.get(3)", "-4n");
 
-  execute_js("v64.push_back(1234n)");
-  assert_js("v64.size() === 5");
-  assert_js("v64.get(4) === 1234n");
+  emscripten_run_script("v64.push_back(1234n)");
+  assert_js_eq("v64.size()", "5");
+  assert_js_eq("v64.get(4)", "1234n");
 
   test("vector<int64_t> Cannot convert bigint that is too big");
   ensure_js_throws_with_assertions_enabled("v64.push_back(12345678901234567890123456n)", "TypeError");
 
   test("vector<uint64_t>");
   val myval2(vector<uint64_t>{1, 2, 3, 4});
   val::global().set("vU64", myval2);
-  assert_js("vU64.get(0) === 1n");
-  assert_js("vU64.get(1) === 2n");
-  assert_js("vU64.get(2) === 3n");
-  assert_js("vU64.get(3) === 4n");
-
-  execute_js("vU64.push_back(1234n)");
-  assert_js("vU64.size() === 5");
-  assert_js("vU64.get(4) === 1234n");
-
-  execute_js("vU64.push_back(1234)");
-  assert_js("vU64.size() === 6");
-  assert_js("vU64.get(5) === 1234n");
+  assert_js_eq("vU64.get(0)", "1n");
+  assert_js_eq("vU64.get(1)", "2n");
+  assert_js_eq("vU64.get(2)", "3n");
+  assert_js_eq("vU64.get(3)", "4n");
+
+  emscripten_run_script("vU64.push_back(1234n)");
+  assert_js_eq("vU64.size()", "5");
+  assert_js_eq("vU64.get(4)", "1234n");
+
+  emscripten_run_script("vU64.push_back(1234)");
+  assert_js_eq("vU64.size()", "6");
+  assert_js_eq("vU64.get(5)", "1234n");
 
   test("vector<uint64_t> Cannot convert bigint that is too big");
   ensure_js_throws_with_assertions_enabled("vU64.push_back(12345678901234567890123456n)", "TypeError");
diff --git a/test/embind/test_i64_binding.out b/test/embind/test_i64_binding.out
@@ -1,12 +1,8 @@
+test: limits
 start
-test:
-vector<int64_t>
-test:
-vector<int64_t> Cannot convert bigint that is too big
-test:
-vector<uint64_t>
-test:
-vector<uint64_t> Cannot convert bigint that is too big
-test:
-vector<uint64_t> Cannot convert bigint that is negative
+test: vector<int64_t>
+test: vector<int64_t> Cannot convert bigint that is too big
+test: vector<uint64_t>
+test: vector<uint64_t> Cannot convert bigint that is too big
+test: vector<uint64_t> Cannot convert bigint that is negative
 end
diff --git a/test/test_other.py b/test/test_other.py
@@ -3396,14 +3396,6 @@ def test_embind_return_value_policy(self):
 
     self.do_runf('embind/test_return_value_policy.cpp')
 
-  @parameterized({
-    '': [[]],
-    'asyncify': [['-sASYNCIFY=1']],
-  })
-  def test_embind_long_long(self, args):
-    self.do_runf('embind/test_embind_long_long.cpp', '1000000000000n\n-1000000000000n',
-                 emcc_args=['-lembind', '-sWASM_BIGINT'] + args)
-
   @requires_node_canary
   def test_embind_resource_management(self):
     self.node_args.append('--js-explicit-resource-management')
@@ -3582,7 +3574,7 @@ def test_embind_tsgen_bigint(self):
     args = [EMXX, test_file('other/embind_tsgen_bigint.cpp'), '-lembind', '--emit-tsd', 'embind_tsgen_bigint.d.ts']
     # Check that TypeScript generation fails when code contains bigints but their support is not enabled
     stderr = self.expect_fail(args + ['-sWASM_BIGINT=0'])
-    self.assertContained("Missing primitive type to TS type for 'int64_t", stderr)
+    self.assertContained("Missing primitive type to TS type for 'long long", stderr)
     # Check that TypeScript generation works when bigint support is enabled
     self.run_process(args)
     self.assertFileContents(test_file('other/embind_tsgen_bigint.d.ts'), read_file('embind_tsgen_bigint.d.ts'))