Update on "make et.export support etrecord generation"

Gasoonjia · Gasoonjia · commit 9187fb01aedc · 2025-08-07T00:25:13.000-07:00
this diff makes et.export etrecord generation supportive. Details can be found in #12925. After this change, all things in #12925 has completed. Differential Revision: [D79741917](https://our.internmc.facebook.com/intern/diff/D79741917/) [ghstack-poisoned]
diff --git a/backends/apple/coreml/compiler/torch_ops.py b/backends/apple/coreml/compiler/torch_ops.py
@@ -8,6 +8,7 @@
 # coremltools than is used by ExecuTorch.  Each op registered here should have a link to a PR in coremltools that adds
 # the op to the coremltools library.
 
+import numpy as np
 import torch as _torch
 from coremltools import _logger
 from coremltools.converters.mil.frontend import _utils
@@ -21,7 +22,6 @@
     transpose,
     unbind,
 )
-
 from coremltools.converters.mil.frontend.torch.torch_op_registry import (
     register_torch_op,
 )
@@ -132,3 +132,43 @@ def dequantize_affine(context, node):
         name=node.name,
     )
     context.add(output, node.name)
+
+
+@register_torch_op(
+    torch_alias=["quant::dequantize_codebook", "quant.dequantize_codebook"],
+    override=False,
+)
+def dequantize_codebook(context, node):
+    inputs = _get_inputs(context, node, expected=[4, 5])
+    codes = inputs[0].val
+    codebook = inputs[1].val
+    nbits = inputs[2].val
+
+    # information in block_size is redundant with codebook.shape
+    block_size = inputs[3].val  # noqa: F841
+
+    assert len(codes.shape) == 2, "Only rank 2 inputs are supported"
+
+    # Assert codebook is as expected.  codebook.dim() = codes.dim() + 2
+    assert len(codebook.shape) == 4, "Only rank 4 inputs are supported for codebook"
+    assert codebook.shape[0] == 1, "Only grouped_channel granularity is supported"
+    n_luts = codebook.shape[1]
+    assert (
+        codes.shape[1] % n_luts == 0
+    ), "codes.shape[1] must be divisible by codebook.shape[1]"
+    assert codebook.shape[2] == 2**nbits
+    assert codebook.shape[3] == 1, "Only scalar look up values are supported"
+
+    if len(inputs) > 4:
+        output_dtype = inputs[4].val
+        out_np_dtype = NUM_TO_NUMPY_DTYPE[output_dtype]
+        _logger.warning(
+            f"Core ML ignores output_dtype {out_np_dtype} on torchao.dequantize_affine and instead uses the native precision."
+        )
+
+    output = _utils._construct_constexpr_lut_op(
+        codes.astype(np.int8),
+        codebook,
+        name=node.name,
+    )
+    context.add(output, node.name)
diff --git a/backends/apple/coreml/runtime/delegate/coreml_backend_delegate.mm b/backends/apple/coreml/runtime/delegate/coreml_backend_delegate.mm
@@ -88,17 +88,17 @@
         ET_LOG(Error, "%s: DataType=%d is not supported", ETCoreMLStrings.delegateIdentifier.UTF8String, (int)tensor.scalar_type());
         return std::nullopt;
     }
-    
+
     std::vector<ssize_t> strides(tensor.strides().begin(), tensor.strides().end());
     std::vector<size_t> shape(tensor.sizes().begin(), tensor.sizes().end());
-    
+
     // If tensor is rank 0, wrap in rank 1
     // See https://github.com/apple/coremltools/blob/8.2/coremltools/converters/mil/frontend/torch/exir_utils.py#L73
     if (shape.size() == 0) {
         shape.push_back(1);
         strides.push_back(1);
     }
-    
+
     MultiArray::MemoryLayout layout(dataType.value(), std::move(shape), std::move(strides));
     switch (argType) {
         case ArgType::Input: {
@@ -281,9 +281,11 @@ ModelLoggingOptions get_logging_options(BackendExecutionContext& context) {
 }
 
 namespace {
-auto cls = CoreMLBackendDelegate();
-Backend backend{ETCoreMLStrings.delegateIdentifier.UTF8String, &cls};
-static auto success_with_compiler = register_backend(backend);
+    #ifndef LAZY_LOAD_IOS_PYTORCH_INITIALIZER
+        auto cls = CoreMLBackendDelegate();
+        Backend backend{ETCoreMLStrings.delegateIdentifier.UTF8String, &cls};
+        static auto success_with_compiler = register_backend(backend);
+    #endif
 }
 
 } // namespace coreml
diff --git a/backends/apple/coreml/test/test_torch_ops.py b/backends/apple/coreml/test/test_torch_ops.py
@@ -14,6 +14,9 @@
 
 from executorch.backends.apple.coreml.compiler import CoreMLBackend
 from executorch.backends.apple.coreml.partition import CoreMLPartitioner
+from executorch.exir.backend.utils import format_delegated_graph
+
+from torchao.prototype.quantization.codebook_coreml import CodebookWeightOnlyConfig
 from torchao.quantization import IntxWeightOnlyConfig, PerAxis, PerGroup, quantize_
 
 
@@ -164,6 +167,61 @@ def test_dequantize_affine_c8w_embedding_b4w_linear(self):
         et_prog = delegated_program.to_executorch()
         self._compare_outputs(et_prog, model, example_inputs)
 
+    def test_dequantize_codebook_linear(self):
+        model, example_inputs = self._get_test_model()
+        quantize_(
+            model,
+            CodebookWeightOnlyConfig(dtype=torch.uint2, block_size=[-1, 16]),
+        )
+        ep = torch.export.export(model, example_inputs)
+        assert "torch.ops.quant.dequantize_codebook.default" in ep.graph_module.code
+        delegated_program = executorch.exir.to_edge_transform_and_lower(
+            ep,
+            partitioner=[self._coreml_partitioner()],
+        )
+        for node in delegated_program.exported_program().graph.nodes:
+            if node.op == "call_function":
+                assert node.target.__name__ in [
+                    "executorch_call_delegate",
+                    "getitem",
+                ], f"Got unexpected node target after delegation: {node.target.__name__}"
+
+        assert (
+            "executorch.exir.dialects.edge._ops.quant.dequantize_codebook.default"
+            in format_delegated_graph(delegated_program.exported_program().graph_module)
+        )
+
+        et_prog = delegated_program.to_executorch()
+        self._compare_outputs(et_prog, model, example_inputs)
+
+    def test_dequantize_codebook_embedding(self):
+        model, example_inputs = self._get_test_model()
+        quantize_(
+            model,
+            CodebookWeightOnlyConfig(dtype=torch.uint3, block_size=[-1, 16]),
+            lambda m, fqn: isinstance(m, torch.nn.Embedding),
+        )
+        ep = torch.export.export(model, example_inputs)
+        assert "torch.ops.quant.dequantize_codebook.default" in ep.graph_module.code
+        delegated_program = executorch.exir.to_edge_transform_and_lower(
+            ep,
+            partitioner=[self._coreml_partitioner()],
+        )
+        for node in delegated_program.exported_program().graph.nodes:
+            if node.op == "call_function":
+                assert node.target.__name__ in [
+                    "executorch_call_delegate",
+                    "getitem",
+                ], f"Got unexpected node target after delegation: {node.target.__name__}"
+
+        assert (
+            "executorch.exir.dialects.edge._ops.quant.dequantize_codebook.default"
+            in format_delegated_graph(delegated_program.exported_program().graph_module)
+        )
+
+        et_prog = delegated_program.to_executorch()
+        self._compare_outputs(et_prog, model, example_inputs)
+
 
 if __name__ == "__main__":
     test_runner = TestTorchOps()
@@ -172,3 +230,5 @@ def test_dequantize_affine_c8w_embedding_b4w_linear(self):
     test_runner.test_dequantize_affine_c4w_embedding()
     test_runner.test_dequantize_affine_c4w_linear()
     test_runner.test_dequantize_affine_c8w_embedding_b4w_linear()
+    test_runner.test_dequantize_codebook_linear()
+    test_runner.test_dequantize_codebook_embedding()
diff --git a/backends/xnnpack/CMakeLists.txt b/backends/xnnpack/CMakeLists.txt
@@ -59,7 +59,7 @@ foreach(fbs_file ${_xnnpack_schema__srcs})
   )
 endforeach()
 
-if(WIN32)
+if(WIN32 AND NOT CMAKE_CROSSCOMPILING)
   set(MV_COMMAND
       powershell -Command
       "Move-Item -Path ${_xnnpack_flatbuffer__outputs} -Destination ${_xnnpack_schema__outputs}"
diff --git a/devtools/etrecord/tests/TARGETS b/devtools/etrecord/tests/TARGETS
@@ -7,13 +7,7 @@ python_unittest(
     name = "etrecord_test",
     srcs = ["etrecord_test.py"],
     deps = [
-        "//caffe2:torch",
-        "//executorch/devtools/bundled_program:config",
-        "//executorch/devtools/bundled_program:core",
-        "//executorch/devtools/etrecord:etrecord",
-        "//executorch/exir:lib",
-        "//executorch/exir/tests:models",
-        "//executorch/export:lib",
+        ":etrecord_test_library"
     ],
 )
 
@@ -27,5 +21,6 @@ python_library(
         "//executorch/devtools/etrecord:etrecord",
         "//executorch/exir:lib",
         "//executorch/exir/tests:models",
+        "//executorch/export:lib",
     ],
 )
diff --git a/extension/wasm/CMakeLists.txt b/extension/wasm/CMakeLists.txt
@@ -37,7 +37,6 @@ list(
   embind
   executorch_core
   extension_data_loader
-  portable_ops_lib
   extension_module_static
   extension_tensor
   extension_runner_util
@@ -49,8 +48,12 @@ target_compile_options(executorch_wasm PUBLIC ${_common_compile_options})
 target_include_directories(
   executorch_wasm PUBLIC ${_common_include_directories}
 )
-target_link_libraries(executorch_wasm PUBLIC ${link_libraries})
+target_link_libraries(
+  executorch_wasm
+  PUBLIC ${link_libraries}
+  INTERFACE executorch_kernels
+)
 
-if(EXECUTORCH_BUILD_WASM_TESTS)
+if(BUILD_TESTING)
   add_subdirectory(test)
 endif()
diff --git a/extension/wasm/README.md b/extension/wasm/README.md
@@ -0,0 +1,130 @@
+# ExecuTorch Wasm Extension
+
+This directory contains the source code for the ExecuTorch Wasm extension. The extension is a C++ library that provides a JavaScript API for ExecuTorch models. The extension is compiled to WebAssembly and can be used in JavaScript applications.
+
+## Installing Emscripten
+
+[Emscripten](https://emscripten.org/index.html) is necessary to compile ExecuTorch for Wasm. You can install Emscripten with these commands:
+
+```bash
+# Clone the emsdk repository
+git clone https://github.com/emscripten-core/emsdk.git
+cd emsdk
+
+# Download and install version 4.0.10 of the SDK
+./emsdk install 4.0.10
+./emsdk activate 4.0.10
+
+# Add the Emscripten environment variables to your shell
+source ./emsdk_env.sh
+```
+
+## Building ExecuTorch for Wasm
+
+To build ExecuTorch for Wasm, make sure to use the `emcmake cmake` command and to have `EXECUTORCH_BUILD_WASM` enabled. For example:
+
+```bash
+# Configure the build with the Emscripten environment variables
+emcmake cmake . -DEXECUTORCH_BUILD_WASM=ON \
+    -DCMAKE_BUILD_TYPE=Release \
+    -Bcmake-out-wasm
+
+# Build the Wasm extension
+cmake --build cmake-out-wasm --target executorch_wasm -j32
+```
+
+To reduce the binary size, you may also use the selective build options found in the [Kernel Library Selective Build guide](../../docs/source/kernel-library-selective-build.md). You may also use optimized kernels with the `EXECUTORCH_BUILD_KERNELS_OPTIMIZED` option. Portable kernels are used by default.
+
+### Building for Web
+
+In your CMakeLists.txt, add the following lines:
+
+```cmake
+add_executable(executorch_wasm_lib) # Emscripten outputs this as a JS and Wasm file
+target_link_libraries(executorch_wasm_lib PRIVATE executorch_wasm)
+target_link_options(executorch_wasm_lib PRIVATE ...) # Add any additional link options here
+```
+
+You can find the Emscripten link options in the [emcc reference](https://emscripten.org/docs/tools_reference/emcc.html).
+
+Building this should output `executorch_wasm_lib.js` and `executorch_wasm_lib.wasm` in the build directory. You can then use this file in your page.
+
+```html
+<script>
+  // Emscripten calls Module.onRuntimeInitialized once the runtime is ready.
+  var Module = {
+    onRuntimeInitialized: function() {
+      const et = Module; // Assign Module into et for ease of use
+      const model = et.Module.load("mv2.pte");
+      // ...
+    }
+  }
+</script>
+<script src="executorch_wasm_lib.js"></script>
+```
+
+### Building for Node.js
+
+While the standard way to import a module in Node.js is to use the `require` function, doing so does not give you access to the [Emscripten API](https://emscripten.org/docs/api_reference/index.html) which would be stored in the globals. For example, you may want to use the [File System API](https://emscripten.org/docs/api_reference/Filesystem-API.html) in your unit tests, which cannot be done if the library is loaded with `require`. Instead, you can use the `--pre-js` option to prepend your file to the start of the JS output and behave similarly to the example in the [Web build](#building-for-web).
+
+```cmake
+add_executable(my_project) # Emscripten outputs this as a JS and Wasm file
+target_link_libraries(my_project PRIVATE executorch_wasm)
+target_link_options(my_project PRIVATE --pre-js my_code.js) # Add any additional link options here
+```
+
+The output `my_project.js` should contain both the emitted JS code and the contents of `my_code.js` prepended.
+
+## JavaScript API
+
+### Module
+- `static load(data)`: Load a model from a file or a buffer.
+- `getMethods()`: Returns the list of methods in the model.
+- `loadMethod(methodName)`: Load a method from the model.
+- `getMethodMetadata(methodName)`: Get the metadata of a method.
+- `execute(methodName, inputs)`: Execute a method with the given inputs.
+- `forward(inputs)`: Execute the forward method with the given inputs.
+- `delete()`: Delete the model from memory.
+
+### Tensor
+- `static zeroes(shape, dtype=ScalarType.Float)`: Create a tensor of zeros with the given shape and dtype.
+- `static ones(shape, dtype=ScalarType.Float)`: Create a tensor of ones with the given shape and dtype.
+- `static full(shape, value, dtype=ScalarType.Float)`: Create a tensor of the given value with the given shape and dtype
+- `static fromArray(shape, array, dtype=ScalarType.Float, dimOrder=[], strides=[])`: Create a tensor from a JavaScript array.
+- `static fromIter(shape, iter, dtype=ScalarType.Float, dimOrder=[], strides=[])`: Create a tensor from an iterable.
+- `delete()`: Delete the tensor from memory.
+- `scalarType`: The scalar type of the tensor.
+- `data`: The data buffer of the tensor.
+- `sizes`: The sizes of the tensor.
+
+### MethodMeta
+- `name`: The name of the method.
+- `inputTags`: The input tags of the method.
+- `inputTensorMeta`: The input tensor metadata of the method.
+- `outputTags`: The output tags of the method.
+- `outputTensorMeta`: The output tensor metadata of the method.
+- `attributeTensorMeta`: The attribute tensor metadata of the method.
+- `memoryPlannedBufferSizes`: The memory planned buffer sizes of the method.
+- `backends`: The backends of the method.
+- `numInstructions`: The number of instructions in the method.
+- These are value types and do not need to be manually deleted.
+
+### TensorInfo
+- `sizes`: The sizes of the tensor.
+- `dimOrder`: The dimension order of the tensor.
+- `scalarType`: The scalar type of the tensor.
+- `isMemoryPlanned`: Whether the tensor is memory planned.
+- `nBytes`: The number of bytes in the tensor.
+- `name`: The name of the tensor.
+- These are value types and do not need to be manually deleted.
+
+### ScalarType
+- Only `Float` and `Long` are currently supported.
+- `value`: The int constant value of the enum.
+- `name`: The `ScalarType` as a string.
+
+### Tag
+- `value`: The int constant value of the enum.
+- `name`: The `Tag` as a string.
+
+Emscripten's JavaScript API is also avaiable, which you can find more information about it in their [API Reference](https://emscripten.org/docs/api_reference/index.html).
diff --git a/extension/wasm/test/CMakeLists.txt b/extension/wasm/test/CMakeLists.txt
@@ -41,13 +41,13 @@ add_custom_target(
 )
 
 add_executable(executorch_wasm_tests)
-target_link_libraries(executorch_wasm_tests PUBLIC executorch_wasm)
+target_link_libraries(executorch_wasm_tests PRIVATE executorch_wasm)
 target_link_options(
   executorch_wasm_tests
-  PUBLIC
+  PRIVATE
   --embed-file
   "${MODELS_DIR}@/"
-  --post-js
+  --pre-js
   ${CMAKE_CURRENT_SOURCE_DIR}/unittests.js
   -sASSERTIONS=2
 )
diff --git a/extension/wasm/test/unittests.js b/extension/wasm/test/unittests.js
@@ -6,9 +6,9 @@
  * LICENSE file in the root directory of this source tree.
  */
 
-let et;
+var Module = {};
+const et = Module;
 beforeAll((done) => {
-    et = Module;
     et.onRuntimeInitialized = () => {
         done();
     }
diff --git a/pyproject.toml b/pyproject.toml
@@ -72,6 +72,8 @@ dependencies=[
   "typing-extensions>=4.10.0",
   # Keep this version in sync with: ./backends/apple/coreml/scripts/install_requirements.sh
   "coremltools==8.3; platform_system == 'Darwin' or platform_system == 'Linux'",
+  # scikit-learn is used to support palettization in the coreml backend
+  "scikit-learn==1.7.1",
   "hydra-core>=1.3.0",
   "omegaconf>=2.3.0",
 ]
diff --git a/scripts/build_wasm_tests.sh b/scripts/build_wasm_tests.sh
diff --git a/third-party/CMakeLists.txt b/third-party/CMakeLists.txt
diff --git a/third-party/ao b/third-party/ao
diff --git a/tools/cmake/preset/default.cmake b/tools/cmake/preset/default.cmake

Original file line number	Diff line number	Diff line change
`@@ -59,7 +59,7 @@ foreach(fbs_file ${_xnnpack_schema__srcs})`
`59`	`59`	`)`
`60`	`60`	`endforeach()`
`61`	`61`
`62`		`-if(WIN32)`
	`62`	`+if(WIN32 AND NOT CMAKE_CROSSCOMPILING)`
`63`	`63`	`set(MV_COMMAND`
`64`	`64`	`powershell -Command`
`65`	`65`	`"Move-Item -Path ${_xnnpack_flatbuffer__outputs} -Destination ${_xnnpack_schema__outputs}"`
Original file line number	Diff line number	Diff line change
`@@ -72,6 +72,8 @@ dependencies=[`
`72`	`72`	`"typing-extensions>=4.10.0",`
`73`	`73`	`# Keep this version in sync with: ./backends/apple/coreml/scripts/install_requirements.sh`
`74`	`74`	`"coremltools==8.3; platform_system == 'Darwin' or platform_system == 'Linux'",`
	`75`	`+ # scikit-learn is used to support palettization in the coreml backend`
	`76`	`+ "scikit-learn==1.7.1",`
`75`	`77`	`"hydra-core>=1.3.0",`
`76`	`78`	`"omegaconf>=2.3.0",`
`77`	`79`	`]`