Merge branch 'main' into addScript

yangw-dev · web-flow · commit ffe683905330 · 2025-06-18T13:34:51.000-07:00
diff --git a/backends/arm/quantizer/arm_quantizer.py b/backends/arm/quantizer/arm_quantizer.py
@@ -38,7 +38,6 @@
     HistogramObserver,
     MinMaxObserver,
     MovingAverageMinMaxObserver,
-    MovingAveragePerChannelMinMaxObserver,
     ObserverOrFakeQuantizeConstructor,
     PerChannelMinMaxObserver,
     PlaceholderObserver,
@@ -95,24 +94,26 @@ def get_symmetric_quantization_config(
             **extra_args,
         ),
     )
+
+    # Setup quantization config for weights
     weight_qscheme = (
         torch.per_channel_symmetric if is_per_channel else torch.per_tensor_symmetric
     )
     weight_observer_or_fake_quant_ctr: ObserverOrFakeQuantizeConstructor = (
         MinMaxObserver
     )
+    # Determine the right observer/fake-quant constructor
     if is_qat:
-        # TODO: qat + per channel?
-        weight_observer_or_fake_quant_ctr = FusedMovingAvgObsFakeQuantize
-    elif is_per_channel:
-        weight_observer_or_fake_quant_ctr = PerChannelMinMaxObserver
+        # Set plain fake-quant with true min/max
+        weight_observer_or_fake_quant_ctr = FakeQuantize
+    else:
+        # PTQ: set min/max observer
+        weight_observer_or_fake_quant_ctr = (
+            PerChannelMinMaxObserver if is_per_channel else MinMaxObserver
+        )
+
+    extra_args = {"eps": 2**-12}
 
-    extra_args: Dict[str, Any] = {"eps": 2**-12}
-    if is_qat:
-        if weight_qscheme == torch.per_tensor_symmetric:
-            extra_args["observer"] = MovingAverageMinMaxObserver
-        else:
-            extra_args["observer"] = MovingAveragePerChannelMinMaxObserver  # type: ignore[dict-item]
     weight_quantization_spec = QuantizationSpec(
         dtype=torch.int8,
         quant_min=weight_qmin,
diff --git a/backends/cadence/aot/compiler.py b/backends/cadence/aot/compiler.py
@@ -278,6 +278,7 @@ def quantize_and_export_to_edge(
     dump_graphs: bool = False,
     constant_methods: Optional[dict[str, object]] = None,
     calibration_data: Optional[list[tuple[object, ...]]] = None,
+    core_aten_exceptions: Optional[list[torch._ops.OpOverload]] = None,
 ) -> EdgeProgramManager:
     """
     Trace, quantize and lower a model/inputs pair to edge IR.
@@ -294,6 +295,7 @@ def quantize_and_export_to_edge(
         quantized_model,
         dump_graphs=dump_graphs,
         constant_methods=constant_methods,
+        core_aten_exceptions=core_aten_exceptions,
     )
 
 
diff --git a/backends/vulkan/runtime/graph/ops/glsl/dequantize_buffer.yaml b/backends/vulkan/runtime/graph/ops/glsl/dequantize_buffer.yaml
@@ -11,6 +11,7 @@ dequantize_buffer:
     OUT_DTYPE:
       - VALUE: half
       - VALUE: float
+      - VALUE: double
   shader_variants:
     - NAME: dequantize_per_tensor_buffer
       MODE: per_tensor
diff --git a/backends/vulkan/runtime/graph/ops/glsl/dequantize_texture.glsl b/backends/vulkan/runtime/graph/ops/glsl/dequantize_texture.glsl
@@ -139,7 +139,10 @@ void dequantize_per_tensor() {
   [[unroll]] for (int i = 0; i < 4; ++i) {
     IN_T qvalue = IN_T(intex[i]);
     OUT_T value = dequantize_val(qvalue, scale, zero_point);
-    outtex[i] = value;
+    $if OUT_DTYPE == "double":
+      outtex[i] = float(value);
+    $else:
+      outtex[i] = value;
   }
   write_texel(t_out, pos, outtex);
 }
@@ -177,7 +180,10 @@ void dequantize_per_token() {
   [[unroll]] for (int i = 0; i < 4; ++i) {
     IN_T qvalue = IN_T(intex[i]);
     OUT_T value = dequantize_val(qvalue, scale_val, zero_point_val);
-    outtex[i] = value;
+    $if OUT_DTYPE == "double":
+      outtex[i] = float(value);
+    $else:
+      outtex[i] = value;
   }
 
   write_texel(t_out, pos, outtex);
diff --git a/backends/vulkan/runtime/graph/ops/glsl/dequantize_texture.yaml b/backends/vulkan/runtime/graph/ops/glsl/dequantize_texture.yaml
@@ -11,6 +11,7 @@ dequantize_texture:
     OUT_DTYPE:
       - VALUE: half
       - VALUE: float
+      - VALUE: double
   shader_variants:
     - NAME: dequantize_per_tensor_texture3d
       MODE: per_tensor
diff --git a/backends/vulkan/runtime/graph/ops/glsl/quantize_buffer.yaml b/backends/vulkan/runtime/graph/ops/glsl/quantize_buffer.yaml
@@ -7,6 +7,7 @@ quantize_buffer:
     IN_DTYPE:
       - VALUE: half
       - VALUE: float
+      - VALUE: double
     OUT_DTYPE:
       - VALUE: uint8
       - VALUE: int8
diff --git a/backends/vulkan/runtime/graph/ops/glsl/quantize_texture.yaml b/backends/vulkan/runtime/graph/ops/glsl/quantize_texture.yaml
@@ -7,6 +7,7 @@ quantize_texture:
     IN_DTYPE:
       - VALUE: half
       - VALUE: float
+      - VALUE: double
     OUT_DTYPE:
       - VALUE: uint8
       - VALUE: int8
diff --git a/backends/vulkan/runtime/graph/ops/impl/Quantize.cpp b/backends/vulkan/runtime/graph/ops/impl/Quantize.cpp
@@ -188,6 +188,7 @@ void quantize_per_tensor_impl(
 
   // Verify input is a floating point type
   VK_CHECK_COND(
+      graph.dtype_of(input) == vkapi::kDouble ||
       graph.dtype_of(input) == vkapi::kFloat ||
       graph.dtype_of(input) == vkapi::kHalf);
 
@@ -214,6 +215,7 @@ void quantize_per_token_impl(
 
   // Verify input is a floating point type
   VK_CHECK_COND(
+      graph.dtype_of(input) == vkapi::kDouble ||
       graph.dtype_of(input) == vkapi::kFloat ||
       graph.dtype_of(input) == vkapi::kHalf);
 
diff --git a/backends/vulkan/test/op_tests/dequantize_test.cpp b/backends/vulkan/test/op_tests/dequantize_test.cpp
@@ -366,6 +366,12 @@ void test_vulkan_dequantize_per_tensor(
       vkcompute::utils::kBuffer,
       vkcompute::utils::kBuffer);
 
+  // Telling the system to expect a float instead of a double
+  // since the shader can only return 32bit anyways
+  if (out_dtype == at::kDouble) {
+    out_dtype = at::kFloat;
+  }
+
   // Test with texture storage
   test_vulkan_dequantize_per_tensor_impl(
       input_sizes,
@@ -400,6 +406,12 @@ void test_vulkan_dequantize_per_token(
       vkcompute::utils::kBuffer,
       vkcompute::utils::kBuffer);
 
+  // Telling the system to expect a float instead of a double
+  // since the shader can only return 32bit anyways
+  if (out_dtype == at::kDouble) {
+    out_dtype = at::kFloat;
+  }
+
   // Test with texture storage
   test_vulkan_dequantize_per_token_impl(
       input_sizes,
@@ -793,6 +805,24 @@ TEST(
       at::kHalf); // output dtype
 }
 
+TEST(
+    VulkanDequantizePerTensorTest,
+    test_vulkan_dequantize_per_tensor_int8_to_double) {
+  if (!vkcompute::api::context()
+           ->adapter_ptr()
+           ->has_full_int8_buffers_support()) {
+    GTEST_SKIP();
+  }
+  test_vulkan_dequantize_per_tensor(
+      {2, 3}, // input sizes
+      0.05, // scale
+      10, // zero_point
+      -128, // quant_min
+      127, // quant_max
+      at::kChar, // input dtype
+      at::kDouble); // output dtype
+}
+
 void test_reference_dequantize_per_token(
     const std::vector<int>& input_sizes,
     const std::vector<float>& scales,
@@ -1288,3 +1318,24 @@ TEST(
       at::kInt, // input dtype
       at::kHalf); // output dtype
 }
+
+TEST(
+    VulkanDequantizePerTokenTest,
+    test_vulkan_dequantize_per_token_int8_to_double) {
+  if (!vkcompute::api::context()
+           ->adapter_ptr()
+           ->has_full_int8_buffers_support()) {
+    GTEST_SKIP();
+  }
+  std::vector<float> scales = {0.05, 0.001};
+  std::vector<int> zero_points = {10, -5};
+
+  test_vulkan_dequantize_per_token(
+      {2, 2}, // input sizes (2 tokens)
+      scales,
+      zero_points,
+      -128, // quant_min
+      127, // quant_max
+      at::kChar, // input dtype
+      at::kDouble); // output dtype
+}
diff --git a/backends/vulkan/test/op_tests/quantize_test.cpp b/backends/vulkan/test/op_tests/quantize_test.cpp
@@ -315,6 +315,12 @@ void test_vulkan_quantize_per_tensor(
       vkcompute::utils::kBuffer,
       vkcompute::utils::kBuffer);
 
+  // If the in_dtype is a double, convert to float for texture implementation
+  // since they don't support 64bit as inputs
+  if (in_dtype == at::kDouble) {
+    in_dtype = at::kFloat;
+  }
+
   // Test with texture storage
   test_vulkan_quantize_per_tensor_impl(
       input_sizes,
@@ -349,6 +355,12 @@ void test_vulkan_quantize_per_token(
       vkcompute::utils::kBuffer,
       vkcompute::utils::kBuffer);
 
+  // If the in_dtype is a double, convert to float for texture implementation
+  // since they don't support 64bit as inputs
+  if (in_dtype == at::kDouble) {
+    in_dtype = at::kFloat;
+  }
+
   // Test with texture storage
   test_vulkan_quantize_per_token_impl(
       input_sizes,
@@ -655,6 +667,24 @@ TEST(
       at::kChar); // output dtype
 }
 
+TEST(
+    VulkanQuantizePerTensorTest,
+    test_vulkan_quantize_per_tensor_double_to_int8) {
+  if (!vkcompute::api::context()
+           ->adapter_ptr()
+           ->has_full_int8_buffers_support()) {
+    GTEST_SKIP();
+  }
+  test_vulkan_quantize_per_tensor(
+      {2, 3}, // input sizes
+      0.01, // scale
+      1, // zero_point
+      -128, // quant_min
+      127, // quant_max
+      at::kDouble, // input dtype
+      at::kChar); // output dtype
+}
+
 void test_reference_quantize_per_token(
     const std::vector<int>& input_sizes,
     const std::vector<float>& pre_scales,
@@ -1075,3 +1105,24 @@ TEST(VulkanQuantizePerTensorTest, test_vulkan_quantize_per_token_half_to_int8) {
       at::kHalf, // input dtype
       at::kChar); // output dtype
 }
+
+TEST(
+    VulkanQuantizePerTensorTest,
+    test_vulkan_quantize_per_token_double_to_int8) {
+  if (!vkcompute::api::context()
+           ->adapter_ptr()
+           ->has_full_int8_buffers_support()) {
+    GTEST_SKIP();
+  }
+  std::vector<float> scales = {0.1, 0.2};
+  std::vector<int> zero_points = {0, 5};
+
+  test_vulkan_quantize_per_token(
+      {2, 2}, // input sizes (2*2=4 tokens)
+      scales,
+      zero_points,
+      -128, // quant_min
+      127, // quant_max
+      at::kDouble, // input dtype
+      at::kChar); // output dtype
+}
diff --git a/backends/xnnpack/README.md b/backends/xnnpack/README.md
@@ -105,6 +105,7 @@ mkdir cmake-out
 cmake \
     -DCMAKE_INSTALL_PREFIX=cmake-out \
     -DCMAKE_BUILD_TYPE=Release \
+    -DEXECUTORCH_BUILD_EXECUTOR_RUNNER=ON \
     -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
     -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
     -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
diff --git a/backends/xnnpack/test/ops/test_div.py b/backends/xnnpack/test/ops/test_div.py
@@ -31,17 +31,20 @@ def forward(self, x):
             return z
 
     def _test_div(self, inputs):
-        (
-            Tester(self.Div(), inputs)
-            .export()
-            .check_count({"torch.ops.aten.div.Tensor": 1})
-            .to_edge_transform_and_lower()
-            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
-            .check_not(["executorch_exir_dialects_edge__ops_aten_div_Tensor"])
-            .to_executorch()
-            .serialize()
-            .run_method_and_compare_outputs()
-        )
+        for legacy_mode in (True, False):
+            tester = Tester(self.Div(), inputs)
+            tester.export()
+            tester.check_count({"torch.ops.aten.div.Tensor": 1})
+            if legacy_mode:
+                tester.to_edge()
+                tester.partition()
+            else:
+                tester.to_edge_transform_and_lower()
+            tester.check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            tester.check_not(["executorch_exir_dialects_edge__ops_aten_div_Tensor"])
+            tester.to_executorch()
+            tester.serialize()
+            tester.run_method_and_compare_outputs()
 
     def test_fp16_div(self):
         # Adding 4 to move distribution away from 0, 4 Std Dev should be far enough
@@ -59,14 +62,17 @@ def test_fp32_div(self):
     def test_fp32_div_single_input(self):
         # Adding 4 to move distribution away from 0, 4 Std Dev should be far enough
         inputs = (torch.randn(1) + 4,)
-        (
-            Tester(self.DivSingleInput(), inputs)
-            .export()
-            .check_count({"torch.ops.aten.div.Tensor": 1})
-            .to_edge_transform_and_lower()
-            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
-            .check_not(["executorch_exir_dialects_edge__ops_aten_div_Tensor"])
-            .to_executorch()
-            .serialize()
-            .run_method_and_compare_outputs()
-        )
+        for legacy_mode in (True, False):
+            tester = Tester(self.DivSingleInput(), inputs)
+            tester.export()
+            tester.check_count({"torch.ops.aten.div.Tensor": 1})
+            if legacy_mode:
+                tester.to_edge()
+                tester.partition()
+            else:
+                tester.to_edge_transform_and_lower()
+            tester.check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            tester.check_not(["executorch_exir_dialects_edge__ops_aten_div_Tensor"])
+            tester.to_executorch()
+            tester.serialize()
+            tester.run_method_and_compare_outputs()
diff --git a/docs/source/tutorial-xnnpack-delegate-lowering.md b/docs/source/tutorial-xnnpack-delegate-lowering.md
@@ -154,6 +154,7 @@ mkdir cmake-out
 cmake \
     -DCMAKE_INSTALL_PREFIX=cmake-out \
     -DCMAKE_BUILD_TYPE=Release \
+    -DEXECUTORCH_BUILD_EXECUTOR_RUNNER=ON \
     -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
     -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
     -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
diff --git a/examples/xnnpack/README.md b/examples/xnnpack/README.md
@@ -38,6 +38,7 @@ mkdir cmake-out
 cmake \
     -DCMAKE_INSTALL_PREFIX=cmake-out \
     -DCMAKE_BUILD_TYPE=Release \
+    -DEXECUTORCH_BUILD_EXECUTOR_RUNNER=ON \
     -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
     -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
     -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
diff --git a/extension/apple/ExecuTorch/Exported/ExecuTorch+Module.swift b/extension/apple/ExecuTorch/Exported/ExecuTorch+Module.swift
diff --git a/extension/apple/ExecuTorch/Exported/ExecuTorch+Tensor.swift b/extension/apple/ExecuTorch/Exported/ExecuTorch+Tensor.swift
diff --git a/extension/apple/ExecuTorch/Exported/ExecuTorch+Value.swift b/extension/apple/ExecuTorch/Exported/ExecuTorch+Value.swift
diff --git a/extension/apple/ExecuTorch/Exported/ExecuTorchTensor.h b/extension/apple/ExecuTorch/Exported/ExecuTorchTensor.h
diff --git a/extension/apple/ExecuTorch/Exported/ExecuTorchValue.h b/extension/apple/ExecuTorch/Exported/ExecuTorchValue.h