pytorch
diff --git a/‎.github/workflows/trunk.yml‎
Lines changed: 3 additions & 0 deletions b/‎.github/workflows/trunk.yml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎backends/apple/coreml/test/test_coreml_quantizer.py‎
Lines changed: 2 additions & 4 deletions b/‎backends/apple/coreml/test/test_coreml_quantizer.py‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎backends/apple/mps/test/test_mps_utils.py‎
Lines changed: 1 addition & 1 deletion b/‎backends/apple/mps/test/test_mps_utils.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/arm/operators/op_abs.py‎
Lines changed: 1 addition & 3 deletions b/‎backends/arm/operators/op_abs.py‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎backends/arm/operators/op_sum.py‎
Lines changed: 2 additions & 6 deletions b/‎backends/arm/operators/op_sum.py‎
Lines changed: 2 additions & 6 deletions
diff --git a/‎backends/cadence/aot/ref_implementations.py‎
Lines changed: 19 additions & 3 deletions b/‎backends/cadence/aot/ref_implementations.py‎
Lines changed: 19 additions & 3 deletions
diff --git a/‎backends/cadence/aot/tests/test_ref_implementations.py‎
Lines changed: 17 additions & 8 deletions b/‎backends/cadence/aot/tests/test_ref_implementations.py‎
Lines changed: 17 additions & 8 deletions
diff --git a/‎backends/cortex_m/test/test_quantize_op_fusion_pass.py‎
Lines changed: 3 additions & 7 deletions b/‎backends/cortex_m/test/test_quantize_op_fusion_pass.py‎
Lines changed: 3 additions & 7 deletions
diff --git a/‎backends/example/test_example_delegate.py‎
Lines changed: 2 additions & 6 deletions b/‎backends/example/test_example_delegate.py‎
Lines changed: 2 additions & 6 deletions
diff --git a/‎backends/mediatek/quantizer/annotator.py‎
Lines changed: 2 additions & 4 deletions b/‎backends/mediatek/quantizer/annotator.py‎
Lines changed: 2 additions & 4 deletions
@@ -8,6 +8,9 @@ on:
     tags:
       - ciflow/trunk/*
   pull_request:
+    paths:
+      - .ci/docker/ci_commit_pins/pytorch.txt
+      - .ci/scripts/**
   workflow_dispatch:
 
 concurrency:
 
@@ -15,7 +15,7 @@
 )
 
 from executorch.backends.apple.coreml.quantizer import CoreMLQuantizer
-from torch.export import export_for_training
+from torch.export import export
 from torchao.quantization.pt2e.quantize_pt2e import (
     convert_pt2e,
     prepare_pt2e,
@@ -32,9 +32,7 @@ def quantize_and_compare(
     ) -> None:
         assert quantization_type in {"PTQ", "QAT"}
 
-        pre_autograd_aten_dialect = export_for_training(
-            model, example_inputs, strict=True
-        ).module()
+        pre_autograd_aten_dialect = export(model, example_inputs, strict=True).module()
 
         quantization_config = LinearQuantizerConfig.from_dict(
             {
 
@@ -206,7 +206,7 @@ def lower_module_and_test_output(
 
         expected_output = model(*sample_inputs)
 
-        model = torch.export.export_for_training(
+        model = torch.export.export(
             model, sample_inputs, dynamic_shapes=dynamic_shapes, strict=True
         ).module()
 
 
@@ -73,9 +73,7 @@ def define_node(
             abs_output = output
 
         # Do the INT32 Abs
-        self._serialize_operator(
-            node,
-            tosa_graph,
+        tosa_graph.addOperator(
             ts.TosaOp.Op().ABS,
             [
                 rescaled_inputs[0].name,
 
@@ -67,9 +67,7 @@ def define_node(
             dtype=ts.DType.INT32,
         )
 
-        self._serialize_operator(
-            node,
-            tosa_graph,
+        tosa_graph.addOperator(
             ts.TosaOp.Op().REDUCE_SUM,
             [rescaled_inputs[0].name],
             [intermediate.name],
@@ -113,9 +111,7 @@ def define_node(
         attr = ts.TosaSerializerAttribute()
         attr.ReduceSumAttribute(tensor.dim_order.index(dim))
 
-        self._serialize_operator(
-            node,
-            tosa_graph,
+        tosa_graph.addOperator(
             ts.TosaOp.Op().REDUCE_SUM,
             [tensor.name],
             [output.name],
 
@@ -458,10 +458,21 @@ def quantized_conv_nhwc_per_tensor(
         - out_shift (int): Unused
     """
 
-    if not input_tensor.is_contiguous(memory_format=torch.channels_last):
-        raise ValueError("Input tensor must be in NHWC format")
+    # Convert to NCHW format to reuse the existing implementation
+    conv_is_1d = False
+    if len(input_tensor.shape) == 3:
+        conv_is_1d = True
+        input_tensor = input_tensor.movedim(-1, 1).contiguous()
+        if len(weight.shape) != 3:
+            raise ValueError("Weight tensor must be 3D if input is 3D")
+        weight = weight.movedim(-1, 1).contiguous()
+    else:
+        input_tensor = input_tensor.movedim(-1, -3)
+        if len(weight.shape) != 4:
+            raise ValueError("Weight tensor must be 4D if input is nd > 3")
+        weight = torch.permute(weight, (0, -1, 1, 2)).contiguous()
 
-    return quantized_conv_per_tensor(
+    nchw_out = quantized_conv_per_tensor(
         input_tensor,
         weight,
         bias,
@@ -478,6 +489,11 @@ def quantized_conv_nhwc_per_tensor(
         out_shift,
     )
 
+    if conv_is_1d:
+        return nchw_out.movedim(1, -1).contiguous()
+    else:
+        return nchw_out.movedim(-3, -1).contiguous()
+
 
 def quantized_conv_variant(
     layout: str,
 
@@ -449,7 +449,7 @@ def test_quantized_layer_norm_per_tensor(
                     ),  # expected_output: [1+2, 2+3, 3+4] / 0.5 = [6, 10, 14]
                     memory_format,
                 )
-                for memory_format in [torch.contiguous_format]
+                for memory_format in [torch.contiguous_format, torch.channels_last]
             ],
             # Test case 5: Multiple output channels
             *[
@@ -686,10 +686,13 @@ def test_quantized_conv_per_tensor(
     ) -> None:
         assert memory_format in [torch.contiguous_format, torch.channels_last]
 
-        if len(input_tensor.shape) == 3 and memory_format == torch.channels_last:
-            self.fail("Channels last format is not supported for 3D input tensors")
-
-        input_tensor = input_tensor.to(memory_format=memory_format)
+        if memory_format == torch.channels_last:
+            if input_tensor.ndim == 3:
+                input_tensor = input_tensor.movedim(1, -1)
+                weight = weight.movedim(1, -1)
+            else:
+                input_tensor = input_tensor.movedim(-3, -1)
+                weight = weight.movedim(-3, -1)
 
         convs = [
             (
@@ -701,7 +704,7 @@ def test_quantized_conv_per_tensor(
 
         optimized_convs = []
         if input_tensor.dtype == torch.int8 and weight.dtype == torch.int8:
-            if input_tensor.is_contiguous(memory_format=torch.contiguous_format):
+            if memory_format == torch.contiguous_format:
                 optimized_convs = [
                     torch.ops.cadence.quantized_conv_nchw_asym8sxsym8s_asym8s.per_tensor,
                     torch.ops.cadence.quantized_conv_nchw_dilated_asym8sxsym8s_asym8s.per_tensor,
@@ -715,7 +718,7 @@ def test_quantized_conv_per_tensor(
                     torch.ops.cadence.quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s.per_tensor,
                 ]
         elif input_tensor.dtype == torch.uint8 and weight.dtype == torch.uint8:
-            if input_tensor.is_contiguous(memory_format=torch.contiguous_format):
+            if memory_format == torch.contiguous_format:
                 optimized_convs = [
                     torch.ops.cadence.quantized_conv_nchw_asym8uxsym8u_asym8u.per_tensor,
                     torch.ops.cadence.quantized_conv_nchw_dilated_asym8uxsym8u_asym8u.per_tensor,
@@ -746,7 +749,13 @@ def test_quantized_conv_per_tensor(
                 output_zero_point,
                 out_multiplier,
                 out_shift,
-            ).to(memory_format=torch.contiguous_format)
+            )
+
+            if memory_format == torch.channels_last:
+                if input_tensor.ndim == 3:
+                    output = output.movedim(-1, 1)
+                else:
+                    output = output.movedim(-1, -3)
 
             # Verify output properties
             self.assertEqual(output.dtype, dtype, f"Output dtype should be {dtype}")
 
@@ -23,7 +23,7 @@
     get_node_args,
 )
 from executorch.exir.dialects._ops import ops as exir_ops
-from torch.export import export, export_for_training
+from torch.export import export
 from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
 
 
@@ -42,9 +42,7 @@ def _prepare_quantized_model(self, model_class):
         model = model_class()
 
         # Export and quantize
-        exported_model = export_for_training(
-            model.eval(), self.example_inputs, strict=True
-        ).module()
+        exported_model = export(model.eval(), self.example_inputs, strict=True).module()
         prepared_model = prepare_pt2e(exported_model, AddQuantizer())
         quantized_model = convert_pt2e(prepared_model)
 
@@ -242,9 +240,7 @@ def forward(self, x, y):
                 inputs = (torch.randn(shape), torch.randn(shape))
 
                 model = SingleAddModel()
-                exported_model = export_for_training(
-                    model.eval(), inputs, strict=True
-                ).module()
+                exported_model = export(model.eval(), inputs, strict=True).module()
                 prepared_model = prepare_pt2e(exported_model, AddQuantizer())
                 quantized_model = convert_pt2e(prepared_model)
 
 
@@ -46,9 +46,7 @@ def get_example_inputs():
         )
 
         m = model.eval()
-        m = torch.export.export_for_training(
-            m, copy.deepcopy(example_inputs), strict=True
-        ).module()
+        m = torch.export.export(m, copy.deepcopy(example_inputs), strict=True).module()
         # print("original model:", m)
         quantizer = ExampleQuantizer()
         # quantizer = XNNPACKQuantizer()
@@ -84,9 +82,7 @@ def test_delegate_mobilenet_v2(self):
         )
 
         m = model.eval()
-        m = torch.export.export_for_training(
-            m, copy.deepcopy(example_inputs), strict=True
-        ).module()
+        m = torch.export.export(m, copy.deepcopy(example_inputs), strict=True).module()
         quantizer = ExampleQuantizer()
 
         m = prepare_pt2e(m, quantizer)
 
@@ -10,7 +10,7 @@
 from torch._ops import OpOverload
 from torch._subclasses import FakeTensor
 
-from torch.export import export_for_training
+from torch.export import export
 from torch.fx import Graph, Node
 from torch.fx.passes.utils.matcher_with_name_node_map_utils import (
     SubgraphMatcherWithNameNodeMap,
@@ -158,9 +158,7 @@ def forward(self, x):
             return norm, {}
 
     for pattern_cls in (ExecuTorchPattern, MTKPattern):
-        pattern_gm = export_for_training(
-            pattern_cls(), (torch.randn(3, 3),), strict=True
-        ).module()
+        pattern_gm = export(pattern_cls(), (torch.randn(3, 3),), strict=True).module()
         matcher = SubgraphMatcherWithNameNodeMap(
             pattern_gm, ignore_literals=True, remove_overlapping_matches=False
         )