pytorch
diff --git a/‎.github/workflows/trunk.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/trunk.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/arm/arm_backend.py‎
Lines changed: 1 addition & 1 deletion b/‎backends/arm/arm_backend.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/arm/test/test_arm_baremetal.sh‎
Lines changed: 1 addition & 1 deletion b/‎backends/arm/test/test_arm_baremetal.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/cadence/aot/compiler.py‎
Lines changed: 3 additions & 3 deletions b/‎backends/cadence/aot/compiler.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎backends/cadence/aot/ops_registrations.py‎
Lines changed: 19 additions & 0 deletions b/‎backends/cadence/aot/ops_registrations.py‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎backends/cadence/aot/replace_ops.py‎
Lines changed: 5 additions & 2 deletions b/‎backends/cadence/aot/replace_ops.py‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎backends/qualcomm/_passes/layout_transform.py‎
Lines changed: 1 addition & 1 deletion b/‎backends/qualcomm/_passes/layout_transform.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/qualcomm/quantizer/annotators.py‎
Lines changed: 4 additions & 2 deletions b/‎backends/qualcomm/quantizer/annotators.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎backends/qualcomm/quantizer/custom_annotation.py‎
Lines changed: 7 additions & 4 deletions b/‎backends/qualcomm/quantizer/custom_annotation.py‎
Lines changed: 7 additions & 4 deletions
diff --git a/‎backends/qualcomm/scripts/build.sh‎
Lines changed: 8 additions & 0 deletions b/‎backends/qualcomm/scripts/build.sh‎
Lines changed: 8 additions & 0 deletions
@@ -269,7 +269,7 @@ jobs:
         if [[ ${{ matrix.os}} == "bare_metal" ]]; then
           bash test/build_size_test.sh "-DCMAKE_TOOLCHAIN_FILE=${toolchain_cmake} -DEXECUTORCH_BUILD_ARM_BAREMETAL=ON"
         elif [[ ${{ matrix.os}} == "zephyr-preset" ]]; then
-          CXXFLAGS=${cxx_flags} cmake --preset zephyr -DCMAKE_BUILD_TYPE=Release -DEXECUTORCH_OPTIMIZE_SIZE=ON -DCMAKE_INSTALL_PREFIX=cmake-out -Bcmake-out .
+          CXXFLAGS=${cxx_flags} cmake --preset zephyr -DCMAKE_BUILD_TYPE=Release -DEXECUTORCH_OPTIMIZE_SIZE=ON -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON -DCMAKE_INSTALL_PREFIX=cmake-out -Bcmake-out .
           cmake --build cmake-out -j9 --target install --config Release
           CXXFLAGS=${cxx_flags}  cmake -DCMAKE_TOOLCHAIN_FILE=${toolchain_cmake} -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=cmake-out -Bcmake-out/test test
           cmake --build cmake-out/test -j9 --config Release
 
@@ -128,7 +128,7 @@ def ethosu_compile_spec(
         self.compiler_flags.append("--output-format=raw")
         self.compiler_flags.append("--debug-force-regor")
 
-        base_tosa_version = "TOSA-0.80+BI"
+        base_tosa_version = "TOSA-1.0+INT"
         if "u55" in target:
             # Add the Ethos-U55 extension marker
             base_tosa_version += "+u55"
 
@@ -228,7 +228,7 @@ test_models_ethos-u85() { # End to End model tests using model_test.py
     python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-256 --model=mv2 --extra_flags="-DET_ATOL=2.00 -DET_RTOL=2.00"
     python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-512 --model=mv3 --extra_flags="-DET_ATOL=5.00 -DET_RTOL=5.00"
     python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-128 --model=lstm --extra_flags="-DET_ATOL=0.03 -DET_RTOL=0.03"
-    python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-128 --model=w2l --extra_flags="-DET_ATOL=0.01 -DET_RTOL=0.01"
+    #python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-128 --model=w2l --extra_flags="-DET_ATOL=0.01 -DET_RTOL=0.01"  # Takes long time to run
     python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-256 --model=ic4 --extra_flags="-DET_ATOL=0.8 -DET_RTOL=0.8" --timeout=2400
     python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-128 --model=resnet18 --extra_flags="-DET_ATOL=0.2 -DET_RTOL=0.2"
     python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-128 --model=resnet50 --extra_flags="-DET_ATOL=0.2 -DET_RTOL=0.2"
 
@@ -228,6 +228,9 @@ def _lower_ep_to_edge(
     """
     Lower an ExportedProgram to an EdgeProgramManager (in edge IR).
     """
+    # Apply passes which transform the ExportedProgram before it gets lowered to edge.
+    expo_program = apply_torch_ops_passes(expo_program)
+
     # Call to_edge to convert the graph to edge IR.
     # Note: dim_order is skipped (https://github.com/pytorch/executorch/issues/3704)
     edge_prog_manager = to_edge(
@@ -263,9 +266,6 @@ def export_to_edge(
     # Export the model into an ExportedProgram.
     expo_program = trace(model, inputs)
 
-    # Apply passes which transform the ExportedProgram before it gets lowered to edge.
-    expo_program = apply_torch_ops_passes(expo_program)
-
     # Lower the model to edge IR.
     edge_prog_manager = _lower_ep_to_edge(
         expo_program, dump_graphs, constant_methods, core_aten_exceptions
 
@@ -276,6 +276,14 @@
     "requantize.per_tensor_out(Tensor input, float in_scale, int in_zero_point, float out_scale, "
     "int out_zero_point, ScalarType out_dtype, *, Tensor(a!) out) -> Tensor(a!)"
 )
+lib.define(
+    "roi_align_box_processor.out(Tensor rois, int output_size_h, int output_size_w, "
+    "int sampling_ratio, bool aligned, *, Tensor(a!) out) -> Tensor(a!)"
+)
+lib.define(
+    "roi_align_box_processor(Tensor rois, int output_size_h, int output_size_w, "
+    "int sampling_ratio, bool aligned) -> (Tensor out)"
+)
 
 # Custom ops with aten namespace. Need to specify the lib var as FRAGMENT type as aten library is already defined
 aten_lib = Library("aten", "FRAGMENT")
@@ -1038,3 +1046,14 @@ def idma_store_impl(
     channel: int = 0,
 ) -> torch.Tensor:
     return copy_idma_copy_impl(src, task_num, channel)
+
+
+@register_fake("cadence::roi_align_box_processor")
+def roi_align_box_processor_meta(
+    rois: torch.Tensor,
+    output_size_h: int,
+    output_size_w: int,
+    sampling_ratio: int,
+    aligned: bool,
+) -> torch.Tensor:
+    return rois.new_empty((rois.shape[0], 80), dtype=torch.uint8)
@@ -2328,12 +2328,15 @@ def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
 
             # Extract an argument to a separate full op.
             with graph_module.graph.inserting_before(mul_node):
-                full_tensor = graph_module.graph.call_function(
+                full_node = graph_module.graph.call_function(
                     torch.ops.aten.full.default, args=([1], full_arg)
                 )
+                full_node.meta = mul_node.meta
+                full_node.meta["val"] = [1]
                 new_mul_node = graph_module.graph.call_function(
-                    torch.ops.aten.mul.Tensor, args=(x_arg, full_tensor)
+                    torch.ops.aten.mul.Tensor, args=(x_arg, full_node)
                 )
+                new_mul_node.meta = mul_node.meta
             # Replace the old mul with a newly created mul.
             mul_node.replace_all_uses_with(new_mul_node)
             graph_module.graph.erase_node(mul_node)
 
@@ -103,8 +103,8 @@ class LayoutTransform(ExportPass):
         exir_ops.edge.aten.pow.Tensor_Scalar,
         exir_ops.edge.aten.prelu.default,
         exir_ops.edge.aten.repeat.default,
-        exir_ops.edge.aten.round.default,
         exir_ops.edge.aten.relu.default,
+        exir_ops.edge.aten.round.default,
         exir_ops.edge.aten.sigmoid.default,
         exir_ops.edge.aten.split_with_sizes.default,
         exir_ops.edge.aten.split_with_sizes_copy.default,
 
@@ -278,7 +278,9 @@ def annotate_masked_fill(node: Node, quantization_config: QuantizationConfig) ->
     )
 
 
-@register_annotator([torch.ops.aten.mul, torch.ops.aten.mul.Tensor])
+@register_annotator(
+    [torch.ops.aten.mul, torch.ops.aten.mul.Tensor, torch.ops.aten.mul_.Tensor]
+)
 def annotate_mul(node: Node, quantization_config: QuantizationConfig) -> None:
     annotate_binary(node, quantization_config)
 
@@ -1311,7 +1313,7 @@ def annotate_where(node: Node, quantization_config: QuantizationConfig) -> None:
     )
 
 
-@register_annotator([torch.ops.aten.zeros.default])
+@register_annotator([torch.ops.aten.zeros.default, torch.ops.aten.zeros_like.default])
 def annotate_zeros(node: Node, quantization_config: QuantizationConfig) -> None:
     if _is_annotated([node]) or not _is_float_tensor(node):
         return
 
@@ -153,7 +153,9 @@ def annotate_prefill_kv_output(gm: torch.fx.GraphModule, kv_quant_attrs: dict):
                 )
 
 
-def annotate_matmul_16a8w(gm: torch.fx.GraphModule) -> None:  # noqa: C901
+def annotate_matmul_16a8w(  # noqa: C901
+    gm: torch.fx.GraphModule, annotate_conv=True
+) -> None:
     """
     This function is specific for matmul op 16a8w.
     For k, we will tag such as the below, and
@@ -317,9 +319,10 @@ def annotate_matmul_input1(node: Node):
                 # The arguments of cat op: (the past kv cache, the new kv cache)
                 node = node.args[0][1]
             elif node.target == torch.ops.aten.conv2d.default:
-                annotate_conv2d(
-                    node, quantization_config=quantization_config_8a4w_per_channel
-                )
+                if annotate_conv:
+                    annotate_conv2d(
+                        node, quantization_config=quantization_config_8a4w_per_channel
+                    )
                 break
             elif node.target in [torch.ops.aten.add.Tensor, torch.ops.aten.sub.Tensor]:
                 break
 
@@ -85,6 +85,7 @@ if [ "$BUILD_AARCH64" = true ]; then
         -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
         -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
         -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
+        -DEXECUTORCH_ENABLE_LOGGING=ON \
         -DQNN_SDK_ROOT=$QNN_SDK_ROOT \
         -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK_ROOT/build/cmake/android.toolchain.cmake \
         -DANDROID_ABI='arm64-v8a' \
@@ -104,6 +105,9 @@ if [ "$BUILD_AARCH64" = true ]; then
         -DANDROID_ABI='arm64-v8a' \
         -DANDROID_PLATFORM=android-30 \
         -DCMAKE_PREFIX_PATH=$CMAKE_PREFIX_PATH \
+        -DSUPPORT_REGEX_LOOKAHEAD=ON \
+        -DBUILD_TESTING=OFF \
+        -DEXECUTORCH_ENABLE_LOGGING=ON \
         -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
         -DCMAKE_FIND_ROOT_PATH_MODE_PACKAGE=BOTH \
         -DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \
@@ -134,6 +138,7 @@ if [ "$BUILD_X86_64" = true ]; then
         -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
         -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
         -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
+        -DEXECUTORCH_ENABLE_LOGGING=ON \
         -DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \
         -S $PRJ_ROOT \
         -B $BUILD_ROOT \
@@ -157,6 +162,9 @@ if [ "$BUILD_X86_64" = true ]; then
        -DCMAKE_PREFIX_PATH=$CMAKE_PREFIX_PATH \
        -DCMAKE_FIND_ROOT_PATH_MODE_PACKAGE=BOTH \
        -DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \
+       -DSUPPORT_REGEX_LOOKAHEAD=ON \
+       -DBUILD_TESTING=OFF \
+       -DEXECUTORCH_ENABLE_LOGGING=ON \
        -B$EXAMPLE_ROOT
 
    cmake --build $EXAMPLE_ROOT -j$BUILD_JOB_NUMBER
Original file line number	Diff line number	Diff line change
`@@ -278,7 +278,9 @@ def annotate_masked_fill(node: Node, quantization_config: QuantizationConfig) ->`
`278`	`278`	`)`
`279`	`279`
`280`	`280`
`281`		`-@register_annotator([torch.ops.aten.mul, torch.ops.aten.mul.Tensor])`
	`281`	`+@register_annotator(`
	`282`	`+ [torch.ops.aten.mul, torch.ops.aten.mul.Tensor, torch.ops.aten.mul_.Tensor]`
	`283`	`+)`
`282`	`284`	`def annotate_mul(node: Node, quantization_config: QuantizationConfig) -> None:`
`283`	`285`	`annotate_binary(node, quantization_config)`
`284`	`286`
`@@ -1311,7 +1313,7 @@ def annotate_where(node: Node, quantization_config: QuantizationConfig) -> None:`
`1311`	`1313`	`)`
`1312`	`1314`
`1313`	`1315`
`1314`		`-@register_annotator([torch.ops.aten.zeros.default])`
	`1316`	`+@register_annotator([torch.ops.aten.zeros.default, torch.ops.aten.zeros_like.default])`
`1315`	`1317`	`def annotate_zeros(node: Node, quantization_config: QuantizationConfig) -> None:`
`1316`	`1318`	`if _is_annotated([node]) or not _is_float_tensor(node):`
`1317`	`1319`	`return`