pytorch
diff --git a/‎.github/workflows/pull.yml‎
Lines changed: 6 additions & 1 deletion b/‎.github/workflows/pull.yml‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎.github/workflows/trunk.yml‎
Lines changed: 22 additions & 31 deletions b/‎.github/workflows/trunk.yml‎
Lines changed: 22 additions & 31 deletions
diff --git a/‎backends/arm/operators/op_conv2d.py‎
Lines changed: 3 additions & 3 deletions b/‎backends/arm/operators/op_conv2d.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎backends/arm/test/ops/test_linear.py‎
Lines changed: 2 additions & 12 deletions b/‎backends/arm/test/ops/test_linear.py‎
Lines changed: 2 additions & 12 deletions
@@ -970,11 +970,16 @@ jobs:
         PYTHON_EXECUTABLE=python bash backends/vulkan/test/scripts/test_model.sh --build
 
         # Test models serially
-        models="mv2 mv3 edsr resnet18 resnet50 dl3"
+        models="mv2 mv3 edsr resnet18 resnet50 dl3 w2l ic3 ic4"
         for model in $models; do
           python -m examples.vulkan.export --model_name=$model --test
         done
 
+        # For selected vision models, test with dynamic shapes
+        models="mv2 resnet18 resnet50 ic3 densenet161"
+        for model in $models; do
+          python -m examples.vulkan.export --model_name=$model --test -d
+        done
 
   test-vulkan-operators-linux:
     name: test-vulkan-operators-linux
 
@@ -824,11 +824,26 @@ jobs:
         echo "Recipe: $RECIPE"
         echo "Quantize: $QUANTIZE"
 
-        echo "::group::Set up ExecuTorch"
         # The generic Linux job chooses to use base env, not the one setup by the image
         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
         conda activate "${CONDA_ENV}"
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake
+
+        echo "::group::Setup ExecuTorch"
+        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake"
+        echo "::endgroup::"
+
+        echo "::group::Setup Huggingface"
+        pip install -U "huggingface_hub[cli]" accelerate
+        huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
+        OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
+        pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
+        echo "::endgroup::"
+
+        echo "::group::Test MODEL: $MODEL RECIPE: $RECIPE QUANTIZE: $QUANTIZE"
+        export OUTPUT_DIR="$(pwd)/${MODEL}_${RECIPE}_${QUANTIZE}"
+        python .ci/scripts/test_huggingface_optimum_model.py --model "$MODEL" --recipe "$RECIPE" $QUANTIZE --model_dir "$OUTPUT_DIR"
+        echo "::endgroup::"
+
         # Build executor_runner with ETdump enabled
         PYTHON_EXECUTABLE=python cmake -DPYTHON_EXECUTABLE=python \
           -DCMAKE_INSTALL_PREFIX=cmake-out \
@@ -846,25 +861,6 @@ jobs:
           -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
           -Bcmake-out .
         cmake --build cmake-out -j16 --target install --config Release
-        echo "::endgroup::"
-
-        echo "::group::Set up Hugging Face"
-        pip install -U "huggingface_hub[cli]"
-        huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
-        OPTIMUM_ET_COMMIT=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
-        git clone https://github.com/huggingface/optimum-executorch
-        pushd optimum-executorch
-        # There is no release yet, for CI stability, always test from the same commit on main
-        git checkout $OPTIMUM_ET_COMMIT
-        python install_dev.py --skip_override_torch
-        popd
-        pip list
-        echo "::endgroup::"
-
-        echo "::group::Run tests"
-        export OUTPUT_DIR="$(pwd)/${MODEL}_${RECIPE}_${QUANTIZE}"
-        python .ci/scripts/test_huggingface_optimum_model.py --model ${MODEL} --recipe ${RECIPE} ${QUANTIZE} --model_dir ${OUTPUT_DIR}
-        echo "::endgroup::"
 
         echo "::group::Generate artifacts for performance profiling"
         ./cmake-out/executor_runner \
@@ -931,16 +927,11 @@ jobs:
         ${CONDA_RUN} python install_executorch.py
         echo "::endgroup::"
 
-        echo "::group::Set up Hugging Face"
-        pip install -U "huggingface_hub[cli]"
-        huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
-        OPTIMUM_ET_COMMIT=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
-        git clone https://github.com/huggingface/optimum-executorch
-        pushd optimum-executorch
-        # There is no release yet, for CI stability, always test from the same commit on main
-        git checkout $OPTIMUM_ET_COMMIT
-        ${CONDA_RUN} python install_dev.py --skip_override_torch
-        popd
+        echo "::group::Set up Huggingface"
+        ${CONDA_RUN} pip install -U "huggingface_hub[cli]" accelerate
+        ${CONDA_RUN} huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
+        OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
+        ${CONDA_RUN} pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
         ${CONDA_RUN} pip list
         echo "::endgroup::"
 
 
@@ -182,11 +182,11 @@ def define_node(
             acc_type = ts.DType.FP32
 
         tosa_graph.addConst(
-            [1], output.dtype, [input_zp], name=f"{conv2d_output_name}_input_zp"
+            [1], inputs[0].dtype, [input_zp], name=f"{conv2d_output_name}_input_zp"
         )
         tosa_graph.addConst(
             [1],
-            output.dtype,
+            inputs[1].dtype,
             weight_zp,
             name=f"{conv2d_output_name}_weight_zp",
         )
@@ -269,7 +269,7 @@ def define_node(
 
         # For quantized convolution, rescale the output value back to the same
         # integer value domain of the next op. Otherwise return float32 output.
-        if inputs[0].dtype == ts.DType.INT8 or inputs[0].dtype == ts.DType.INT16:
+        if output.dtype == ts.DType.INT8 or output.dtype == ts.DType.INT16:
             # Get scale_factor from input, weight, and output.
             input_scale = input_qparams[0].get_scale_per_tensor()  # type: ignore[possibly-undefined]  # pyre-ignore [61]
             per_channel_quant = input_qparams[1].per_channel  # pyre-ignore [61]
 
@@ -8,8 +8,6 @@
 
 from typing import Tuple
 
-import pytest
-
 import torch
 from executorch.backends.arm.quantizer.arm_quantizer import (
     get_symmetric_a16w8_quantization_config,
@@ -313,12 +311,8 @@ def test_linear_16a8w_tosa_INT(test_data: torch.Tensor):
     pipeline.run()
 
 
-@common.parametrize("test_data", test_data_rank1_INT | test_data_rank4_INT)
+@common.parametrize("test_data", test_data_all_16a8w)
 @common.XfailIfNoCorstone300
-@pytest.mark.xfail(
-    reason="Ethos-U55 A16W8 linear: int16 matmul not yet supported; pending backend support or linear->conv1x1 lowering. See: https://github.com/pytorch/executorch/issues/13947",
-    strict=False,
-)
 def test_linear_16a8w_u55_INT16(test_data: torch.Tensor):
     """Test linear operation with 16A8W quantization on U55 (16-bit activations, 8-bit weights)"""
     test_data, out_features, has_bias, per_channel_quantization = test_data()
@@ -347,12 +341,8 @@ def test_linear_16a8w_u55_INT16(test_data: torch.Tensor):
     pipeline.run()
 
 
-@common.parametrize("test_data", test_data_rank1_INT | test_data_rank4_INT)
+@common.parametrize("test_data", test_data_all_16a8w)
 @common.XfailIfNoCorstone320
-@pytest.mark.xfail(
-    reason="Ethos-U55 A16W8 linear: int16 matmul not yet supported; pending backend support or linear->conv1x1 lowering. See: https://github.com/pytorch/executorch/issues/13947",
-    strict=False,
-)
 def test_linear_16a8w_u85_INT16(test_data: torch.Tensor):
     """Test linear operation with 16A8W quantization on U85 (16-bit activations, 8-bit weights)"""
     test_data, out_features, has_bias, per_channel_quantization = test_data()