pytorch
diff --git a/‎.github/workflows/cuda.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/cuda.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/pull.yml‎
Lines changed: 11 additions & 2 deletions b/‎.github/workflows/pull.yml‎
Lines changed: 11 additions & 2 deletions
diff --git a/‎backends/aoti/aoti_backend.py‎
Lines changed: 9 additions & 24 deletions b/‎backends/aoti/aoti_backend.py‎
Lines changed: 9 additions & 24 deletions
diff --git a/‎backends/cadence/aot/ops_registrations.py‎
Lines changed: 70 additions & 54 deletions b/‎backends/cadence/aot/ops_registrations.py‎
Lines changed: 70 additions & 54 deletions
diff --git a/‎backends/cadence/aot/ref_implementations.py‎
Lines changed: 3 additions & 3 deletions b/‎backends/cadence/aot/ref_implementations.py‎
Lines changed: 3 additions & 3 deletions
@@ -24,7 +24,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        cuda-version: ["12.6", "12.8", "13.0"]
+        cuda-version: ["12.6", "12.8", "12.9", "13.0"]
 
     name: test-executorch-cuda-build-${{ matrix.cuda-version }}
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
 
@@ -862,15 +862,24 @@ jobs:
         # Install Node.js and Emscripten
         source .ci/scripts/setup-emscripten.sh
 
+        export PNPM_VERSION=10.24.0
+        
+        curl -fsSL https://get.pnpm.io/install.sh | env PNPM_VERSION=$PNPM_VERSION SHELL="$(which bash)" sh -
+
+        export PNPM_HOME="$HOME/.local/share/pnpm"
+        export PATH="$PNPM_HOME:$PATH"
+
+        pnpm --version
+
         # Test selective build
         bash scripts/build_wasm_tests.sh ${{ matrix.enable-etdump }}
 
         # Install Jest
         cd cmake-out-wasm/extension/wasm/test
-        npm install --save-dev jest
+        pnpm add -D jest@30.2.0 --ignore-scripts
 
         # Run unit test
-        npm test
+        pnpm test
 
   unittest-nxp-neutron:
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
 
@@ -9,7 +9,7 @@
 import typing
 from abc import ABC, abstractmethod
 from enum import Enum
-from typing import Any, Dict, List, Optional, Set
+from typing import Any, Dict, List, Set
 
 import torch
 from executorch.backends.aoti.passes.replace_view_copy_with_view import (
@@ -91,39 +91,24 @@ def collect_unsupported_fallback_kernels(cls, missing_fallback_kernels: Set[str]
         )
 
         def generate_c_shim_extern_kernel_call_and_collect_unsupported_kernels(
-            self,
-            kernel: str,
-            args: list[str],
-            device: str,
-            *,
-            debug_args: Optional[list[str]] = None,
-            debug_handle: Optional[int] = None,
-        ):
+            self, kernel: str, *args: Any, **kwargs: Any
+        ) -> None:
             if kernel not in supported_kernels:
                 missing_fallback_kernels.add(kernel)
 
-            original_generate_c_shim_extern_kernel_call(
-                self,
-                kernel,
-                args,
-                device,
-                debug_args=debug_args,
-                debug_handle=debug_handle,
+            return original_generate_c_shim_extern_kernel_call(
+                self, kernel, *args, **kwargs
             )
 
         def generate_fallback_kernel_with_runtime_lookup_aot_and_collect_unsupported_kernels(
-            self,
-            op_overload,
-            raw_args,
-            output_args,
-            raw_outputs,
-        ):
+            self, op_overload: Any, *args: Any, **kwargs: Any
+        ) -> None:
             kernel_name = getattr(op_overload, "_name", str(op_overload))
             if kernel_name not in supported_kernels:
                 missing_fallback_kernels.add(kernel_name)
 
-            original_generate_fallback_kernel_with_runtime_lookup_aot(
-                self, op_overload, raw_args, output_args, raw_outputs
+            return original_generate_fallback_kernel_with_runtime_lookup_aot(
+                self, op_overload, *args, **kwargs
             )
 
         CppWrapperCpu.generate_c_shim_extern_kernel_call = (
 
@@ -1030,9 +1030,9 @@ def quantized_conv2d_nhwc_meta(
         get_conv1d_output_size(
             in_size,
             out_channels,
-            stride[1],
-            padding[1],
-            dilation[1],
+            stride[-1],
+            padding[-1],
+            dilation[-1],
             kernel_size[0],
             True,
         )
@@ -1074,9 +1074,9 @@ def quantized_conv2d_nchw_meta(
         get_conv1d_output_size(
             in_size,
             out_channels,
-            stride[1],
-            padding[1],
-            dilation[1],
+            stride[-1],
+            padding[-1],
+            dilation[-1],
             kernel_size[0],
             False,
         )
@@ -1118,9 +1118,9 @@ def quantized_conv2d_nchw_per_tensor_meta(
         get_conv1d_output_size(
             in_size,
             out_channels,
-            stride[1],
-            padding[1],
-            dilation[1],
+            stride[-1],
+            padding[-1],
+            dilation[-1],
             kernel_size[0],
             False,
         )
@@ -1162,9 +1162,9 @@ def quantized_conv2d_nhwc_per_tensor_meta(
         get_conv1d_output_size(
             in_size,
             out_channels,
-            stride[1],
-            padding[1],
-            dilation[1],
+            stride[-1],
+            padding[-1],
+            dilation[-1],
             kernel_size[0],
             True,
         )
@@ -1211,9 +1211,9 @@ def quantized_conv2d_nchw_asym8sxsym8s_asym8s_per_tensor_meta(
         get_conv1d_output_size(
             in_size,
             out_channels,
-            stride[1],
-            padding[1],
-            dilation[1],
+            stride[-1],
+            padding[-1],
+            dilation[-1],
             kernel_size[0],
             False,
         )
@@ -1260,9 +1260,9 @@ def quantized_conv2d_nchw_asym8uxsym8u_asym8u_per_tensor_meta(
         get_conv1d_output_size(
             in_size,
             out_channels,
-            stride[1],
-            padding[1],
-            dilation[1],
+            stride[-1],
+            padding[-1],
+            dilation[-1],
             kernel_size[0],
             False,
         )
@@ -1309,9 +1309,9 @@ def quantized_conv2d_nhwc_asym8sxsym8s_asym8s_per_tensor_meta(
         get_conv1d_output_size(
             in_size,
             out_channels,
-            stride[1],
-            padding[1],
-            dilation[1],
+            stride[-1],
+            padding[-1],
+            dilation[-1],
             kernel_size[0],
             True,
         )
@@ -1358,9 +1358,9 @@ def quantized_conv2d_nhwc_asym8uxsym8u_asym8u_per_tensor_meta(
         get_conv1d_output_size(
             in_size,
             out_channels,
-            stride[1],
-            padding[1],
-            dilation[1],
+            stride[-1],
+            padding[-1],
+            dilation[-1],
             kernel_size[0],
             True,
         )
@@ -1407,9 +1407,9 @@ def quantized_conv2d_nchw_dilated_asym8sxsym8s_asym8s_per_tensor_meta(
         get_conv1d_output_size(
             in_size,
             out_channels,
-            stride[1],
-            padding[1],
-            dilation[1],
+            stride[-1],
+            padding[-1],
+            dilation[-1],
             kernel_size[0],
             False,
         )
@@ -1456,9 +1456,9 @@ def quantized_conv2d_nchw_dilated_asym8uxsym8u_asym8u_per_tensor_meta(
         get_conv1d_output_size(
             in_size,
             out_channels,
-            stride[1],
-            padding[1],
-            dilation[1],
+            stride[-1],
+            padding[-1],
+            dilation[-1],
             kernel_size[0],
             False,
         )
@@ -1505,9 +1505,9 @@ def quantized_conv2d_nhwc_dilated_asym8sxsym8s_asym8s_per_tensor_meta(
         get_conv1d_output_size(
             in_size,
             out_channels,
-            stride[1],
-            padding[1],
-            dilation[1],
+            stride[-1],
+            padding[-1],
+            dilation[-1],
             kernel_size[0],
             True,
         )
@@ -1554,9 +1554,9 @@ def quantized_conv2d_nhwc_dilated_asym8uxsym8u_asym8u_per_tensor_meta(
         get_conv1d_output_size(
             in_size,
             out_channels,
-            stride[1],
-            padding[1],
-            dilation[1],
+            stride[-1],
+            padding[-1],
+            dilation[-1],
             kernel_size[0],
             True,
         )
@@ -1605,9 +1605,9 @@ def quantized_conv2d_nchw_depthwise_asym8sxsym8s_asym8s_per_tensor_meta(
         get_conv1d_output_size(
             in_size,
             out_channels,
-            stride[1],
-            padding[1],
-            dilation[1],
+            stride[-1],
+            padding[-1],
+            dilation[-1],
             kernel_size[0],
             False,
         )
@@ -1656,9 +1656,9 @@ def quantized_conv2d_nchw_depthwise_asym8uxsym8u_asym8u_per_tensor_meta(
         get_conv1d_output_size(
             in_size,
             out_channels,
-            stride[1],
-            padding[1],
-            dilation[1],
+            stride[-1],
+            padding[-1],
+            dilation[-1],
             kernel_size[0],
             False,
         )
@@ -1707,9 +1707,9 @@ def quantized_conv2d_nhwc_depthwise_asym8sxsym8s_asym8s_per_tensor_meta(
         get_conv1d_output_size(
             in_size,
             out_channels,
-            stride[1],
-            padding[1],
-            dilation[1],
+            stride[-1],
+            padding[-1],
+            dilation[-1],
             kernel_size[0],
             True,
         )
@@ -1758,9 +1758,9 @@ def quantized_conv2d_nhwc_depthwise_asym8uxsym8u_asym8u_per_tensor_meta(
         get_conv1d_output_size(
             in_size,
             out_channels,
-            stride[1],
-            padding[1],
-            dilation[1],
+            stride[-1],
+            padding[-1],
+            dilation[-1],
             kernel_size[0],
             True,
         )
@@ -2178,15 +2178,31 @@ def conv1d_meta(
     dilation: Tuple[int],
     groups: int,
 ) -> torch.Tensor:
+    # Validate tensor dimensions
+    assert len(input.shape) == 3, f"Conv1d expects 3D input, got {len(input.shape)}D"
+    assert len(weight.shape) == 3, f"Conv1d expects 3D weight, got {len(weight.shape)}D"
+
+    # Extract dimensions
+    batch_size, in_channels, length = input.shape
+    out_channels, weight_in_channels, kernel_size = weight.shape
+
+    # Validate groups parameter and channel consistency
+    assert groups > 0, f"groups must be positive, got {groups}"
     assert (
-        len(weight.shape) == 3
-    ), f"Conv1d expects a 3D weight, got {len(weight.shape)}D"
-    out_channels, _, kernel_size = weight.shape
-    in_size = input.shape
-    assert len(in_size) == 3, f"conv1d expects 3D input, got {len(in_size)}D"
+        in_channels % groups == 0
+    ), f"in_channels ({in_channels}) must be divisible by groups ({groups})"
+    assert (
+        out_channels % groups == 0
+    ), f"out_channels ({out_channels}) must be divisible by groups ({groups})"
+
+    # Validate weight channels match input channels divided by groups
+    expected_weight_in_channels = in_channels // groups
+    assert (
+        weight_in_channels == expected_weight_in_channels
+    ), f"Expected weight to have {expected_weight_in_channels} input channels (in_channels/groups), but got {weight_in_channels}"
 
     output_size = get_conv1d_output_size(
-        in_size,
+        input.shape,
         out_channels,
         stride[0],
         padding[0],
 
@@ -788,9 +788,9 @@ def quantized_conv_per_tensor(
             (input_tensor - in_zero_point).float(),
             (weight - weight_zero_point).float(),
             (bias * bias_scale).float(),
-            stride[1],
-            padding[1],
-            dilation[1],
+            stride[-1],
+            padding[-1],
+            dilation[-1],
             groups,
         )