pytorch
diff --git a/‎.ci/scripts/unittest-windows.ps1‎
Lines changed: 36 additions & 0 deletions b/‎.ci/scripts/unittest-windows.ps1‎
Lines changed: 36 additions & 0 deletions
diff --git a/‎.github/workflows/_unittest.yml‎
Lines changed: 12 additions & 0 deletions b/‎.github/workflows/_unittest.yml‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 4 additions & 0 deletions b/‎.gitignore‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎.gitmodules‎
Lines changed: 1 addition & 1 deletion b/‎.gitmodules‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm‎
Lines changed: 3 additions & 0 deletions b/‎backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎backends/arm/_passes/convert_expand_copy_to_repeat.py‎
Lines changed: 32 additions & 17 deletions b/‎backends/arm/_passes/convert_expand_copy_to_repeat.py‎
Lines changed: 32 additions & 17 deletions
diff --git a/‎backends/arm/_passes/remove_clone_pass.py‎
Lines changed: 10 additions & 0 deletions b/‎backends/arm/_passes/remove_clone_pass.py‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎backends/arm/debug/__init__.py‎
Lines changed: 4 additions & 0 deletions b/‎backends/arm/debug/__init__.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎backends/arm/debug/schema.py‎
Lines changed: 133 additions & 0 deletions b/‎backends/arm/debug/schema.py‎
Lines changed: 133 additions & 0 deletions
diff --git a/‎backends/arm/operator_support/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/operator_support/__init__.py‎
Lines changed: 1 addition & 0 deletions
@@ -0,0 +1,36 @@
+param (
+    [string]$editable
+)
+
+Set-PSDebug -Trace 1
+$ErrorActionPreference = 'Stop'
+$PSNativeCommandUseErrorActionPreference = $true
+
+conda create --yes --quiet -n et python=3.12
+conda activate et
+
+# Activate the VS environment - this is required for Dynamo to work, as it uses MSVC.
+# There are a bunch of environment variables that it requires.
+# See https://learn.microsoft.com/en-us/cpp/build/building-on-the-command-line.
+& "C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\Common7\Tools\Launch-VsDevShell.ps1" -Arch amd64
+
+# Install test dependencies
+pip install -r .ci/docker/requirements-ci.txt
+
+if ($editable -eq 'true') {
+    install_executorch.bat --editable
+} else {
+    install_executorch.bat
+}
+if ($LASTEXITCODE -ne 0) {
+    Write-Host "Installation was unsuccessful. Exit code: $LASTEXITCODE."
+    exit $LASTEXITCODE
+}
+
+# Run pytest with coverage
+# pytest -n auto --cov=./ --cov-report=xml
+pytest -v --full-trace -c pytest-windows.ini
+if ($LASTEXITCODE -ne 0) {
+    Write-Host "Pytest invocation was unsuccessful. Exit code: $LASTEXITCODE."
+    exit $LASTEXITCODE
+}
@@ -19,6 +19,7 @@ on:
         required: false
         type: string
         description: Install ExecuTorch in editable mode or not.
+        default: 'false'
       python-version:
         required: false
         type: string
@@ -52,3 +53,14 @@ jobs:
         # This is needed to get the prebuilt PyTorch wheel from S3
         ${CONDA_RUN} --no-capture-output pip install awscli==1.37.21
         .ci/scripts/unittest-macos.sh --build-tool "${{ inputs.build-tool }}" --build-mode "${{ inputs.build-mode }}" --editable "${{ inputs.editable }}"
+
+  windows:
+    if: ${{ inputs.build-tool == 'cmake' }}
+    uses: pytorch/test-infra/.github/workflows/windows_job.yml@main
+    with:
+      submodules: 'recursive'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 120
+      script: |
+        conda init powershell
+        powershell .ci/scripts/unittest-windows.ps1 -editable "${{ inputs.editable }}"
@@ -65,3 +65,7 @@ xcuserdata/
 
 # Android
 *.aar
+
+# Windows
+*.dll
+*.pyd
@@ -27,7 +27,7 @@
 	url = https://github.com/google/pthreadpool.git
 [submodule "extension/llm/tokenizers"]
 	path = extension/llm/tokenizers
-	url = https://github.com/pytorch-labs/tokenizers.git
+	url = https://github.com/meta-pytorch/tokenizers.git
 [submodule "kernels/optimized/third-party/eigen"]
 	path = kernels/optimized/third-party/eigen
 	url = https://gitlab.com/libeigen/eigen.git
 
@@ -449,12 +449,14 @@ - (nullable NSURL *)compiledModelURLWithIdentifier:(NSString *)identifier
         case ModelAssetType::CompiledModel: {
             // The model is already compiled; no further action needed.
             // Return the existing model URL.
+            ETCoreMLLogInfo("The model in the pte file is pre-compiled.  Skipping compilation.");
             return modelURL;
         }
 
         case ModelAssetType::Model: {
             // The model is not compiled yet.
             // Compile the model at the specified URL with a maximum wait time of 5 minutes.
+            ETCoreMLLogInfo("The model in the pte file is not pre-compiled.  Compiling with a 5 min timeout.");
             NSURL *compiledModelURL = [ETCoreMLModelCompiler compileModelAtURL:modelURL
                                                           maxWaitTimeInSeconds:(5 * 60)
                                                                          error:error];
@@ -490,6 +492,7 @@ - (nullable ETCoreMLAsset *)compiledModelAssetWithMetadata:(const ModelMetadata&
                                                                  error:error];
         if (compiledModelURL) {
             // Move the compiled model to the asset manager to transfer ownership.
+            ETCoreMLLogInfo("Storing compiled asset with identifier=%@ in the asset manager.", identifier);
             compiledModelAsset = [self.assetManager storeAssetAtURL:compiledModelURL withIdentifier:identifier error:error];
         }
     }];
 
@@ -8,12 +8,43 @@
 import logging
 from typing import cast
 
+import torch
+
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass
 
 logger = logging.getLogger(__name__)
 
 
+def calculate_multiples(args):
+    input_node_or_tensor = args[0]
+
+    if isinstance(input_node_or_tensor, torch.fx.node.Node):
+        input_data = input_node_or_tensor.meta["val"]
+    else:
+        input_data = input_node_or_tensor.data
+
+    input_shape = input_data.shape
+
+    multiples = cast(list[int], args[1])
+    expanded_rank = len(multiples)
+
+    # Expanded shape is 'input_shape' front-padded with ones.
+    padding = expanded_rank - len(input_shape)
+    extended_shape = [
+        input_shape[i] if i >= 0 else 1 for i in range(-padding, len(input_shape))
+    ]
+
+    # To convert expand arg to repeat arg, non-repeated dims should have
+    # multiples[dim] = 1. Passing -1 to expand arg means
+    # not changing the size of that dimension.
+    multiples = [
+        multiples[i] if multiples[i] != -1 and extended_shape[i] == 1 else 1
+        for i in range(expanded_rank)
+    ]
+    return multiples
+
+
 class ConvertExpandCopyToRepeatPass(ExportPass):
     """
     Replace expand copy with repeat since it is a repeat that can only repeat singleton dimensions.
@@ -26,23 +57,7 @@ def call_operator(self, op, args, kwargs, meta):
         if op != self.expand_copy:
             return super().call_operator(op, args, kwargs, meta)
 
-        input_shape = args[0].data.shape
-        multiples = cast(list[int], args[1])
-        expanded_rank = len(multiples)
-
-        # Expanded shape is 'input_shape' front-padded with ones.
-        padding = expanded_rank - len(input_shape)
-        extended_shape = [
-            input_shape[i] if i >= 0 else 1 for i in range(-padding, len(input_shape))
-        ]
-
-        # To convert expand arg to repeat arg, non-repeated dims should have
-        # multiples[dim] = 1. Passing -1 to expand arg means
-        # not changing the size of that dimension.
-        multiples = [
-            multiples[i] if multiples[i] != -1 and extended_shape[i] == 1 else 1
-            for i in range(expanded_rank)
-        ]
+        multiples = calculate_multiples(args)
 
         if all((x == 1 for x in multiples)):
             # All dimensions/repetitions occur only once. Remove node
 
@@ -6,9 +6,13 @@
 
 # pyre-unsafe
 
+import logging
+
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass
 
+logger = logging.getLogger(__name__)
+
 
 class RemoveClonePass(ExportPass):
     """Remove all clones from graph_module"""
@@ -21,4 +25,10 @@ def call_operator(self, op, args, kwargs, meta):
             raise ValueError(
                 f"clone operator expects exactly one argument, got {len(args)}"
             )
+
+        if "memory_format" in kwargs:
+            logger.warning(
+                f"Removing clone with memory_format '{kwargs['memory_format']}'."
+            )
+
         return args[0]
@@ -0,0 +1,4 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
@@ -0,0 +1,133 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from __future__ import annotations
+
+import json
+
+from dataclasses import asdict, dataclass
+from typing import Any
+
+import serializer.tosa_serializer as ts  # type: ignore
+import torch
+
+from torch.fx.traceback import NodeSource
+
+
+@dataclass
+class TosaDebugSchema:
+    node_name: str
+    operator_name: str
+    operator_id: int
+
+
+@dataclass
+class ATenDebugSchema:
+    node_name: str
+    operator_name: str
+
+    @staticmethod
+    def from_node(node: torch.fx.Node) -> ATenDebugSchema:
+        # node.target is Union[Callable[..., Any], str], so we need to access this correctly depending on the type
+        if callable(node.target):
+            operator_name = node.target.__name__
+        else:
+            operator_name = node.target
+
+        return ATenDebugSchema(node_name=node.name, operator_name=operator_name)
+
+
+@dataclass
+class TorchDebugSchema:
+    stack_trace: list[str]
+    node_trace: list[dict[str, Any]] | str
+    nn_module_stack: dict[str, Any] | str
+    torch_fn: tuple[str, str] | str
+
+    @staticmethod
+    def serialize_node_trace(node_trace: list[NodeSource]) -> list[dict[str, Any]]:
+        """Flatten the from_node dictionary to remove nesting."""
+        flattened = []
+        node_stack = []
+
+        for n in node_trace:
+            node_stack.append((n, -1))
+
+        while len(node_stack) > 0:
+            node, parent_id = node_stack.pop()
+            flattened.append(
+                {
+                    "name": node.name,
+                    "target": node.target,
+                    "graph_id": node.graph_id,
+                    "pass_name": node.pass_name,
+                    "action": node._get_action_string(),
+                    "parent_graph_id": parent_id,
+                }
+            )
+
+            for n in node.from_node:
+                node_stack.append((n, node.graph_id))
+
+        return flattened
+
+    @staticmethod
+    def from_node(node: torch.fx.Node) -> TorchDebugSchema:
+        node_trace: str | list[dict[str, Any]] = "No node trace available."
+
+        if "from_node" in node.meta:
+            # Flatten the node_trace dictionary, so there is no nesting
+            node_trace = TorchDebugSchema.serialize_node_trace(node.meta["from_node"])
+
+        return TorchDebugSchema(
+            stack_trace=node.meta.get("stack_trace", "No stack trace available").split(
+                "\n"
+            ),
+            node_trace=node_trace,
+            nn_module_stack=node.meta.get(
+                "nn_module_stack", "No module stack trace available"
+            ),
+            torch_fn=node.meta.get("torch_fn", "No torch_fn available"),
+        )
+
+
+@dataclass
+class DebugSchema:
+    event_id: int
+    aten_info: ATenDebugSchema
+    tosa_info: TosaDebugSchema
+    torch_info: TorchDebugSchema
+
+
+class DebugHook:
+    def __init__(self) -> None:
+        self._debug_events: list[DebugSchema] = []
+        self.__op_id_to_name = {}
+
+        # Build up a mapping from TOSA 1.0 operator IDs to their names
+        for name, val in vars(ts.Op).items():
+            self.__op_id_to_name[val] = name
+
+    def add(self, node: torch.fx.Node, tosa_op: Any, tosa_op_id: int) -> None:
+        tosa_debug_info = TosaDebugSchema(
+            node_name=str(tosa_op),
+            operator_name=self.__op_id_to_name[tosa_op_id],
+            operator_id=tosa_op_id,
+        )
+
+        aten_debug_info = ATenDebugSchema.from_node(node)
+        torch_debug_info = TorchDebugSchema.from_node(node)
+
+        self._debug_events.append(
+            DebugSchema(
+                event_id=len(self._debug_events),
+                aten_info=aten_debug_info,
+                tosa_info=tosa_debug_info,
+                torch_info=torch_debug_info,
+            )
+        )
+
+    def serialize(self) -> str:
+        return json.dumps([asdict(event) for event in self._debug_events], indent=4)
@@ -6,6 +6,7 @@
 # pyre-unsafe
 
 from . import (  # noqa
+    clone_support,
     convolution_support,
     embedding_support,
     ethos_u55_support,