pytorch · GregoryComer · Sep 3, 2025 · Aug 26, 2025 · Aug 26, 2025 · Aug 26, 2025
diff --git a/.ci/scripts/setup-windows.ps1 b/.ci/scripts/setup-windows.ps1
@@ -0,0 +1,24 @@
+param (
+    [string]$editable = $false
+)
+
+conda create --yes --quiet -n et python=3.12
+conda activate et
+
+# Activate the VS environment - this is required for Dynamo to work, as it uses MSVC.
+# There are a bunch of environment variables that it requires.
+# See https://learn.microsoft.com/en-us/cpp/build/building-on-the-command-line.
+& "C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\Common7\Tools\Launch-VsDevShell.ps1" -Arch amd64
+
+# Install test dependencies
+pip install -r .ci/docker/requirements-ci.txt
+
+if ($editable -eq 'true') {
+    install_executorch.bat --editable
+} else {
+    install_executorch.bat
+}
+if ($LASTEXITCODE -ne 0) {
+    Write-Host "Installation was unsuccessful. Exit code: $LASTEXITCODE."
+    exit $LASTEXITCODE
+}
diff --git a/.ci/scripts/test_model.ps1 b/.ci/scripts/test_model.ps1
@@ -0,0 +1,81 @@
+param (
+    [string]$modelName,
+    [string]$backend,
+    [string]$buildDir = "cmake-out",
+    [bool]$strict = $false
+)
+
+Set-PSDebug -Trace 1
+$ErrorActionPreference = 'Stop'
+$PSNativeCommandUseErrorActionPreference = $true
+
+function ExportModel-Portable {
+    param (
+        [string]$model_name,
+        [bool]$strict
+    )
+
+    $exportParams = "--model_name", "$modelName"
+    if ($strict) {
+        $exportParams += "--strict"
+    }
+    python -m examples.portable.scripts.export @exportParams | Write-Host
+    if ($LASTEXITCODE -ne 0) {
+        Write-Host "Model export failed. Exit code: $LASTEXITCODE."
+        exit $LASTEXITCODE
+    }
+
+    "$modelName.pte"
+}
+
+function ExportModel-Xnnpack {
+    param (
+        [string]$model_name
+    )
+
+    python -m examples.xnnpack.aot_compiler --model_name="${MODEL_NAME}" --delegate | Write-Host
+    if ($LASTEXITCODE -ne 0) {
+        Write-Host "Model export failed. Exit code: $LASTEXITCODE."
+        exit $LASTEXITCODE
+    }
+
+    "$($modelName)_xnnpack_fp32.pte"
+}
+
+.ci/scripts/setup-windows.ps1
+
+# Build the runner
+if (Test-Path -Path $buildDir) {
+    Remove-Item -Path $buildDir -Recurse -Force
+}
+New-Item -Path $buildDir -ItemType Directory
+Push-Location $buildDir
+cmake .. --preset windows
+cmake --build . -t executor_runner -j16 --config Release
+if ($LASTEXITCODE -ne 0) {
+    Write-Host "Runner build failed. Exit code: $LASTEXITCODE."
+    exit $LASTEXITCODE
+}
+$executorBinaryPath = Join-Path -Path $buildDir -ChildPath "Release\executor_runner.exe"
+Pop-Location
+
+# Export the model
+switch ($backend) {
+    "portable" {
+        $model_path = ExportModel-Portable -model_name $modelName -strict $strict
+    }
+    "xnnpack" {
+        $model_path = ExportModel-Xnnpack -model_name $modelName
+    }
+    default {
+        Write-Host "Unknown backend $backend."
+        exit 1
+    }
+}
+
+# Run the runner
+& "$executorBinaryPath" --model_path="$model_path"
+if ($LASTEXITCODE -ne 0) {
+    Write-Host "Model execution failed. Exit code: $LASTEXITCODE."
+    exit $LASTEXITCODE
+}
diff --git a/.ci/scripts/unittest-windows.ps1 b/.ci/scripts/unittest-windows.ps1
@@ -0,0 +1,17 @@
+param (
+    [string]$editable = $false
+)
+
+Set-PSDebug -Trace 1
+$ErrorActionPreference = 'Stop'
+$PSNativeCommandUseErrorActionPreference = $true
+
+.ci/scripts/setup-windows.ps1 -editable $editable
+
+# Run pytest with coverage
+# pytest -n auto --cov=./ --cov-report=xml
+pytest -v --full-trace -c pytest-windows.ini
+if ($LASTEXITCODE -ne 0) {
+    Write-Host "Pytest invocation was unsuccessful. Exit code: $LASTEXITCODE."
+    exit $LASTEXITCODE
+}
diff --git a/.github/workflows/_unittest.yml b/.github/workflows/_unittest.yml
@@ -19,6 +19,7 @@ on:
         required: false
         type: string
         description: Install ExecuTorch in editable mode or not.
+        default: 'false'
       python-version:
         required: false
         type: string
@@ -52,3 +53,14 @@ jobs:
         # This is needed to get the prebuilt PyTorch wheel from S3
         ${CONDA_RUN} --no-capture-output pip install awscli==1.37.21
         .ci/scripts/unittest-macos.sh --build-tool "${{ inputs.build-tool }}" --build-mode "${{ inputs.build-mode }}" --editable "${{ inputs.editable }}"
+
+  windows:
+    if: ${{ inputs.build-tool == 'cmake' }}
+    uses: pytorch/test-infra/.github/workflows/windows_job.yml@main
+    with:
+      submodules: 'recursive'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 120
+      script: |
+        conda init powershell
+        powershell .ci/scripts/unittest-windows.ps1 -editable "${{ inputs.editable }}"
diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml
@@ -979,3 +979,18 @@ jobs:
         # Run MCU models
         chmod +x examples/arm/run_mcu_models_fvp.sh
         examples/arm/run_mcu_models_fvp.sh --target=cortex-m55
+
+  test-models-windows:
+    uses: pytorch/test-infra/.github/workflows/windows_job.yml@main
+    strategy:
+      fail-fast: false
+      matrix:
+        model: [linear, add, add_mul, ic3, ic4, mv2, mv3, resnet18, resnet50, vit, w2l, mobilebert, emformer_join, emformer_transcribe]
+        backend: [portable, xnnpack]
+    with:
+      submodules: 'recursive'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 60
+      script: |
+        conda init powershell
+        powershell .ci/scripts/test_model.ps1 -modelName ${{ matrix.model }} -backend ${{ matrix.backend }}
diff --git a/.gitignore b/.gitignore
@@ -65,3 +65,7 @@ xcuserdata/
 
 # Android
 *.aar
+
+# Windows
+*.dll
+*.pyd
diff --git a/backends/xnnpack/CMakeLists.txt b/backends/xnnpack/CMakeLists.txt
@@ -62,7 +62,7 @@ endforeach()
 if(WIN32 AND NOT CMAKE_CROSSCOMPILING)
   set(MV_COMMAND
       powershell -Command
-      "Move-Item -Path ${_xnnpack_flatbuffer__outputs} -Destination ${_xnnpack_schema__outputs}"
+      "Move-Item -Path ${_xnnpack_flatbuffer__outputs} -Destination ${_xnnpack_schema__outputs} -Force"
   )
 else()
   set(MV_COMMAND mv ${_xnnpack_flatbuffer__outputs} ${_xnnpack_schema__outputs})

diff --git a/conftest.py b/conftest.py
@@ -0,0 +1,12 @@
+import sys
+
+collect_ignore_glob: list[str] = []
+
+# Skip Apple tests on Windows. Note that some Core ML tests can run on Linux, as the AOT flow
+# is available. Tests will manage this internally. However, the coremltools import is not available
+# on Windows and causes collection to fail. The easiest way to manage this seems to be to just
+# skip collection for this subdirectory on unsupported platforms.
+if sys.platform == "win32":
+    collect_ignore_glob += [
+        "backends/apple/**",
+    ]
diff --git a/examples/apple/coreml/scripts/build_executor_runner.sh b/examples/apple/coreml/scripts/build_executor_runner.sh
@@ -93,7 +93,7 @@ find "$CMAKE_BUILD_DIR_PATH/" -name 'libcoreml_inmemoryfs.a' -exec cp -f "{}" "$
 find "$CMAKE_BUILD_DIR_PATH/" -name 'libcoremldelegate.a' -exec cp -f "{}" "$LIBRARIES_DIR_PATH/libcoremldelegate.a"  \;
 find "$CMAKE_BUILD_DIR_PATH/" -name 'libportable_ops_lib.a' -exec cp -f "{}" "$LIBRARIES_DIR_PATH/libportable_ops_lib.a"  \;
 find "$CMAKE_BUILD_DIR_PATH/" -name 'libportable_kernels.a' -exec cp -f "{}" "$LIBRARIES_DIR_PATH/libportable_kernels.a"  \;
-cp -f "$CMAKE_BUILD_DIR_PATH/third-party/flatcc_external_project/lib/libflatccrt.a" "$LIBRARIES_DIR_PATH/libflatccrt.a"
+cp -f "$CMAKE_BUILD_DIR_PATH/third-party/flatcc_ep/lib/libflatccrt.a" "$LIBRARIES_DIR_PATH/libflatccrt.a"
 
 # Build the runner
 echo "ExecuTorch: Building runner"

diff --git a/examples/models/llama/source_transformation/custom_kv_cache.py b/examples/models/llama/source_transformation/custom_kv_cache.py
@@ -269,7 +269,7 @@ def replace_kv_cache_with_quantized_kv_cache(module):
         executorch_package_path = executorch.__path__[-1]
         libs = list(
             glob.glob(
-                f"{executorch_package_path}/**/libquantized_ops_aot_lib.*",
+                f"{executorch_package_path}/**/*quantized_ops_aot_lib.*",
                 recursive=True,
             )
         )

diff --git a/exir/tracer.py b/exir/tracer.py
@@ -48,6 +48,7 @@
 from torch._decomp import get_decompositions
 from torch._dynamo.guards import Guard
 from torch._functorch.eager_transforms import _maybe_unwrap_functional_tensor
+
 from torch.export import default_decompositions
 from torch.func import functionalize
 from torch.fx.operator_schemas import normalize_function

diff --git a/extension/llm/custom_ops/custom_ops.py b/extension/llm/custom_ops/custom_ops.py
@@ -33,7 +33,7 @@
     package_path = Path(__file__).parent.resolve()
     logging.info(f"Looking for libcustom_ops_aot_lib.so in {package_path}")
 
-    libs = list(package_path.glob("**/libcustom_ops_aot_lib.*"))
+    libs = list(package_path.glob("**/*custom_ops_aot_lib.*"))
 
     assert len(libs) == 1, f"Expected 1 library but got {len(libs)}"
     logging.info(f"Loading custom ops library: {libs[0]}")

diff --git a/extension/llm/custom_ops/op_tile_crop_aot.py b/extension/llm/custom_ops/op_tile_crop_aot.py
@@ -13,7 +13,7 @@
     tile_crop = torch.ops.preprocess.tile_crop.default
     assert tile_crop is not None
 except:
-    libs = list(Path(__file__).parent.resolve().glob("libcustom_ops_aot_lib.*"))
+    libs = list(Path(__file__).parent.resolve().glob("*custom_ops_aot_lib.*"))
     assert len(libs) == 1, f"Expected 1 library but got {len(libs)}"
     logging.info(f"Loading custom ops library: {libs[0]}")
     torch.ops.load_library(libs[0])

diff --git a/install_executorch.py b/install_executorch.py
@@ -194,14 +194,6 @@ def main(args):
         clean()
         return
 
-    cmake_args = [os.getenv("CMAKE_ARGS", "")]
-    # Use ClangCL on Windows.
-    # ClangCL is an alias to Clang that configures it to work in an MSVC-compatible
-    # mode. Using it on Windows to avoid compiler compatibility issues for MSVC.
-    if os.name == "nt":
-        cmake_args.append("-T ClangCL")
-    os.environ["CMAKE_ARGS"] = " ".join(cmake_args)
-
     check_and_update_submodules()
     # This option is used in CI to make sure that PyTorch build from the pinned commit
     # is used instead of nightly. CI jobs wouldn't be able to catch regression from the

diff --git a/install_requirements.py b/install_requirements.py
@@ -112,8 +112,13 @@ def install_requirements(use_pytorch_nightly):
 
     LOCAL_REQUIREMENTS = [
         "third-party/ao",  # We need the latest kernels for fast iteration, so not relying on pypi.
-        "extension/llm/tokenizers",  # TODO(larryliu0820): Setup a pypi package for this.
-    ]
+    ] + (
+        [
+            "extension/llm/tokenizers",  # TODO(larryliu0820): Setup a pypi package for this.
+        ]
+        if sys.platform != "win32"
+        else []
+    )  # TODO(gjcomer): Re-enable when buildable on Windows.
 
     # Install packages directly from local copy instead of pypi.
     # This is usually not recommended.

diff --git a/kernels/quantized/__init__.py b/kernels/quantized/__init__.py
@@ -7,7 +7,7 @@
 try:
     from pathlib import Path
 
-    libs = list(Path(__file__).parent.resolve().glob("**/libquantized_ops_aot_lib.*"))
+    libs = list(Path(__file__).parent.resolve().glob("**/*quantized_ops_aot_lib.*"))
     del Path
     assert len(libs) == 1, f"Expected 1 library but got {len(libs)}"
     import torch as _torch

diff --git a/pytest-windows.ini b/pytest-windows.ini
@@ -0,0 +1,115 @@
+# NOTE: This file is a copy of pytest.ini, but with additional tests disabled for Windows. This
+# is intended to be a short-term solution to allow for incrementally enabling tests on Windows.
+# This file is intended to be deleted once the enablement is complete.
+
+[pytest]
+addopts =
+    # show summary of all tests that did not pass
+    -rEfX
+    # Make tracebacks shorter
+    --tb=native
+    # capture only Python print and C++ py::print, but not C output (low-level Python errors)
+    --capture=sys
+    # don't suppress warnings, but don't shove them all to the end either
+    -p no:warnings
+    # Ignore backends/arm tests you need to run examples/arm/setup.sh to install some tool to make them work
+    # For GitHub testing this is setup/executed in the unittest-arm job see .github/workflows/pull.yml for more info.
+    --ignore-glob=backends/arm/**/*
+    # explicitly list out tests that are running successfully in oss
+    .ci/scripts/tests
+    examples/models/test
+    devtools/
+    --ignore=devtools/visualization/visualization_utils_test.py
+    # examples
+    # examples/models/llava/test TODO: enable this
+    # exir
+    exir/_serialize/test
+    exir/backend/test
+    exir/dialects/backend/test
+    exir/dialects/edge/test
+    exir/dialects/test
+    exir/emit/test
+    exir/program/test
+    exir/tests/
+    # executorch/export
+    export/tests
+    --ignore=export/tests/test_export_stages.py
+    # kernels/
+    kernels/prim_ops/test
+    kernels/quantized
+    # Because this test depends on test only cpp ops lib
+    # Will add test only cmake targets to re-enable this test
+    # but maybe it is a bit of anti-pattern
+    --ignore=kernels/quantized/test/test_quant_dequant_per_token.py
+    kernels/test/test_case_gen.py
+    # backends/test
+    # This effort is WIP and will be enabled in CI once testing infra
+    # is stable and signal to noise ratio is good (no irrelevant failures).
+    # See https://github.com/pytorch/executorch/discussions/11140
+    --ignore=backends/test
+    backends/test/harness/tests
+    backends/test/suite/tests
+    # backends/xnnpack
+    backends/xnnpack/test/ops
+    --ignore=backends/xnnpack/test/ops/test_bmm.py
+    --ignore=backends/xnnpack/test/ops/test_conv2d.py
+    --ignore=backends/xnnpack/test/ops/test_linear.py
+    --ignore=backends/xnnpack/test/ops/test_sdpa.py
+    backends/xnnpack/test/passes
+    backends/xnnpack/test/recipes
+    backends/xnnpack/test/serialization
+    # backends/apple/coreml
+    backends/apple/coreml/test
+    # extension/
+    extension/llm/custom_ops/test_sdpa_with_kv_cache.py
+    extension/llm/custom_ops/test_update_cache.py
+    extension/llm/custom_ops/test_quantized_sdpa.py
+    extension/pybindings/test
+    extension/training/pybindings/test
+    # Runtime
+    runtime
+    # Tools
+    codegen/test
+    tools/cmake
+    # test TODO: fix these tests
+    # test/end2end/test_end2end.py
+    --ignore=backends/xnnpack/test/ops/linear.py
+    --ignore=backends/xnnpack/test/models/llama2_et_example.py
+    # T200992559: Add torchao to ET as core dependency
+    --ignore=examples/models/llama/tests/test_pre_quantization_transforms.py
+    --ignore=exir/backend/test/demos
+    --ignore=exir/backend/test/test_backends.py
+    --ignore=exir/backend/test/test_backends_lifted.py
+    --ignore=exir/backend/test/test_partitioner.py
+    --ignore=exir/tests/test_common.py
+    --ignore=exir/tests/test_memory_format_ops_pass_aten.py
+    --ignore=exir/tests/test_memory_planning.py
+    --ignore=exir/tests/test_op_convert.py
+    --ignore=exir/tests/test_passes.py
+    --ignore=exir/tests/test_quant_fusion_pass.py
+    --ignore=exir/tests/test_quantization.py
+    --ignore=exir/tests/test_verification.py
+    # Tests that are (temporarily) disabled for Windows
+    # TODO(gjcomer) Re-enable the LLM tests when tokenizers library is available on Windows.
+    #examples/models/llama3_2_vision/preprocess
+    #examples/models/llama3_2_vision/vision_encoder/test
+    #examples/models/llama3_2_vision/text_decoder/test
+    #examples/models/llama/tests
+    #examples/models/llama/config
+    #extension/llm/modules/test
+    #extension/llm/export
+    --deselect=extension/pybindings/test/test_pybindings.py::PybindingsTest::test_method_quantized_ops
+    --deselect=extension/pybindings/test/test_pybindings.py::PybindingsTest::test_quantized_ops
+    --deselect=runtime/test/test_runtime.py::RuntimeTest::test_load_program_with_path
+    --deselect=exir/backend/test/test_compatibility.py::TestCompatibility::test_compatibility_in_runtime
+    --deselect=exir/backend/test/test_compatibility.py::TestCompatibility::test_compatibility_in_runtime_edge_program_manager
+    --deselect=exir/backend/test/test_lowered_backend_module.py::TestBackendAPI::test_emit_lowered_backend_module_end_to_end
+    --deselect=exir/backend/test/test_to_backend_multi_method.py::TestToBackendMultiMethod::test_multi_method_end_to_end
+    --deselect=extension/llm/custom_ops/test_sdpa_with_kv_cache.py::SDPATestForSpeculativeDecode::test_sdpa_with_cache_seq_len_130
+    --deselect=devtools/inspector/tests/inspector_test.py::TestInspector::test_etrecord_populates_correct_edge_dialect_aot_intermediate_outputs
+    --deselect=devtools/inspector/tests/inspector_test.py::TestInspector::test_etrecord_populates_correct_export_program_aot_intermediate_outputs
+
+# run the same tests multiple times to determine their
+# flakiness status. Default to 50 re-runs
+flake-finder = true
+flake-runs = 50
-Original file line number
+Diff line change
@@ Expand Up / @@ -65,3 +65,7 @@ xcuserdata/ @@
     # Android
     *.aar
+    # Windows
+    *.dll
+    *.pyd