pytorch · GregoryComer · Aug 27, 2025 · Aug 8, 2025 · Aug 8, 2025 · Aug 8, 2025
diff --git a/.github/workflows/build-presets.yml b/.github/workflows/build-presets.yml
@@ -103,3 +103,40 @@ jobs:
         ./install_requirements.sh > /dev/null
         cmake --preset ${{ matrix.preset }}
         cmake --build cmake-out -j$(( $(nproc) - 1 ))
+
+  windows:
+    uses: pytorch/test-infra/.github/workflows/windows_job.yml@main
+    strategy:
+      fail-fast: false
+      matrix:
+        preset: [pybind, windows]
+    with:
+      job-name: build
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      submodules: recursive
+      timeout: 90
+      script: |
+        set -eux
+        conda init powershell
+        powershell -Command "& {
+          Set-PSDebug -Trace 1
+          \$ErrorActionPreference = 'Stop'
+          \$PSNativeCommandUseErrorActionPreference = \$true
+
+          conda create --yes --quiet -n et python=3.12
+          conda activate et
+
+          python install_requirements.py
+          cmake --preset ${{ matrix.preset }} -T ClangCL
+          if (\$LASTEXITCODE -ne 0) {
+            Write-Host "CMake configuration was unsuccessful. Exit code: \$LASTEXITCODE."
+            exit \$LASTEXITCODE
+          }
+
+          \$numCores = [System.Environment]::GetEnvironmentVariable('NUMBER_OF_PROCESSORS') - 1
+          cmake --build cmake-out -j \$numCores
+          if (\$LASTEXITCODE -ne 0) {
+            Write-Host "CMake build was unsuccessful. Exit code: \$LASTEXITCODE."
+            exit \$LASTEXITCODE
+          }
+        }"
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -595,13 +595,22 @@ if(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER)
   if(NOT WIN32)
     set(data_loader_exclude_pattern "*mman_windows.h")
   endif()
-  install(
-    DIRECTORY extension/data_loader/
-    DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/executorch/extension/data_loader
-    FILES_MATCHING
-    PATTERN "*.h"
-    PATTERN ${data_loader_exclude_pattern} EXCLUDE
-  )
+  if (DEFINED data_loader_exclude_pattern)
+    install(
+      DIRECTORY extension/data_loader/
+      DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/executorch/extension/data_loader
+      FILES_MATCHING
+      PATTERN "*.h"
+      PATTERN ${data_loader_exclude_pattern} EXCLUDE
+    )
+  else()
+    install(
+      DIRECTORY extension/data_loader/
+      DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/executorch/extension/data_loader
+      FILES_MATCHING
+      PATTERN "*.h"
+    )
+  endif()
   list(APPEND _executorch_extensions extension_data_loader)
 endif()
 

diff --git a/backends/xnnpack/CMakeLists.txt b/backends/xnnpack/CMakeLists.txt
@@ -59,7 +59,7 @@ foreach(fbs_file ${_xnnpack_schema__srcs})
   )
 endforeach()
 
-if(WIN32 AND NOT CMAKE_CROSSCOMPILING)
+if(${CMAKE_HOST_SYSTEM_NAME} STREQUAL "Windows")
   set(MV_COMMAND
       powershell -Command
       "Move-Item -Path ${_xnnpack_flatbuffer__outputs} -Destination ${_xnnpack_schema__outputs}"

diff --git a/backends/xnnpack/cmake/Dependencies.cmake b/backends/xnnpack/cmake/Dependencies.cmake
@@ -43,6 +43,14 @@ set(XNNPACK_ENABLE_AVX512VNNIGFNI
     CACHE BOOL ""
 )
 
+if(WIN32)
+  # These XNNPACK options don't currently build on Windows.
+  set_overridable_option(XNNPACK_ENABLE_AVX256SKX OFF)
+  set_overridable_option(XNNPACK_ENABLE_AVX256VNNI OFF)
+  set_overridable_option(XNNPACK_ENABLE_AVX256VNNIGFNI OFF)
+  set_overridable_option(XNNPACK_ENABLE_AVX512BF16 OFF)
+endif()
+
 if(EXECUTORCH_XNNPACK_ENABLE_KLEIDI)
   set(XNNPACK_ENABLE_KLEIDIAI
       ON

diff --git a/extension/data_loader/CMakeLists.txt b/extension/data_loader/CMakeLists.txt
@@ -24,6 +24,11 @@ if(NOT ET_HAVE_SYS_MMAN_H AND NOT WIN32)
        "extension/data_loader/mmap_data_loader.cpp"
   )
 endif()
+if(WIN32)
+  list(APPEND _extension_data_loader__srcs
+       "extension/data_loader/mman_windows.cpp"
+  )
+endif()
 list(TRANSFORM _extension_data_loader__srcs PREPEND "${EXECUTORCH_ROOT}/")
 add_library(extension_data_loader ${_extension_data_loader__srcs})
 target_link_libraries(extension_data_loader executorch_core)

diff --git a/install_requirements.py b/install_requirements.py
@@ -112,8 +112,13 @@ def install_requirements(use_pytorch_nightly):
 
     LOCAL_REQUIREMENTS = [
         "third-party/ao",  # We need the latest kernels for fast iteration, so not relying on pypi.
-        "extension/llm/tokenizers",  # TODO(larryliu0820): Setup a pypi package for this.
-    ]
+    ] + (
+        [
+            "extension/llm/tokenizers",  # TODO(larryliu0820): Setup a pypi package for this.
+        ]
+        if sys.platform != "win32"
+        else []
+    )  # TODO(gjcomer): Re-enable when buildable on Windows.
 
     # Install packages directly from local copy instead of pypi.
     # This is usually not recommended.

diff --git a/kernels/portable/cpu/op_amax.cpp b/kernels/portable/cpu/op_amax.cpp
@@ -9,6 +9,7 @@
 #include <c10/util/irange.h>
 #include <cmath>
 
+#include <executorch/kernels/portable/cpu/util/math_util.h>
 #include <executorch/kernels/portable/cpu/util/reduce_util.h>
 #include <executorch/runtime/kernel/kernel_includes.h>
 #include <executorch/runtime/platform/assert.h>
@@ -55,7 +56,7 @@ Tensor& amax_out(
           for (const auto out_ix : c10::irange(begin, end)) {
             out_data[out_ix] = plan.execute<CTYPE>(
                 [](CTYPE v, CTYPE max_v) {
-                  return std::isnan(v) || v > max_v ? v : max_v;
+                  return utils::isnan_override(v) || v > max_v ? v : max_v;
                 },
                 out_ix);
           }

diff --git a/kernels/portable/cpu/op_amin.cpp b/kernels/portable/cpu/op_amin.cpp
@@ -8,6 +8,7 @@
 #include <c10/util/irange.h>
 #include <cmath>
 
+#include <executorch/kernels/portable/cpu/util/math_util.h>
 #include <executorch/kernels/portable/cpu/util/reduce_util.h>
 #include <executorch/runtime/kernel/kernel_includes.h>
 #include <executorch/runtime/platform/assert.h>
@@ -54,7 +55,7 @@ Tensor& amin_out(
           for (const auto out_ix : c10::irange(begin, end)) {
             out_data[out_ix] = plan.execute<CTYPE>(
                 [](CTYPE v, CTYPE min_v) {
-                  return std::isnan(v) || v < min_v ? v : min_v;
+                  return utils::isnan_override(v) || v < min_v ? v : min_v;
                 },
                 out_ix);
           }

diff --git a/kernels/portable/cpu/op_argmax.cpp b/kernels/portable/cpu/op_argmax.cpp
@@ -10,6 +10,7 @@
 #include <cmath>
 #include <tuple>
 
+#include <executorch/kernels/portable/cpu/util/math_util.h>
 #include <executorch/kernels/portable/cpu/util/reduce_util.h>
 #include <executorch/runtime/kernel/kernel_includes.h>
 #include <executorch/runtime/platform/assert.h>
@@ -58,7 +59,7 @@ Tensor& argmax_out(
                   // the below condition as written is equivalent to
                   // !isnan(accval) && (isnan(v) || v > acc_val). See
                   // argument in op_argmin.cpp.
-                  if (!std::isnan(acc_val) && !(v <= acc_val)) {
+                  if (!utils::isnan_override(acc_val) && !(v <= acc_val)) {
                     acc_val = v;
                     acc_ix = ix;
                   }

diff --git a/kernels/portable/cpu/op_argmin.cpp b/kernels/portable/cpu/op_argmin.cpp
@@ -10,6 +10,7 @@
 #include <cmath>
 #include <tuple>
 
+#include <executorch/kernels/portable/cpu/util/math_util.h>
 #include <executorch/kernels/portable/cpu/util/reduce_util.h>
 #include <executorch/runtime/kernel/kernel_includes.h>
 #include <executorch/runtime/platform/assert.h>
@@ -65,7 +66,7 @@ Tensor& argmin_out(
                   // - false, so the result is true. The result is trivially
                   // - true for the above condition that uses isnan(v) as
                   // - well.
-                  if (!std::isnan(acc_val) && !(v >= acc_val)) {
+                  if (!utils::isnan_override(acc_val) && !(v >= acc_val)) {
                     acc_val = v;
                     acc_ix = ix;
                   }

diff --git a/kernels/portable/cpu/op_max.cpp b/kernels/portable/cpu/op_max.cpp
@@ -10,6 +10,7 @@
 #include <cmath>
 #include <tuple>
 
+#include <executorch/kernels/portable/cpu/util/math_util.h>
 #include <executorch/kernels/portable/cpu/util/reduce_util.h>
 #include <executorch/runtime/kernel/kernel_includes.h>
 #include <executorch/runtime/platform/assert.h>
@@ -88,8 +89,8 @@ std::tuple<Tensor&, Tensor&> max_out(
               for (const auto out_ix : c10::irange(begin, end)) {
                 std::tuple<CTYPE, long> acc = reduce_over_dim<CTYPE>(
                     [](CTYPE v, long ix, CTYPE acc_val, long acc_ix) {
-                      if (!std::isnan(acc_val) &&
-                          (std::isnan(v) || v > acc_val)) {
+                      if (!utils::isnan_override(acc_val) &&
+                          (utils::isnan_override(v) || v > acc_val)) {
                         acc_val = v;
                         acc_ix = ix;
                       }
@@ -132,7 +133,7 @@ max_unary_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
       data_out[0] = lower_bound<CTYPE_OUT>();
       for (const auto i : c10::irange(in.numel())) {
         CTYPE_OUT val = static_cast<CTYPE_OUT>(data_in[i]);
-        if (std::isnan(val)) {
+        if (utils::isnan_override(val)) {
           data_out[0] = val;
           break;
         }

diff --git a/kernels/portable/cpu/op_min.cpp b/kernels/portable/cpu/op_min.cpp
@@ -10,6 +10,7 @@
 #include <cmath>
 #include <tuple>
 
+#include <executorch/kernels/portable/cpu/util/math_util.h>
 #include <executorch/kernels/portable/cpu/util/reduce_util.h>
 #include <executorch/runtime/kernel/kernel_includes.h>
 #include <executorch/runtime/platform/assert.h>
@@ -88,8 +89,8 @@ std::tuple<Tensor&, Tensor&> min_out(
               for (const auto out_ix : c10::irange(begin, end)) {
                 std::tuple<CTYPE, long> acc = reduce_over_dim<CTYPE>(
                     [](CTYPE v, long ix, CTYPE acc_val, long acc_ix) {
-                      if (!std::isnan(acc_val) &&
-                          (std::isnan(v) || v < acc_val)) {
+                      if (!utils::isnan_override(acc_val) &&
+                          (utils::isnan_override(v) || v < acc_val)) {
                         acc_val = v;
                         acc_ix = ix;
                       }
@@ -132,7 +133,7 @@ min_unary_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
       data_out[0] = upper_bound<CTYPE_OUT>();
       for (const auto i : c10::irange(in.numel())) {
         CTYPE_OUT val = static_cast<CTYPE_OUT>(data_in[i]);
-        if (std::isnan(val)) {
+        if (utils::isnan_override(val)) {
           data_out[0] = val;
           break;
         }

diff --git a/kernels/portable/cpu/op_relu.cpp b/kernels/portable/cpu/op_relu.cpp
@@ -9,6 +9,7 @@
 #include <cmath>
 
 #include <executorch/kernels/portable/cpu/util/functional_util.h>
+#include <executorch/kernels/portable/cpu/util/math_util.h>
 #include <executorch/runtime/kernel/kernel_includes.h>
 #include <executorch/runtime/platform/assert.h>
 
@@ -45,7 +46,9 @@ Tensor& relu_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
   ET_SWITCH_REALHBF16_TYPES(in.scalar_type(), ctx, "relu.out", CTYPE, [&]() {
     apply_unary_map_fn(
         [](const CTYPE val_in) {
-          return (std::isnan(val_in) || val_in >= CTYPE(0)) ? val_in : CTYPE(0);
+          return (utils::isnan_override(val_in) || val_in >= CTYPE(0))
+              ? val_in
+              : CTYPE(0);
         },
         in.const_data_ptr<CTYPE>(),
         out.mutable_data_ptr<CTYPE>(),

diff --git a/kernels/portable/cpu/op_sign.cpp b/kernels/portable/cpu/op_sign.cpp
@@ -10,6 +10,7 @@
 #include <cstring>
 
 #include <executorch/kernels/portable/cpu/util/functional_util.h>
+#include <executorch/kernels/portable/cpu/util/math_util.h>
 #include <executorch/runtime/kernel/kernel_includes.h>
 #include <executorch/runtime/platform/assert.h>
 
@@ -42,7 +43,7 @@ Tensor& sign_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
     ET_SWITCH_REALHBF16_TYPES(in.scalar_type(), ctx, "sign.out", CTYPE, [&] {
       apply_unary_map_fn(
           [](const CTYPE val_in) {
-            if (std::isnan(val_in)) {
+            if (utils::isnan_override(val_in)) {
               return val_in;
             } else {
               return static_cast<CTYPE>((val_in > 0) - (val_in < 0));

diff --git a/kernels/portable/cpu/op_topk.cpp b/kernels/portable/cpu/op_topk.cpp
@@ -10,6 +10,8 @@
 #include <cmath>
 #include <tuple>
 
+#include <executorch/kernels/portable/cpu/util/math_util.h>
+#include <executorch/runtime/core/exec_aten/exec_aten.h>
 #include <executorch/runtime/kernel/kernel_includes.h>
 
 namespace torch {
@@ -62,7 +64,7 @@ bool float_less_than(T x, T y) {
   if constexpr (std::is_integral_v<T>) {
     return x < y;
   }
-  return (!std::isnan(x) && std::isnan(y)) || x < y;
+  return (!utils::isnan_override(x) && utils::isnan_override(y)) || x < y;
 }
 
 template <typename CTYPE, typename elem_t = std::pair<CTYPE, int64_t>>

diff --git a/kernels/portable/cpu/util/math_util.h b/kernels/portable/cpu/util/math_util.h
@@ -8,10 +8,14 @@
 
 #pragma once
 
+#include <executorch/runtime/core/exec_aten/exec_aten.h>
+
 #if defined(ET_USE_PYTORCH_HEADERS) && ET_USE_PYTORCH_HEADERS
 #include <ATen/cpu/vec/vec.h>
 #endif
 
+#include <type_traits>
+
 namespace torch {
 namespace executor {
 namespace native {
@@ -29,7 +33,8 @@ template <
     typename std::enable_if<std::is_integral<INT_T>::value, bool>::type = true>
 INT_T floor_divide(INT_T a, INT_T b) {
   const auto quot = a / b;
-  if (std::signbit(a) == std::signbit(b)) {
+  // MSVC does not like signbit on integral types.
+  if ((a < 0) == (b < 0)) {
     return quot;
   }
   const auto rem = a % b;
@@ -52,6 +57,20 @@ FLOAT_T floor_divide(FLOAT_T a, FLOAT_T b) {
   return div;
 }
 
+/**
+ * A wrapper around std::isnan that works with MSVC. When building with MSVC,
+ * std::isnan calls with integer inputs fail to compile due to ambiguous
+ * overload resolution.
+ */
+template <typename T>
+bool isnan_override(T a) {
+  if constexpr (!std::is_integral_v<T>) {
+    return std::isnan(a);
+  } else {
+    return false;
+  }
+}
+
 /**
  * Override min/max so we can emulate PyTorch's behavior with NaN entries.
  */