Merge branch 'main' into amyachev/issue3087

anmyachev · web-flow · commit 909395a73cbb · 2025-01-06T20:22:42.000+01:00
diff --git a/.github/pins/pytorch.txt b/.github/pins/pytorch.txt
@@ -1 +1 @@
-61dc5e9c0a36d590adc47b4110efd94d9eb59306
+1e881ceecfe80532206ca4e0acb64391fab8b935
diff --git a/python/test/unit/language/test_compile_errors.py b/python/test/unit/language/test_compile_errors.py
@@ -406,6 +406,9 @@ def test_min_dot_size(dtype):
             pytest.skip("fp16 FMA path supports all sizes")
         else:
             error_msg = "M >= 16, N >= 16 and K >= 16"
+    elif is_xpu():
+        # XPU supports all sizes
+        pass
     else:
         pytest.skip("Test only supported on CUDA and HIP")
 
diff --git a/python/triton/runtime/build.py b/python/triton/runtime/build.py
@@ -103,7 +103,7 @@ def _build(name, src, srcdir, library_dirs, include_dirs, libraries, extra_compi
     if os.getenv("VERBOSE"):
         print(" ".join(cc_cmd))
 
-    ret = subprocess.check_call(cc_cmd)
+    ret = subprocess.check_call(cc_cmd, stdout=subprocess.DEVNULL)
     if ret == 0:
         return so
     # extra arguments
diff --git a/scripts/patch-pytorch.sh b/scripts/patch-pytorch.sh
@@ -17,4 +17,5 @@ echo "Applying PyTorch patches in $REPO_ROOT"
 cd "$REPO_ROOT"
 
 curl -sSL https://github.com/pytorch/pytorch/pull/126516.diff | git apply -
+
 git apply "${SCRIPT_DIR}/pytorch.patch"
diff --git a/scripts/pytorch.patch b/scripts/pytorch.patch
@@ -46,10 +46,10 @@ index 4d7a85029e3..f3d45ea5520 100644
  
      @requires_gpu
 diff --git a/torch/_higher_order_ops/triton_kernel_wrap.py b/torch/_higher_order_ops/triton_kernel_wrap.py
-index ace56135fe1..7e925dd6e45 100644
+index c3f72bc5215..03aab72dca9 100644
 --- a/torch/_higher_order_ops/triton_kernel_wrap.py
 +++ b/torch/_higher_order_ops/triton_kernel_wrap.py
-@@ -238,7 +238,7 @@ def generate_ttir(
+@@ -239,7 +239,7 @@ def generate_ttir(
  
              target = triton.runtime.driver.active.get_current_target()
              backend = triton.compiler.compiler.make_backend(target)
@@ -58,17 +58,20 @@ index ace56135fe1..7e925dd6e45 100644
          except ImportError:
              return kernel._get_config(*args)
  
-@@ -247,7 +247,8 @@ def generate_ttir(
+@@ -248,9 +248,10 @@ def generate_ttir(
          name: arg for name, arg in ordered_args.items() if not isinstance(arg, Tensor)
      }
  
 -    # Build kernel signature -- doesn't include constexpr arguments.
 +    # Build kernel signature; it should also include `constexpr` arguments but `kernel._key_of`
 +    # doesn't work correctly with them. They will be added in `fixup_signature` function later.
      signature = {
-         name: kernel._type_of(kernel._key_of(arg))
+-        name: kernel._type_of(kernel._key_of(arg))
++        name: triton.runtime.jit.mangle_type(arg)
          for i, (name, arg) in enumerate(ordered_args.items())
-@@ -257,7 +258,18 @@ def generate_ttir(
+         if i not in kernel.constexprs
+     }
+@@ -258,7 +259,18 @@ def generate_ttir(
      triton._C.libtriton.ir.load_dialects(context)
      backend.load_dialects(context)
  
@@ -135,12 +138,12 @@ index 276c01f3f42..5c633b7963b 100644
  
              # Instantiate AttrsDescriptor with the prepared arguments
 diff --git a/torch/_inductor/runtime/triton_heuristics.py b/torch/_inductor/runtime/triton_heuristics.py
-index af8530e94d0..1ec44de9806 100644
+index 281d0e78ba4..901263df4aa 100644
 --- a/torch/_inductor/runtime/triton_heuristics.py
 +++ b/torch/_inductor/runtime/triton_heuristics.py
-@@ -435,11 +435,22 @@ class CachingAutotuner(KernelInterface):
-         else:
-             triton_helpers.set_driver_to_gpu()
+@@ -414,10 +414,21 @@ class CachingAutotuner(KernelInterface):
+         if not ASTSource:
+             raise RuntimeError("Installed triton version too old, please upgrade")
  
 +        def fixup_signature(arg_names, signature, constants):
 +            new_signature = {arg_name: None for arg_name in arg_names}
@@ -153,12 +156,11 @@ index af8530e94d0..1ec44de9806 100644
 +                    new_signature[arg_name] = signature[arg_name]
 +            return new_signature
 +
-         if ASTSource:
-             compile_args = (
-                 ASTSource(
-                     self.fn,
--                    compile_meta["signature"],
-+                    fixup_signature(self.fn.arg_names, compile_meta["signature"], compile_meta["constants"]),
-                     compile_meta["constants"],
-                     compile_meta["configs"][0],
-                 ),
+         compile_args = (
+             ASTSource(
+                 self.fn,
+-                compile_meta["signature"],
++                fixup_signature(self.fn.arg_names, compile_meta["signature"], compile_meta["constants"]),
+                 compile_meta["constants"],
+                 compile_meta["configs"][0],
+             ),
diff --git a/third_party/intel/backend/arch_parser.c b/third_party/intel/backend/arch_parser.c
@@ -6,6 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include <iostream>
+
 #include <sycl/sycl.hpp>
 
 #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
@@ -31,7 +33,7 @@ static PyObject *parseDeviceArch(PyObject *self, PyObject *args) {
     arch = "lnl";
     break;
   default:
-    printf("sycl_arch = %d", sycl_arch);
+    std::cerr << "sycl_arch not recognized: " << (int)sycl_arch << std::endl;
   }
 
   return Py_BuildValue("s", arch.c_str());

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-61dc5e9c0a36d590adc47b4110efd94d9eb59306`
	`1`	`+1e881ceecfe80532206ca4e0acb64391fab8b935`