Skip TensorLayoutInterface verification on advanced path

whitneywhtsang · whitneywhtsang · commit 9706b7cb7ea7 · 2025-08-07T20:51:22.000Z
Signed-off-by: Whitney Tsang &lt;whitney.tsang@intel.com&gt;
diff --git a/lib/Dialect/TritonGPU/IR/Dialect.cpp b/lib/Dialect/TritonGPU/IR/Dialect.cpp
@@ -23,6 +23,7 @@
 #include "triton/Tools/LayoutUtils.h"
 #include "triton/Tools/LinearLayout.h"
 #include "triton/Tools/StrUtil.h"
+#include "triton/Tools/Sys/GetEnv.hpp"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/TypeSwitch.h"
 #include "llvm/Support/MathExtras.h"
@@ -2981,6 +2982,8 @@ struct TritonGPUVerifyTensorLayoutInterface
     if (!distr)
       return makeErr()
              << "Non-distributed layout is not allowed in tensor type.";
+    if (mlir::triton::tools::getBoolEnv("TRITON_INTEL_ADVANCED_PATH"))
+      return success();
     auto rank = distr.getRepOrder().size();
     if (rank != rankedTy.getRank())
       return makeErr() << "Layout has rank " << rank
diff --git a/test/TritonIntelGPU/distribute-to-warps.mlir b/test/TritonIntelGPU/distribute-to-warps.mlir
@@ -1,4 +1,4 @@
-// RUN: triton-opt %s -split-input-file -tritonintelgpu-distribute-to-warps | FileCheck %s
+// RUN: env TRITON_INTEL_ADVANCED_PATH=1 triton-opt %s -split-input-file -tritonintelgpu-distribute-to-warps | FileCheck %s
 
 #blocked1 = #ttg.blocked<{sizePerThread = [32, 32], threadsPerWarp = [1, 1], warpsPerCTA = [4, 1], order = [1, 0], CTAsPerCGA = [1, 1], CTASplitNum = [1, 1], CTAOrder = [0, 1]}>
 #blocked2 = #ttg.blocked<{sizePerThread = [32, 32], threadsPerWarp = [1, 1], warpsPerCTA = [1, 4], order = [1, 0], CTAsPerCGA = [1, 1], CTASplitNum = [1, 1], CTAOrder = [0, 1]}>
diff --git a/test/TritonIntelGPU/match-target-size.mlir b/test/TritonIntelGPU/match-target-size.mlir
@@ -1,6 +1,6 @@
-// RUN: env TRITON_INTEL_REDUCE_TRANSPOSE=1 \
+// RUN: env TRITON_INTEL_ADVANCED_PATH=1 TRITON_INTEL_REDUCE_TRANSPOSE=1 \
 // RUN: triton-opt %s -split-input-file -tritonintelgpu-match-target-size | FileCheck %s --check-prefixes=CHECK,CHECK-TR-RED
-// RUN: triton-opt %s -split-input-file -tritonintelgpu-match-target-size | FileCheck %s --check-prefixes=CHECK,CHECK-SG-RED
+// RUN: env TRITON_INTEL_ADVANCED_PATH=1 triton-opt %s -split-input-file -tritonintelgpu-match-target-size | FileCheck %s --check-prefixes=CHECK,CHECK-SG-RED
 
 #warp = #ttig.warp<{sizePerThread = [32, 64], threadsPerWarp = [1, 1], order = [1, 0]}>
 #dot0_ = #ttg.dot_op<{opIdx = 0, parent = #warp}>
diff --git a/test/TritonIntelGPU/slm-match-target-size.mlir b/test/TritonIntelGPU/slm-match-target-size.mlir
@@ -1,4 +1,4 @@
-// RUN: env TRITON_INTEL_ENABLE_FIRST_LOAD_TO_SLM=1 triton-opt %s -tritonintelgpu-match-target-size | FileCheck %s
+// RUN: env TRITON_INTEL_ADVANCED_PATH=1 TRITON_INTEL_ENABLE_FIRST_LOAD_TO_SLM=1 triton-opt %s -tritonintelgpu-match-target-size | FileCheck %s
 
 #warp = #ttig.warp<{sizePerThread = [32, 64], threadsPerWarp = [1, 1], order = [1, 0]}>
 #dot0 = #ttg.dot_op<{opIdx = 0, parent = #warp}>

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-// RUN: triton-opt %s -split-input-file -tritonintelgpu-distribute-to-warps \| FileCheck %s`
	`1`	`+// RUN: env TRITON_INTEL_ADVANCED_PATH=1 triton-opt %s -split-input-file -tritonintelgpu-distribute-to-warps \| FileCheck %s`
`2`	`2`
`3`	`3`	`#blocked1 = #ttg.blocked<{sizePerThread = [32, 32], threadsPerWarp = [1, 1], warpsPerCTA = [4, 1], order = [1, 0], CTAsPerCGA = [1, 1], CTASplitNum = [1, 1], CTAOrder = [0, 1]}>`
`4`	`4`	`#blocked2 = #ttg.blocked<{sizePerThread = [32, 32], threadsPerWarp = [1, 1], warpsPerCTA = [1, 4], order = [1, 0], CTAsPerCGA = [1, 1], CTASplitNum = [1, 1], CTAOrder = [0, 1]}>`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-// RUN: env TRITON_INTEL_ENABLE_FIRST_LOAD_TO_SLM=1 triton-opt %s -tritonintelgpu-match-target-size \| FileCheck %s`
	`1`	`+// RUN: env TRITON_INTEL_ADVANCED_PATH=1 TRITON_INTEL_ENABLE_FIRST_LOAD_TO_SLM=1 triton-opt %s -tritonintelgpu-match-target-size \| FileCheck %s`
`2`	`2`
`3`	`3`	`#warp = #ttig.warp<{sizePerThread = [32, 64], threadsPerWarp = [1, 1], order = [1, 0]}>`
`4`	`4`	`#dot0 = #ttg.dot_op<{opIdx = 0, parent = #warp}>`