Fix bug in DPAS analysis. (#4252)

chengjunlu · web-flow · commit 7dbaad326eed · 2025-05-22T07:19:28.000+08:00
Fixes the bug in DPAS analysis that the code missed some dot ops in the
function.

Signed-off-by: Lu,Chengjun &lt;chengjun.lu@intel.com&gt;
diff --git a/test/TritonIntelGPU/accelerate-matmul-pvc.mlir b/test/TritonIntelGPU/accelerate-matmul-pvc.mlir
@@ -312,3 +312,24 @@ module attributes {ttg.target = "xpu", "ttg.num-ctas" = 1 : i32, "ttg.num-warps"
     tt.return
   }
 }
+
+// -----
+
+// CHECK-NOT: triton_intel_gpu.dpas
+#blocked = #ttg.blocked<{sizePerThread = [4, 4], threadsPerWarp = [1, 16], warpsPerCTA = [1, 1], order = [1, 0], CTAsPerCGA = [1, 1], CTASplitNum = [1, 1], CTAOrder = [1, 0]}>
+module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 1 : i32, "ttg.threads-per-warp" = 16 : i32, "triton_intel_gpu.min_sg_size" = 16 : i32, "triton_intel_gpu.support_dpas"} {
+  // CHECK-LABEL: check_dpas_cap
+  tt.func @check_dpas_cap(%arg0: !tt.ptr<f32> {tt.divisibility = 16 : i32}) {
+    %zero_f32 = arith.constant dense<0.000000e+00> : tensor<128x16xf32, #blocked>
+    %a = arith.constant dense<0.000000e+00> : tensor<128x128xf32, #ttg.dot_op<{opIdx = 0, parent = #blocked}>>
+    %b = arith.constant dense<0.000000e+00> : tensor<128x16xf32, #ttg.dot_op<{opIdx = 1, parent = #blocked}>>
+
+    %result = tt.dot %a, %b, %zero_f32, inputPrecision = tf32 : tensor<128x128xf32, #ttg.dot_op<{opIdx = 0, parent = #blocked}>> * tensor<128x16xf32, #ttg.dot_op<{opIdx = 1, parent = #blocked}>> -> tensor<128x16xf32, #blocked>
+    %result_ptr = tt.splat %arg0 : !tt.ptr<f32> -> tensor<128x16x!tt.ptr<f32>, #blocked>
+    tt.store %result_ptr, %result : tensor<128x16x!tt.ptr<f32>, #blocked>
+
+    %result2 = tt.dot %a, %b, %zero_f32 : tensor<128x128xf32, #ttg.dot_op<{opIdx = 0, parent = #blocked}>> * tensor<128x16xf32, #ttg.dot_op<{opIdx = 1, parent = #blocked}>> -> tensor<128x16xf32, #blocked>
+    tt.store %result_ptr, %result2 : tensor<128x16x!tt.ptr<f32>, #blocked>
+    tt.return
+  }
+}
diff --git a/third_party/intel/lib/Analysis/DPAS.cpp b/third_party/intel/lib/Analysis/DPAS.cpp
@@ -17,16 +17,15 @@ DPASAnalysis::DPASAnalysis(Operation *root) {
 
   // Populate the maps.
   mod.walk([&](FunctionOpInterface funcOp) {
+    if (funcToDotMap.find(funcOp) == funcToDotMap.end())
+      funcToDotMap[funcOp] = {};
     auto it = funcToDotMap.find(funcOp);
 
     funcOp.walk([&](Operation *op) {
       if (!isa<DotOp, DotScaledOp>(op))
         return;
 
-      if (it != funcToDotMap.end())
-        it->second.push_back(op);
-      else
-        funcToDotMap[funcOp] = {op};
+      it->second.push_back(op);
 
       DPASEngineType dpasEngineType = supportDPAS
                                           ? DPASAnalysis::getDPASType(op)