@@ -355,6 +355,25 @@ module attributes {"triton_gpu.num-warps" = 8 : i32, "triton_gpu.threads-per-war
355355
356356// -----
357357
358+ #blocked = #triton_gpu.blocked <{sizePerThread = [1 , 1 ], threadsPerWarp = [1 , 16 ], warpsPerCTA = [2 , 4 ], order = [1 , 0 ]}>
359+ module attributes {" triton_gpu.num-warps" = 8 : i32 , " triton_gpu.threads-per-warp" = 16 : i32 } {
360+ // CHECK-LABEL: llvm.func spir_kernelcc @blocked_layout_no_boundary_check
361+ // COM: Check mask is not generated when boundary_check is not set.
362+ // CHECK-COUNT-1: llvm.icmp "slt"
363+ tt.func public @blocked_layout_no_boundary_check (%arg0: !tt.ptr <f16 >, %col_stride: i64 ) {
364+ %cst = arith.constant dense <0.000000e+00 > : tensor <64 x16 xf16 , #blocked >
365+ %c64_i64 = arith.constant 64 : i64
366+ %c1_i64 = arith.constant 1 : i64
367+ %c0_i32 = arith.constant 0 : i32
368+ %0 = tt.make_tensor_ptr %arg0 , [%c64_i64 , %c64_i64 ], [%c1_i64 , %col_stride ], [%c0_i32 , %c0_i32 ] {order = array<i32 : 0 , 1 >} : <tensor <64 x16 xf16 , #blocked >>
369+ // CHECK-COUNT-32: llvm.store
370+ tt.store %0 , %cst : !tt.ptr <tensor <64 x16 xf16 , #blocked >>
371+ tt.return
372+ }
373+ }
374+
375+ // -----
376+
358377#blocked = #triton_gpu.blocked <{sizePerThread = [1 , 1 ], threadsPerWarp = [1 , 16 ], warpsPerCTA = [2 , 4 ], order = [1 , 0 ]}>
359378module attributes {" triton_gpu.num-warps" = 8 : i32 , " triton_gpu.threads-per-warp" = 16 : i32 } {
360379 // CHECK-LABEL: llvm.func spir_kernelcc @blocked_layout
0 commit comments