55#blocked = #triton_gpu.blocked <{sizePerThread = [16 , 1 ], threadsPerWarp = [1 , 16 ], warpsPerCTA = [1 , 1 ], order = [0 , 1 ]}>
66#blocked1 = #triton_gpu.blocked <{sizePerThread = [1 , 16 ], threadsPerWarp = [16 , 1 ], warpsPerCTA = [1 , 1 ], order = [0 , 1 ]}>
77
8- module attributes {" triton_gpu.num-ctas" = 1 : i32 , " triton_gpu.num-warps" = 1 : i32 , triton_gpu.target = " xpu " , " triton_gpu.threads-per-warp" = 16 : i32 } {
8+ module attributes {" triton_gpu.num-ctas" = 1 : i32 , " triton_gpu.num-warps" = 1 : i32 , " triton_gpu.threads-per-warp" = 16 : i32 } {
99 // CHECK-LABEL: llvm.func spir_kernelcc @test_f16(
1010 // CHECK-SAME: , %[[VAL_1:.*]]: !llvm.ptr<3>
1111 tt.func @test_f16 (%arg0: tensor <16 x16 xf16 , #blocked >) -> tensor <16 x16 xf16 , #blocked1 > {
@@ -187,7 +187,7 @@ module attributes {"triton_gpu.num-ctas" = 1 : i32, "triton_gpu.num-warps" = 1 :
187187#blocked = #triton_gpu.blocked <{sizePerThread = [16 , 1 ], threadsPerWarp = [1 , 16 ], warpsPerCTA = [2 , 1 ], order = [0 , 1 ]}>
188188#blocked1 = #triton_gpu.blocked <{sizePerThread = [1 , 16 ], threadsPerWarp = [16 , 1 ], warpsPerCTA = [2 , 1 ], order = [0 , 1 ]}>
189189
190- module attributes {" triton_gpu.num-ctas" = 1 : i32 , " triton_gpu.num-warps" = 2 : i32 , triton_gpu.target = " xpu " , " triton_gpu.threads-per-warp" = 16 : i32 } {
190+ module attributes {" triton_gpu.num-ctas" = 1 : i32 , " triton_gpu.num-warps" = 2 : i32 , " triton_gpu.threads-per-warp" = 16 : i32 } {
191191 // CHECK-LABEL: llvm.func spir_kernelcc @test(
192192 // CHECK-SAME: , %[[VAL_1:.*]]: !llvm.ptr<3>
193193 tt.func @test (%arg0: tensor <32 x16 xf32 , #blocked >) -> tensor <32 x16 xf32 , #blocked1 > {
@@ -204,7 +204,7 @@ module attributes {"triton_gpu.num-ctas" = 1 : i32, "triton_gpu.num-warps" = 2 :
204204#blocked = #triton_gpu.blocked <{sizePerThread = [16 , 1 ], threadsPerWarp = [1 , 16 ], warpsPerCTA = [1 , 2 ], order = [0 , 1 ]}>
205205#blocked1 = #triton_gpu.blocked <{sizePerThread = [1 , 16 ], threadsPerWarp = [16 , 1 ], warpsPerCTA = [1 , 2 ], order = [0 , 1 ]}>
206206
207- module attributes {" triton_gpu.num-ctas" = 1 : i32 , " triton_gpu.num-warps" = 2 : i32 , triton_gpu.target = " xpu " , " triton_gpu.threads-per-warp" = 16 : i32 } {
207+ module attributes {" triton_gpu.num-ctas" = 1 : i32 , " triton_gpu.num-warps" = 2 : i32 , " triton_gpu.threads-per-warp" = 16 : i32 } {
208208 // CHECK-LABEL: llvm.func spir_kernelcc @test(
209209 // CHECK-SAME: , %[[VAL_1:.*]]: !llvm.ptr<3>
210210 tt.func @test (%arg0: tensor <16 x32 xf32 , #blocked >) -> tensor <16 x32 xf32 , #blocked1 > {
@@ -221,7 +221,7 @@ module attributes {"triton_gpu.num-ctas" = 1 : i32, "triton_gpu.num-warps" = 2 :
221221#blocked = #triton_gpu.blocked <{sizePerThread = [16 , 1 ], threadsPerWarp = [1 , 16 ], warpsPerCTA = [4 , 4 ], order = [0 , 1 ]}>
222222#blocked1 = #triton_gpu.blocked <{sizePerThread = [1 , 16 ], threadsPerWarp = [16 , 1 ], warpsPerCTA = [4 , 4 ], order = [0 , 1 ]}>
223223
224- module attributes {" triton_gpu.num-ctas" = 1 : i32 , " triton_gpu.num-warps" = 16 : i32 , triton_gpu.target = " xpu " , " triton_gpu.threads-per-warp" = 16 : i32 } {
224+ module attributes {" triton_gpu.num-ctas" = 1 : i32 , " triton_gpu.num-warps" = 16 : i32 , " triton_gpu.threads-per-warp" = 16 : i32 } {
225225 // CHECK-LABEL: llvm.func spir_kernelcc @test(
226226 // CHECK-SAME: , %[[VAL_1:.*]]: !llvm.ptr<3>
227227 tt.func @test (%arg0: tensor <64 x64 xf32 , #blocked >) -> tensor <64 x64 xf32 , #blocked1 > {
@@ -238,7 +238,7 @@ module attributes {"triton_gpu.num-ctas" = 1 : i32, "triton_gpu.num-warps" = 16
238238#blocked = #triton_gpu.blocked <{sizePerThread = [16 , 1 , 1 ], threadsPerWarp = [1 , 16 , 1 ], warpsPerCTA = [4 , 4 , 1 ], order = [0 , 1 , 2 ]}>
239239#blocked1 = #triton_gpu.blocked <{sizePerThread = [1 , 16 , 1 ], threadsPerWarp = [16 , 1 , 1 ], warpsPerCTA = [4 , 4 , 1 ], order = [0 , 1 , 2 ]}>
240240
241- module attributes {" triton_gpu.num-ctas" = 1 : i32 , " triton_gpu.num-warps" = 16 : i32 , triton_gpu.target = " xpu " , " triton_gpu.threads-per-warp" = 16 : i32 } {
241+ module attributes {" triton_gpu.num-ctas" = 1 : i32 , " triton_gpu.num-warps" = 16 : i32 , " triton_gpu.threads-per-warp" = 16 : i32 } {
242242 // CHECK-LABEL: llvm.func spir_kernelcc @test(
243243 // CHECK-SAME: , %[[VAL_1:.*]]: !llvm.ptr<3>
244244 tt.func @test (%arg0: tensor <64 x64 x1 xf32 , #blocked >) -> tensor <64 x64 x1 xf32 , #blocked1 > {
@@ -254,7 +254,7 @@ module attributes {"triton_gpu.num-ctas" = 1 : i32, "triton_gpu.num-warps" = 16
254254#blocked = #triton_gpu.blocked <{sizePerThread = [16 , 1 , 1 ], threadsPerWarp = [1 , 16 , 1 ], warpsPerCTA = [4 , 4 , 1 ], order = [0 , 1 , 2 ]}>
255255#blocked1 = #triton_gpu.blocked <{sizePerThread = [1 , 16 ], threadsPerWarp = [16 , 1 ], warpsPerCTA = [4 , 4 ], order = [0 , 1 ]}>
256256
257- module attributes {" triton_gpu.num-ctas" = 1 : i32 , " triton_gpu.num-warps" = 16 : i32 , triton_gpu.target = " xpu " , " triton_gpu.threads-per-warp" = 16 : i32 } {
257+ module attributes {" triton_gpu.num-ctas" = 1 : i32 , " triton_gpu.num-warps" = 16 : i32 , " triton_gpu.threads-per-warp" = 16 : i32 } {
258258 // CHECK-LABEL: llvm.func spir_kernelcc @test(
259259 // CHECK-SAME: , %[[VAL_1:.*]]: !llvm.ptr<3>
260260 tt.func @test (%arg0: tensor <64 x64 xf32 , #triton_gpu.slice <{dim = 2 , parent = #blocked }>>) -> tensor <64 x64 xf32 , #blocked1 > {
@@ -271,7 +271,7 @@ module attributes {"triton_gpu.num-ctas" = 1 : i32, "triton_gpu.num-warps" = 16
271271#blocked = #triton_gpu.blocked <{sizePerThread = [16 , 1 , 1 , 1 , 1 ], threadsPerWarp = [1 , 16 , 1 , 1 , 1 ], warpsPerCTA = [1 , 1 , 1 , 1 , 1 ], order = [1 , 2 , 3 , 4 , 0 ]}>
272272#blocked1 = #triton_gpu.blocked <{sizePerThread = [1 , 16 , 1 ], threadsPerWarp = [16 , 1 , 1 ], warpsPerCTA = [1 , 1 , 1 ], order = [1 , 2 , 0 ]}>
273273
274- module attributes {" triton_gpu.num-ctas" = 1 : i32 , " triton_gpu.num-warps" = 1 : i32 , triton_gpu.target = " xpu " , " triton_gpu.threads-per-warp" = 16 : i32 } {
274+ module attributes {" triton_gpu.num-ctas" = 1 : i32 , " triton_gpu.num-warps" = 1 : i32 , " triton_gpu.threads-per-warp" = 16 : i32 } {
275275 // CHECK-LABEL: llvm.func spir_kernelcc @test(
276276 // CHECK-SAME: , %[[VAL_1:.*]]: !llvm.ptr<3>
277277 tt.func @test (%arg0: tensor <16 x16 x1 xf32 , #triton_gpu.slice <{dim = 2 , parent = #triton_gpu.slice <{dim = 4 , parent = #blocked }>}>>) -> tensor <16 x16 x1 xf32 , #blocked1 > {
@@ -288,7 +288,7 @@ module attributes {"triton_gpu.num-ctas" = 1 : i32, "triton_gpu.num-warps" = 1 :
288288#blocked = #triton_gpu.blocked <{sizePerThread = [16 , 1 , 1 , 1 , 1 ], threadsPerWarp = [1 , 16 , 1 , 1 , 1 ], warpsPerCTA = [4 , 1 , 1 , 4 , 1 ], order = [1 , 2 , 3 , 4 , 0 ]}>
289289#blocked1 = #triton_gpu.blocked <{sizePerThread = [1 , 16 , 1 ], threadsPerWarp = [16 , 1 , 1 ], warpsPerCTA = [4 , 1 , 4 ], order = [1 , 2 , 0 ]}>
290290
291- module attributes {" triton_gpu.num-ctas" = 1 : i32 , " triton_gpu.num-warps" = 16 : i32 , triton_gpu.target = " xpu " , " triton_gpu.threads-per-warp" = 16 : i32 } {
291+ module attributes {" triton_gpu.num-ctas" = 1 : i32 , " triton_gpu.num-warps" = 16 : i32 , " triton_gpu.threads-per-warp" = 16 : i32 } {
292292 // CHECK-LABEL: llvm.func spir_kernelcc @test(
293293 // CHECK-SAME: , %[[VAL_1:.*]]: !llvm.ptr<3>
294294 tt.func @test (%arg0: tensor <64 x16 x4 xf32 , #triton_gpu.slice <{dim = 2 , parent = #triton_gpu.slice <{dim = 4 , parent = #blocked }>}>>) -> tensor <64 x16 x4 xf32 , #blocked1 > {
0 commit comments