44// CHECK-LABEL: @test_canonicalize_convert_view
55// CHECK-SAME: (%[[ARG:.+]]: tensor<64x64xf32
66// CHECK-NOT: triton_gpu.convert_layout
7- // CHECK: %[[V:.+]] = tt.reshape %[[ARG]] { allow_reorder = true}
7+ // CHECK: %[[V:.+]] = tt.reshape %[[ARG]] allow_reorder
88// CHECK: tt.return %[[V]]
99#blocked0 = #triton_gpu.blocked <{sizePerThread = [1 , 8 ], threadsPerWarp = [4 , 8 ], warpsPerCTA = [8 , 1 ], order = [1 , 0 ], CTAsPerCGA = [1 , 1 ], CTASplitNum = [1 , 1 ], CTAOrder = [0 , 1 ]}>
1010#blocked1 = #triton_gpu.blocked <{sizePerThread = [1 ], threadsPerWarp = [32 ], warpsPerCTA = [8 ], order = [0 ], CTAsPerCGA = [1 ], CTASplitNum = [1 ], CTAOrder = [0 ]}>
1313module attributes {" triton_gpu.num-warps" = 8 : i32 , " triton_gpu.num-ctas" = 1 : i32 , " triton_gpu.target" = " cuda:80" } {
1414tt.func @test_canonicalize_convert_view (%arg0: tensor <64 x64 xf32 , #blocked0 >) -> tensor <4096 xf32 , #blocked1 > {
1515 %c = triton_gpu.convert_layout %arg0 : tensor <64 x64 xf32 , #blocked0 > -> tensor <64 x64 xf32 , #blocked2 >
16- %r = tt.reshape %c { allow_reorder = true } : tensor <64 x64 xf32 , #blocked2 > -> tensor <4096 xf32 , #blocked1 >
16+ %r = tt.reshape %c allow_reorder : tensor <64 x64 xf32 , #blocked2 > -> tensor <4096 xf32 , #blocked1 >
1717 tt.return %r : tensor <4096 xf32 , #blocked1 >
1818}
1919} // end module
@@ -25,15 +25,15 @@ tt.func @test_canonicalize_convert_view(%arg0: tensor<64x64xf32, #blocked0>) ->
2525// CHECK-LABEL: @test_canonicalize_convert_expensive_view
2626// CHECK-SAME: (%[[ARG:.+]]: tensor<256x16xf32
2727// CHECK: %[[C:.+]] = triton_gpu.convert_layout %[[ARG]]
28- // CHECK: %[[V:.+]] = tt.reshape %[[C]] { allow_reorder = true}
28+ // CHECK: %[[V:.+]] = tt.reshape %[[C]] allow_reorder
2929// CHECK: tt.return %[[V]]
3030#blocked0 = #triton_gpu.blocked <{sizePerThread = [1 , 8 ], threadsPerWarp = [4 , 8 ], warpsPerCTA = [8 , 1 ], order = [1 , 0 ], CTAsPerCGA = [1 , 1 ], CTASplitNum = [1 , 1 ], CTAOrder = [0 , 1 ]}>
3131#blocked1 = #triton_gpu.blocked <{sizePerThread = [1 ], threadsPerWarp = [32 ], warpsPerCTA = [8 ], order = [0 ], CTAsPerCGA = [1 ], CTASplitNum = [1 ], CTAOrder = [0 ]}>
3232#blocked2 = #triton_gpu.blocked <{sizePerThread = [1 , 1 ], threadsPerWarp = [32 , 1 ], warpsPerCTA = [8 , 1 ], order = [0 , 1 ], CTAsPerCGA = [1 , 1 ], CTASplitNum = [1 , 1 ], CTAOrder = [0 , 1 ]}>
3333module attributes {" triton_gpu.num-warps" = 8 : i32 , " triton_gpu.num-ctas" = 1 : i32 , " triton_gpu.target" = " cuda:80" } {
3434tt.func @test_canonicalize_convert_expensive_view (%arg0: tensor <256 x16 xf32 , #blocked0 >) -> tensor <4096 xf32 , #blocked1 > {
3535 %c = triton_gpu.convert_layout %arg0 : tensor <256 x16 xf32 , #blocked0 > -> tensor <256 x16 xf32 , #blocked2 >
36- %r = tt.reshape %c { allow_reorder = true } : tensor <256 x16 xf32 , #blocked2 > -> tensor <4096 xf32 , #blocked1 >
36+ %r = tt.reshape %c allow_reorder : tensor <256 x16 xf32 , #blocked2 > -> tensor <4096 xf32 , #blocked1 >
3737 tt.return %r : tensor <4096 xf32 , #blocked1 >
3838}
3939} // end module
0 commit comments