@@ -45,21 +45,33 @@ llvm.func @triton_gen.cache_controls(%arg0: !llvm.ptr) {
4545llvm.func @triton_gen.2Dblockload (%ptr : !llvm.ptr , %base_width : i32 , %base_height : i32 , %base_pitch : i32 , %x : i32 , %y : i32 ) {
4646 // CHECK: llvm.func @triton_gen.2Dblockload(%arg0: !llvm.ptr, %arg1: i32, %arg2: i32, %arg3: i32, %arg4: i32, %arg5: i32) {
4747 // CHECK-NEXT: %0 = triton_gen.2Dblockload %arg0, %arg1, %arg2, %arg3, %arg4, %arg5 {elem_size_in_bits = 16, tile_width = 16, tile_height = 16, v_blocks = 1, transpose = false, vnni_transform = false, cache_control = Default} : (!llvm.ptr, i32, i32, i32, i32, i32) -> vector<16xf16>
48+ // CHECK-NEXT: %1 = triton_gen.2Dblockload %arg0, %arg1, %arg2, %arg3, %arg4, %arg5 {elem_size_in_bits = 32, tile_width = 16, tile_height = 16, v_blocks = 1, transpose = false, vnni_transform = false, cache_control = Default} : (!llvm.ptr, i32, i32, i32, i32, i32) -> vector<16xf32>
49+ // CHECK-NEXT: %2 = triton_gen.2Dblockload %arg0, %arg1, %arg2, %arg3, %arg4, %arg5 {elem_size_in_bits = 64, tile_width = 8, tile_height = 16, v_blocks = 1, transpose = false, vnni_transform = false, cache_control = Default} : (!llvm.ptr, i32, i32, i32, i32, i32) -> vector<8xi64>
4850 %0 = triton_gen.2Dblockload %ptr , %base_width , %base_height , %base_pitch , %x , %y {elem_size_in_bits =16 , tile_width =16 , tile_height =16 , v_blocks =1 , transpose =false , vnni_transform =false , cache_control =Default } : (!llvm.ptr , i32 , i32 , i32 , i32 , i32 ) -> vector <16 xf16 >
51+ %1 = triton_gen.2Dblockload %ptr , %base_width , %base_height , %base_pitch , %x , %y {elem_size_in_bits =32 , tile_width =16 , tile_height =16 , v_blocks =1 , transpose =false , vnni_transform =false , cache_control =Default } : (!llvm.ptr , i32 , i32 , i32 , i32 , i32 ) -> vector <16 xf32 >
52+ %2 = triton_gen.2Dblockload %ptr , %base_width , %base_height , %base_pitch , %x , %y {elem_size_in_bits =64 , tile_width =8 , tile_height =16 , v_blocks =1 , transpose =false , vnni_transform =false , cache_control =Default } : (!llvm.ptr , i32 , i32 , i32 , i32 , i32 ) -> vector <8 xi64 >
4953 llvm.return
5054}
5155
52- llvm.func @triton_gen.2Dblockstore (%ptr : !llvm.ptr , %base_width : i32 , %base_height : i32 , %base_pitch : i32 , %x : i32 , %y : i32 , %stored_val : vector <16 xf32 >) {
53- // CHECK: llvm.func @triton_gen.2Dblockstore(%arg0: !llvm.ptr, %arg1: i32, %arg2: i32, %arg3: i32, %arg4: i32, %arg5: i32, %arg6: vector<16xf32>) {
54- // CHECK-NEXT: triton_gen.2Dblockstore %arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6 {elem_size_in_bits = 32, tile_width = 16, tile_height = 8, v_blocks = 1, cache_control = Default} : (!llvm.ptr, i32, i32, i32, i32, i32, vector<16xf32>)
55- triton_gen.2Dblockstore %ptr , %base_width , %base_height , %base_pitch , %x , %y , %stored_val {elem_size_in_bits =32 , tile_width =16 , tile_height =8 , v_blocks =1 , cache_control =Default } : (!llvm.ptr , i32 , i32 , i32 , i32 , i32 , vector <16 xf32 >)
56+ llvm.func @triton_gen.2Dblockstore (%ptr : !llvm.ptr , %base_width : i32 , %base_height : i32 , %base_pitch : i32 , %x : i32 , %y : i32 , %stored_val1 : vector <16 xf16 >, %stored_val2 : vector <16 xf32 >, %stored_val3 : vector <8 xi64 >) {
57+ // CHECK: llvm.func @triton_gen.2Dblockstore(%arg0: !llvm.ptr, %arg1: i32, %arg2: i32, %arg3: i32, %arg4: i32, %arg5: i32, %arg6: vector<16xf16>, %arg7: vector<16xf32>, %arg8: vector<8xi64>) {
58+ // CHECK-NEXT: triton_gen.2Dblockstore %arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6 {elem_size_in_bits = 16, tile_width = 16, tile_height = 8, v_blocks = 1, cache_control = Default} : (!llvm.ptr, i32, i32, i32, i32, i32, vector<16xf16>)
59+ // CHECK-NEXT: triton_gen.2Dblockstore %arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg7 {elem_size_in_bits = 32, tile_width = 16, tile_height = 8, v_blocks = 1, cache_control = Default} : (!llvm.ptr, i32, i32, i32, i32, i32, vector<16xf32>)
60+ // CHECK-NEXT: triton_gen.2Dblockstore %arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg8 {elem_size_in_bits = 64, tile_width = 8, tile_height = 8, v_blocks = 1, cache_control = Default} : (!llvm.ptr, i32, i32, i32, i32, i32, vector<8xi64>)
61+ triton_gen.2Dblockstore %ptr , %base_width , %base_height , %base_pitch , %x , %y , %stored_val1 {elem_size_in_bits =16 , tile_width =16 , tile_height =8 , v_blocks =1 , cache_control =Default } : (!llvm.ptr , i32 , i32 , i32 , i32 , i32 , vector <16 xf16 >)
62+ triton_gen.2Dblockstore %ptr , %base_width , %base_height , %base_pitch , %x , %y , %stored_val2 {elem_size_in_bits =32 , tile_width =16 , tile_height =8 , v_blocks =1 , cache_control =Default } : (!llvm.ptr , i32 , i32 , i32 , i32 , i32 , vector <16 xf32 >)
63+ triton_gen.2Dblockstore %ptr , %base_width , %base_height , %base_pitch , %x , %y , %stored_val3 {elem_size_in_bits =64 , tile_width =8 , tile_height =8 , v_blocks =1 , cache_control =Default } : (!llvm.ptr , i32 , i32 , i32 , i32 , i32 , vector <8 xi64 >)
5664 llvm.return
5765}
5866
5967llvm.func @triton_gen.2Dblockprefetch (%ptr : !llvm.ptr , %base_width : i32 , %base_height : i32 , %base_pitch : i32 , %x : i32 , %y : i32 ) {
6068 // CHECK: llvm.func @triton_gen.2Dblockprefetch(%arg0: !llvm.ptr, %arg1: i32, %arg2: i32, %arg3: i32, %arg4: i32, %arg5: i32) {
69+ // CHECK-NEXT: triton_gen.2Dblockprefetch %arg0, %arg1, %arg2, %arg3, %arg4, %arg5 {elem_size_in_bits = 16, tile_width = 8, tile_height = 8, v_blocks = 1, cache_control = Default} : (!llvm.ptr, i32, i32, i32, i32, i32)
6170 // CHECK-NEXT: triton_gen.2Dblockprefetch %arg0, %arg1, %arg2, %arg3, %arg4, %arg5 {elem_size_in_bits = 32, tile_width = 8, tile_height = 8, v_blocks = 1, cache_control = Default} : (!llvm.ptr, i32, i32, i32, i32, i32)
71+ // CHECK-NEXT: triton_gen.2Dblockprefetch %arg0, %arg1, %arg2, %arg3, %arg4, %arg5 {elem_size_in_bits = 64, tile_width = 8, tile_height = 8, v_blocks = 1, cache_control = Default} : (!llvm.ptr, i32, i32, i32, i32, i32)
72+ triton_gen.2Dblockprefetch %ptr , %base_width , %base_height , %base_pitch , %x , %y {elem_size_in_bits =16 , tile_width =8 , tile_height =8 , v_blocks =1 , cache_control =Default } : (!llvm.ptr , i32 , i32 , i32 , i32 , i32 )
6273 triton_gen.2Dblockprefetch %ptr , %base_width , %base_height , %base_pitch , %x , %y {elem_size_in_bits =32 , tile_width =8 , tile_height =8 , v_blocks =1 , cache_control =Default } : (!llvm.ptr , i32 , i32 , i32 , i32 , i32 )
74+ triton_gen.2Dblockprefetch %ptr , %base_width , %base_height , %base_pitch , %x , %y {elem_size_in_bits =64 , tile_width =8 , tile_height =8 , v_blocks =1 , cache_control =Default } : (!llvm.ptr , i32 , i32 , i32 , i32 , i32 )
6375 llvm.return
6476}
6577
0 commit comments