11// RUN: triton-opt %s -split-input-file --convert-triton-intel-gpu-to-llvm | FileCheck %s --implicit-check-not=llvm.inline_asm
2- // RUN: env TRITON_INTEL_ENABLE_BLOCK_IO_ALL_LAYOUTS=1 triton-opt %s -split-input-file --convert-triton-intel-gpu-to-llvm | FileCheck %s --implicit-check-not=llvm.inline_asm --check-prefixes=ALL-LAYOUT
32
43#dpas = #ttig.dpas <{repeatCount = 8 , systolicDepth = 8 , executionSize = 16 , opsPerChan = 1 , threadsPerWarp = 16 , warpsPerCTA = [4 , 4 ], repCluster = [2 , 2 ]}>
54#dot_a = #ttg.dot_op <{opIdx = 0 , parent = #dpas , kWidth = 1 }>
@@ -11,25 +10,25 @@ module attributes {ttig.support_sg_2d_block, "ttg.num-warps" = 16 : i32, "ttg.t
1110 %c1_i64 = arith.constant 1 : i64
1211 %c0_i32 = arith.constant 0 : i32
1312 %0 = tt.make_tensor_ptr %arg0 , [%c64_i64 , %c64_i64 ], [%c1_i64 , %col_stride ], [%c0_i32 , %c0_i32 ] {order = array<i32 : 0 , 1 >} : <tensor <256 x64 xi8 , #dot_a >>
14- // ALL-LAYOUT : %[[OFF_0:.*]] = llvm.extractvalue {{.*}}[0] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
15- // ALL-LAYOUT : %[[OFF_1:.*]] = llvm.extractvalue {{.*}}[1] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
16- // ALL-LAYOUT : %[[HEIGHT_i64:.*]] = llvm.extractvalue {{.*}}[2] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
17- // ALL-LAYOUT : %[[WIDTH_i64:.*]] = llvm.extractvalue {{.*}}[3] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
18- // ALL-LAYOUT : %[[ROW_STRIDE_i64:.*]] = llvm.extractvalue {{.*}}[4] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
19- // ALL-LAYOUT : %[[COL_STRIDE_i64:.*]] = llvm.extractvalue {{.*}}[5] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
20- // ALL-LAYOUT : %[[BASE_PTR:.*]] = llvm.extractvalue {{.*}}[6] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
21-
22- // ALL-LAYOUT : %[[HEIGHT:.*]] = llvm.trunc %[[HEIGHT_i64]] : i64 to i32
23-
24- // ALL-LAYOUT : %[[OFFSET:.*]] = llvm.add %[[OFF_0]], {{.*}} : i32
25- // ALL-LAYOUT : %[[BASE:.*]] = llvm.getelementptr %[[BASE_PTR]]{{.*}} : (!llvm.ptr<1>, i32) -> !llvm.ptr<1>, i8
26- // ALL-LAYOUT : %[[OFFSET_X:.*]] = llvm.mlir.constant(0 : i32) : i32
27- // ALL-LAYOUT : %[[OFFSET_Y:.*]] = llvm.select {{.*}}, %[[OFFSET]], %[[HEIGHT]] : i1, i32
28- // ALL-LAYOUT : llvm.mlir.undef : vector<4xi8>
29- // ALL-LAYOUT -COUNT-4: llvm.insertelement %{{[0-9]+}}, %{{[0-9]+}}{{\[}}{{.*}} : i32] : vector<4xi8>
30- // ALL-LAYOUT : triton_gen.2Dblockstore {{.*}}, %[[OFFSET_X]], %[[OFFSET_Y]], {{.*}} {elem_size_in_bits = 8, tile_width = 8, tile_height = 8, v_blocks = 1, cache_control = Default}
13+ // CHECK : %[[OFF_0:.*]] = llvm.extractvalue {{.*}}[0] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
14+ // CHECK : %[[OFF_1:.*]] = llvm.extractvalue {{.*}}[1] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
15+ // CHECK : %[[HEIGHT_i64:.*]] = llvm.extractvalue {{.*}}[2] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
16+ // CHECK : %[[WIDTH_i64:.*]] = llvm.extractvalue {{.*}}[3] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
17+ // CHECK : %[[ROW_STRIDE_i64:.*]] = llvm.extractvalue {{.*}}[4] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
18+ // CHECK : %[[COL_STRIDE_i64:.*]] = llvm.extractvalue {{.*}}[5] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
19+ // CHECK : %[[BASE_PTR:.*]] = llvm.extractvalue {{.*}}[6] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
20+
21+ // CHECK : %[[HEIGHT:.*]] = llvm.trunc %[[HEIGHT_i64]] : i64 to i32
22+
23+ // CHECK : %[[OFFSET:.*]] = llvm.add %[[OFF_0]], {{.*}} : i32
24+ // CHECK : %[[BASE:.*]] = llvm.getelementptr %[[BASE_PTR]]{{.*}} : (!llvm.ptr<1>, i32) -> !llvm.ptr<1>, i8
25+ // CHECK : %[[OFFSET_X:.*]] = llvm.mlir.constant(0 : i32) : i32
26+ // CHECK : %[[OFFSET_Y:.*]] = llvm.select {{.*}}, %[[OFFSET]], %[[HEIGHT]] : i1, i32
27+ // CHECK : llvm.mlir.undef : vector<4xi8>
28+ // CHECK -COUNT-4: llvm.insertelement %{{[0-9]+}}, %{{[0-9]+}}{{\[}}{{.*}} : i32] : vector<4xi8>
29+ // CHECK : triton_gen.2Dblockstore {{.*}}, %[[OFFSET_X]], %[[OFFSET_Y]], {{.*}} {elem_size_in_bits = 8, tile_width = 8, tile_height = 8, v_blocks = 1, cache_control = Default}
3130 tt.store %0 , %cst {ttig.block_io = " row_major" , boundaryCheck = array<i32 : 0 >} : !tt.ptr <tensor <256 x64 xi8 , #dot_a >>
32- // ALL-LAYOUT -COUNT-63: triton_gen.2Dblockstore {{.*}} {elem_size_in_bits = 8, tile_width = 8, tile_height = 8, v_blocks = 1, cache_control = Default}
31+ // CHECK -COUNT-63: triton_gen.2Dblockstore {{.*}} {elem_size_in_bits = 8, tile_width = 8, tile_height = 8, v_blocks = 1, cache_control = Default}
3332
3433 tt.return
3534 }
@@ -47,25 +46,25 @@ module attributes {ttig.support_sg_2d_block, "ttg.num-warps" = 16 : i32, "ttg.t
4746 %c1_i64 = arith.constant 1 : i64
4847 %c0_i32 = arith.constant 0 : i32
4948 %0 = tt.make_tensor_ptr %arg0 , [%c64_i64 , %c64_i64 ], [%c1_i64 , %col_stride ], [%c0_i32 , %c0_i32 ] {order = array<i32 : 0 , 1 >} : <tensor <256 x64 xi8 , #dot_b >>
50- // ALL-LAYOUT : %[[OFF_0:.*]] = llvm.extractvalue {{.*}}[0] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
51- // ALL-LAYOUT : %[[OFF_1:.*]] = llvm.extractvalue {{.*}}[1] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
52- // ALL-LAYOUT : %[[HEIGHT_i64:.*]] = llvm.extractvalue {{.*}}[2] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
53- // ALL-LAYOUT : %[[WIDTH_i64:.*]] = llvm.extractvalue {{.*}}[3] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
54- // ALL-LAYOUT : %[[ROW_STRIDE_i64:.*]] = llvm.extractvalue {{.*}}[4] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
55- // ALL-LAYOUT : %[[COL_STRIDE_i64:.*]] = llvm.extractvalue {{.*}}[5] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
56- // ALL-LAYOUT : %[[BASE_PTR:.*]] = llvm.extractvalue {{.*}}[6] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
57-
58- // ALL-LAYOUT : %[[HEIGHT:.*]] = llvm.trunc %[[HEIGHT_i64]] : i64 to i32
59-
60- // ALL-LAYOUT : %[[OFFSET:.*]] = llvm.add %[[OFF_0]], {{.*}} : i32
61- // ALL-LAYOUT : %[[BASE:.*]] = llvm.getelementptr %[[BASE_PTR]]{{.*}} : (!llvm.ptr<1>, i32) -> !llvm.ptr<1>, i8
62- // ALL-LAYOUT : %[[OFFSET_X:.*]] = llvm.mlir.constant(0 : i32) : i32
63- // ALL-LAYOUT : %[[OFFSET_Y:.*]] = llvm.select {{.*}}, %[[OFFSET]], %[[HEIGHT]] : i1, i32
64- // ALL-LAYOUT : llvm.mlir.undef : vector<8xi8>
65- // ALL-LAYOUT -COUNT-8: llvm.insertelement %{{[0-9]+}}, %{{[0-9]+}}{{\[}}{{.*}} : i32] : vector<8xi8>
66- // ALL-LAYOUT : triton_gen.2Dblockstore {{.*}}, %[[OFFSET_X]], %[[OFFSET_Y]], {{.*}} {elem_size_in_bits = 8, tile_width = 16, tile_height = 8, v_blocks = 1, cache_control = Default}
49+ // CHECK : %[[OFF_0:.*]] = llvm.extractvalue {{.*}}[0] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
50+ // CHECK : %[[OFF_1:.*]] = llvm.extractvalue {{.*}}[1] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
51+ // CHECK : %[[HEIGHT_i64:.*]] = llvm.extractvalue {{.*}}[2] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
52+ // CHECK : %[[WIDTH_i64:.*]] = llvm.extractvalue {{.*}}[3] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
53+ // CHECK : %[[ROW_STRIDE_i64:.*]] = llvm.extractvalue {{.*}}[4] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
54+ // CHECK : %[[COL_STRIDE_i64:.*]] = llvm.extractvalue {{.*}}[5] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
55+ // CHECK : %[[BASE_PTR:.*]] = llvm.extractvalue {{.*}}[6] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
56+
57+ // CHECK : %[[HEIGHT:.*]] = llvm.trunc %[[HEIGHT_i64]] : i64 to i32
58+
59+ // CHECK : %[[OFFSET:.*]] = llvm.add %[[OFF_0]], {{.*}} : i32
60+ // CHECK : %[[BASE:.*]] = llvm.getelementptr %[[BASE_PTR]]{{.*}} : (!llvm.ptr<1>, i32) -> !llvm.ptr<1>, i8
61+ // CHECK : %[[OFFSET_X:.*]] = llvm.mlir.constant(0 : i32) : i32
62+ // CHECK : %[[OFFSET_Y:.*]] = llvm.select {{.*}}, %[[OFFSET]], %[[HEIGHT]] : i1, i32
63+ // CHECK : llvm.mlir.undef : vector<8xi8>
64+ // CHECK -COUNT-8: llvm.insertelement %{{[0-9]+}}, %{{[0-9]+}}{{\[}}{{.*}} : i32] : vector<8xi8>
65+ // CHECK : triton_gen.2Dblockstore {{.*}}, %[[OFFSET_X]], %[[OFFSET_Y]], {{.*}} {elem_size_in_bits = 8, tile_width = 16, tile_height = 8, v_blocks = 1, cache_control = Default}
6766 tt.store %0 , %cst {ttig.block_io = " row_major" , boundaryCheck = array<i32 : 0 >} : !tt.ptr <tensor <256 x64 xi8 , #dot_b >>
68- // ALL-LAYOUT -COUNT-63: triton_gen.2Dblockstore {{.*}} {elem_size_in_bits = 8, tile_width = 16, tile_height = 8, v_blocks = 1, cache_control = Default}
67+ // CHECK -COUNT-63: triton_gen.2Dblockstore {{.*}} {elem_size_in_bits = 8, tile_width = 16, tile_height = 8, v_blocks = 1, cache_control = Default}
6968
7069 tt.return
7170 }
@@ -83,25 +82,25 @@ module attributes {ttig.support_sg_2d_block, "ttg.num-warps" = 16 : i32, "ttg.t
8382 %c1_i64 = arith.constant 1 : i64
8483 %c0_i32 = arith.constant 0 : i32
8584 %0 = tt.make_tensor_ptr %arg0 , [%c64_i64 , %c64_i64 ], [%c1_i64 , %col_stride ], [%c0_i32 , %c0_i32 ] {order = array<i32 : 0 , 1 >} : <tensor <256 x64 xi8 , #slice >>
86- // ALL-LAYOUT : %[[OFF_0:.*]] = llvm.extractvalue {{.*}}[0] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
87- // ALL-LAYOUT : %[[OFF_1:.*]] = llvm.extractvalue {{.*}}[1] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
88- // ALL-LAYOUT : %[[HEIGHT_i64:.*]] = llvm.extractvalue {{.*}}[2] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
89- // ALL-LAYOUT : %[[WIDTH_i64:.*]] = llvm.extractvalue {{.*}}[3] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
90- // ALL-LAYOUT : %[[ROW_STRIDE_i64:.*]] = llvm.extractvalue {{.*}}[4] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
91- // ALL-LAYOUT : %[[COL_STRIDE_i64:.*]] = llvm.extractvalue {{.*}}[5] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
92- // ALL-LAYOUT : %[[BASE_PTR:.*]] = llvm.extractvalue {{.*}}[6] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
93-
94- // ALL-LAYOUT : %[[HEIGHT:.*]] = llvm.trunc %[[HEIGHT_i64]] : i64 to i32
95-
96- // ALL-LAYOUT : %[[OFFSET:.*]] = llvm.add %[[OFF_0]], {{.*}} : i32
97- // ALL-LAYOUT : %[[BASE:.*]] = llvm.getelementptr %[[BASE_PTR]]{{.*}} : (!llvm.ptr<1>, i32) -> !llvm.ptr<1>, i8
98- // ALL-LAYOUT : %[[OFFSET_X:.*]] = llvm.mlir.constant(0 : i32) : i32
99- // ALL-LAYOUT : %[[OFFSET_Y:.*]] = llvm.select {{.*}}, %[[OFFSET]], %[[HEIGHT]] : i1, i32
100- // ALL-LAYOUT : llvm.mlir.undef : vector<16xi8>
101- // ALL-LAYOUT -COUNT-16: llvm.insertelement %{{[0-9]+}}, %{{[0-9]+}}{{\[}}{{.*}} : i32] : vector<16xi8>
102- // ALL-LAYOUT : triton_gen.2Dblockstore {{.*}}, %[[OFFSET_X]], %[[OFFSET_Y]], {{.*}} {elem_size_in_bits = 16, tile_width = 32, tile_height = 8, v_blocks = 1, cache_control = Default}
85+ // CHECK : %[[OFF_0:.*]] = llvm.extractvalue {{.*}}[0] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
86+ // CHECK : %[[OFF_1:.*]] = llvm.extractvalue {{.*}}[1] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
87+ // CHECK : %[[HEIGHT_i64:.*]] = llvm.extractvalue {{.*}}[2] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
88+ // CHECK : %[[WIDTH_i64:.*]] = llvm.extractvalue {{.*}}[3] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
89+ // CHECK : %[[ROW_STRIDE_i64:.*]] = llvm.extractvalue {{.*}}[4] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
90+ // CHECK : %[[COL_STRIDE_i64:.*]] = llvm.extractvalue {{.*}}[5] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
91+ // CHECK : %[[BASE_PTR:.*]] = llvm.extractvalue {{.*}}[6] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
92+
93+ // CHECK : %[[HEIGHT:.*]] = llvm.trunc %[[HEIGHT_i64]] : i64 to i32
94+
95+ // CHECK : %[[OFFSET:.*]] = llvm.add %[[OFF_0]], {{.*}} : i32
96+ // CHECK : %[[BASE:.*]] = llvm.getelementptr %[[BASE_PTR]]{{.*}} : (!llvm.ptr<1>, i32) -> !llvm.ptr<1>, i8
97+ // CHECK : %[[OFFSET_X:.*]] = llvm.mlir.constant(0 : i32) : i32
98+ // CHECK : %[[OFFSET_Y:.*]] = llvm.select {{.*}}, %[[OFFSET]], %[[HEIGHT]] : i1, i32
99+ // CHECK : llvm.mlir.undef : vector<16xi8>
100+ // CHECK -COUNT-16: llvm.insertelement %{{[0-9]+}}, %{{[0-9]+}}{{\[}}{{.*}} : i32] : vector<16xi8>
101+ // CHECK : triton_gen.2Dblockstore {{.*}}, %[[OFFSET_X]], %[[OFFSET_Y]], {{.*}} {elem_size_in_bits = 16, tile_width = 32, tile_height = 8, v_blocks = 1, cache_control = Default}
103102 tt.store %0 , %cst {ttig.block_io = " row_major" , boundaryCheck = array<i32 : 0 >} : !tt.ptr <tensor <256 x64 xi8 , #slice >>
104- // ALL-LAYOUT -COUNT-31: triton_gen.2Dblockstore {{.*}} {elem_size_in_bits = 16, tile_width = 32, tile_height = 8, v_blocks = 1, cache_control = Default}
103+ // CHECK -COUNT-31: triton_gen.2Dblockstore {{.*}} {elem_size_in_bits = 16, tile_width = 32, tile_height = 8, v_blocks = 1, cache_control = Default}
105104
106105 tt.return
107106 }
@@ -118,25 +117,25 @@ module attributes {ttig.support_sg_2d_block, "ttg.num-warps" = 16 : i32, "ttg.t
118117 %c1_i64 = arith.constant 1 : i64
119118 %c0_i32 = arith.constant 0 : i32
120119 %0 = tt.make_tensor_ptr %arg0 , [%c64_i64 , %c64_i64 ], [%c1_i64 , %col_stride ], [%c0_i32 , %c0_i32 ] {order = array<i32 : 0 , 1 >} : <tensor <256 x64 xi8 , #blocked >>
121- // ALL-LAYOUT : %[[OFF_0:.*]] = llvm.extractvalue {{.*}}[0] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
122- // ALL-LAYOUT : %[[OFF_1:.*]] = llvm.extractvalue {{.*}}[1] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
123- // ALL-LAYOUT : %[[HEIGHT_i64:.*]] = llvm.extractvalue {{.*}}[2] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
124- // ALL-LAYOUT : %[[WIDTH_i64:.*]] = llvm.extractvalue {{.*}}[3] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
125- // ALL-LAYOUT : %[[ROW_STRIDE_i64:.*]] = llvm.extractvalue {{.*}}[4] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
126- // ALL-LAYOUT : %[[COL_STRIDE_i64:.*]] = llvm.extractvalue {{.*}}[5] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
127- // ALL-LAYOUT : %[[BASE_PTR:.*]] = llvm.extractvalue {{.*}}[6] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
128-
129- // ALL-LAYOUT : %[[HEIGHT:.*]] = llvm.trunc %[[HEIGHT_i64]] : i64 to i32
130-
131- // ALL-LAYOUT : %[[OFFSET:.*]] = llvm.add %[[OFF_0]], {{.*}} : i32
132- // ALL-LAYOUT : %[[BASE:.*]] = llvm.getelementptr %[[BASE_PTR]]{{.*}} : (!llvm.ptr<1>, i32) -> !llvm.ptr<1>, i8
133- // ALL-LAYOUT : %[[OFFSET_X:.*]] = llvm.mlir.constant(0 : i32) : i32
134- // ALL-LAYOUT : %[[OFFSET_Y:.*]] = llvm.select {{.*}}, %[[OFFSET]], %[[HEIGHT]] : i1, i32
135- // ALL-LAYOUT : llvm.mlir.undef : vector<8xi8>
136- // ALL-LAYOUT -COUNT-8: llvm.insertelement %{{[0-9]+}}, %{{[0-9]+}}{{\[}}{{.*}} : i32] : vector<8xi8>
137- // ALL-LAYOUT : triton_gen.2Dblockstore {{.*}}, %[[OFFSET_X]], %[[OFFSET_Y]], {{.*}} {elem_size_in_bits = 16, tile_width = 32, tile_height = 4, v_blocks = 1, cache_control = Default}
120+ // CHECK : %[[OFF_0:.*]] = llvm.extractvalue {{.*}}[0] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
121+ // CHECK : %[[OFF_1:.*]] = llvm.extractvalue {{.*}}[1] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
122+ // CHECK : %[[HEIGHT_i64:.*]] = llvm.extractvalue {{.*}}[2] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
123+ // CHECK : %[[WIDTH_i64:.*]] = llvm.extractvalue {{.*}}[3] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
124+ // CHECK : %[[ROW_STRIDE_i64:.*]] = llvm.extractvalue {{.*}}[4] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
125+ // CHECK : %[[COL_STRIDE_i64:.*]] = llvm.extractvalue {{.*}}[5] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
126+ // CHECK : %[[BASE_PTR:.*]] = llvm.extractvalue {{.*}}[6] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
127+
128+ // CHECK : %[[HEIGHT:.*]] = llvm.trunc %[[HEIGHT_i64]] : i64 to i32
129+
130+ // CHECK : %[[OFFSET:.*]] = llvm.add %[[OFF_0]], {{.*}} : i32
131+ // CHECK : %[[BASE:.*]] = llvm.getelementptr %[[BASE_PTR]]{{.*}} : (!llvm.ptr<1>, i32) -> !llvm.ptr<1>, i8
132+ // CHECK : %[[OFFSET_X:.*]] = llvm.mlir.constant(0 : i32) : i32
133+ // CHECK : %[[OFFSET_Y:.*]] = llvm.select {{.*}}, %[[OFFSET]], %[[HEIGHT]] : i1, i32
134+ // CHECK : llvm.mlir.undef : vector<8xi8>
135+ // CHECK -COUNT-8: llvm.insertelement %{{[0-9]+}}, %{{[0-9]+}}{{\[}}{{.*}} : i32] : vector<8xi8>
136+ // CHECK : triton_gen.2Dblockstore {{.*}}, %[[OFFSET_X]], %[[OFFSET_Y]], {{.*}} {elem_size_in_bits = 16, tile_width = 32, tile_height = 4, v_blocks = 1, cache_control = Default}
138137 tt.store %0 , %cst {ttig.block_io = " row_major" , boundaryCheck = array<i32 : 0 >} : !tt.ptr <tensor <256 x64 xi8 , #blocked >>
139- // ALL-LAYOUT -COUNT-7: triton_gen.2Dblockstore {{.*}} {elem_size_in_bits = 16, tile_width = 32, tile_height = 4, v_blocks = 1, cache_control = Default}
138+ // CHECK -COUNT-7: triton_gen.2Dblockstore {{.*}} {elem_size_in_bits = 16, tile_width = 32, tile_height = 4, v_blocks = 1, cache_control = Default}
140139
141140 tt.return
142141 }
0 commit comments