Skip to content

Commit 653ab28

Browse files
Enable store block IO for all layout by default
Signed-off-by: Whitney Tsang <[email protected]>
1 parent bd11640 commit 653ab28

File tree

2 files changed

+77
-76
lines changed

2 files changed

+77
-76
lines changed

test/TritonIntelGPU/blockptr_store.mlir

Lines changed: 72 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
// RUN: triton-opt %s -split-input-file --convert-triton-intel-gpu-to-llvm | FileCheck %s --implicit-check-not=llvm.inline_asm
2-
// RUN: env TRITON_INTEL_ENABLE_BLOCK_IO_ALL_LAYOUTS=1 triton-opt %s -split-input-file --convert-triton-intel-gpu-to-llvm | FileCheck %s --implicit-check-not=llvm.inline_asm --check-prefixes=ALL-LAYOUT
32

43
#dpas = #ttig.dpas<{repeatCount = 8, systolicDepth = 8, executionSize = 16, opsPerChan = 1, threadsPerWarp = 16, warpsPerCTA = [4, 4], repCluster = [2, 2]}>
54
#dot_a = #ttg.dot_op<{opIdx = 0, parent = #dpas, kWidth = 1}>
@@ -11,25 +10,25 @@ module attributes {ttig.support_sg_2d_block, "ttg.num-warps" = 16 : i32, "ttg.t
1110
%c1_i64 = arith.constant 1 : i64
1211
%c0_i32 = arith.constant 0 : i32
1312
%0 = tt.make_tensor_ptr %arg0, [%c64_i64, %c64_i64], [%c1_i64, %col_stride], [%c0_i32, %c0_i32] {order = array<i32: 0, 1>} : <tensor<256x64xi8, #dot_a>>
14-
// ALL-LAYOUT: %[[OFF_0:.*]] = llvm.extractvalue {{.*}}[0] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
15-
// ALL-LAYOUT: %[[OFF_1:.*]] = llvm.extractvalue {{.*}}[1] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
16-
// ALL-LAYOUT: %[[HEIGHT_i64:.*]] = llvm.extractvalue {{.*}}[2] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
17-
// ALL-LAYOUT: %[[WIDTH_i64:.*]] = llvm.extractvalue {{.*}}[3] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
18-
// ALL-LAYOUT: %[[ROW_STRIDE_i64:.*]] = llvm.extractvalue {{.*}}[4] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
19-
// ALL-LAYOUT: %[[COL_STRIDE_i64:.*]] = llvm.extractvalue {{.*}}[5] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
20-
// ALL-LAYOUT: %[[BASE_PTR:.*]] = llvm.extractvalue {{.*}}[6] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
21-
22-
// ALL-LAYOUT: %[[HEIGHT:.*]] = llvm.trunc %[[HEIGHT_i64]] : i64 to i32
23-
24-
// ALL-LAYOUT: %[[OFFSET:.*]] = llvm.add %[[OFF_0]], {{.*}} : i32
25-
// ALL-LAYOUT: %[[BASE:.*]] = llvm.getelementptr %[[BASE_PTR]]{{.*}} : (!llvm.ptr<1>, i32) -> !llvm.ptr<1>, i8
26-
// ALL-LAYOUT: %[[OFFSET_X:.*]] = llvm.mlir.constant(0 : i32) : i32
27-
// ALL-LAYOUT: %[[OFFSET_Y:.*]] = llvm.select {{.*}}, %[[OFFSET]], %[[HEIGHT]] : i1, i32
28-
// ALL-LAYOUT: llvm.mlir.undef : vector<4xi8>
29-
// ALL-LAYOUT-COUNT-4: llvm.insertelement %{{[0-9]+}}, %{{[0-9]+}}{{\[}}{{.*}} : i32] : vector<4xi8>
30-
// ALL-LAYOUT: triton_gen.2Dblockstore {{.*}}, %[[OFFSET_X]], %[[OFFSET_Y]], {{.*}} {elem_size_in_bits = 8, tile_width = 8, tile_height = 8, v_blocks = 1, cache_control = Default}
13+
// CHECK: %[[OFF_0:.*]] = llvm.extractvalue {{.*}}[0] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
14+
// CHECK: %[[OFF_1:.*]] = llvm.extractvalue {{.*}}[1] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
15+
// CHECK: %[[HEIGHT_i64:.*]] = llvm.extractvalue {{.*}}[2] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
16+
// CHECK: %[[WIDTH_i64:.*]] = llvm.extractvalue {{.*}}[3] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
17+
// CHECK: %[[ROW_STRIDE_i64:.*]] = llvm.extractvalue {{.*}}[4] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
18+
// CHECK: %[[COL_STRIDE_i64:.*]] = llvm.extractvalue {{.*}}[5] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
19+
// CHECK: %[[BASE_PTR:.*]] = llvm.extractvalue {{.*}}[6] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
20+
21+
// CHECK: %[[HEIGHT:.*]] = llvm.trunc %[[HEIGHT_i64]] : i64 to i32
22+
23+
// CHECK: %[[OFFSET:.*]] = llvm.add %[[OFF_0]], {{.*}} : i32
24+
// CHECK: %[[BASE:.*]] = llvm.getelementptr %[[BASE_PTR]]{{.*}} : (!llvm.ptr<1>, i32) -> !llvm.ptr<1>, i8
25+
// CHECK: %[[OFFSET_X:.*]] = llvm.mlir.constant(0 : i32) : i32
26+
// CHECK: %[[OFFSET_Y:.*]] = llvm.select {{.*}}, %[[OFFSET]], %[[HEIGHT]] : i1, i32
27+
// CHECK: llvm.mlir.undef : vector<4xi8>
28+
// CHECK-COUNT-4: llvm.insertelement %{{[0-9]+}}, %{{[0-9]+}}{{\[}}{{.*}} : i32] : vector<4xi8>
29+
// CHECK: triton_gen.2Dblockstore {{.*}}, %[[OFFSET_X]], %[[OFFSET_Y]], {{.*}} {elem_size_in_bits = 8, tile_width = 8, tile_height = 8, v_blocks = 1, cache_control = Default}
3130
tt.store %0, %cst {ttig.block_io = "row_major", boundaryCheck = array<i32: 0>} : !tt.ptr<tensor<256x64xi8, #dot_a>>
32-
// ALL-LAYOUT-COUNT-63: triton_gen.2Dblockstore {{.*}} {elem_size_in_bits = 8, tile_width = 8, tile_height = 8, v_blocks = 1, cache_control = Default}
31+
// CHECK-COUNT-63: triton_gen.2Dblockstore {{.*}} {elem_size_in_bits = 8, tile_width = 8, tile_height = 8, v_blocks = 1, cache_control = Default}
3332

3433
tt.return
3534
}
@@ -47,25 +46,25 @@ module attributes {ttig.support_sg_2d_block, "ttg.num-warps" = 16 : i32, "ttg.t
4746
%c1_i64 = arith.constant 1 : i64
4847
%c0_i32 = arith.constant 0 : i32
4948
%0 = tt.make_tensor_ptr %arg0, [%c64_i64, %c64_i64], [%c1_i64, %col_stride], [%c0_i32, %c0_i32] {order = array<i32: 0, 1>} : <tensor<256x64xi8, #dot_b>>
50-
// ALL-LAYOUT: %[[OFF_0:.*]] = llvm.extractvalue {{.*}}[0] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
51-
// ALL-LAYOUT: %[[OFF_1:.*]] = llvm.extractvalue {{.*}}[1] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
52-
// ALL-LAYOUT: %[[HEIGHT_i64:.*]] = llvm.extractvalue {{.*}}[2] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
53-
// ALL-LAYOUT: %[[WIDTH_i64:.*]] = llvm.extractvalue {{.*}}[3] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
54-
// ALL-LAYOUT: %[[ROW_STRIDE_i64:.*]] = llvm.extractvalue {{.*}}[4] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
55-
// ALL-LAYOUT: %[[COL_STRIDE_i64:.*]] = llvm.extractvalue {{.*}}[5] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
56-
// ALL-LAYOUT: %[[BASE_PTR:.*]] = llvm.extractvalue {{.*}}[6] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
57-
58-
// ALL-LAYOUT: %[[HEIGHT:.*]] = llvm.trunc %[[HEIGHT_i64]] : i64 to i32
59-
60-
// ALL-LAYOUT: %[[OFFSET:.*]] = llvm.add %[[OFF_0]], {{.*}} : i32
61-
// ALL-LAYOUT: %[[BASE:.*]] = llvm.getelementptr %[[BASE_PTR]]{{.*}} : (!llvm.ptr<1>, i32) -> !llvm.ptr<1>, i8
62-
// ALL-LAYOUT: %[[OFFSET_X:.*]] = llvm.mlir.constant(0 : i32) : i32
63-
// ALL-LAYOUT: %[[OFFSET_Y:.*]] = llvm.select {{.*}}, %[[OFFSET]], %[[HEIGHT]] : i1, i32
64-
// ALL-LAYOUT: llvm.mlir.undef : vector<8xi8>
65-
// ALL-LAYOUT-COUNT-8: llvm.insertelement %{{[0-9]+}}, %{{[0-9]+}}{{\[}}{{.*}} : i32] : vector<8xi8>
66-
// ALL-LAYOUT: triton_gen.2Dblockstore {{.*}}, %[[OFFSET_X]], %[[OFFSET_Y]], {{.*}} {elem_size_in_bits = 8, tile_width = 16, tile_height = 8, v_blocks = 1, cache_control = Default}
49+
// CHECK: %[[OFF_0:.*]] = llvm.extractvalue {{.*}}[0] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
50+
// CHECK: %[[OFF_1:.*]] = llvm.extractvalue {{.*}}[1] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
51+
// CHECK: %[[HEIGHT_i64:.*]] = llvm.extractvalue {{.*}}[2] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
52+
// CHECK: %[[WIDTH_i64:.*]] = llvm.extractvalue {{.*}}[3] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
53+
// CHECK: %[[ROW_STRIDE_i64:.*]] = llvm.extractvalue {{.*}}[4] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
54+
// CHECK: %[[COL_STRIDE_i64:.*]] = llvm.extractvalue {{.*}}[5] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
55+
// CHECK: %[[BASE_PTR:.*]] = llvm.extractvalue {{.*}}[6] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
56+
57+
// CHECK: %[[HEIGHT:.*]] = llvm.trunc %[[HEIGHT_i64]] : i64 to i32
58+
59+
// CHECK: %[[OFFSET:.*]] = llvm.add %[[OFF_0]], {{.*}} : i32
60+
// CHECK: %[[BASE:.*]] = llvm.getelementptr %[[BASE_PTR]]{{.*}} : (!llvm.ptr<1>, i32) -> !llvm.ptr<1>, i8
61+
// CHECK: %[[OFFSET_X:.*]] = llvm.mlir.constant(0 : i32) : i32
62+
// CHECK: %[[OFFSET_Y:.*]] = llvm.select {{.*}}, %[[OFFSET]], %[[HEIGHT]] : i1, i32
63+
// CHECK: llvm.mlir.undef : vector<8xi8>
64+
// CHECK-COUNT-8: llvm.insertelement %{{[0-9]+}}, %{{[0-9]+}}{{\[}}{{.*}} : i32] : vector<8xi8>
65+
// CHECK: triton_gen.2Dblockstore {{.*}}, %[[OFFSET_X]], %[[OFFSET_Y]], {{.*}} {elem_size_in_bits = 8, tile_width = 16, tile_height = 8, v_blocks = 1, cache_control = Default}
6766
tt.store %0, %cst {ttig.block_io = "row_major", boundaryCheck = array<i32: 0>} : !tt.ptr<tensor<256x64xi8, #dot_b>>
68-
// ALL-LAYOUT-COUNT-63: triton_gen.2Dblockstore {{.*}} {elem_size_in_bits = 8, tile_width = 16, tile_height = 8, v_blocks = 1, cache_control = Default}
67+
// CHECK-COUNT-63: triton_gen.2Dblockstore {{.*}} {elem_size_in_bits = 8, tile_width = 16, tile_height = 8, v_blocks = 1, cache_control = Default}
6968

7069
tt.return
7170
}
@@ -83,25 +82,25 @@ module attributes {ttig.support_sg_2d_block, "ttg.num-warps" = 16 : i32, "ttg.t
8382
%c1_i64 = arith.constant 1 : i64
8483
%c0_i32 = arith.constant 0 : i32
8584
%0 = tt.make_tensor_ptr %arg0, [%c64_i64, %c64_i64], [%c1_i64, %col_stride], [%c0_i32, %c0_i32] {order = array<i32: 0, 1>} : <tensor<256x64xi8, #slice>>
86-
// ALL-LAYOUT: %[[OFF_0:.*]] = llvm.extractvalue {{.*}}[0] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
87-
// ALL-LAYOUT: %[[OFF_1:.*]] = llvm.extractvalue {{.*}}[1] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
88-
// ALL-LAYOUT: %[[HEIGHT_i64:.*]] = llvm.extractvalue {{.*}}[2] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
89-
// ALL-LAYOUT: %[[WIDTH_i64:.*]] = llvm.extractvalue {{.*}}[3] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
90-
// ALL-LAYOUT: %[[ROW_STRIDE_i64:.*]] = llvm.extractvalue {{.*}}[4] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
91-
// ALL-LAYOUT: %[[COL_STRIDE_i64:.*]] = llvm.extractvalue {{.*}}[5] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
92-
// ALL-LAYOUT: %[[BASE_PTR:.*]] = llvm.extractvalue {{.*}}[6] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
93-
94-
// ALL-LAYOUT: %[[HEIGHT:.*]] = llvm.trunc %[[HEIGHT_i64]] : i64 to i32
95-
96-
// ALL-LAYOUT: %[[OFFSET:.*]] = llvm.add %[[OFF_0]], {{.*}} : i32
97-
// ALL-LAYOUT: %[[BASE:.*]] = llvm.getelementptr %[[BASE_PTR]]{{.*}} : (!llvm.ptr<1>, i32) -> !llvm.ptr<1>, i8
98-
// ALL-LAYOUT: %[[OFFSET_X:.*]] = llvm.mlir.constant(0 : i32) : i32
99-
// ALL-LAYOUT: %[[OFFSET_Y:.*]] = llvm.select {{.*}}, %[[OFFSET]], %[[HEIGHT]] : i1, i32
100-
// ALL-LAYOUT: llvm.mlir.undef : vector<16xi8>
101-
// ALL-LAYOUT-COUNT-16: llvm.insertelement %{{[0-9]+}}, %{{[0-9]+}}{{\[}}{{.*}} : i32] : vector<16xi8>
102-
// ALL-LAYOUT: triton_gen.2Dblockstore {{.*}}, %[[OFFSET_X]], %[[OFFSET_Y]], {{.*}} {elem_size_in_bits = 16, tile_width = 32, tile_height = 8, v_blocks = 1, cache_control = Default}
85+
// CHECK: %[[OFF_0:.*]] = llvm.extractvalue {{.*}}[0] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
86+
// CHECK: %[[OFF_1:.*]] = llvm.extractvalue {{.*}}[1] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
87+
// CHECK: %[[HEIGHT_i64:.*]] = llvm.extractvalue {{.*}}[2] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
88+
// CHECK: %[[WIDTH_i64:.*]] = llvm.extractvalue {{.*}}[3] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
89+
// CHECK: %[[ROW_STRIDE_i64:.*]] = llvm.extractvalue {{.*}}[4] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
90+
// CHECK: %[[COL_STRIDE_i64:.*]] = llvm.extractvalue {{.*}}[5] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
91+
// CHECK: %[[BASE_PTR:.*]] = llvm.extractvalue {{.*}}[6] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
92+
93+
// CHECK: %[[HEIGHT:.*]] = llvm.trunc %[[HEIGHT_i64]] : i64 to i32
94+
95+
// CHECK: %[[OFFSET:.*]] = llvm.add %[[OFF_0]], {{.*}} : i32
96+
// CHECK: %[[BASE:.*]] = llvm.getelementptr %[[BASE_PTR]]{{.*}} : (!llvm.ptr<1>, i32) -> !llvm.ptr<1>, i8
97+
// CHECK: %[[OFFSET_X:.*]] = llvm.mlir.constant(0 : i32) : i32
98+
// CHECK: %[[OFFSET_Y:.*]] = llvm.select {{.*}}, %[[OFFSET]], %[[HEIGHT]] : i1, i32
99+
// CHECK: llvm.mlir.undef : vector<16xi8>
100+
// CHECK-COUNT-16: llvm.insertelement %{{[0-9]+}}, %{{[0-9]+}}{{\[}}{{.*}} : i32] : vector<16xi8>
101+
// CHECK: triton_gen.2Dblockstore {{.*}}, %[[OFFSET_X]], %[[OFFSET_Y]], {{.*}} {elem_size_in_bits = 16, tile_width = 32, tile_height = 8, v_blocks = 1, cache_control = Default}
103102
tt.store %0, %cst {ttig.block_io = "row_major", boundaryCheck = array<i32: 0>} : !tt.ptr<tensor<256x64xi8, #slice>>
104-
// ALL-LAYOUT-COUNT-31: triton_gen.2Dblockstore {{.*}} {elem_size_in_bits = 16, tile_width = 32, tile_height = 8, v_blocks = 1, cache_control = Default}
103+
// CHECK-COUNT-31: triton_gen.2Dblockstore {{.*}} {elem_size_in_bits = 16, tile_width = 32, tile_height = 8, v_blocks = 1, cache_control = Default}
105104

106105
tt.return
107106
}
@@ -118,25 +117,25 @@ module attributes {ttig.support_sg_2d_block, "ttg.num-warps" = 16 : i32, "ttg.t
118117
%c1_i64 = arith.constant 1 : i64
119118
%c0_i32 = arith.constant 0 : i32
120119
%0 = tt.make_tensor_ptr %arg0, [%c64_i64, %c64_i64], [%c1_i64, %col_stride], [%c0_i32, %c0_i32] {order = array<i32: 0, 1>} : <tensor<256x64xi8, #blocked>>
121-
// ALL-LAYOUT: %[[OFF_0:.*]] = llvm.extractvalue {{.*}}[0] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
122-
// ALL-LAYOUT: %[[OFF_1:.*]] = llvm.extractvalue {{.*}}[1] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
123-
// ALL-LAYOUT: %[[HEIGHT_i64:.*]] = llvm.extractvalue {{.*}}[2] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
124-
// ALL-LAYOUT: %[[WIDTH_i64:.*]] = llvm.extractvalue {{.*}}[3] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
125-
// ALL-LAYOUT: %[[ROW_STRIDE_i64:.*]] = llvm.extractvalue {{.*}}[4] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
126-
// ALL-LAYOUT: %[[COL_STRIDE_i64:.*]] = llvm.extractvalue {{.*}}[5] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
127-
// ALL-LAYOUT: %[[BASE_PTR:.*]] = llvm.extractvalue {{.*}}[6] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
128-
129-
// ALL-LAYOUT: %[[HEIGHT:.*]] = llvm.trunc %[[HEIGHT_i64]] : i64 to i32
130-
131-
// ALL-LAYOUT: %[[OFFSET:.*]] = llvm.add %[[OFF_0]], {{.*}} : i32
132-
// ALL-LAYOUT: %[[BASE:.*]] = llvm.getelementptr %[[BASE_PTR]]{{.*}} : (!llvm.ptr<1>, i32) -> !llvm.ptr<1>, i8
133-
// ALL-LAYOUT: %[[OFFSET_X:.*]] = llvm.mlir.constant(0 : i32) : i32
134-
// ALL-LAYOUT: %[[OFFSET_Y:.*]] = llvm.select {{.*}}, %[[OFFSET]], %[[HEIGHT]] : i1, i32
135-
// ALL-LAYOUT: llvm.mlir.undef : vector<8xi8>
136-
// ALL-LAYOUT-COUNT-8: llvm.insertelement %{{[0-9]+}}, %{{[0-9]+}}{{\[}}{{.*}} : i32] : vector<8xi8>
137-
// ALL-LAYOUT: triton_gen.2Dblockstore {{.*}}, %[[OFFSET_X]], %[[OFFSET_Y]], {{.*}} {elem_size_in_bits = 16, tile_width = 32, tile_height = 4, v_blocks = 1, cache_control = Default}
120+
// CHECK: %[[OFF_0:.*]] = llvm.extractvalue {{.*}}[0] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
121+
// CHECK: %[[OFF_1:.*]] = llvm.extractvalue {{.*}}[1] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
122+
// CHECK: %[[HEIGHT_i64:.*]] = llvm.extractvalue {{.*}}[2] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
123+
// CHECK: %[[WIDTH_i64:.*]] = llvm.extractvalue {{.*}}[3] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
124+
// CHECK: %[[ROW_STRIDE_i64:.*]] = llvm.extractvalue {{.*}}[4] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
125+
// CHECK: %[[COL_STRIDE_i64:.*]] = llvm.extractvalue {{.*}}[5] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
126+
// CHECK: %[[BASE_PTR:.*]] = llvm.extractvalue {{.*}}[6] : !llvm.struct<(i32, i32, i64, i64, i64, i64, ptr<1>)>
127+
128+
// CHECK: %[[HEIGHT:.*]] = llvm.trunc %[[HEIGHT_i64]] : i64 to i32
129+
130+
// CHECK: %[[OFFSET:.*]] = llvm.add %[[OFF_0]], {{.*}} : i32
131+
// CHECK: %[[BASE:.*]] = llvm.getelementptr %[[BASE_PTR]]{{.*}} : (!llvm.ptr<1>, i32) -> !llvm.ptr<1>, i8
132+
// CHECK: %[[OFFSET_X:.*]] = llvm.mlir.constant(0 : i32) : i32
133+
// CHECK: %[[OFFSET_Y:.*]] = llvm.select {{.*}}, %[[OFFSET]], %[[HEIGHT]] : i1, i32
134+
// CHECK: llvm.mlir.undef : vector<8xi8>
135+
// CHECK-COUNT-8: llvm.insertelement %{{[0-9]+}}, %{{[0-9]+}}{{\[}}{{.*}} : i32] : vector<8xi8>
136+
// CHECK: triton_gen.2Dblockstore {{.*}}, %[[OFFSET_X]], %[[OFFSET_Y]], {{.*}} {elem_size_in_bits = 16, tile_width = 32, tile_height = 4, v_blocks = 1, cache_control = Default}
138137
tt.store %0, %cst {ttig.block_io = "row_major", boundaryCheck = array<i32: 0>} : !tt.ptr<tensor<256x64xi8, #blocked>>
139-
// ALL-LAYOUT-COUNT-7: triton_gen.2Dblockstore {{.*}} {elem_size_in_bits = 16, tile_width = 32, tile_height = 4, v_blocks = 1, cache_control = Default}
138+
// CHECK-COUNT-7: triton_gen.2Dblockstore {{.*}} {elem_size_in_bits = 16, tile_width = 32, tile_height = 4, v_blocks = 1, cache_control = Default}
140139

141140
tt.return
142141
}

third_party/intel/lib/TritonIntelGPUToLLVM/LoadStoreOpToLLVM.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2677,9 +2677,11 @@ struct StoreOpToBlockIOConversion
26772677
LogicalResult
26782678
matchAndRewrite(triton::StoreOp op, OpAdaptor adaptor,
26792679
ConversionPatternRewriter &rewriter) const final {
2680-
static const bool enableBlockIOForAllLayout =
2681-
triton::tools::getBoolEnv("TRITON_INTEL_ENABLE_BLOCK_IO_ALL_LAYOUTS");
2682-
if (!isBlockIOCandidate(op, enableBlockIOForAllLayout))
2680+
std::optional<bool> enableBlockIOForAllLayout =
2681+
mlir::triton::tools::isEnvValueBool(mlir::triton::tools::getStrEnv(
2682+
"TRITON_INTEL_ENABLE_BLOCK_IO_ALL_LAYOUTS"));
2683+
if (!isBlockIOCandidate(op, !enableBlockIOForAllLayout.has_value() ||
2684+
enableBlockIOForAllLayout.value()))
26832685
return failure();
26842686

26852687
Location loc = op.getLoc();

0 commit comments

Comments
 (0)