Skip to content

Commit 701afb3

Browse files
committed
[mlir][xegpu] Support boundary checks only for block instructions
Constrains Vector lowering to apply boundary checks only to data transfers operating on block shapes. This further aligns lowering with the current Xe instructions' restrictions.
1 parent 9ea499a commit 701afb3

File tree

5 files changed

+42
-7
lines changed

5 files changed

+42
-7
lines changed

mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,10 @@ static LogicalResult transferPreconditions(PatternRewriter &rewriter,
8282
xferOp, "Buffer must be contiguous in the innermost dimension");
8383

8484
unsigned vecRank = vecTy.getRank();
85+
if (xferOp.hasOutOfBoundsDim() && vecRank < 2)
86+
return rewriter.notifyMatchFailure(
87+
xferOp, "Boundary check is available only for block instructions.");
88+
8589
AffineMap map = xferOp.getPermutationMap();
8690
if (!map.isProjectedPermutation(/*allowZeroInResults=*/false))
8791
return rewriter.notifyMatchFailure(xferOp, "Unsupported permutation map");
@@ -255,9 +259,12 @@ struct LoadLowering : public OpRewritePattern<vector::LoadOp> {
255259
if (failed(storeLoadPreconditions(rewriter, loadOp, vecTy)))
256260
return failure();
257261

262+
// Boundary check is available only for block instructions.
263+
bool boundaryCheck = vecTy.getRank() > 1;
264+
258265
auto descType = xegpu::TensorDescType::get(
259266
vecTy.getShape(), vecTy.getElementType(), /*array_length=*/1,
260-
/*boundary_check=*/true, xegpu::MemorySpace::Global);
267+
boundaryCheck, xegpu::MemorySpace::Global);
261268
xegpu::CreateNdDescOp ndDesc = createNdDescriptor(
262269
rewriter, loc, descType, loadOp.getBase(), loadOp.getIndices());
263270

@@ -285,10 +292,12 @@ struct StoreLowering : public OpRewritePattern<vector::StoreOp> {
285292
if (failed(storeLoadPreconditions(rewriter, storeOp, vecTy)))
286293
return failure();
287294

288-
auto descType =
289-
xegpu::TensorDescType::get(vecTy.getShape(), vecTy.getElementType(),
290-
/*array_length=*/1, /*boundary_check=*/true,
291-
xegpu::MemorySpace::Global);
295+
// Boundary check is available only for block instructions.
296+
bool boundaryCheck = vecTy.getRank() > 1;
297+
298+
auto descType = xegpu::TensorDescType::get(
299+
vecTy.getShape(), vecTy.getElementType(),
300+
/*array_length=*/1, boundaryCheck, xegpu::MemorySpace::Global);
292301
xegpu::CreateNdDescOp ndDesc = createNdDescriptor(
293302
rewriter, loc, descType, storeOp.getBase(), storeOp.getIndices());
294303

mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ func.func @load_1D_vector(%source: memref<8x16x32xf32>, %offset: index) -> vecto
1212
// CHECK: %[[DESC:.+]] = xegpu.create_nd_tdesc
1313
// CHECK-SAME: %[[SRC]][%[[OFFSET]], %[[OFFSET]], %[[OFFSET]]]
1414
// CHECK-SAME: memref<8x16x32xf32> -> !xegpu.tensor_desc<8xf32,
15-
// CHECK-SAME: boundary_check = true
15+
// CHECK-SAME: boundary_check = false
1616
// CHECK: %[[VEC:.+]] = xegpu.load_nd %[[DESC]]{{.*}}-> vector<8xf32>
1717
// CHECK: return %[[VEC]]
1818

mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ func.func @store_1D_vector(%vec: vector<8xf32>,
1414
// CHECK: %[[DESC:.+]] = xegpu.create_nd_tdesc
1515
// CHECK-SAME: %[[SRC]][%[[OFFSET]], %[[OFFSET]], %[[OFFSET]]]
1616
// CHECK-SAME: memref<8x16x32xf32> -> !xegpu.tensor_desc<8xf32,
17-
// CHECK-SAME: boundary_check = true
17+
// CHECK-SAME: boundary_check = false
1818
// CHECK: xegpu.store_nd %[[VEC]], %[[DESC]] : vector<8xf32>
1919

2020
// -----

mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,19 @@ func.func @no_load_out_of_bounds_non_zero_pad(%source: memref<32x64xf32>,
119119

120120
// -----
121121

122+
func.func @no_load_out_of_bounds_1D_vector(%source: memref<8x16x32xf32>,
123+
%offset: index) -> vector<8xf32> {
124+
%c0 = arith.constant 0.0 : f32
125+
%0 = vector.transfer_read %source[%offset, %offset, %offset], %c0
126+
{in_bounds = [false]} : memref<8x16x32xf32>, vector<8xf32>
127+
return %0 : vector<8xf32>
128+
}
129+
130+
// CHECK-LABEL: @no_load_out_of_bounds_1D_vector(
131+
// CHECK: vector.transfer_read
132+
133+
// -----
134+
122135
func.func @no_load_masked(%source : memref<4xf32>,
123136
%offset : index) -> vector<4xf32> {
124137
%c0 = arith.constant 0.0 : f32

mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,3 +164,16 @@ func.func @no_store_unsupported_map(%vec: vector<8x16xf32>,
164164

165165
// CHECK-LABEL: @no_store_unsupported_map(
166166
// CHECK: vector.transfer_write
167+
168+
// -----
169+
170+
func.func @no_store_out_of_bounds_1D_vector(%vec: vector<8xf32>,
171+
%source: memref<8x16x32xf32>, %offset: index) {
172+
vector.transfer_write %vec, %source[%offset, %offset, %offset]
173+
{in_bounds = [false]}
174+
: vector<8xf32>, memref<8x16x32xf32>
175+
return
176+
}
177+
178+
// CHECK-LABEL: @no_store_out_of_bounds_1D_vector(
179+
// CHECK: vector.transfer_write

0 commit comments

Comments
 (0)