Skip to content

Commit ee3802d

Browse files
committed
refine tensor_desc verifier
1 parent 55678dc commit ee3802d

File tree

5 files changed

+85
-52
lines changed

5 files changed

+85
-52
lines changed

mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,14 @@ class TensorDescType;
2525
} // namespace xegpu
2626
} // namespace mlir
2727

28-
#include <mlir/Dialect/XeGPU/IR/XeGPUDialect.h.inc>
2928
#include <mlir/Dialect/XeGPU/IR/XeGPUEnums.h.inc>
3029
#define GET_ATTRDEF_CLASSES
3130
#include <mlir/Dialect/XeGPU/IR/XeGPUAttrs.h.inc>
3231
#define GET_TYPEDEF_CLASSES
3332
#include <mlir/Dialect/XeGPU/IR/XeGPUTypes.h.inc>
33+
34+
#include <mlir/Dialect/XeGPU/IR/XeGPUDialect.h.inc>
35+
3436
#define GET_OP_CLASSES
3537
#include <mlir/Dialect/XeGPU/IR/XeGPU.h.inc>
3638

mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,12 @@ def XeGPU_Dialect : Dialect {
3636

3737
let useDefaultTypePrinterParser = true;
3838
let useDefaultAttributePrinterParser = true;
39+
40+
let extraClassDeclaration = [{
41+
/// Checks if the given shape can be evenly distributed based on the layout
42+
/// and data factors provided by the LayoutAttr.
43+
static bool isEvenlyDistributable(llvm::ArrayRef<int64_t> shape, xegpu::LayoutAttr attr);
44+
}];
3945
}
4046

4147
#endif // MLIR_DIALECT_XEGPU_IR_XEGPUDIALECT_TD

mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp

Lines changed: 70 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9+
#include "mlir/Dialect/Utils/IndexingUtils.h"
910
#include "mlir/Dialect/XeGPU/IR/XeGPU.h"
1011
#include "mlir/IR/Builders.h"
1112
#include "mlir/IR/DialectImplementation.h"
@@ -30,6 +31,61 @@ void XeGPUDialect::initialize() {
3031
>();
3132
}
3233

34+
bool XeGPUDialect::isEvenlyDistributable(llvm::ArrayRef<int64_t> shape,
35+
xegpu::LayoutAttr attr) {
36+
assert(attr && "Layout attribute is missing.");
37+
38+
auto getSubShapeOrNull =
39+
[&](llvm::ArrayRef<int64_t> shape, DenseI32ArrayAttr layout,
40+
DenseI32ArrayAttr data,
41+
bool use_rr = true) -> std::optional<SmallVector<int64_t>> {
42+
llvm::SmallVector<int64_t> newShape(shape);
43+
if (layout) {
44+
auto vec = llvm::to_vector_of<int64_t>(layout.asArrayRef());
45+
if (vec.size() != shape.size())
46+
return std::nullopt;
47+
auto ratio = computeShapeRatio(shape, vec);
48+
if (!ratio.has_value())
49+
return std::nullopt;
50+
newShape = ratio.value();
51+
}
52+
53+
if (data) {
54+
auto vec = llvm::to_vector_of<int64_t>(data.asArrayRef());
55+
if (vec.size() != shape.size())
56+
return std::nullopt;
57+
auto ratio = computeShapeRatio(newShape, vec);
58+
if (!ratio.has_value() && use_rr)
59+
ratio = computeShapeRatio(vec, newShape);
60+
if (!ratio.has_value())
61+
return std::nullopt;
62+
63+
// if data is not null, we always return it for next phase.
64+
newShape = vec;
65+
}
66+
return newShape;
67+
};
68+
69+
// check the sgLayout and sgData
70+
auto maybeSgShape =
71+
getSubShapeOrNull(shape, attr.getSgLayout(), attr.getSgData());
72+
if (!maybeSgShape)
73+
return false;
74+
auto sgShape = maybeSgShape.value();
75+
76+
// check InstData, it neither have layout nor need round-robin
77+
auto maybeInstShape =
78+
getSubShapeOrNull(sgShape, nullptr, attr.getInstData(), false);
79+
if (!maybeInstShape)
80+
return false;
81+
auto instShape = maybeInstShape.value();
82+
83+
// check LaneLayout and LaneData
84+
auto maybeLaneShape = getSubShapeOrNull(instShape, attr.getLaneLayout(),
85+
attr.getLaneData(), false);
86+
return maybeLaneShape.has_value();
87+
}
88+
3389
//===----------------------------------------------------------------------===//
3490
// XeGPU_BlockTensorDescAttr
3591
//===----------------------------------------------------------------------===//
@@ -241,7 +297,7 @@ LogicalResult TensorDescType::verify(
241297
llvm::ArrayRef<int64_t> shape, mlir::Type elementType,
242298
mlir::Attribute encoding, mlir::Attribute layout) {
243299
size_t rank = shape.size();
244-
// Low-pressure types are packed in 32-bit units.
300+
// Low-precision types are packed in 32-bit units.
245301
int32_t packingFactor = 32 / elementType.getIntOrFloatBitWidth();
246302
if (rank != 1 && rank != 2)
247303
return emitError() << "expected 1D or 2D tensor";
@@ -268,23 +324,21 @@ LogicalResult TensorDescType::verify(
268324
}
269325
}
270326

271-
if (auto blockAttr =
272-
mlir::dyn_cast_if_present<BlockTensorDescAttr>(encoding)) {
327+
auto blockAttr = mlir::dyn_cast_if_present<BlockTensorDescAttr>(encoding);
328+
if (blockAttr) {
273329
MemorySpaceAttr memorySpaceAttr = blockAttr.getMemorySpace();
274330
if (rank == 2 && memorySpaceAttr &&
275331
memorySpaceAttr.getValue() == MemorySpace::SLM)
276332
return emitError() << "SLM is not supported for 2D block tensor";
277333
}
278334

279-
if (auto layoutAttr = llvm::dyn_cast_if_present<LayoutAttr>(layout)) {
280-
335+
auto layoutAttr = llvm::dyn_cast_if_present<LayoutAttr>(layout);
336+
if (layoutAttr) {
281337
if (rank != (size_t)layoutAttr.getRank())
282338
return emitError() << "expected layout rank to match tensor rank";
283339

284-
ArrayRef<int32_t> laneLayout = layoutAttr.getLaneLayout().asArrayRef();
285-
ArrayRef<int32_t> laneData = layoutAttr.getLaneData().asArrayRef();
286-
287-
if (scatterAttr) {
340+
auto laneData = layoutAttr.getLaneData();
341+
if (scatterAttr && laneData) {
288342
// Validate subgroup mapping rules for scattered tensors.
289343
// A work-item's slice of the tensor with shape [sg_size] or
290344
// [sg_size, chunk_size] will be [1] or [1, 32/element_ty_bit_width]
@@ -294,20 +348,19 @@ LogicalResult TensorDescType::verify(
294348
if (rank > 1 && laneData[0] != 1)
295349
return emitError()
296350
<< "cannot map over non-contiguous scattered row elements";
297-
if (laneData.back() != packingFactor)
351+
if (laneData[rank - 1] != packingFactor)
298352
return emitError() << "work item data mapping must match the number of "
299353
"contiguous elements";
300354
}
301355

302-
for (size_t i = 0; i < shape.size(); ++i) {
303-
uint32_t numElemPerWi = laneLayout[i] * laneData[i];
304-
if (shape[i] < numElemPerWi || shape[i] % numElemPerWi != 0)
305-
return emitError() << "cannot distribute " << shape[i] << " over "
306-
<< laneLayout[i] << " work items with "
307-
<< laneData[i] << " elements each";
356+
if (!XeGPUDialect::isEvenlyDistributable(shape, layoutAttr)) {
357+
std::string shapeStr;
358+
llvm::raw_string_ostream stream(shapeStr);
359+
llvm::interleaveComma(shape, stream);
360+
return emitError() << "cannot distribute [" << shapeStr << "] using "
361+
<< layoutAttr;
308362
}
309363
}
310-
311364
return success();
312365
}
313366

mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp

Lines changed: 2 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -73,34 +73,6 @@ static bool isWriteHintOrNone(const CachePolicyAttr &attr) {
7373
kind == CachePolicy::WRITE_BACK || kind == CachePolicy::WRITE_THROUGH;
7474
}
7575

76-
// Checks if the given shape is evenly distributed based on the layout
77-
// and data factors provided by the LayoutAttr. The function ensures that
78-
// each dimension of the shape can be evenly divided by the corresponding
79-
// data factor, and the resulting quotient can be evenly divided by the
80-
// layout factor. Returns `true` if the shape is evenly distributed,
81-
// otherwise `false`.
82-
static bool isEvenDistributed(llvm::ArrayRef<int64_t> shape,
83-
xegpu::LayoutAttr attr) {
84-
assert(attr && "Layout attribute is missing.");
85-
llvm::SmallVector<int32_t> defaults(shape.size(), 1);
86-
llvm::ArrayRef<int32_t> layout, data;
87-
if (auto sg_layout = attr.getSgLayout()) {
88-
layout = sg_layout.asArrayRef();
89-
auto sg_data = attr.getSgData();
90-
data = sg_data ? sg_data.asArrayRef() : defaults;
91-
} else {
92-
layout = attr.getLaneLayout().asArrayRef();
93-
auto lane_data = attr.getLaneData();
94-
data = lane_data ? lane_data.asArrayRef() : defaults;
95-
}
96-
for (auto [dimSize, dataFactor, layoutFactor] :
97-
llvm::zip_equal(shape, data, layout)) {
98-
if (dimSize % dataFactor != 0 || (dimSize / dataFactor) % layoutFactor != 0)
99-
return false;
100-
}
101-
return true;
102-
}
103-
10476
static LogicalResult
10577
isValidGatherScatterParams(Type maskTy, VectorType valueTy,
10678
TensorDescType tdescTy, UnitAttr transposeAttr,
@@ -685,10 +657,10 @@ LogicalResult ConvertLayoutOp::verify() {
685657
"expected srcMap and resMap be WgLayout or SgLayout at the same time.");
686658

687659
auto shape = getSource().getType().getShape();
688-
if (!isEvenDistributed(shape, srcMap))
660+
if (!XeGPUDialect::isEvenlyDistributable(shape, srcMap))
689661
return emitOpError("invalid srcMap, data cannot be evenly distributed.");
690662

691-
if (!isEvenDistributed(shape, resMap))
663+
if (!XeGPUDialect::isEvenlyDistributable(shape, resMap))
692664
return emitOpError("invalid resMap, data cannot be evenly distributed.");
693665

694666
return mlir::success();

mlir/test/Dialect/XeGPU/invalid.mlir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -404,31 +404,31 @@ func.func @tensor_desc_1D_invalid_map_data(%src: memref<24x32xf32>) {
404404
// -----
405405
func.func @tensor_desc_invalid_map_layout(%src: memref<24x32xf32>) {
406406
%0 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> ->
407-
// expected-error@+1 {{cannot distribute 8 over 16 work items with 1 elements each}}
407+
// expected-error@+1 {{cannot distribute [4, 8] using #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>}}
408408
!xegpu.tensor_desc<4x8xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
409409
return
410410
}
411411

412412
// -----
413413
func.func @tensor_desc_invalid_map_layout_1(%src: memref<24x32xf32>) {
414414
%0 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> ->
415-
// expected-error@+1 {{cannot distribute 4 over 8 work items with 1 elements each}}
415+
// expected-error@+1 {{cannot distribute [4, 8] using #xegpu.layout<lane_layout = [8, 2], lane_data = [1, 1]>}}
416416
!xegpu.tensor_desc<4x8xf32, #xegpu.layout<lane_layout = [8, 2], lane_data = [1, 1]>>
417417
return
418418
}
419419

420420
// -----
421421
func.func @tensor_desc_invalid_map_data(%src: memref<24x32xf32>) {
422422
%0 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> ->
423-
// expected-error@+1 {{cannot distribute 4 over 2 work items with 4 elements each}}
423+
// expected-error@+1 {{cannot distribute [4, 8] using #xegpu.layout<lane_layout = [2, 8], lane_data = [4, 1]>}}
424424
!xegpu.tensor_desc<4x8xf32, #xegpu.layout<lane_layout = [2, 8], lane_data = [4, 1]>>
425425
return
426426
}
427427

428428
// -----
429429
func.func @tensor_desc_invalid_map_data_1(%src: memref<24x32xf32>) {
430430
%0 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> ->
431-
// expected-error@+1 {{cannot distribute 4 over 8 work items with 1 elements each}}
431+
// expected-error@+1 {{cannot distribute [4, 8] using #xegpu.layout<lane_layout = [8, 2], lane_data = [1, 2]>}}
432432
!xegpu.tensor_desc<4x8xf32, #xegpu.layout<lane_layout = [8, 2], lane_data = [1, 2]>>
433433
return
434434
}

0 commit comments

Comments
 (0)