Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 63 additions & 7 deletions mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,22 @@ class XeGPU_Op<string mnemonic, list<Trait> traits = []>:
void printProperties(::mlir::MLIRContext *ctx,
::mlir::OpAsmPrinter &p, const Properties &prop,
::mlir::ArrayRef<::llvm::StringRef> elidedProps) {
Attribute propAttr = getPropertiesAsAttr(ctx, prop);
if (propAttr)
p << "<" << propAttr << ">";

DictionaryAttr propAttr = dyn_cast_if_present<mlir::DictionaryAttr>(getPropertiesAsAttr(ctx, prop));

// filter out the elidedProps from propAttr, and get the resultAttr
mlir::SmallVector<mlir::NamedAttribute> filteredAttrs;
if (propAttr) {
for (auto namedAttr : propAttr.getValue()) {
if (llvm::is_contained(elidedProps, namedAttr.getName().strref()))
continue;
filteredAttrs.push_back(namedAttr);
}
}

if (!filteredAttrs.empty()) {
p << "<" << DictionaryAttr::get(ctx, filteredAttrs) << ">";
}
}

static ::mlir::ParseResult parseProperties(::mlir::OpAsmParser &parser,
Expand Down Expand Up @@ -288,6 +301,8 @@ def XeGPU_PrefetchNdOp : XeGPU_Op<"prefetch_nd", []> {
}];

let arguments = (ins XeGPU_TensorDesc: $TensorDesc,
Variadic<Index>: $offsets,
OptionalAttr<DenseI64ArrayAttr>: $const_offsets,
OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
Expand All @@ -298,7 +313,18 @@ def XeGPU_PrefetchNdOp : XeGPU_Op<"prefetch_nd", []> {
}
}];

let assemblyFormat = "$TensorDesc prop-dict attr-dict `:` qualified(type($TensorDesc))";
let assemblyFormat = [{
$TensorDesc ``
custom<OptionalDynamicIndexList>($offsets, $const_offsets)
prop-dict attr-dict `:` qualified(type($TensorDesc))
}];

let builders = [
OpBuilder<(ins "Value": $TensorDesc,
"xegpu::CachePolicyAttr": $l1_hint,
"xegpu::CachePolicyAttr": $l2_hint,
"xegpu::CachePolicyAttr": $l3_hint)>
];

let hasVerifier = 1;
}
Expand Down Expand Up @@ -343,6 +369,8 @@ def XeGPU_LoadNdOp : XeGPU_Op<"load_nd", [
}];

let arguments = (ins XeGPU_TensorDesc: $TensorDesc,
Variadic<Index>: $offsets,
OptionalAttr<DenseI64ArrayAttr>: $const_offsets,
OptionalAttr<UnitAttr>: $packed,
OptionalAttr<DenseI64ArrayAttr>: $transpose,
OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
Expand All @@ -361,7 +389,20 @@ def XeGPU_LoadNdOp : XeGPU_Op<"load_nd", [
}
}];

let assemblyFormat = "$TensorDesc prop-dict attr-dict `:` qualified(type($TensorDesc)) `->` type($value)";
let assemblyFormat = [{
$TensorDesc ``
custom<OptionalDynamicIndexList>($offsets, $const_offsets)
prop-dict attr-dict `:` qualified(type($TensorDesc)) `->` type($value)
}];

let builders = [
OpBuilder<(ins "Type": $value, "Value": $TensorDesc,
"UnitAttr": $packed, "DenseI64ArrayAttr": $transpose,
"xegpu::CachePolicyAttr": $l1_hint,
"xegpu::CachePolicyAttr": $l2_hint,
"xegpu::CachePolicyAttr": $l3_hint)>
];

let hasVerifier = 1;
}

Expand Down Expand Up @@ -400,6 +441,8 @@ def XeGPU_StoreNdOp : XeGPU_Op<"store_nd", [

let arguments = (ins XeGPU_ValueType: $value,
XeGPU_TensorDesc: $TensorDesc,
Variadic<Index>: $offsets,
OptionalAttr<DenseI64ArrayAttr>: $const_offsets,
OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
Expand All @@ -414,8 +457,21 @@ def XeGPU_StoreNdOp : XeGPU_Op<"store_nd", [
}
}];

let assemblyFormat = [{$value `,` $TensorDesc prop-dict attr-dict
`:` type($value) `,` qualified(type($TensorDesc))}];
let assemblyFormat = [{
$value `,`
$TensorDesc ``
custom<OptionalDynamicIndexList>($offsets, $const_offsets)
prop-dict attr-dict `:` type($value) `,` qualified(type($TensorDesc))
}];

let builders = [
OpBuilder<(ins "Value": $value, "Value": $TensorDesc,
"xegpu::CachePolicyAttr": $l1_hint,
"xegpu::CachePolicyAttr": $l2_hint,
"xegpu::CachePolicyAttr": $l3_hint)>
];


let hasVerifier = 1;
}

Expand Down
74 changes: 68 additions & 6 deletions mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -329,18 +329,30 @@ ParseResult parseOptionalDynamicIndexList(
return success();
}

void printOptionalDynamicIndexList(
OpAsmPrinter &printer, Operation *op, OperandRange values,
ArrayRef<int64_t> integers, TypeRange valueTypes = TypeRange(),
AsmParser::Delimiter delimiter = AsmParser::Delimiter::Square) {
void printOptionalDynamicIndexList(OpAsmPrinter &printer, Operation *op,
OperandRange values,
DenseI64ArrayAttr integers) {

if (!integers)
return;

return printDynamicIndexList(printer, op, values, integers,
/*scalableFlags=*/{}, valueTypes, delimiter);
/*scalableFlags=*/{}, {},
AsmParser::Delimiter::Square);
}

//===----------------------------------------------------------------------===//
// XeGPU_PrefetchNdOp
//===----------------------------------------------------------------------===//

void PrefetchNdOp::build(OpBuilder &builder, OperationState &state,
Value tensorDesc, xegpu::CachePolicyAttr l1_hint,
xegpu::CachePolicyAttr l2_hint,
xegpu::CachePolicyAttr l3_hint) {

return build(builder, state, tensorDesc, ValueRange(), DenseI64ArrayAttr(),
l1_hint, l2_hint, l3_hint);
}

LogicalResult PrefetchNdOp::verify() {
auto tdescTy = getTensorDescType();
if (tdescTy.isScattered())
Expand All @@ -355,12 +367,34 @@ LogicalResult PrefetchNdOp::verify() {
if (!isReadHintOrNone(getL3HintAttr()))
return emitOpError("invalid l3_hint: ") << getL3HintAttr();

int64_t tDescRank = tdescTy.getRank();
int64_t offsetSize = static_cast<int64_t>(getOffsets().size());
int64_t constOffsetSize =
getConstOffsetsAttr() ? getConstOffsetsAttr().size() : 0;
if (((offsetSize != 0) && (offsetSize != tDescRank)) ||
((constOffsetSize != 0) && (constOffsetSize != tDescRank)))
return emitOpError(
"Mismatched ranks between offsets and tensor descriptor");

return success();
}

//===----------------------------------------------------------------------===//
// XeGPU_LoadNdOp
//===----------------------------------------------------------------------===//

void LoadNdOp::build(OpBuilder &builder, OperationState &state, Type retType,
Value tensorDesc, UnitAttr packed,
DenseI64ArrayAttr transpose,
xegpu::CachePolicyAttr l1_hint,
xegpu::CachePolicyAttr l2_hint,
xegpu::CachePolicyAttr l3_hint) {

return build(builder, state, retType, tensorDesc, ValueRange(),
DenseI64ArrayAttr(), packed, transpose, l1_hint, l2_hint,
l3_hint);
}

LogicalResult LoadNdOp::verify() {
auto tdescTy = getTensorDescType();
auto valueTy = getType();
Expand Down Expand Up @@ -442,12 +476,31 @@ LogicalResult LoadNdOp::verify() {
<< " is not consistent with tensor descriptor "
<< tdescTy;

int64_t tDescRank = tdescTy.getRank();
int64_t offsetSize = static_cast<int64_t>(getOffsets().size());
int64_t constOffsetSize =
getConstOffsetsAttr() ? getConstOffsetsAttr().size() : 0;
if (((offsetSize != 0) && (offsetSize != tDescRank)) ||
((constOffsetSize != 0) && (constOffsetSize != tDescRank)))
return emitOpError(
"Mismatched ranks between offsets and tensor descriptor");

return success();
}

//===----------------------------------------------------------------------===//
// XeGPU_StoreNdOp
//===----------------------------------------------------------------------===//

void StoreNdOp::build(OpBuilder &builder, OperationState &state, Value value,
Value tensorDesc, xegpu::CachePolicyAttr l1_hint,
xegpu::CachePolicyAttr l2_hint,
xegpu::CachePolicyAttr l3_hint) {

return build(builder, state, value, tensorDesc, ValueRange(),
DenseI64ArrayAttr(), l1_hint, l2_hint, l3_hint);
}

LogicalResult StoreNdOp::verify() {
auto dstTy = getTensorDescType(); // Tile
auto valTy = getValueType(); // Vector
Expand Down Expand Up @@ -502,6 +555,15 @@ LogicalResult StoreNdOp::verify() {
<< " is not consistent with tensor descriptor "
<< dstTy;

int64_t tDescRank = dstTy.getRank();
int64_t offsetSize = static_cast<int64_t>(getOffsets().size());
int64_t constOffsetSize =
getConstOffsetsAttr() ? getConstOffsetsAttr().size() : 0;
if (((offsetSize != 0) && (offsetSize != tDescRank)) ||
((constOffsetSize != 0) && (constOffsetSize != tDescRank)))
return emitOpError(
"Mismatched ranks between offsets and tensor descriptor");

return success();
}

Expand Down
14 changes: 14 additions & 0 deletions mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,10 @@ struct StoreNdDistribution final : public gpu::WarpDistributionPattern {
if (!storeOp)
return failure();

int64_t offsetSize = static_cast<int64_t>(storeOp.getOffsets().size());
if ((offsetSize != 0) || storeOp.getConstOffsetsAttr())
return failure();

xegpu::TensorDescType tensorDescTy = storeOp.getTensorDescType();
xegpu::LayoutAttr layout = tensorDescTy.getLayoutAttr();
if (!layout)
Expand Down Expand Up @@ -464,6 +468,11 @@ struct LoadNdDistribution final : public gpu::WarpDistributionPattern {
warpOp, "warp result is not a xegpu::LoadNd op");

auto loadOp = operand->get().getDefiningOp<xegpu::LoadNdOp>();

int64_t offsetSize = static_cast<int64_t>(loadOp.getOffsets().size());
if ((offsetSize != 0) || loadOp.getConstOffsetsAttr())
return failure();

xegpu::TensorDescType tensorDescTy = loadOp.getTensorDescType();
xegpu::LayoutAttr layout = tensorDescTy.getLayoutAttr();
if (!layout)
Expand Down Expand Up @@ -767,6 +776,11 @@ struct PrefetchNdDistribution final : public gpu::WarpDistributionPattern {
auto prefetchOp = dyn_cast_or_null<xegpu::PrefetchNdOp>(lastNode);
if (!prefetchOp)
return failure();

int64_t offsetSize = static_cast<int64_t>(prefetchOp.getOffsets().size());
if ((offsetSize != 0) || prefetchOp.getConstOffsetsAttr())
return failure();

xegpu::LayoutAttr layout = prefetchOp.getTensorDescType().getLayoutAttr();
if (!layout)
return rewriter.notifyMatchFailure(
Expand Down
12 changes: 12 additions & 0 deletions mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,10 @@ struct UnrollPrefetchNdOp : public UnrollPattern<xegpu::PrefetchNdOp> {
if (!targetShape)
return failure();

int64_t offsetSize = static_cast<int64_t>(op.getOffsets().size());
if ((offsetSize != 0) || op.getConstOffsetsAttr())
return failure();

SmallVector<Type> convertedTdescTypes =
getUnrolledTypes(tdescTy, *targetShape);
SmallVector<Value> convertedTdesc = pack(
Expand Down Expand Up @@ -245,6 +249,10 @@ struct UnrollLoadNdOp : public UnrollPattern<xegpu::LoadNdOp> {
if (!targetShape)
return failure();

int64_t offsetSize = static_cast<int64_t>(op.getOffsets().size());
if ((offsetSize != 0) || op.getConstOffsetsAttr())
return failure();

Type elemTy = tdescTy.getElementType();
VectorType newValueTy = valueTy.cloneWith(*targetShape, elemTy);

Expand Down Expand Up @@ -279,6 +287,10 @@ struct UnrollStoreNdOp : public UnrollPattern<xegpu::StoreNdOp> {
if (!targetShape)
return failure();

int64_t offsetSize = static_cast<int64_t>(op.getOffsets().size());
if ((offsetSize != 0) || op.getConstOffsetsAttr())
return failure();

SmallVector<Type> convertedValTypes =
getUnrolledTypes(valueTy, *targetShape);
SmallVector<Type> convertedTdescTypes =
Expand Down
15 changes: 15 additions & 0 deletions mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,11 @@ struct WgToSgLoadNdOp : public OpConversionPattern<xegpu::LoadNdOp> {
matchAndRewrite(xegpu::LoadNdOp op, OneToNOpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
SmallVector<Value> newLoadOps;

int64_t offsetSize = static_cast<int64_t>(op.getOffsets().size());
if ((offsetSize != 0) || op.getConstOffsetsAttr())
return failure();

for (auto src : adaptor.getTensorDesc()) {
xegpu::TensorDescType tdescTy =
dyn_cast<xegpu::TensorDescType>(src.getType());
Expand All @@ -241,6 +246,11 @@ struct WgToSgStoreNdOp : public OpConversionPattern<xegpu::StoreNdOp> {
LogicalResult
matchAndRewrite(xegpu::StoreNdOp op, OneToNOpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {

int64_t offsetSize = static_cast<int64_t>(op.getOffsets().size());
if ((offsetSize != 0) || op.getConstOffsetsAttr())
return failure();

for (auto [v, t] : llvm::zip(adaptor.getValue(), adaptor.getTensorDesc()))
xegpu::StoreNdOp::create(rewriter, op.getLoc(), v, t, op.getL1HintAttr(),
op.getL2HintAttr(), op.getL3HintAttr());
Expand Down Expand Up @@ -323,6 +333,11 @@ struct WgToSgPrefetchNdOp : public OpConversionPattern<xegpu::PrefetchNdOp> {
LogicalResult
matchAndRewrite(xegpu::PrefetchNdOp op, OneToNOpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {

int64_t offsetSize = static_cast<int64_t>(op.getOffsets().size());
if ((offsetSize != 0) || op.getConstOffsetsAttr())
return failure();

for (auto src : adaptor.getTensorDesc())
xegpu::PrefetchNdOp::create(rewriter, op.getLoc(), TypeRange(), src,
op->getAttrs());
Expand Down
25 changes: 25 additions & 0 deletions mlir/test/Dialect/XeGPU/invalid.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,31 @@ func.func @subgroup_load_nd_9(%src: memref<4x8x16xf16>) {
return
}

// -----
func.func @subgroup_load_nd_offset_1(%src: memref<4x8x16xf16>, %x : index) {
%1 = xegpu.create_nd_tdesc %src: memref<4x8x16xf16> -> !xegpu.tensor_desc<16xf16>
// expected-error@+1 {{Mismatched ranks between offsets and tensor descriptor}}
%2 = xegpu.load_nd %1[0, 0] : !xegpu.tensor_desc<16xf16> -> vector<16xf16>
return
}

// -----
func.func @subgroup_load_nd_offset_2(%src: memref<4x8x16xf16>, %x : index) {
%3 = xegpu.create_nd_tdesc %src: memref<4x8x16xf16> -> !xegpu.tensor_desc<8x16xf16>
// expected-error@+1 {{Mismatched ranks between offsets and tensor descriptor}}
xegpu.prefetch_nd %3[0] : !xegpu.tensor_desc<8x16xf16>
return
}

// -----
func.func @subgroup_load_nd_offset_3(%src: memref<4x8x16xf16>, %x : index) {
%3 = xegpu.create_nd_tdesc %src: memref<4x8x16xf16> -> !xegpu.tensor_desc<8x16xf16>
%5 = xegpu.load_nd %3[0, 0] : !xegpu.tensor_desc<8x16xf16> -> vector<8x16xf16>
// expected-error@+1 {{Mismatched ranks between offsets and tensor descriptor}}
xegpu.store_nd %5, %3[%x] : vector<8x16xf16>, !xegpu.tensor_desc<8x16xf16>
return
}

// -----
func.func @load_nd_layout(%src: memref<24x32xf32>) {
%1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<16xf32>
Expand Down
Loading