diff --git a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td index b6d2cc29cd1bf..4dfd5c24bf4df 100644 --- a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td +++ b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td @@ -1248,7 +1248,7 @@ def Vector_TransferReadOp : AffineMapAttr:$permutation_map, AnyType:$padding, Optional>:$mask, - BoolArrayAttr:$in_bounds)>, + DenseBoolArrayAttr:$in_bounds)>, Results<(outs AnyVectorOfAnyRank:$vector)> { let summary = "Reads a supervector from memory into an SSA vector value."; @@ -1443,7 +1443,7 @@ def Vector_TransferReadOp : "Value":$source, "ValueRange":$indices, "AffineMapAttr":$permutationMapAttr, - "ArrayAttr":$inBoundsAttr)>, + "DenseBoolArrayAttr":$inBoundsAttr)>, /// 2. Builder that sets padding to zero and an empty mask (variant without attrs). OpBuilder<(ins "VectorType":$vectorType, "Value":$source, @@ -1495,7 +1495,7 @@ def Vector_TransferWriteOp : Variadic:$indices, AffineMapAttr:$permutation_map, Optional>:$mask, - BoolArrayAttr:$in_bounds)>, + DenseBoolArrayAttr:$in_bounds)>, Results<(outs Optional:$result)> { let summary = "The vector.transfer_write op writes a supervector to memory."; @@ -1606,13 +1606,13 @@ def Vector_TransferWriteOp : "ValueRange":$indices, "AffineMapAttr":$permutationMapAttr, "Value":$mask, - "ArrayAttr":$inBoundsAttr)>, + "DenseBoolArrayAttr":$inBoundsAttr)>, /// 2. Builder with type inference that sets an empty mask (variant with attrs). OpBuilder<(ins "Value":$vector, "Value":$dest, "ValueRange":$indices, "AffineMapAttr":$permutationMapAttr, - "ArrayAttr":$inBoundsAttr)>, + "DenseBoolArrayAttr":$inBoundsAttr)>, /// 3. Builder with type inference that sets an empty mask (variant without attrs). OpBuilder<(ins "Value":$vector, "Value":$dest, diff --git a/mlir/include/mlir/Interfaces/VectorInterfaces.td b/mlir/include/mlir/Interfaces/VectorInterfaces.td index 7ea62c2ae2ab1..c3ed9ff46179c 100644 --- a/mlir/include/mlir/Interfaces/VectorInterfaces.td +++ b/mlir/include/mlir/Interfaces/VectorInterfaces.td @@ -98,7 +98,7 @@ def VectorTransferOpInterface : OpInterface<"VectorTransferOpInterface"> { dimension whether it is in-bounds or not. (Broadcast dimensions are always in-bounds). }], - /*retTy=*/"::mlir::ArrayAttr", + /*retTy=*/"::mlir::ArrayRef", /*methodName=*/"getInBounds", /*args=*/(ins) >, @@ -169,15 +169,6 @@ def VectorTransferOpInterface : OpInterface<"VectorTransferOpInterface"> { ]; let extraSharedClassDeclaration = [{ - /// Return a vector of all in_bounds values as booleans (one per vector - /// transfer dimension). - ::llvm::SmallVector getInBoundsValues() { - ::llvm::SmallVector inBounds; - for (int64_t i = 0, e = $_op.getTransferRank(); i < e; ++i) - inBounds.push_back($_op.isDimInBounds(i)); - return inBounds; - } - /// Return the number of leading shaped dimensions (of the "source" operand) /// that do not participate in the permutation map. unsigned getLeadingShapedRank() { @@ -241,7 +232,7 @@ def VectorTransferOpInterface : OpInterface<"VectorTransferOpInterface"> { if ($_op.isBroadcastDim(dim)) return true; auto inBounds = $_op.getInBounds(); - return ::llvm::cast<::mlir::BoolAttr>(inBounds[dim]).getValue(); + return inBounds[dim]; } /// Helper function to account for the fact that `permutationMap` results diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp index 3a4dc806efe97..0f0b34f2e9f3f 100644 --- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp +++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp @@ -263,13 +263,6 @@ static void generateInBoundsCheck( }); } -/// Given an ArrayAttr, return a copy where the first element is dropped. -static ArrayAttr dropFirstElem(OpBuilder &b, ArrayAttr attr) { - if (!attr) - return attr; - return ArrayAttr::get(b.getContext(), attr.getValue().drop_front()); -} - /// Add the pass label to a vector transfer op if its rank is not the target /// rank. template @@ -424,11 +417,11 @@ struct Strategy { Location loc = xferOp.getLoc(); auto bufferType = dyn_cast(buffer.getType()); auto vecType = dyn_cast(bufferType.getElementType()); - auto inBoundsAttr = dropFirstElem(b, xferOp.getInBoundsAttr()); auto newXferOp = b.create( loc, vecType, xferOp.getSource(), xferIndices, AffineMapAttr::get(unpackedPermutationMap(b, xferOp)), - xferOp.getPadding(), Value(), inBoundsAttr); + xferOp.getPadding(), Value(), + b.getDenseBoolArrayAttr(xferOp.getInBounds().drop_front())); maybeApplyPassLabel(b, newXferOp, options.targetRank); @@ -511,13 +504,12 @@ struct Strategy { Location loc = xferOp.getLoc(); auto vec = b.create(loc, buffer, loadIndices); - auto inBoundsAttr = dropFirstElem(b, xferOp.getInBoundsAttr()); auto source = loopState.empty() ? xferOp.getSource() : loopState[0]; Type type = isTensorOp(xferOp) ? xferOp.getShapedType() : Type(); auto newXferOp = b.create( loc, type, vec, source, xferIndices, AffineMapAttr::get(unpackedPermutationMap(b, xferOp)), Value(), - inBoundsAttr); + b.getDenseBoolArrayAttr(xferOp.getInBounds().drop_front())); maybeApplyPassLabel(b, newXferOp, options.targetRank); @@ -1160,7 +1152,7 @@ struct ScalableTransposeTransferWriteConversion loopIterArgs.empty() ? writeOp.getSource() : loopIterArgs.front(); auto newWriteOp = b.create( loc, sliceVec, dest, xferIndices, - ArrayRef(writeOp.getInBoundsValues()).drop_front()); + writeOp.getInBounds().drop_front()); if (sliceMask) newWriteOp.getMaskMutable().assign(sliceMask); @@ -1332,11 +1324,11 @@ struct UnrollTransferReadConversion getInsertionIndices(xferOp, insertionIndices); insertionIndices.push_back(rewriter.getIndexAttr(i)); - auto inBoundsAttr = dropFirstElem(b, xferOp.getInBoundsAttr()); auto newXferOp = b.create( loc, newXferVecType, xferOp.getSource(), xferIndices, AffineMapAttr::get(unpackedPermutationMap(b, xferOp)), - xferOp.getPadding(), Value(), inBoundsAttr); + xferOp.getPadding(), Value(), + b.getDenseBoolArrayAttr(xferOp.getInBounds().drop_front())); maybeAssignMask(b, xferOp, newXferOp, i); return b.create(loc, newXferOp, vec, insertionIndices); @@ -1467,7 +1459,6 @@ struct UnrollTransferWriteConversion auto extracted = b.create(loc, vec, extractionIndices); - auto inBoundsAttr = dropFirstElem(b, xferOp.getInBoundsAttr()); Value xferVec; if (inputVectorTy.getRank() == 1) { // When target-rank=0, unrolling would causes the vector input @@ -1481,7 +1472,7 @@ struct UnrollTransferWriteConversion auto newXferOp = b.create( loc, sourceType, xferVec, source, xferIndices, AffineMapAttr::get(unpackedPermutationMap(b, xferOp)), Value(), - inBoundsAttr); + b.getDenseBoolArrayAttr(xferOp.getInBounds().drop_front())); maybeAssignMask(b, xferOp, newXferOp, i); diff --git a/mlir/lib/Dialect/ArmSME/Transforms/VectorLegalization.cpp b/mlir/lib/Dialect/ArmSME/Transforms/VectorLegalization.cpp index 4968c4fc463d0..84305758e9fc0 100644 --- a/mlir/lib/Dialect/ArmSME/Transforms/VectorLegalization.cpp +++ b/mlir/lib/Dialect/ArmSME/Transforms/VectorLegalization.cpp @@ -497,8 +497,7 @@ struct LegalizeMultiTileTransferWriteAsStoreLoop loc, slice, writeOp.getSource(), ValueRange{storeRow, storeCol}, AffineMapAttr::get(writeOp.getPermutationMap().dropResult(0)), sliceMask, - rewriter.getBoolArrayAttr( - ArrayRef(writeOp.getInBoundsValues()).drop_front())); + rewriter.getDenseBoolArrayAttr(writeOp.getInBounds().drop_front())); } rewriter.eraseOp(writeOp); @@ -691,13 +690,12 @@ struct LiftIllegalVectorTransposeToMemory transposeOp.getPermutation(), getContext()); auto transposedSubview = rewriter.create( loc, readSubview, AffineMapAttr::get(transposeMap)); - ArrayAttr inBoundsAttr = illegalRead.getInBoundsAttr(); + DenseBoolArrayAttr inBoundsAttr = illegalRead.getInBoundsAttr(); // - The `in_bounds` attribute if (inBoundsAttr) { - SmallVector inBoundsValues(inBoundsAttr.begin(), - inBoundsAttr.end()); + SmallVector inBoundsValues(inBoundsAttr.asArrayRef()); applyPermutationToVector(inBoundsValues, transposeOp.getPermutation()); - inBoundsAttr = rewriter.getArrayAttr(inBoundsValues); + inBoundsAttr = rewriter.getDenseBoolArrayAttr(inBoundsValues); } VectorType legalReadType = resultType.clone(readType.getElementType()); @@ -902,7 +900,7 @@ struct LowerIllegalTransposeStoreViaZA rewriter.create(loc, transposedCol, writeIndices[1]); auto smeWrite = rewriter.create( loc, tile, destTensorOrMemref, ValueRange{destRow, destCol}, - transposeMap, subMask, writeOp.getInBounds()); + transposeMap, subMask, writeOp.getInBoundsAttr()); if (writeOp.hasPureTensorSemantics()) destTensorOrMemref = smeWrite.getResult(); diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index ca85f4b9b9c15..1aafc3a9471cc 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -659,7 +659,7 @@ static Value buildVectorWrite(RewriterBase &rewriter, Value value, if (auto maskOp = dyn_cast(write)) { auto maskedWriteOp = cast(maskOp.getMaskableOp()); SmallVector inBounds(maskedWriteOp.getVectorType().getRank(), true); - maskedWriteOp.setInBoundsAttr(rewriter.getBoolArrayAttr(inBounds)); + maskedWriteOp.setInBoundsAttr(rewriter.getDenseBoolArrayAttr(inBounds)); } LDBG("vectorized op: " << *write << "\n"); @@ -1399,7 +1399,7 @@ vectorizeAsLinalgGeneric(RewriterBase &rewriter, VectorizationState &state, if (auto maskOp = dyn_cast(read)) { SmallVector inBounds(readType.getRank(), true); cast(maskOp.getMaskableOp()) - .setInBoundsAttr(rewriter.getBoolArrayAttr(inBounds)); + .setInBoundsAttr(rewriter.getDenseBoolArrayAttr(inBounds)); } // 3.c. Not all ops support 0-d vectors, extract the scalar for now. @@ -2432,7 +2432,7 @@ struct PadOpVectorizationWithTransferReadPattern rewriter.modifyOpInPlace(xferOp, [&]() { SmallVector inBounds(xferOp.getVectorType().getRank(), false); xferOp->setAttr(xferOp.getInBoundsAttrName(), - rewriter.getBoolArrayAttr(inBounds)); + rewriter.getDenseBoolArrayAttr(inBounds)); xferOp.getSourceMutable().assign(padOp.getSource()); xferOp.getPaddingMutable().assign(padValue); }); @@ -2511,7 +2511,7 @@ struct PadOpVectorizationWithTransferWritePattern auto newXferOp = rewriter.replaceOpWithNewOp( xferOp, padOp.getSource().getType(), xferOp.getVector(), padOp.getSource(), xferOp.getIndices(), xferOp.getPermutationMapAttr(), - xferOp.getMask(), rewriter.getBoolArrayAttr(inBounds)); + xferOp.getMask(), rewriter.getDenseBoolArrayAttr(inBounds)); rewriter.replaceOp(trimPadding, newXferOp->getResult(0)); return success(); @@ -2815,7 +2815,7 @@ LogicalResult LinalgCopyVTRForwardingPattern::matchAndRewrite( Value res = rewriter.create( xferOp.getLoc(), vectorType, in, xferOp.getIndices(), xferOp.getPermutationMapAttr(), xferOp.getPadding(), xferOp.getMask(), - rewriter.getBoolArrayAttr( + rewriter.getDenseBoolArrayAttr( SmallVector(vectorType.getRank(), false))); if (maybeFillOp) @@ -2874,7 +2874,7 @@ LogicalResult LinalgCopyVTWForwardingPattern::matchAndRewrite( rewriter.create( xferOp.getLoc(), vector, out, xferOp.getIndices(), xferOp.getPermutationMapAttr(), xferOp.getMask(), - rewriter.getBoolArrayAttr( + rewriter.getDenseBoolArrayAttr( SmallVector(vector.getType().getRank(), false))); rewriter.eraseOp(copyOp); @@ -3381,7 +3381,7 @@ struct Conv1DGenerator SmallVector inBounds(maskShape.size(), true); auto xferOp = cast(opToMask); xferOp->setAttr(xferOp.getInBoundsAttrName(), - rewriter.getBoolArrayAttr(inBounds)); + rewriter.getDenseBoolArrayAttr(inBounds)); SmallVector mixedDims = vector::getMixedSizesXfer( cast(op).hasPureTensorSemantics(), opToMask, rewriter); diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp index cac6b95545704..0bd82894a6e58 100644 --- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp @@ -3791,7 +3791,7 @@ void ExtractStridedSliceOp::getCanonicalizationPatterns( void TransferReadOp::build(OpBuilder &builder, OperationState &result, VectorType vectorType, Value source, ValueRange indices, AffineMapAttr permutationMapAttr, - /*optional*/ ArrayAttr inBoundsAttr) { + /*optional*/ DenseBoolArrayAttr inBoundsAttr) { Type elemType = llvm::cast(source.getType()).getElementType(); Value padding = builder.create( result.location, elemType, builder.getZeroAttr(elemType)); @@ -3806,8 +3806,8 @@ void TransferReadOp::build(OpBuilder &builder, OperationState &result, std::optional> inBounds) { auto permutationMapAttr = AffineMapAttr::get(permutationMap); auto inBoundsAttr = (inBounds && !inBounds.value().empty()) - ? builder.getBoolArrayAttr(inBounds.value()) - : builder.getBoolArrayAttr( + ? builder.getDenseBoolArrayAttr(inBounds.value()) + : builder.getDenseBoolArrayAttr( SmallVector(vectorType.getRank(), false)); build(builder, result, vectorType, source, indices, permutationMapAttr, inBoundsAttr); @@ -3822,8 +3822,8 @@ void TransferReadOp::build(OpBuilder &builder, OperationState &result, llvm::cast(source.getType()), vectorType); auto permutationMapAttr = AffineMapAttr::get(permutationMap); auto inBoundsAttr = (inBounds && !inBounds.value().empty()) - ? builder.getBoolArrayAttr(inBounds.value()) - : builder.getBoolArrayAttr( + ? builder.getDenseBoolArrayAttr(inBounds.value()) + : builder.getDenseBoolArrayAttr( SmallVector(vectorType.getRank(), false)); build(builder, result, vectorType, source, indices, permutationMapAttr, padding, @@ -3875,7 +3875,7 @@ static LogicalResult verifyTransferOp(VectorTransferOpInterface op, ShapedType shapedType, VectorType vectorType, VectorType maskType, VectorType inferredMaskType, AffineMap permutationMap, - ArrayAttr inBounds) { + ArrayRef inBounds) { if (op->hasAttr("masked")) { return op->emitOpError("masked attribute has been removed. " "Use in_bounds instead."); @@ -3948,8 +3948,7 @@ verifyTransferOp(VectorTransferOpInterface op, ShapedType shapedType, << AffineMapAttr::get(permutationMap) << " vs inBounds of size: " << inBounds.size(); for (unsigned int i = 0, e = permutationMap.getNumResults(); i < e; ++i) - if (isa(permutationMap.getResult(i)) && - !llvm::cast(inBounds.getValue()[i]).getValue()) + if (isa(permutationMap.getResult(i)) && !inBounds[i]) return op->emitOpError("requires broadcast dimensions to be in-bounds"); return success(); @@ -3961,11 +3960,17 @@ static void printTransferAttrs(OpAsmPrinter &p, VectorTransferOpInterface op) { if (op.getPermutationMap().isMinorIdentity()) elidedAttrs.push_back(op.getPermutationMapAttrName()); // Elide in_bounds attribute if all dims are out-of-bounds. - if (llvm::none_of(op.getInBoundsValues(), [](bool b) { return b; })) + if (llvm::none_of(op.getInBounds(), [](bool b) { return b; })) elidedAttrs.push_back(op.getInBoundsAttrName()); p.printOptionalAttrDict(op->getAttrs(), elidedAttrs); } +template +static ParseResult parseTransferAttrs(OpAsmParser &parser, + OperationState &result) { + return parser.parseOptionalAttrDict(result.attributes); +} + void TransferReadOp::print(OpAsmPrinter &p) { p << " " << getSource() << "[" << getIndices() << "], " << getPadding(); if (getMask()) @@ -4005,7 +4010,7 @@ ParseResult TransferReadOp::parse(OpAsmParser &parser, OperationState &result) { if (parser.parseOperand(maskInfo)) return failure(); } - if (parser.parseOptionalAttrDict(result.attributes) || + if (parseTransferAttrs(parser, result) || parser.getCurrentLocation(&typesLoc) || parser.parseColonTypeList(types)) return failure(); if (types.size() != 2) @@ -4030,7 +4035,7 @@ ParseResult TransferReadOp::parse(OpAsmParser &parser, OperationState &result) { Attribute inBoundsAttr = result.attributes.get(inBoundsAttrName); if (!inBoundsAttr) { result.addAttribute(inBoundsAttrName, - builder.getBoolArrayAttr( + builder.getDenseBoolArrayAttr( SmallVector(permMap.getNumResults(), false))); } if (parser.resolveOperand(sourceInfo, shapedType, result.operands) || @@ -4158,7 +4163,7 @@ static LogicalResult foldTransferInBoundsAttribute(TransferOp op) { return failure(); // OpBuilder is only used as a helper to build an I64ArrayAttr. OpBuilder b(op.getContext()); - op.setInBoundsAttr(b.getBoolArrayAttr(newInBounds)); + op.setInBoundsAttr(b.getDenseBoolArrayAttr(newInBounds)); return success(); } @@ -4328,7 +4333,7 @@ void TransferWriteOp::build(OpBuilder &builder, OperationState &result, Value vector, Value dest, ValueRange indices, AffineMapAttr permutationMapAttr, /*optional*/ Value mask, - /*optional*/ ArrayAttr inBoundsAttr) { + /*optional*/ DenseBoolArrayAttr inBoundsAttr) { Type resultType = llvm::dyn_cast(dest.getType()); build(builder, result, resultType, vector, dest, indices, permutationMapAttr, mask, inBoundsAttr); @@ -4338,7 +4343,7 @@ void TransferWriteOp::build(OpBuilder &builder, OperationState &result, void TransferWriteOp::build(OpBuilder &builder, OperationState &result, Value vector, Value dest, ValueRange indices, AffineMapAttr permutationMapAttr, - /*optional*/ ArrayAttr inBoundsAttr) { + /*optional*/ DenseBoolArrayAttr inBoundsAttr) { build(builder, result, vector, dest, indices, permutationMapAttr, /*mask=*/Value(), inBoundsAttr); } @@ -4352,8 +4357,8 @@ void TransferWriteOp::build(OpBuilder &builder, OperationState &result, auto permutationMapAttr = AffineMapAttr::get(permutationMap); auto inBoundsAttr = (inBounds && !inBounds.value().empty()) - ? builder.getBoolArrayAttr(inBounds.value()) - : builder.getBoolArrayAttr(SmallVector( + ? builder.getDenseBoolArrayAttr(inBounds.value()) + : builder.getDenseBoolArrayAttr(SmallVector( llvm::cast(vector.getType()).getRank(), false)); build(builder, result, vector, dest, indices, permutationMapAttr, /*mask=*/Value(), inBoundsAttr); @@ -4385,7 +4390,7 @@ ParseResult TransferWriteOp::parse(OpAsmParser &parser, ParseResult hasMask = parser.parseOptionalComma(); if (hasMask.succeeded() && parser.parseOperand(maskInfo)) return failure(); - if (parser.parseOptionalAttrDict(result.attributes) || + if (parseTransferAttrs(parser, result) || parser.getCurrentLocation(&typesLoc) || parser.parseColonTypeList(types)) return failure(); if (types.size() != 2) @@ -4411,7 +4416,7 @@ ParseResult TransferWriteOp::parse(OpAsmParser &parser, Attribute inBoundsAttr = result.attributes.get(inBoundsAttrName); if (!inBoundsAttr) { result.addAttribute(inBoundsAttrName, - builder.getBoolArrayAttr( + builder.getDenseBoolArrayAttr( SmallVector(permMap.getNumResults(), false))); } if (parser.resolveOperand(vectorInfo, vectorType, result.operands) || @@ -4764,7 +4769,7 @@ struct SwapExtractSliceOfTransferWrite auto newTransferWriteOp = rewriter.create( transferOp.getLoc(), transferOp.getVector(), newExtractOp.getResult(), transferOp.getIndices(), transferOp.getPermutationMapAttr(), - rewriter.getBoolArrayAttr(newInBounds)); + rewriter.getDenseBoolArrayAttr(newInBounds)); rewriter.modifyOpInPlace(insertOp, [&]() { insertOp.getSourceMutable().assign(newTransferWriteOp.getResult()); }); diff --git a/mlir/lib/Dialect/Vector/Transforms/LowerVectorTransfer.cpp b/mlir/lib/Dialect/Vector/Transforms/LowerVectorTransfer.cpp index 344cfc0cbffb9..7ecfb766f0b79 100644 --- a/mlir/lib/Dialect/Vector/Transforms/LowerVectorTransfer.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/LowerVectorTransfer.cpp @@ -22,15 +22,14 @@ using namespace mlir::vector; /// Transpose a vector transfer op's `in_bounds` attribute by applying reverse /// permutation based on the given indices. -static ArrayAttr -inverseTransposeInBoundsAttr(OpBuilder &builder, ArrayAttr attr, +static DenseBoolArrayAttr +inverseTransposeInBoundsAttr(OpBuilder &builder, ArrayRef inBounds, const SmallVector &permutation) { SmallVector newInBoundsValues(permutation.size()); size_t index = 0; for (unsigned pos : permutation) - newInBoundsValues[pos] = - cast(attr.getValue()[index++]).getValue(); - return builder.getBoolArrayAttr(newInBoundsValues); + newInBoundsValues[pos] = inBounds[index++]; + return builder.getDenseBoolArrayAttr(newInBoundsValues); } /// Extend the rank of a vector Value by `addedRanks` by adding outer unit @@ -132,7 +131,7 @@ struct TransferReadPermutationLowering } // Transpose in_bounds attribute. - ArrayAttr newInBoundsAttr = + DenseBoolArrayAttr newInBoundsAttr = inverseTransposeInBoundsAttr(rewriter, op.getInBounds(), permutation); // Generate new transfer_read operation. @@ -205,7 +204,7 @@ struct TransferWritePermutationLowering }); // Transpose in_bounds attribute. - ArrayAttr newInBoundsAttr = + DenseBoolArrayAttr newInBoundsAttr = inverseTransposeInBoundsAttr(rewriter, op.getInBounds(), permutation); // Generate new transfer_write operation. @@ -298,7 +297,8 @@ struct TransferWriteNonPermutationLowering for (int64_t i = 0, e = op.getVectorType().getRank(); i < e; ++i) { newInBoundsValues.push_back(op.isDimInBounds(i)); } - ArrayAttr newInBoundsAttr = rewriter.getBoolArrayAttr(newInBoundsValues); + DenseBoolArrayAttr newInBoundsAttr = + rewriter.getDenseBoolArrayAttr(newInBoundsValues); auto newWrite = rewriter.create( op.getLoc(), newVec, op.getSource(), op.getIndices(), AffineMapAttr::get(newMap), newMask, newInBoundsAttr); @@ -386,11 +386,8 @@ struct TransferOpReduceRank VectorType newReadType = VectorType::get( newShape, originalVecType.getElementType(), newScalableDims); - ArrayAttr newInBoundsAttr = - op.getInBounds() - ? rewriter.getArrayAttr( - op.getInBoundsAttr().getValue().take_back(reducedShapeRank)) - : ArrayAttr(); + DenseBoolArrayAttr newInBoundsAttr = rewriter.getDenseBoolArrayAttr( + op.getInBounds().take_back(reducedShapeRank)); Value newRead = rewriter.create( op.getLoc(), newReadType, op.getSource(), op.getIndices(), AffineMapAttr::get(newMap), op.getPadding(), op.getMask(), diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorDropLeadUnitDim.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorDropLeadUnitDim.cpp index 42ac717b44c4b..e08285bdf772d 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorDropLeadUnitDim.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/VectorDropLeadUnitDim.cpp @@ -248,10 +248,9 @@ struct CastAwayTransferReadLeadingOneDim AffineMap::get(oldMap.getNumDims(), oldMap.getNumSymbols(), newResults, rewriter.getContext()); - ArrayAttr inBoundsAttr; - if (read.getInBounds()) - inBoundsAttr = rewriter.getArrayAttr( - read.getInBoundsAttr().getValue().take_back(newType.getRank())); + DenseBoolArrayAttr inBoundsAttr; + inBoundsAttr = rewriter.getDenseBoolArrayAttr( + read.getInBoundsAttr().asArrayRef().take_back(newType.getRank())); Value mask = Value(); if (read.getMask()) { @@ -302,10 +301,9 @@ struct CastAwayTransferWriteLeadingOneDim AffineMap::get(oldMap.getNumDims(), oldMap.getNumSymbols(), newResults, rewriter.getContext()); - ArrayAttr inBoundsAttr; - if (write.getInBounds()) - inBoundsAttr = rewriter.getArrayAttr( - write.getInBoundsAttr().getValue().take_back(newType.getRank())); + DenseBoolArrayAttr inBoundsAttr; + inBoundsAttr = rewriter.getDenseBoolArrayAttr( + write.getInBoundsAttr().asArrayRef().take_back(newType.getRank())); auto newVector = rewriter.create( write.getLoc(), write.getVector(), splatZero(dropDim)); diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp index 4c93d3841bf87..52773b2570994 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp @@ -411,7 +411,7 @@ class TransferReadDropUnitDimsPattern auto newTransferReadOp = rewriter.create( loc, reducedVectorType, reducedShapeSource, zeros, identityMap, transferReadOp.getPadding(), maskOp, - rewriter.getBoolArrayAttr(inBounds)); + rewriter.getDenseBoolArrayAttr(inBounds)); auto shapeCast = rewriter.createOrFold( loc, vectorType, newTransferReadOp); rewriter.replaceOp(transferReadOp, shapeCast); @@ -480,7 +480,7 @@ class TransferWriteDropUnitDimsPattern loc, reducedVectorType, vector); rewriter.replaceOpWithNewOp( transferWriteOp, Type(), shapeCast, reducedShapeSource, zeros, - identityMap, maskOp, rewriter.getBoolArrayAttr(inBounds)); + identityMap, maskOp, rewriter.getDenseBoolArrayAttr(inBounds)); return success(); } @@ -640,7 +640,8 @@ class FlattenContiguousRowMajorTransferReadPattern vectorType.getElementType()); vector::TransferReadOp flatRead = rewriter.create( loc, flatVectorType, collapsedSource, collapsedIndices, collapsedMap); - flatRead.setInBoundsAttr(rewriter.getBoolArrayAttr({true})); + SmallVector inBounds(1, true); + flatRead.setInBoundsAttr(rewriter.getDenseBoolArrayAttr(inBounds)); // 4. Replace the old transfer_read with the new one reading from the // collapsed shape @@ -735,7 +736,8 @@ class FlattenContiguousRowMajorTransferWritePattern vector::TransferWriteOp flatWrite = rewriter.create( loc, flatVector, collapsedSource, collapsedIndices, collapsedMap); - flatWrite.setInBoundsAttr(rewriter.getBoolArrayAttr({true})); + SmallVector inBounds(1, true); + flatWrite.setInBoundsAttr(rewriter.getDenseBoolArrayAttr(inBounds)); // 4. Replace the old transfer_write with the new one writing the // collapsed shape diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransferSplitRewritePatterns.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransferSplitRewritePatterns.cpp index ee622e886f618..3c482413d761e 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorTransferSplitRewritePatterns.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransferSplitRewritePatterns.cpp @@ -523,7 +523,7 @@ LogicalResult mlir::vector::splitFullAndPartialTransfer( return failure(); SmallVector bools(xferOp.getTransferRank(), true); - auto inBoundsAttr = b.getBoolArrayAttr(bools); + auto inBoundsAttr = b.getDenseBoolArrayAttr(bools); if (options.vectorTransferSplit == VectorTransferSplit::ForceInBounds) { b.modifyOpInPlace(xferOp, [&]() { xferOp->setAttr(xferOp.getInBoundsAttrName(), inBoundsAttr); diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp index 8fcef54f12edf..8ed18f5f926b5 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp @@ -1125,7 +1125,7 @@ struct MaterializeTransferMask : public OpRewritePattern { rewriter.modifyOpInPlace(xferOp, [&]() { xferOp.getMaskMutable().assign(mask); - xferOp.setInBoundsAttr(rewriter.getBoolArrayAttr({true})); + xferOp.setInBoundsAttr(rewriter.getDenseBoolArrayAttr({true})); }); return success(); @@ -1303,8 +1303,7 @@ class DropInnerMostUnitDimsTransferRead if (dimsToDrop == 0) return failure(); - auto inBounds = readOp.getInBoundsValues(); - auto droppedInBounds = ArrayRef(inBounds).take_back(dimsToDrop); + auto droppedInBounds = readOp.getInBounds().take_back(dimsToDrop); if (llvm::is_contained(droppedInBounds, false)) return failure(); @@ -1324,8 +1323,8 @@ class DropInnerMostUnitDimsTransferRead cast(memref::SubViewOp::inferRankReducedResultType( srcType.getShape().drop_back(dimsToDrop), srcType, offsets, sizes, strides)); - ArrayAttr inBoundsAttr = rewriter.getArrayAttr( - readOp.getInBoundsAttr().getValue().drop_back(dimsToDrop)); + DenseBoolArrayAttr inBoundsAttr = rewriter.getDenseBoolArrayAttr( + readOp.getInBounds().drop_back(dimsToDrop)); Value rankedReducedView = rewriter.create( loc, resultMemrefType, readOp.getSource(), offsets, sizes, strides); auto permMap = getTransferMinorIdentityMap( @@ -1394,8 +1393,7 @@ class DropInnerMostUnitDimsTransferWrite if (dimsToDrop == 0) return failure(); - auto inBounds = writeOp.getInBoundsValues(); - auto droppedInBounds = ArrayRef(inBounds).take_back(dimsToDrop); + auto droppedInBounds = writeOp.getInBounds().take_back(dimsToDrop); if (llvm::is_contained(droppedInBounds, false)) return failure(); @@ -1415,8 +1413,8 @@ class DropInnerMostUnitDimsTransferWrite cast(memref::SubViewOp::inferRankReducedResultType( srcType.getShape().drop_back(dimsToDrop), srcType, offsets, sizes, strides)); - ArrayAttr inBoundsAttr = rewriter.getArrayAttr( - writeOp.getInBoundsAttr().getValue().drop_back(dimsToDrop)); + DenseBoolArrayAttr inBoundsAttr = rewriter.getDenseBoolArrayAttr( + writeOp.getInBounds().drop_back(dimsToDrop)); Value rankedReducedView = rewriter.create( loc, resultMemrefType, writeOp.getSource(), offsets, sizes, strides); diff --git a/mlir/test/Conversion/GPUCommon/transfer_write.mlir b/mlir/test/Conversion/GPUCommon/transfer_write.mlir index cd62b7b13fa9a..af5043ce517f5 100644 --- a/mlir/test/Conversion/GPUCommon/transfer_write.mlir +++ b/mlir/test/Conversion/GPUCommon/transfer_write.mlir @@ -7,7 +7,7 @@ // CHECK:%[[base:[0-9]+]] = llvm.extractvalue // CHECK:%[[ptr:[0-9]+]] = llvm.getelementptr %[[base]] // CHECK:llvm.store %[[val]], %[[ptr]] - vector.transfer_write %arg3, %arg1[%c0, %c0] {in_bounds = [true]} : vector<1xf32>, memref<1024x1024xf32> + vector.transfer_write %arg3, %arg1[%c0, %c0] {in_bounds = array} : vector<1xf32>, memref<1024x1024xf32> } return } diff --git a/mlir/test/Conversion/VectorToArmSME/unsupported.mlir b/mlir/test/Conversion/VectorToArmSME/unsupported.mlir index ff7b4bcb5f65a..5be585e8e55f5 100644 --- a/mlir/test/Conversion/VectorToArmSME/unsupported.mlir +++ b/mlir/test/Conversion/VectorToArmSME/unsupported.mlir @@ -10,7 +10,7 @@ func.func @transfer_read_2d__bad_type(%src : memref) { %c0 = arith.constant 0 : index %pad = arith.constant 0.0 : f64 - %0 = vector.transfer_read %src[%c0, %c0], %pad {permutation_map = affine_map<(d0, d1) -> (d1, d0)>, in_bounds = [false, false]} : memref, vector<[4]x[4]xf64> + %0 = vector.transfer_read %src[%c0, %c0], %pad {permutation_map = affine_map<(d0, d1) -> (d1, d0)>, in_bounds = array} : memref, vector<[4]x[4]xf64> "prevent.dce"(%0) : (vector<[4]x[4]xf64>) -> () return } @@ -23,7 +23,7 @@ func.func @transfer_read_2d__bad_type(%src : memref) { func.func @transfer_read_2d__non_memref_type(%src : tensor) { %c0 = arith.constant 0 : index %pad = arith.constant 0.0 : f64 - %0 = vector.transfer_read %src[%c0, %c0], %pad {permutation_map = affine_map<(d0, d1) -> (d1, d0)>, in_bounds = [true, true]} : tensor, vector<[2]x[2]xf64> + %0 = vector.transfer_read %src[%c0, %c0], %pad {permutation_map = affine_map<(d0, d1) -> (d1, d0)>, in_bounds = array} : tensor, vector<[2]x[2]xf64> "prevent.dce"(%0) : (vector<[2]x[2]xf64>) -> () return } @@ -36,7 +36,7 @@ func.func @transfer_read_2d__non_memref_type(%src : tensor) { func.func @transfer_read_2d__bad_transfer_rank(%src : memref) { %c0 = arith.constant 0 : index %pad = arith.constant 0.0 : f64 - %0 = vector.transfer_read %src[%c0, %c0], %pad {permutation_map = affine_map<(d0, d1) -> (d0)>, in_bounds = [true]} : memref, vector<[2]xf64> + %0 = vector.transfer_read %src[%c0, %c0], %pad {permutation_map = affine_map<(d0, d1) -> (d0)>, in_bounds = array} : memref, vector<[2]xf64> "prevent.dce"(%0) : (vector<[2]xf64>) -> () return } @@ -49,7 +49,7 @@ func.func @transfer_read_2d__bad_transfer_rank(%src : memref) { func.func @transfer_read_2d__non_transpose(%src : memref) { %c0 = arith.constant 0 : index %pad = arith.constant 0.0 : f64 - %0 = vector.transfer_read %src[%c0, %c0], %pad {permutation_map = affine_map<(d0, d1) -> (d0, 0)>, in_bounds = [true, true]} : memref, vector<[2]x[2]xf64> + %0 = vector.transfer_read %src[%c0, %c0], %pad {permutation_map = affine_map<(d0, d1) -> (d0, 0)>, in_bounds = array} : memref, vector<[2]x[2]xf64> "prevent.dce"(%0) : (vector<[2]x[2]xf64>) -> () return } @@ -62,7 +62,7 @@ func.func @transfer_read_2d__non_transpose(%src : memref) { func.func @transfer_read_2d__out_of_bounds(%src : memref) { %c0 = arith.constant 0 : index %pad = arith.constant 0.0 : f64 - %0 = vector.transfer_read %src[%c0, %c0], %pad {permutation_map = affine_map<(d0, d1) -> (d1, d0)>, in_bounds = [false, false]} : memref, vector<[2]x[2]xf64> + %0 = vector.transfer_read %src[%c0, %c0], %pad {permutation_map = affine_map<(d0, d1) -> (d1, d0)>, in_bounds = array} : memref, vector<[2]x[2]xf64> "prevent.dce"(%0) : (vector<[2]x[2]xf64>) -> () return } @@ -83,7 +83,7 @@ func.func @transfer_read_2d__out_of_bounds(%src : memref) { func.func @transfer_write_2d_zero__bad_type(%arg0 : memref) { %c0 = arith.constant 0 : index %cst = arith.constant dense<0> : vector<[16]x[16]xi4> - vector.transfer_write %cst, %arg0[%c0, %c0] {in_bounds = [true, true]} : vector<[16]x[16]xi4>, memref + vector.transfer_write %cst, %arg0[%c0, %c0] {in_bounds = array} : vector<[16]x[16]xi4>, memref return } @@ -95,7 +95,7 @@ func.func @transfer_write_2d_zero__bad_type(%arg0 : memref) { func.func @transfer_write_2d_zero__bad_shape(%arg0 : memref) { %c0 = arith.constant 0 : index %cst = arith.constant dense<0> : vector<[8]x[8]xi8> - vector.transfer_write %cst, %arg0[%c0, %c0] {in_bounds = [true, true]} : vector<[8]x[8]xi8>, memref + vector.transfer_write %cst, %arg0[%c0, %c0] {in_bounds = array} : vector<[8]x[8]xi8>, memref return } @@ -107,7 +107,7 @@ func.func @transfer_write_2d_zero__bad_shape(%arg0 : memref) { func.func @transfer_write_2d_zero__bad_rank(%arg0 : memref) { %c0 = arith.constant 0 : index %cst = arith.constant dense<0> : vector<[16]x[16]x[16]xi8> - vector.transfer_write %cst, %arg0[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<[16]x[16]x[16]xi8>, memref + vector.transfer_write %cst, %arg0[%c0, %c0, %c0] {in_bounds = array} : vector<[16]x[16]x[16]xi8>, memref return } @@ -119,7 +119,7 @@ func.func @transfer_write_2d_zero__bad_rank(%arg0 : memref) { func.func @transfer_write_2d_zero__non_memref_type(%arg0 : tensor) -> tensor { %c0 = arith.constant 0 : index %cst = arith.constant dense<0> : vector<[16]x[16]xi8> - %0 = vector.transfer_write %cst, %arg0[%c0, %c0] {in_bounds = [true, true]} : vector<[16]x[16]xi8>, tensor + %0 = vector.transfer_write %cst, %arg0[%c0, %c0] {in_bounds = array} : vector<[16]x[16]xi8>, tensor return %0 : tensor } @@ -130,7 +130,7 @@ func.func @transfer_write_2d_zero__non_memref_type(%arg0 : tensor) -> te // CHECK-NOT: arm_sme.tile_store func.func @transfer_write_2d__fixed(%vector : vector<16x16xi8>, %dest : memref) { %c0 = arith.constant 0 : index - vector.transfer_write %vector, %dest[%c0, %c0] {in_bounds = [true, true]} : vector<16x16xi8>, memref + vector.transfer_write %vector, %dest[%c0, %c0] {in_bounds = array} : vector<16x16xi8>, memref return } @@ -141,7 +141,7 @@ func.func @transfer_write_2d__fixed(%vector : vector<16x16xi8>, %dest : memref, %dest : memref) { %c0 = arith.constant 0 : index - vector.transfer_write %vector, %dest[%c0, %c0] {in_bounds = [false, false]} : vector<[4]x[4]xf32>, memref + vector.transfer_write %vector, %dest[%c0, %c0] {in_bounds = array} : vector<[4]x[4]xf32>, memref return } @@ -152,7 +152,7 @@ func.func @transfer_write_2d__out_of_bounds(%vector : vector<[4]x[4]xf32>, %dest func.func @transfer_write_slice_unsupported_permutation(%vector: vector<[4]x[4]xf32>, %dest : memref, %slice_index: index) { %c0 = arith.constant 0 : index %slice = vector.extract %vector[%slice_index] : vector<[4]xf32> from vector<[4]x[4]xf32> - vector.transfer_write %slice, %dest[%slice_index, %c0] { permutation_map = affine_map<(d0, d1) -> (d0)>, in_bounds = [true] }: vector<[4]xf32>, memref + vector.transfer_write %slice, %dest[%slice_index, %c0] { permutation_map = affine_map<(d0, d1) -> (d0)>, in_bounds = array }: vector<[4]xf32>, memref return } diff --git a/mlir/test/Conversion/VectorToArmSME/vector-to-arm-sme.mlir b/mlir/test/Conversion/VectorToArmSME/vector-to-arm-sme.mlir index 0f973af799634..30196f54139e8 100644 --- a/mlir/test/Conversion/VectorToArmSME/vector-to-arm-sme.mlir +++ b/mlir/test/Conversion/VectorToArmSME/vector-to-arm-sme.mlir @@ -9,7 +9,7 @@ func.func @transfer_read_2d_i8(%src : memref) { %c0 = arith.constant 0 : index %pad = arith.constant 0 : i8 - %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = [true, true]} : memref, vector<[16]x[16]xi8> + %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = array} : memref, vector<[16]x[16]xi8> "prevent.dce"(%0) : (vector<[16]x[16]xi8>) -> () return } @@ -21,7 +21,7 @@ func.func @transfer_read_2d_i8(%src : memref) { func.func @transfer_read_2d_i16(%src : memref) { %c0 = arith.constant 0 : index %pad = arith.constant 0 : i16 - %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = [true, true]} : memref, vector<[8]x[8]xi16> + %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = array} : memref, vector<[8]x[8]xi16> "prevent.dce"(%0) : (vector<[8]x[8]xi16>) -> () return } @@ -33,7 +33,7 @@ func.func @transfer_read_2d_i16(%src : memref) { func.func @transfer_read_2d_i32(%src : memref) { %c0 = arith.constant 0 : index %pad = arith.constant 0 : i32 - %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = [true, true]} : memref, vector<[4]x[4]xi32> + %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = array} : memref, vector<[4]x[4]xi32> "prevent.dce"(%0) : (vector<[4]x[4]xi32>) -> () return } @@ -45,7 +45,7 @@ func.func @transfer_read_2d_i32(%src : memref) { func.func @transfer_read_2d_i64(%src : memref) { %c0 = arith.constant 0 : index %pad = arith.constant 0 : i64 - %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = [true, true]} : memref, vector<[2]x[2]xi64> + %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = array} : memref, vector<[2]x[2]xi64> "prevent.dce"(%0) : (vector<[2]x[2]xi64>) -> () return } @@ -57,7 +57,7 @@ func.func @transfer_read_2d_i64(%src : memref) { func.func @transfer_read_2d_i128(%src : memref) { %c0 = arith.constant 0 : index %pad = arith.constant 0 : i128 - %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = [true, true]} : memref, vector<[1]x[1]xi128> + %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = array} : memref, vector<[1]x[1]xi128> "prevent.dce"(%0) : (vector<[1]x[1]xi128>) -> () return } @@ -69,7 +69,7 @@ func.func @transfer_read_2d_i128(%src : memref) { func.func @transfer_read_2d_f16(%src : memref) { %c0 = arith.constant 0 : index %pad = arith.constant 0.0 : f16 - %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = [true, true]} : memref, vector<[8]x[8]xf16> + %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = array} : memref, vector<[8]x[8]xf16> "prevent.dce"(%0) : (vector<[8]x[8]xf16>) -> () return } @@ -81,7 +81,7 @@ func.func @transfer_read_2d_f16(%src : memref) { func.func @transfer_read_2d_bf16(%src : memref) { %c0 = arith.constant 0 : index %pad = arith.constant 0.0 : bf16 - %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = [true, true]} : memref, vector<[8]x[8]xbf16> + %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = array} : memref, vector<[8]x[8]xbf16> "prevent.dce"(%0) : (vector<[8]x[8]xbf16>) -> () return } @@ -93,7 +93,7 @@ func.func @transfer_read_2d_bf16(%src : memref) { func.func @transfer_read_2d_f32(%src : memref) { %c0 = arith.constant 0 : index %pad = arith.constant 0.0 : f32 - %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = [true, true]} : memref, vector<[4]x[4]xf32> + %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = array} : memref, vector<[4]x[4]xf32> "prevent.dce"(%0) : (vector<[4]x[4]xf32>) -> () return } @@ -105,7 +105,7 @@ func.func @transfer_read_2d_f32(%src : memref) { func.func @transfer_read_2d_f64(%src : memref) { %c0 = arith.constant 0 : index %pad = arith.constant 0.0 : f64 - %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = [true, true]} : memref, vector<[2]x[2]xf64> + %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = array} : memref, vector<[2]x[2]xf64> "prevent.dce"(%0) : (vector<[2]x[2]xf64>) -> () return } @@ -117,7 +117,7 @@ func.func @transfer_read_2d_f64(%src : memref) { func.func @transfer_read_2d_with_mask_i16(%src : memref, %mask : vector<[8]x[8]xi1>) { %c0 = arith.constant 0 : index %pad = arith.constant 0 : i16 - %0 = vector.transfer_read %src[%c0, %c0], %pad, %mask {in_bounds = [true, true]} : memref, vector<[8]x[8]xi16> + %0 = vector.transfer_read %src[%c0, %c0], %pad, %mask {in_bounds = array} : memref, vector<[8]x[8]xi16> "prevent.dce"(%0) : (vector<[8]x[8]xi16>) -> () return } @@ -131,7 +131,7 @@ func.func @transfer_read_2d_with_mask_i16(%src : memref, %mask : vector func.func @transfer_read_2d_transpose_i8(%src : memref) { %c0 = arith.constant 0 : index %pad = arith.constant 0 : i8 - %0 = vector.transfer_read %src[%c0, %c0], %pad {permutation_map = affine_map<(d0, d1) -> (d1, d0)>, in_bounds = [true, true]} : memref, vector<[16]x[16]xi8> + %0 = vector.transfer_read %src[%c0, %c0], %pad {permutation_map = affine_map<(d0, d1) -> (d1, d0)>, in_bounds = array} : memref, vector<[16]x[16]xi8> "prevent.dce"(%0) : (vector<[16]x[16]xi8>) -> () return } @@ -143,7 +143,7 @@ func.func @transfer_read_2d_transpose_i8(%src : memref) { func.func @transfer_read_2d_transpose_with_mask_f32(%src : memref, %mask : vector<[4]x[4]xi1>) { %c0 = arith.constant 0 : index %pad = arith.constant 0.0 : f32 - %0 = vector.transfer_read %src[%c0, %c0], %pad, %mask {permutation_map = affine_map<(d0, d1) -> (d1, d0)>, in_bounds = [true, true]} : memref, vector<[4]x[4]xf32> + %0 = vector.transfer_read %src[%c0, %c0], %pad, %mask {permutation_map = affine_map<(d0, d1) -> (d1, d0)>, in_bounds = array} : memref, vector<[4]x[4]xf32> "prevent.dce"(%0) : (vector<[4]x[4]xf32>) -> () return } @@ -157,7 +157,7 @@ func.func @transfer_read_2d_transpose_with_mask_f32(%src : memref, %mas func.func @fold_transpose_into_load(%src : memref) { %c0 = arith.constant 0 : index %pad = arith.constant 0.0 : f32 - %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = [true, true]} : memref, vector<[4]x[4]xf32> + %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = array} : memref, vector<[4]x[4]xf32> %1 = vector.transpose %0, [1, 0] : vector<[4]x[4]xf32> to vector<[4]x[4]xf32> "prevent.dce"(%1) : (vector<[4]x[4]xf32>) -> () } @@ -175,7 +175,7 @@ func.func @fold_transpose_into_load(%src : memref) { func.func @fold_transpose_into_load_multi_use(%src : memref) { %c0 = arith.constant 0 : index %pad = arith.constant 0.0 : f32 - %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = [true, true]} : memref, vector<[4]x[4]xf32> + %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = array} : memref, vector<[4]x[4]xf32> "test.some_use"(%0) : (vector<[4]x[4]xf32>) -> () %1 = vector.transpose %0, [1, 0] : vector<[4]x[4]xf32> to vector<[4]x[4]xf32> "prevent.dce"(%1) : (vector<[4]x[4]xf32>) -> () @@ -194,7 +194,7 @@ func.func @fold_transpose_into_load_multi_use(%src : memref) { // CHECK: arm_sme.tile_store %[[VECTOR]], %[[DEST]]{{\[}}%[[C0]], %[[C0]]] : memref, vector<[16]x[16]xi8> func.func @transfer_write_2d_i8(%vector : vector<[16]x[16]xi8>, %dest : memref) { %c0 = arith.constant 0 : index - vector.transfer_write %vector, %dest[%c0, %c0] {in_bounds = [true, true]} : vector<[16]x[16]xi8>, memref + vector.transfer_write %vector, %dest[%c0, %c0] {in_bounds = array} : vector<[16]x[16]xi8>, memref return } @@ -207,7 +207,7 @@ func.func @transfer_write_2d_i8(%vector : vector<[16]x[16]xi8>, %dest : memref, vector<[8]x[8]xi16> func.func @transfer_write_2d_i16(%vector : vector<[8]x[8]xi16>, %dest : memref) { %c0 = arith.constant 0 : index - vector.transfer_write %vector, %dest[%c0, %c0] {in_bounds = [true, true]} : vector<[8]x[8]xi16>, memref + vector.transfer_write %vector, %dest[%c0, %c0] {in_bounds = array} : vector<[8]x[8]xi16>, memref return } @@ -220,7 +220,7 @@ func.func @transfer_write_2d_i16(%vector : vector<[8]x[8]xi16>, %dest : memref, vector<[4]x[4]xi32> func.func @transfer_write_2d_i32(%vector : vector<[4]x[4]xi32>, %dest : memref) { %c0 = arith.constant 0 : index - vector.transfer_write %vector, %dest[%c0, %c0] {in_bounds = [true, true]} : vector<[4]x[4]xi32>, memref + vector.transfer_write %vector, %dest[%c0, %c0] {in_bounds = array} : vector<[4]x[4]xi32>, memref return } @@ -233,7 +233,7 @@ func.func @transfer_write_2d_i32(%vector : vector<[4]x[4]xi32>, %dest : memref, vector<[2]x[2]xi64> func.func @transfer_write_2d_i64(%vector : vector<[2]x[2]xi64>, %dest : memref) { %c0 = arith.constant 0 : index - vector.transfer_write %vector, %dest[%c0, %c0] {in_bounds = [true, true]} : vector<[2]x[2]xi64>, memref + vector.transfer_write %vector, %dest[%c0, %c0] {in_bounds = array} : vector<[2]x[2]xi64>, memref return } @@ -246,7 +246,7 @@ func.func @transfer_write_2d_i64(%vector : vector<[2]x[2]xi64>, %dest : memref, vector<[8]x[8]xf16> func.func @transfer_write_2d_f16(%vector : vector<[8]x[8]xf16>, %dest : memref) { %c0 = arith.constant 0 : index - vector.transfer_write %vector, %dest[%c0, %c0] {in_bounds = [true, true]} : vector<[8]x[8]xf16>, memref + vector.transfer_write %vector, %dest[%c0, %c0] {in_bounds = array} : vector<[8]x[8]xf16>, memref return } @@ -259,7 +259,7 @@ func.func @transfer_write_2d_f16(%vector : vector<[8]x[8]xf16>, %dest : memref, vector<[8]x[8]xbf16> func.func @transfer_write_2d_bf16(%vector : vector<[8]x[8]xbf16>, %dest : memref) { %c0 = arith.constant 0 : index - vector.transfer_write %vector, %dest[%c0, %c0] {in_bounds = [true, true]} : vector<[8]x[8]xbf16>, memref + vector.transfer_write %vector, %dest[%c0, %c0] {in_bounds = array} : vector<[8]x[8]xbf16>, memref return } @@ -272,7 +272,7 @@ func.func @transfer_write_2d_bf16(%vector : vector<[8]x[8]xbf16>, %dest : memref // CHECK: arm_sme.tile_store %[[VECTOR]], %[[DEST]]{{\[}}%[[C0]], %[[C0]]] : memref, vector<[4]x[4]xf32> func.func @transfer_write_2d_f32(%vector : vector<[4]x[4]xf32>, %dest : memref) { %c0 = arith.constant 0 : index - vector.transfer_write %vector, %dest[%c0, %c0] {in_bounds = [true, true]} : vector<[4]x[4]xf32>, memref + vector.transfer_write %vector, %dest[%c0, %c0] {in_bounds = array} : vector<[4]x[4]xf32>, memref return } @@ -285,7 +285,7 @@ func.func @transfer_write_2d_f32(%vector : vector<[4]x[4]xf32>, %dest : memref, vector<[2]x[2]xf64> func.func @transfer_write_2d_f64(%vector : vector<[2]x[2]xf64>, %dest : memref) { %c0 = arith.constant 0 : index - vector.transfer_write %vector, %dest[%c0, %c0] {in_bounds = [true, true]} : vector<[2]x[2]xf64>, memref + vector.transfer_write %vector, %dest[%c0, %c0] {in_bounds = array} : vector<[2]x[2]xf64>, memref return } @@ -299,7 +299,7 @@ func.func @transfer_write_2d_f64(%vector : vector<[2]x[2]xf64>, %dest : memref, vector<[2]x[2]xf64> func.func @transfer_write_2d_with_mask_f64(%vector : vector<[2]x[2]xf64>, %dest : memref, %mask : vector<[2]x[2]xi1>) { %c0 = arith.constant 0 : index - vector.transfer_write %vector, %dest[%c0, %c0], %mask {in_bounds = [true, true]} : vector<[2]x[2]xf64>, memref + vector.transfer_write %vector, %dest[%c0, %c0], %mask {in_bounds = array} : vector<[2]x[2]xf64>, memref return } @@ -314,7 +314,7 @@ func.func @transfer_write_2d_with_mask_f64(%vector : vector<[2]x[2]xf64>, %dest // CHECK: arm_sme.tile_store %[[VECTOR]], %[[DEST]]{{\[}}%[[C0]], %[[C0]]] layout : memref, vector<[2]x[2]xi64> func.func @transfer_write_2d_transpose_i64(%vector : vector<[2]x[2]xi64>, %dest : memref) { %c0 = arith.constant 0 : index - vector.transfer_write %vector, %dest[%c0, %c0] {permutation_map = affine_map<(d0, d1) -> (d1, d0)>, in_bounds = [true, true]} : vector<[2]x[2]xi64>, memref + vector.transfer_write %vector, %dest[%c0, %c0] {permutation_map = affine_map<(d0, d1) -> (d1, d0)>, in_bounds = array} : vector<[2]x[2]xi64>, memref return } @@ -330,7 +330,7 @@ func.func @transfer_write_2d_transpose_i64(%vector : vector<[2]x[2]xi64>, %dest // CHECK: arm_sme.tile_store %[[VECTOR]], %[[DEST]]{{\[}}%[[C0]], %[[C0]]], %[[MASK]] layout : memref, vector<[8]x[8]xbf16> func.func @transfer_write_2d_transpose_with_mask_bf16(%vector : vector<[8]x[8]xbf16>, %dest : memref, %mask : vector<[8]x[8]xi1>) { %c0 = arith.constant 0 : index - vector.transfer_write %vector, %dest[%c0, %c0], %mask {permutation_map = affine_map<(d0, d1) -> (d1, d0)>, in_bounds = [true, true]} : vector<[8]x[8]xbf16>, memref + vector.transfer_write %vector, %dest[%c0, %c0], %mask {permutation_map = affine_map<(d0, d1) -> (d1, d0)>, in_bounds = array} : vector<[8]x[8]xbf16>, memref return } @@ -346,7 +346,7 @@ func.func @transfer_write_2d_transpose_with_mask_bf16(%vector : vector<[8]x[8]xb func.func @transfer_write_slice(%vector: vector<[4]x[4]xf32>, %dest : memref, %slice_index: index) { %c0 = arith.constant 0 : index %slice = vector.extract %vector[%slice_index] : vector<[4]xf32> from vector<[4]x[4]xf32> - vector.transfer_write %slice, %dest[%slice_index, %c0] { in_bounds = [true] }: vector<[4]xf32>, memref + vector.transfer_write %slice, %dest[%slice_index, %c0] { in_bounds = array }: vector<[4]xf32>, memref return } @@ -362,7 +362,7 @@ func.func @transfer_write_slice(%vector: vector<[4]x[4]xf32>, %dest : memref, %dest : memref, %mask: vector<[4]xi1>, %slice_index: index) { %c0 = arith.constant 0 : index %slice = vector.extract %vector[%slice_index] : vector<[4]xf32> from vector<[4]x[4]xf32> - vector.transfer_write %slice, %dest[%slice_index, %c0], %mask { in_bounds = [true] }: vector<[4]xf32>, memref + vector.transfer_write %slice, %dest[%slice_index, %c0], %mask { in_bounds = array }: vector<[4]xf32>, memref return } @@ -374,7 +374,7 @@ func.func @transfer_write_vertical_slice(%vector: vector<[4]x[4]xf32>, %dest : m %c0 = arith.constant 0 : index %slice = arm_sme.extract_tile_slice %vector[%slice_index] layout : vector<[4]xf32> from vector<[4]x[4]xf32> - vector.transfer_write %slice, %dest[%slice_index, %c0] { in_bounds = [true] }: vector<[4]xf32>, memref + vector.transfer_write %slice, %dest[%slice_index, %c0] { in_bounds = array }: vector<[4]xf32>, memref return } diff --git a/mlir/test/Conversion/VectorToGPU/fold-arith-vector-to-mma-ops-mma-sync.mlir b/mlir/test/Conversion/VectorToGPU/fold-arith-vector-to-mma-ops-mma-sync.mlir index 0afaa19d59d15..fcf2a28f7d767 100644 --- a/mlir/test/Conversion/VectorToGPU/fold-arith-vector-to-mma-ops-mma-sync.mlir +++ b/mlir/test/Conversion/VectorToGPU/fold-arith-vector-to-mma-ops-mma-sync.mlir @@ -17,13 +17,13 @@ func.func @m16n8k16_mmasync16816_f16_f16_f32_row_row_row(%arg0: memref<42x32xf16 %cst_f32 = arith.constant 0.000000e+00 : f32 // CHECK-DAG: nvgpu.ldmatrix %arg0[%{{.*}}, %{{.*}}] {numTiles = 4 : i32, transpose = false} - %A = vector.transfer_read %arg0[%c0, %c0], %cst_f16 {in_bounds = [true, true]} : memref<42x32xf16, #gpu.address_space>, vector<16x16xf16> + %A = vector.transfer_read %arg0[%c0, %c0], %cst_f16 {in_bounds = array} : memref<42x32xf16, #gpu.address_space>, vector<16x16xf16> %A_f32 = arith.extf %A : vector<16x16xf16> to vector<16x16xf32> // CHECK-DAG: nvgpu.ldmatrix %arg1[%{{.*}}, %{{.*}}] {numTiles = 4 : i32, transpose = true} - %B = vector.transfer_read %arg1[%c0, %c0], %cst_f16 {permutation_map = #map0, in_bounds = [true, true]} : memref<32x64xf16, #gpu.address_space>, vector<16x16xf16> - %C = vector.transfer_read %arg2[%c0, %c0], %cst_f32 {in_bounds = [true, true]} : memref<42x64xf32, #gpu.address_space>, vector<16x16xf32> + %B = vector.transfer_read %arg1[%c0, %c0], %cst_f16 {permutation_map = #map0, in_bounds = array} : memref<32x64xf16, #gpu.address_space>, vector<16x16xf16> + %C = vector.transfer_read %arg2[%c0, %c0], %cst_f32 {in_bounds = array} : memref<42x64xf32, #gpu.address_space>, vector<16x16xf32> %B0 = vector.extract_strided_slice %B {offsets = [0, 0], sizes = [8, 16], strides = [1, 1]} : vector<16x16xf16> to vector<8x16xf16> %B0_f32 = arith.extf %B0 : vector<8x16xf16> to vector<8x16xf32> @@ -31,7 +31,7 @@ func.func @m16n8k16_mmasync16816_f16_f16_f32_row_row_row(%arg0: memref<42x32xf16 // CHECK-DAG: nvgpu.mma.sync({{.*}}) {mmaShape = [16, 8, 16]} : (vector<4x2xf16>, vector<2x2xf16>, vector<2x2xf32>) -> vector<2x2xf32> %D0 = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %A_f32, %B0_f32, %C0 : vector<16x16xf32>, vector<8x16xf32> into vector<16x8xf32> - vector.transfer_write %D0, %arg2[%c0, %c0] {in_bounds = [true, true]} : vector<16x8xf32>, memref<42x64xf32, #gpu.address_space> + vector.transfer_write %D0, %arg2[%c0, %c0] {in_bounds = array} : vector<16x8xf32>, memref<42x64xf32, #gpu.address_space> %B1 = vector.extract_strided_slice %B {offsets = [8, 0], sizes = [8, 16], strides = [1, 1]} : vector<16x16xf16> to vector<8x16xf16> @@ -40,7 +40,7 @@ func.func @m16n8k16_mmasync16816_f16_f16_f32_row_row_row(%arg0: memref<42x32xf16 // CHECK-DAG: nvgpu.mma.sync({{.*}}) {mmaShape = [16, 8, 16]} : (vector<4x2xf16>, vector<2x2xf16>, vector<2x2xf32>) -> vector<2x2xf32> %D1 = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %A_f32, %B1_f32, %C1 : vector<16x16xf32>, vector<8x16xf32> into vector<16x8xf32> - vector.transfer_write %D1, %arg2[%c0, %c0] {in_bounds = [true, true]} : vector<16x8xf32>, memref<42x64xf32, #gpu.address_space> + vector.transfer_write %D1, %arg2[%c0, %c0] {in_bounds = array} : vector<16x8xf32>, memref<42x64xf32, #gpu.address_space> return } diff --git a/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops-mma-sync.mlir b/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops-mma-sync.mlir index 912f7fba59e60..77fde72b1d177 100644 --- a/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops-mma-sync.mlir +++ b/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops-mma-sync.mlir @@ -85,9 +85,9 @@ func.func @m16n8k32_int8_row_row_row(%arg0: memref<128x128xi8, #gpu.address_spac // CHECK: vector.load %arg2[[[row]], [[col]]] : memref<128x128xi32>, vector<2xi32> // CHECK-NOT: vector.load %arg2{{.*}} - %A = vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = [true, true]} : memref<128x128xi8, #gpu.address_space>, vector<16x32xi8> - %B = vector.transfer_read %arg1[%c39, %c40], %cst {in_bounds = [true, true], permutation_map = #map0} : memref<128x128xi8, #gpu.address_space>, vector<8x32xi8> - %C = vector.transfer_read %arg2[%c49, %c40], %cst0 {in_bounds = [true, true]} : memref<128x128xi32>, vector<16x8xi32> + %A = vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = array} : memref<128x128xi8, #gpu.address_space>, vector<16x32xi8> + %B = vector.transfer_read %arg1[%c39, %c40], %cst {in_bounds = array, permutation_map = #map0} : memref<128x128xi8, #gpu.address_space>, vector<8x32xi8> + %C = vector.transfer_read %arg2[%c49, %c40], %cst0 {in_bounds = array} : memref<128x128xi32>, vector<16x8xi32> // CHECK: [[d:%.+]] = nvgpu.mma.sync({{.*}}) {mmaShape = [16, 8, 32]} : (vector<4x4xi8>, vector<2x4xi8>, vector<2x2xi32>) -> vector<2x2xi32> %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %A, %B, %C : vector<16x32xi8>, vector<8x32xi8> into vector<16x8xi32> @@ -97,7 +97,7 @@ func.func @m16n8k32_int8_row_row_row(%arg0: memref<128x128xi8, #gpu.address_spac // CHECK: [[row:%.+]] = affine.apply [[$rowC8_map]]()[{{%.+}}] // CHECK: [[col:%.+]] = affine.apply [[$colC0_map]]()[{{%.+}}] // CHECK: vector.store {{%.+}}, %arg2[[[row]], [[col]]] : memref<128x128xi32>, vector<2xi32> - vector.transfer_write %D, %arg2[%c49, %c40] {in_bounds = [true, true]} : vector<16x8xi32>, memref<128x128xi32> + vector.transfer_write %D, %arg2[%c49, %c40] {in_bounds = array} : vector<16x8xi32>, memref<128x128xi32> return } @@ -150,16 +150,16 @@ func.func @m8n8k4_f64_row_row_row(%arg0: memref<128x128xf64>, %arg1: memref<128x // CHECK-DAG: [[col:%.+]] = affine.apply [[$colC0_map]] // CHECK: vector.load %arg2[[[row]], [[col]]] : memref<128x128xf64>, vector<2xf64> - %A = vector.transfer_read %arg0[%c1, %c1], %cst {in_bounds = [true, true]} : memref<128x128xf64>, vector<8x4xf64> - %B = vector.transfer_read %arg1[%c39, %c40], %cst {in_bounds = [true, true], permutation_map = #map0} : memref<128x128xf64>, vector<8x4xf64> - %C = vector.transfer_read %arg2[%c49, %c40], %cst0 {in_bounds = [true, true]} : memref<128x128xf64>, vector<8x8xf64> + %A = vector.transfer_read %arg0[%c1, %c1], %cst {in_bounds = array} : memref<128x128xf64>, vector<8x4xf64> + %B = vector.transfer_read %arg1[%c39, %c40], %cst {in_bounds = array, permutation_map = #map0} : memref<128x128xf64>, vector<8x4xf64> + %C = vector.transfer_read %arg2[%c49, %c40], %cst0 {in_bounds = array} : memref<128x128xf64>, vector<8x8xf64> // CHECK: [[d:%.+]] = nvgpu.mma.sync({{.*}}) {mmaShape = [8, 8, 4]} : (vector<1x1xf64>, vector<1x1xf64>, vector<1x2xf64>) -> vector<1x2xf64> %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %A, %B, %C : vector<8x4xf64>, vector<8x4xf64> into vector<8x8xf64> // CHECK-DAG: [[row:%.+]] = affine.apply [[$rowC0_map]] // CHECK-DAG: [[col:%.+]] = affine.apply [[$colC0_map]] // CHECK: vector.store {{%.+}}, %arg2[[[row]], [[col]]] : memref<128x128xf64>, vector<2xf64> - vector.transfer_write %D, %arg2[%c49, %c40] {in_bounds = [true, true]} : vector<8x8xf64>, memref<128x128xf64> + vector.transfer_write %D, %arg2[%c49, %c40] {in_bounds = array} : vector<8x8xf64>, memref<128x128xf64> return } @@ -189,11 +189,11 @@ func.func @m16n8k16_fp16_row_row_row(%arg0: memref<20x20xf16, #gpu.address_space // CHECK-DAG: [[n_coord:%.+]] = affine.apply [[$contiguous_map]] // CHECK-DAG: [[k_coord:%.+]] = affine.apply [[$strided_map]] // CHECK: nvgpu.ldmatrix %arg1[[[k_coord]], [[n_coord]]] {numTiles = 2 : i32, transpose = true} - %A = vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = [true, true]} : memref<20x20xf16, #gpu.address_space>, vector<16x16xf16> - %B = vector.transfer_read %arg1[%c0, %c0], %cst {permutation_map = #map0, in_bounds = [true, true]} : memref<20x20xf16, #gpu.address_space>, vector<8x16xf16> - %C = vector.transfer_read %arg2[%c0, %c0], %cst {in_bounds = [true, true]} : memref<20x20xf16, #gpu.address_space>, vector<16x8xf16> + %A = vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = array} : memref<20x20xf16, #gpu.address_space>, vector<16x16xf16> + %B = vector.transfer_read %arg1[%c0, %c0], %cst {permutation_map = #map0, in_bounds = array} : memref<20x20xf16, #gpu.address_space>, vector<8x16xf16> + %C = vector.transfer_read %arg2[%c0, %c0], %cst {in_bounds = array} : memref<20x20xf16, #gpu.address_space>, vector<16x8xf16> %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %A, %B, %C : vector<16x16xf16>, vector<8x16xf16> into vector<16x8xf16> - vector.transfer_write %D, %arg2[%c0, %c0] {in_bounds = [true, true]} : vector<16x8xf16>, memref<20x20xf16, #gpu.address_space> + vector.transfer_write %D, %arg2[%c0, %c0] {in_bounds = array} : vector<16x8xf16>, memref<20x20xf16, #gpu.address_space> return } @@ -220,17 +220,17 @@ func.func @m16n16k16_mmasync16816_fp16_f16_row_row_row(%arg0: memref<42x32xf16, // CHECK-DAG: [[m_coord:%.+]] = affine.apply [[$strided_map]] // CHECK-DAG: [[k_coord:%.+]] = affine.apply [[$contiguous_map]] // CHECK: [[fragmentA:%.+]] = nvgpu.ldmatrix %arg0[[[m_coord]], [[k_coord]]] {numTiles = 4 : i32, transpose = false} - %A = vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = [true, true]} : memref<42x32xf16, #gpu.address_space>, vector<16x16xf16> + %A = vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = array} : memref<42x32xf16, #gpu.address_space>, vector<16x16xf16> // CHECK-DAG: [[n_coord:%.+]] = affine.apply [[$contiguous_map]] // CHECK-DAG: [[k_coord:%.+]] = affine.apply [[$strided_map]] // CHECK-DAG: [[fragmentB:%.+]] = nvgpu.ldmatrix %arg1[[[k_coord]], [[n_coord]]] {numTiles = 4 : i32, transpose = true} - %B = vector.transfer_read %arg1[%c0, %c0], %cst {permutation_map = #map0, in_bounds = [true, true]} : memref<32x64xf16, #gpu.address_space>, vector<16x16xf16> + %B = vector.transfer_read %arg1[%c0, %c0], %cst {permutation_map = #map0, in_bounds = array} : memref<32x64xf16, #gpu.address_space>, vector<16x16xf16> // CHECK-DAG: [[m_coord:%.+]] = affine.apply [[$strided_map]] // CHECK-DAG: [[n_coord:%.+]] = affine.apply [[$contiguous_map]] // CHECK-DAG: [[fragmentC:%.*]] = nvgpu.ldmatrix %arg2[[[m_coord]], [[n_coord]]] {numTiles = 4 : i32, transpose = false} - %C = vector.transfer_read %arg2[%c0, %c0], %cst {in_bounds = [true, true]} : memref<42x64xf16, #gpu.address_space>, vector<16x16xf16> + %C = vector.transfer_read %arg2[%c0, %c0], %cst {in_bounds = array} : memref<42x64xf16, #gpu.address_space>, vector<16x16xf16> // CHECK-DAG: [[fragmentB0:%.+]] = vector.extract_strided_slice [[fragmentB]] {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x2xf16> to vector<2x2xf16> // CHECK-DAG: [[fragmentC0:%.+]] = vector.extract_strided_slice [[fragmentC]] {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x2xf16> to vector<2x2xf16> @@ -238,7 +238,7 @@ func.func @m16n16k16_mmasync16816_fp16_f16_row_row_row(%arg0: memref<42x32xf16, %B0 = vector.extract_strided_slice %B {offsets = [0, 0], sizes = [8, 16], strides = [1, 1]} : vector<16x16xf16> to vector<8x16xf16> %C0 = vector.extract_strided_slice %C {offsets = [0, 0], sizes = [16, 8], strides = [1, 1]} : vector<16x16xf16> to vector<16x8xf16> %D0 = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %A, %B0, %C0 : vector<16x16xf16>, vector<8x16xf16> into vector<16x8xf16> - vector.transfer_write %D0, %arg2[%c0, %c0] {in_bounds = [true, true]} : vector<16x8xf16>, memref<42x64xf16, #gpu.address_space> + vector.transfer_write %D0, %arg2[%c0, %c0] {in_bounds = array} : vector<16x8xf16>, memref<42x64xf16, #gpu.address_space> // CHECK-DAG: [[fragmentB1:%.+]] = vector.extract_strided_slice [[fragmentB]] {offsets = [2, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x2xf16> to vector<2x2xf16> // CHECK-DAG: [[fragmentC1:%.+]] = vector.extract_strided_slice [[fragmentC]] {offsets = [2, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x2xf16> to vector<2x2xf16> @@ -246,7 +246,7 @@ func.func @m16n16k16_mmasync16816_fp16_f16_row_row_row(%arg0: memref<42x32xf16, %B1 = vector.extract_strided_slice %B {offsets = [8, 0], sizes = [8, 16], strides = [1, 1]} : vector<16x16xf16> to vector<8x16xf16> %C1 = vector.extract_strided_slice %C {offsets = [0, 8], sizes = [16, 8], strides = [1, 1]} : vector<16x16xf16> to vector<16x8xf16> %D1 = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %A, %B1, %C1 : vector<16x16xf16>, vector<8x16xf16> into vector<16x8xf16> - vector.transfer_write %D1, %arg2[%c0, %c0] {in_bounds = [true, true]} : vector<16x8xf16>, memref<42x64xf16, #gpu.address_space> + vector.transfer_write %D1, %arg2[%c0, %c0] {in_bounds = array} : vector<16x8xf16>, memref<42x64xf16, #gpu.address_space> return } @@ -276,17 +276,17 @@ func.func @multi_dim_m16n8k16_fp16_row_row_row(%arg0: memref<4x32x1x32xf16, #gpu // CHECK-DAG: [[m_coord:%.+]] = affine.apply [[$strided_map]] // CHECK-DAG: [[k_coord:%.+]] = affine.apply [[$contiguous_map]] // CHECK: [[fragmentA:%.+]] = nvgpu.ldmatrix %arg0[[[c0]], [[m_coord]], [[c0]], [[k_coord]]] {numTiles = 4 : i32, transpose = false} - %A = vector.transfer_read %arg0[%c0, %c0, %c0, %c0], %cst {in_bounds = [true, true], permutation_map = #map_a} : memref<4x32x1x32xf16, #gpu.address_space>, vector<16x16xf16> + %A = vector.transfer_read %arg0[%c0, %c0, %c0, %c0], %cst {in_bounds = array, permutation_map = #map_a} : memref<4x32x1x32xf16, #gpu.address_space>, vector<16x16xf16> // CHECK-DAG: [[n_coord:%.+]] = affine.apply [[$contiguous_map]] // CHECK-DAG: [[k_coord:%.+]] = affine.apply [[$strided_map]] // CHECK-DAG: [[fragmentB:%.+]] = nvgpu.ldmatrix %arg1[[[c0]], [[c0]], [[k_coord]], [[n_coord]]] {numTiles = 4 : i32, transpose = true} - %B = vector.transfer_read %arg1[%c0, %c0, %c0, %c0], %cst {in_bounds = [true, true], permutation_map = #map_b} : memref<4x1x32x32xf16, #gpu.address_space>, vector<16x16xf16> + %B = vector.transfer_read %arg1[%c0, %c0, %c0, %c0], %cst {in_bounds = array, permutation_map = #map_b} : memref<4x1x32x32xf16, #gpu.address_space>, vector<16x16xf16> // CHECK-DAG: [[m_coord:%.+]] = affine.apply [[$strided_map]] // CHECK-DAG: [[n_coord:%.+]] = affine.apply [[$contiguous_map]] // CHECK-DAG: [[fragmentC:%.*]] = nvgpu.ldmatrix %arg2[[[c0]], [[m_coord]], [[n_coord]]] {numTiles = 4 : i32, transpose = false} - %C = vector.transfer_read %arg2[%c0, %c0, %c0], %cst {in_bounds = [true, true]} : memref<1x32x40xf16, #gpu.address_space>, vector<16x16xf16> + %C = vector.transfer_read %arg2[%c0, %c0, %c0], %cst {in_bounds = array} : memref<1x32x40xf16, #gpu.address_space>, vector<16x16xf16> // CHECK-DAG: [[fragmentB0:%.+]] = vector.extract_strided_slice [[fragmentB]] {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x2xf16> to vector<2x2xf16> // CHECK-DAG: [[fragmentC0:%.+]] = vector.extract_strided_slice [[fragmentC]] {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x2xf16> to vector<2x2xf16> @@ -294,7 +294,7 @@ func.func @multi_dim_m16n8k16_fp16_row_row_row(%arg0: memref<4x32x1x32xf16, #gpu %B0 = vector.extract_strided_slice %B {offsets = [0, 0], sizes = [8, 16], strides = [1, 1]} : vector<16x16xf16> to vector<8x16xf16> %C0 = vector.extract_strided_slice %C {offsets = [0, 0], sizes = [16, 8], strides = [1, 1]} : vector<16x16xf16> to vector<16x8xf16> %D0 = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %A, %B0, %C0 : vector<16x16xf16>, vector<8x16xf16> into vector<16x8xf16> - vector.transfer_write %D0, %arg2[%c0, %c0, %c0] {in_bounds = [true, true]} : vector<16x8xf16>, memref<1x32x40xf16, #gpu.address_space> + vector.transfer_write %D0, %arg2[%c0, %c0, %c0] {in_bounds = array} : vector<16x8xf16>, memref<1x32x40xf16, #gpu.address_space> return } @@ -319,19 +319,19 @@ func.func @batch_m16n8k16_fp16_row_row_row(%arg0: memref<2x20x20xf16, #gpu.addre // CHECK-DAG: [[m_coord:%.+]] = affine.apply [[$strided_map]] // CHECK-DAG: [[k_coord:%.+]] = affine.apply [[$contiguous_map]] // CHECK: nvgpu.ldmatrix %arg0[[[C0]], [[m_coord]], [[k_coord]]] {numTiles = 4 : i32, transpose = false} : memref<2x20x20xf16, #gpu.address_space> -> vector<4x2xf16> - %A = vector.transfer_read %arg0[%c0, %c0, %c0], %cst {in_bounds = [true, true]} : memref<2x20x20xf16, #gpu.address_space>, vector<16x16xf16> + %A = vector.transfer_read %arg0[%c0, %c0, %c0], %cst {in_bounds = array} : memref<2x20x20xf16, #gpu.address_space>, vector<16x16xf16> // CHECK-DAG: [[n_coord:%.+]] = affine.apply [[$contiguous_map]] // CHECK-DAG: [[k_coord:%.+]] = affine.apply [[$strided_map]] // CHECK: nvgpu.ldmatrix %arg1[[[C0]], [[k_coord]], [[n_coord]]] {numTiles = 2 : i32, transpose = true} : memref<2x20x20xf16, #gpu.address_space> -> vector<2x2xf16> - %B = vector.transfer_read %arg1[%c0, %c0, %c0], %cst {permutation_map = #map0, in_bounds = [true, true]} : memref<2x20x20xf16, #gpu.address_space>, vector<8x16xf16> + %B = vector.transfer_read %arg1[%c0, %c0, %c0], %cst {permutation_map = #map0, in_bounds = array} : memref<2x20x20xf16, #gpu.address_space>, vector<8x16xf16> // CHECK-DAG: [[m_coord:%.+]] = affine.apply [[$strided_map]] // CHECK-DAG: [[n_coord:%.+]] = affine.apply [[$contiguous_map]] // CHECK: nvgpu.ldmatrix %arg2[[[C0]], [[m_coord]], [[n_coord]]] {numTiles = 2 : i32, transpose = false} : memref<2x20x20xf16, #gpu.address_space> -> vector<2x2xf16> - %C = vector.transfer_read %arg2[%c0, %c0, %c0], %cst {in_bounds = [true, true]} : memref<2x20x20xf16, #gpu.address_space>, vector<16x8xf16> + %C = vector.transfer_read %arg2[%c0, %c0, %c0], %cst {in_bounds = array} : memref<2x20x20xf16, #gpu.address_space>, vector<16x8xf16> %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %A, %B, %C : vector<16x16xf16>, vector<8x16xf16> into vector<16x8xf16> - vector.transfer_write %D, %arg2[%c0, %c0, %c0] {in_bounds = [true, true]} : vector<16x8xf16>, memref<2x20x20xf16, #gpu.address_space> + vector.transfer_write %D, %arg2[%c0, %c0, %c0] {in_bounds = array} : vector<16x8xf16>, memref<2x20x20xf16, #gpu.address_space> return } @@ -372,11 +372,11 @@ func.func @m16n8k16_fp16_row_col_row(%arg0: memref<20x20xf16, #gpu.address_space // CHECK-DAG: [[n_coord:%.+]] = affine.apply [[$contiguous_ldmatrix_x4_map]] // CHECK: nvgpu.ldmatrix %arg2[[[m_coord]], [[n_coord]]] {numTiles = 2 : i32 // CHECK-SAME: transpose = false - %A = vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = [true, true]} : memref<20x20xf16, #gpu.address_space>, vector<16x16xf16> - %B = vector.transfer_read %arg1[%c0, %c0], %cst {in_bounds = [true, true]} : memref<20x20xf16, #gpu.address_space>, vector<8x16xf16> - %C = vector.transfer_read %arg2[%c0, %c0], %cst {in_bounds = [true, true]} : memref<20x20xf16, #gpu.address_space>, vector<16x8xf16> + %A = vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = array} : memref<20x20xf16, #gpu.address_space>, vector<16x16xf16> + %B = vector.transfer_read %arg1[%c0, %c0], %cst {in_bounds = array} : memref<20x20xf16, #gpu.address_space>, vector<8x16xf16> + %C = vector.transfer_read %arg2[%c0, %c0], %cst {in_bounds = array} : memref<20x20xf16, #gpu.address_space>, vector<16x8xf16> %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %A, %B, %C : vector<16x16xf16>, vector<8x16xf16> into vector<16x8xf16> - vector.transfer_write %D, %arg2[%c0, %c0] {in_bounds = [true, true]} : vector<16x8xf16>, memref<20x20xf16, #gpu.address_space> + vector.transfer_write %D, %arg2[%c0, %c0] {in_bounds = array} : vector<16x8xf16>, memref<20x20xf16, #gpu.address_space> return } @@ -426,8 +426,8 @@ func.func @m16n8k4_tf32_f32_row_row_row(%arg0: memref<20x20xf32, #gpu.address_sp // CHECK: [[d_frag:%.+]] = nvgpu.mma.sync([[a_frag]], [[b_frag]], [[c_frag]]) // CHECK-SAME: mmaShape = [16, 8, 4] // CHECK-SAME: -> vector<2x2xf32> - %A = vector.transfer_read %arg0[%c1, %c3], %cst {in_bounds = [true, true]} : memref<20x20xf32, #gpu.address_space>, vector<16x4xf32> - %B = vector.transfer_read %arg1[%c3, %c3], %cst {permutation_map = #map0, in_bounds = [true, true]} : memref<20x20xf32, #gpu.address_space>, vector<8x4xf32> + %A = vector.transfer_read %arg0[%c1, %c3], %cst {in_bounds = array} : memref<20x20xf32, #gpu.address_space>, vector<16x4xf32> + %B = vector.transfer_read %arg1[%c3, %c3], %cst {permutation_map = #map0, in_bounds = array} : memref<20x20xf32, #gpu.address_space>, vector<8x4xf32> %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %A, %B, %cst_0 : vector<16x4xf32>, vector<8x4xf32> into vector<16x8xf32> // CHECK: vector.extract [[d_frag]][0] : vector<2xf32> from vector<2x2xf32> @@ -438,7 +438,7 @@ func.func @m16n8k4_tf32_f32_row_row_row(%arg0: memref<20x20xf32, #gpu.address_sp // CHECK: affine.apply [[$rowC8_map]] // CHECK: affine.apply [[$colC_map]] // CHECK: vector.store - vector.transfer_write %D, %arg2[%c0, %c0] {in_bounds = [true, true]} : vector<16x8xf32>, memref<20x20xf32> + vector.transfer_write %D, %arg2[%c0, %c0] {in_bounds = array} : vector<16x8xf32>, memref<20x20xf32> return } @@ -489,8 +489,8 @@ func.func @m16n8k8_tf32_f32_row_row_row(%arg0: memref<20x20xf32, #gpu.address_sp // CHECK: [[d_frag:%.+]] = nvgpu.mma.sync([[a_frag]], [[b_frag1]], [[c_frag]]) // CHECK-SAME: mmaShape = [16, 8, 8] // CHECK-SAME: -> vector<2x2xf32> - %A = vector.transfer_read %arg0[%c1, %c3], %cst {in_bounds = [true, true]} : memref<20x20xf32, #gpu.address_space>, vector<16x8xf32> - %B = vector.transfer_read %arg1[%c3, %c3], %cst {permutation_map = #map0, in_bounds = [true, true]} : memref<20x20xf32, #gpu.address_space>, vector<8x8xf32> + %A = vector.transfer_read %arg0[%c1, %c3], %cst {in_bounds = array} : memref<20x20xf32, #gpu.address_space>, vector<16x8xf32> + %B = vector.transfer_read %arg1[%c3, %c3], %cst {permutation_map = #map0, in_bounds = array} : memref<20x20xf32, #gpu.address_space>, vector<8x8xf32> %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %A, %B, %cst_0 : vector<16x8xf32>, vector<8x8xf32> into vector<16x8xf32> // CHECK: vector.extract [[d_frag]][0] : vector<2xf32> from vector<2x2xf32> @@ -501,7 +501,7 @@ func.func @m16n8k8_tf32_f32_row_row_row(%arg0: memref<20x20xf32, #gpu.address_sp // CHECK: affine.apply [[$rowC8_map]] // CHECK: affine.apply [[$colC_map]] // CHECK: vector.store - vector.transfer_write %D, %arg2[%c0, %c0] {in_bounds = [true, true]} : vector<16x8xf32>, memref<20x20xf32> + vector.transfer_write %D, %arg2[%c0, %c0] {in_bounds = array} : vector<16x8xf32>, memref<20x20xf32> return } @@ -559,8 +559,8 @@ func.func @m16n8k8_tf32_f32_col_col_row(%arg0: memref<20x20xf32, #gpu.address_sp // CHECK: [[d_frag:%.+]] = nvgpu.mma.sync([[a_frag]], [[b_frag]], [[c_frag]]) // CHECK-SAME: mmaShape = [16, 8, 8] // CHECK-SAME: -> vector<2x2xf32> - %A = vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = [true, true], permutation_map = #map0} : memref<20x20xf32, #gpu.address_space>, vector<16x8xf32> - %B = vector.transfer_read %arg1[%c0, %c0], %cst {in_bounds = [true, true]} : memref<20x20xf32, #gpu.address_space>, vector<8x8xf32> + %A = vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = array, permutation_map = #map0} : memref<20x20xf32, #gpu.address_space>, vector<16x8xf32> + %B = vector.transfer_read %arg1[%c0, %c0], %cst {in_bounds = array} : memref<20x20xf32, #gpu.address_space>, vector<8x8xf32> %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %A, %B, %cst_0 : vector<16x8xf32>, vector<8x8xf32> into vector<16x8xf32> @@ -572,7 +572,7 @@ func.func @m16n8k8_tf32_f32_col_col_row(%arg0: memref<20x20xf32, #gpu.address_sp // CHECK: affine.apply [[$rowC8_map]] // CHECK: affine.apply [[$colC_map]] // CHECK: vector.store - vector.transfer_write %D, %arg2[%c16, %c8] {in_bounds = [true, true]} : vector<16x8xf32>, memref<20x20xf32> + vector.transfer_write %D, %arg2[%c16, %c8] {in_bounds = array} : vector<16x8xf32>, memref<20x20xf32> return } @@ -625,9 +625,9 @@ func.func @m16n8k64_int4_row_col_row(%arg0: memref<128x128xi4, #gpu.address_spac // CHECK: vector.load %arg2[[[row]], [[col]]] : memref<128x128xi32>, vector<2xi32> // CHECK-NOT: vector.load - %A = vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = [true, true]} : memref<128x128xi4, #gpu.address_space>, vector<16x64xi4> - %B = vector.transfer_read %arg1[%c0, %c0], %cst {in_bounds = [true, true]} : memref<128x128xi4, #gpu.address_space>, vector<8x64xi4> - %C = vector.transfer_read %arg2[%c0, %c0], %cst0 {in_bounds = [true, true]} : memref<128x128xi32>, vector<16x8xi32> + %A = vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = array} : memref<128x128xi4, #gpu.address_space>, vector<16x64xi4> + %B = vector.transfer_read %arg1[%c0, %c0], %cst {in_bounds = array} : memref<128x128xi4, #gpu.address_space>, vector<8x64xi4> + %C = vector.transfer_read %arg2[%c0, %c0], %cst0 {in_bounds = array} : memref<128x128xi32>, vector<16x8xi32> // CHECK: [[d:%.+]] = nvgpu.mma.sync({{.*}}) {mmaShape = [16, 8, 64]} : (vector<4x8xi4>, vector<2x8xi4>, vector<2x2xi32>) -> vector<2x2xi32> %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %A, %B, %C : vector<16x64xi4>, vector<8x64xi4> into vector<16x8xi32> @@ -641,7 +641,7 @@ func.func @m16n8k64_int4_row_col_row(%arg0: memref<128x128xi4, #gpu.address_spac // CHECK: [[row:%.+]] = affine.apply [[$rowC8_map]]()[[[lane]]] // CHECK: [[col:%.+]] = affine.apply [[$colC0_map]]()[[[lane]]] // CHECK: vector.store [[v]], %arg2[[[row]], [[col]]] : memref<128x128xi32>, vector<2xi32> - vector.transfer_write %D, %arg2[%c0, %c0] {in_bounds = [true, true]} : vector<16x8xi32>, memref<128x128xi32> + vector.transfer_write %D, %arg2[%c0, %c0] {in_bounds = array} : vector<16x8xi32>, memref<128x128xi32> return } @@ -695,9 +695,9 @@ func.func @m16n8k32_int8_row_col_row(%arg0: memref<128x128xi8, #gpu.address_spac // CHECK: vector.load %arg2[[[m_coord]], [[n_coord]]] : memref<128x128xi32>, vector<2xi32> // CHECK-NOT: vector.load %arg2{{.*}} - %A = vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = [true, true]} : memref<128x128xi8, #gpu.address_space>, vector<16x32xi8> - %B = vector.transfer_read %arg1[%c0, %c0], %cst {in_bounds = [true, true]} : memref<128x128xi8, #gpu.address_space>, vector<8x32xi8> - %C = vector.transfer_read %arg2[%c0, %c0], %cst0 {in_bounds = [true, true]} : memref<128x128xi32>, vector<16x8xi32> + %A = vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = array} : memref<128x128xi8, #gpu.address_space>, vector<16x32xi8> + %B = vector.transfer_read %arg1[%c0, %c0], %cst {in_bounds = array} : memref<128x128xi8, #gpu.address_space>, vector<8x32xi8> + %C = vector.transfer_read %arg2[%c0, %c0], %cst0 {in_bounds = array} : memref<128x128xi32>, vector<16x8xi32> // CHECK: [[d:%.+]] = nvgpu.mma.sync({{.*}}) {mmaShape = [16, 8, 32]} : (vector<4x4xi8>, vector<2x4xi8>, vector<2x2xi32>) -> vector<2x2xi32> %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %A, %B, %C : vector<16x32xi8>, vector<8x32xi8> into vector<16x8xi32> @@ -710,7 +710,7 @@ func.func @m16n8k32_int8_row_col_row(%arg0: memref<128x128xi8, #gpu.address_spac // CHECK: [[row:%.+]] = affine.apply [[$rowC8_map]]()[[[lane]]] // CHECK: [[col:%.+]] = affine.apply [[$colC0_map]]()[[[lane]]] // CHECK: vector.store [[v]], %arg2[[[row]], [[col]]] : memref<128x128xi32>, vector<2xi32> - vector.transfer_write %D, %arg2[%c0, %c0] {in_bounds = [true, true]} : vector<16x8xi32>, memref<128x128xi32> + vector.transfer_write %D, %arg2[%c0, %c0] {in_bounds = array} : vector<16x8xi32>, memref<128x128xi32> return } @@ -742,12 +742,12 @@ func.func @strided_memref_read_write(%arg0: !smem_type, // CHECK-DAG: [[n_coord:%.+]] = affine.apply [[$contiguous_map]] // CHECK-DAG: [[k_coord:%.+]] = affine.apply [[$strided_map]] // CHECK: nvgpu.ldmatrix %arg1[[[k_coord]], [[n_coord]]] {numTiles = 2 : i32, transpose = true} - %A = vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = [true, true]} : !smem_type, vector<16x16xf16> - %B = vector.transfer_read %arg1[%c0, %c0], %cst {permutation_map = #map0, in_bounds = [true, true]} : !smem_type, vector<8x16xf16> - %C = vector.transfer_read %arg2[%c0, %c0], %cst {in_bounds = [true, true]} : !smem_type, vector<16x8xf16> + %A = vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = array} : !smem_type, vector<16x16xf16> + %B = vector.transfer_read %arg1[%c0, %c0], %cst {permutation_map = #map0, in_bounds = array} : !smem_type, vector<8x16xf16> + %C = vector.transfer_read %arg2[%c0, %c0], %cst {in_bounds = array} : !smem_type, vector<16x8xf16> %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %A, %B, %C : vector<16x16xf16>, vector<8x16xf16> into vector<16x8xf16> - vector.transfer_write %D, %arg2[%c0, %c0] {in_bounds = [true, true]} : vector<16x8xf16>, !smem_type + vector.transfer_write %D, %arg2[%c0, %c0] {in_bounds = array} : vector<16x8xf16>, !smem_type return } @@ -770,12 +770,12 @@ func.func @unsupported_non_2d_load_store(%arg0: !smem_type, // CHECK-NOT: nvgpu.ldmatrix // CHECK-NOT: nvgpu.mma - %A = vector.transfer_read %arg0[%c0, %c0, %c0], %cst {in_bounds = [true, true, true]} : !smem_type, vector<1x16x16xf16> - %B = vector.transfer_read %arg1[%c0, %c0, %c0], %cst {permutation_map = #map0, in_bounds = [true, true, true]} : !smem_type, vector<8x1x16xf16> - %C = vector.transfer_read %arg2[%c0, %c0, %c0], %cst {in_bounds = [true, true, true]} : !smem_type, vector<1x16x8xf16> + %A = vector.transfer_read %arg0[%c0, %c0, %c0], %cst {in_bounds = array} : !smem_type, vector<1x16x16xf16> + %B = vector.transfer_read %arg1[%c0, %c0, %c0], %cst {permutation_map = #map0, in_bounds = array} : !smem_type, vector<8x1x16xf16> + %C = vector.transfer_read %arg2[%c0, %c0, %c0], %cst {in_bounds = array} : !smem_type, vector<1x16x8xf16> %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "reduction"], kind = #vector.kind} %A, %B, %C : vector<1x16x16xf16>, vector<8x1x16xf16> into vector<1x16x8xf16> - vector.transfer_write %D, %arg2[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x16x8xf16>, !smem_type + vector.transfer_write %D, %arg2[%c0, %c0, %c0] {in_bounds = array} : vector<1x16x8xf16>, !smem_type return } @@ -798,12 +798,12 @@ func.func @unsupported_fully_dynamic_strides(%arg0: !smem_type, // CHECK-NOT: nvgpu.ldmatrix // CHECK-NOT: nvgpu.mma - %A = vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = [true, true]} : !smem_type, vector<16x16xf16> - %B = vector.transfer_read %arg1[%c0, %c0], %cst {permutation_map = #map0, in_bounds = [true, true]} : !smem_type, vector<8x16xf16> - %C = vector.transfer_read %arg2[%c0, %c0], %cst {in_bounds = [true, true]} : !smem_type, vector<16x8xf16> + %A = vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = array} : !smem_type, vector<16x16xf16> + %B = vector.transfer_read %arg1[%c0, %c0], %cst {permutation_map = #map0, in_bounds = array} : !smem_type, vector<8x16xf16> + %C = vector.transfer_read %arg2[%c0, %c0], %cst {in_bounds = array} : !smem_type, vector<16x8xf16> %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %A, %B, %C : vector<16x16xf16>, vector<8x16xf16> into vector<16x8xf16> - vector.transfer_write %D, %arg2[%c0, %c0] {in_bounds = [true, true]} : vector<16x8xf16>, !smem_type + vector.transfer_write %D, %arg2[%c0, %c0] {in_bounds = array} : vector<16x8xf16>, !smem_type return } @@ -827,11 +827,11 @@ func.func @unsupported_transposed_store(%arg0: !smem_type, // CHECK-NOT: nvgpu.ldmatrix // CHECK-NOT: nvgpu.mma - %A = vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = [true, true]} : !smem_type, vector<16x16xf16> - %B = vector.transfer_read %arg1[%c0, %c0], %cst {permutation_map = #map0, in_bounds = [true, true]} : !smem_type, vector<8x16xf16> - %C = vector.transfer_read %arg2[%c0, %c0], %cst {in_bounds = [true, true]} : !smem_type, vector<16x8xf16> + %A = vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = array} : !smem_type, vector<16x16xf16> + %B = vector.transfer_read %arg1[%c0, %c0], %cst {permutation_map = #map0, in_bounds = array} : !smem_type, vector<8x16xf16> + %C = vector.transfer_read %arg2[%c0, %c0], %cst {in_bounds = array} : !smem_type, vector<16x8xf16> %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %A, %B, %C : vector<16x16xf16>, vector<8x16xf16> into vector<16x8xf16> - vector.transfer_write %D, %arg2[%c0, %c0] {in_bounds = [true, true], permutation_map = affine_map<(d0, d1)->(d1, d0)>} : vector<16x8xf16>, !smem_type + vector.transfer_write %D, %arg2[%c0, %c0] {in_bounds = array, permutation_map = affine_map<(d0, d1)->(d1, d0)>} : vector<16x8xf16>, !smem_type return } diff --git a/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir b/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir index 8526ff1392599..69d7a5626f683 100644 --- a/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir +++ b/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir @@ -17,11 +17,11 @@ func.func @matmul(%arg0: memref<16x16xf16>, %arg1: memref<16x16xf16>, %arg2: mem %cst_0 = arith.constant dense<0.000000e+00> : vector<16x16xf16> %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %A = vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = [true, true]} : memref<16x16xf16>, vector<16x16xf16> - %B = vector.transfer_read %arg1[%c0, %c0], %cst {permutation_map = #map0, in_bounds = [true, true]} : memref<16x16xf16>, vector<16x16xf16> - %C = vector.transfer_read %arg2[%c0, %c0], %cst {in_bounds = [true, true]} : memref<16x16xf16>, vector<16x16xf16> + %A = vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = array} : memref<16x16xf16>, vector<16x16xf16> + %B = vector.transfer_read %arg1[%c0, %c0], %cst {permutation_map = #map0, in_bounds = array} : memref<16x16xf16>, vector<16x16xf16> + %C = vector.transfer_read %arg2[%c0, %c0], %cst {in_bounds = array} : memref<16x16xf16>, vector<16x16xf16> %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %A, %B, %C : vector<16x16xf16>, vector<16x16xf16> into vector<16x16xf16> - vector.transfer_write %D, %arg2[%c0, %c0] {in_bounds = [true, true]} : vector<16x16xf16>, memref<16x16xf16> + vector.transfer_write %D, %arg2[%c0, %c0] {in_bounds = array} : vector<16x16xf16>, memref<16x16xf16> return } @@ -45,10 +45,10 @@ func.func @matmul_cst(%arg0: memref<16x16xf16>, %arg1: memref<16x16xf16>, %arg2: %cst_0 = arith.constant dense<0.000000e+00> : vector<16x16xf16> %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %A = vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = [true, true]} : memref<16x16xf16>, vector<16x16xf16> - %B = vector.transfer_read %arg1[%c0, %c0], %cst {permutation_map = #map0, in_bounds = [true, true]} : memref<16x16xf16>, vector<16x16xf16> + %A = vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = array} : memref<16x16xf16>, vector<16x16xf16> + %B = vector.transfer_read %arg1[%c0, %c0], %cst {permutation_map = #map0, in_bounds = array} : memref<16x16xf16>, vector<16x16xf16> %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %A, %B, %cst_0 : vector<16x16xf16>, vector<16x16xf16> into vector<16x16xf16> - vector.transfer_write %D, %arg2[%c0, %c0] {in_bounds = [true, true]} : vector<16x16xf16>, memref<16x16xf16> + vector.transfer_write %D, %arg2[%c0, %c0] {in_bounds = array} : vector<16x16xf16>, memref<16x16xf16> return } @@ -72,10 +72,10 @@ func.func @matmul_broadcast(%arg0: memref<16x16xf16>, %arg1: memref<16x16xf16>, %C = vector.broadcast %f : f16 to vector<16x16xf16> %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %A = vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = [true, true]} : memref<16x16xf16>, vector<16x16xf16> - %B = vector.transfer_read %arg1[%c0, %c0], %cst {permutation_map = #map0, in_bounds = [true, true]} : memref<16x16xf16>, vector<16x16xf16> + %A = vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = array} : memref<16x16xf16>, vector<16x16xf16> + %B = vector.transfer_read %arg1[%c0, %c0], %cst {permutation_map = #map0, in_bounds = array} : memref<16x16xf16>, vector<16x16xf16> %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %A, %B, %C : vector<16x16xf16>, vector<16x16xf16> into vector<16x16xf16> - vector.transfer_write %D, %arg2[%c0, %c0] {in_bounds = [true, true]} : vector<16x16xf16>, memref<16x16xf16> + vector.transfer_write %D, %arg2[%c0, %c0] {in_bounds = array} : vector<16x16xf16>, memref<16x16xf16> return } @@ -102,14 +102,14 @@ func.func @matmul_loop(%arg0: memref<128x128xf16>, %arg1: memref<128x128xf16>, % %c128 = arith.constant 128 : index %c32 = arith.constant 32 : index %cst = arith.constant 0.000000e+00 : f16 - %C = vector.transfer_read %arg2[%c0, %c0], %cst {in_bounds = [true, true]} : memref<128x128xf16>, vector<16x16xf16> + %C = vector.transfer_read %arg2[%c0, %c0], %cst {in_bounds = array} : memref<128x128xf16>, vector<16x16xf16> %14 = scf.for %arg17 = %c0 to %c128 step %c32 iter_args(%arg18 = %C) -> (vector<16x16xf16>) { - %17 = vector.transfer_read %arg0[%c0, %arg17], %cst {in_bounds = [true, true]} : memref<128x128xf16>, vector<16x16xf16> - %18 = vector.transfer_read %arg1[%arg17, %c0], %cst {permutation_map = #map0, in_bounds = [true, true]} : memref<128x128xf16>, vector<16x16xf16> + %17 = vector.transfer_read %arg0[%c0, %arg17], %cst {in_bounds = array} : memref<128x128xf16>, vector<16x16xf16> + %18 = vector.transfer_read %arg1[%arg17, %c0], %cst {permutation_map = #map0, in_bounds = array} : memref<128x128xf16>, vector<16x16xf16> %19 = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %17, %18, %arg18 : vector<16x16xf16>, vector<16x16xf16> into vector<16x16xf16> scf.yield %19 : vector<16x16xf16> } - vector.transfer_write %14, %arg2[%c0, %c0] {in_bounds = [true, true]} : vector<16x16xf16>, memref<128x128xf16> + vector.transfer_write %14, %arg2[%c0, %c0] {in_bounds = array} : vector<16x16xf16>, memref<128x128xf16> return } @@ -137,11 +137,11 @@ func.func @matmul_fused_elementwise(%arg0: memref<16x16xf16>, %arg1: memref<16x1 %cst_1 = arith.constant dense<1.000000e+00> : vector<16x16xf16> %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %A = vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = [true, true]} : memref<16x16xf16>, vector<16x16xf16> - %B = vector.transfer_read %arg1[%c0, %c0], %cst {permutation_map = #map0, in_bounds = [true, true]} : memref<16x16xf16>, vector<16x16xf16> + %A = vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = array} : memref<16x16xf16>, vector<16x16xf16> + %B = vector.transfer_read %arg1[%c0, %c0], %cst {permutation_map = #map0, in_bounds = array} : memref<16x16xf16>, vector<16x16xf16> %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %A, %B, %cst_0 : vector<16x16xf16>, vector<16x16xf16> into vector<16x16xf16> %E = arith.addf %D, %cst_1 : vector<16x16xf16> - vector.transfer_write %E, %arg2[%c0, %c0] {in_bounds = [true, true]} : vector<16x16xf16>, memref<16x16xf16> + vector.transfer_write %E, %arg2[%c0, %c0] {in_bounds = array} : vector<16x16xf16>, memref<16x16xf16> return } @@ -168,14 +168,14 @@ func.func @matmul_fused_broadcast(%arg0: memref<16x16xf16>, %arg1: memref<16x16x %cst_0 = arith.constant dense<0.000000e+00> : vector<16x16xf16> %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %A = vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = [true, true]} : memref<16x16xf16>, vector<16x16xf16> - %B = vector.transfer_read %arg1[%c0, %c0], %cst {permutation_map = #map0, in_bounds = [true, true]} : memref<16x16xf16>, vector<16x16xf16> + %A = vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = array} : memref<16x16xf16>, vector<16x16xf16> + %B = vector.transfer_read %arg1[%c0, %c0], %cst {permutation_map = #map0, in_bounds = array} : memref<16x16xf16>, vector<16x16xf16> %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %A, %B, %cst_0 : vector<16x16xf16>, vector<16x16xf16> into vector<16x16xf16> %E = vector.transfer_read %arg3[%c0, %c0, %c0, %c0], %cst - {in_bounds = [true, true], permutation_map = affine_map<(d0, d1, d2, d3)->(0, d3)>} + {in_bounds = array, permutation_map = affine_map<(d0, d1, d2, d3)->(0, d3)>} : memref<16x16x16x16xf16>, vector<16x16xf16> %F = arith.divf %D, %E : vector<16x16xf16> - vector.transfer_write %F, %arg2[%c0, %c0] {in_bounds = [true, true]} : vector<16x16xf16>, memref<16x16xf16> + vector.transfer_write %F, %arg2[%c0, %c0] {in_bounds = array} : vector<16x16xf16>, memref<16x16xf16> return } @@ -199,11 +199,11 @@ func.func @matmul_3Dmemref(%arg0: memref<2x16x16xf16>, %arg1: memref<16xf16>, %a %cst_0 = arith.constant dense<0.000000e+00> : vector<16x16xf16> %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %A = vector.transfer_read %arg0[%c0, %c0, %c0], %cst {in_bounds = [true, true]} : memref<2x16x16xf16>, vector<16x16xf16> - %B = vector.transfer_read %arg1[%c0], %cst {permutation_map = #map4, in_bounds = [true, true]} : memref<16xf16>, vector<16x16xf16> - %C = vector.transfer_read %arg2[%c0, %c0, %c0], %cst {in_bounds = [true, true]} : memref<2x16x16xf16>, vector<16x16xf16> + %A = vector.transfer_read %arg0[%c0, %c0, %c0], %cst {in_bounds = array} : memref<2x16x16xf16>, vector<16x16xf16> + %B = vector.transfer_read %arg1[%c0], %cst {permutation_map = #map4, in_bounds = array} : memref<16xf16>, vector<16x16xf16> + %C = vector.transfer_read %arg2[%c0, %c0, %c0], %cst {in_bounds = array} : memref<2x16x16xf16>, vector<16x16xf16> %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %A, %B, %C : vector<16x16xf16>, vector<16x16xf16> into vector<16x16xf16> - vector.transfer_write %D, %arg2[%c0, %c0, %c0] {in_bounds = [true, true]} : vector<16x16xf16>, memref<2x16x16xf16> + vector.transfer_write %D, %arg2[%c0, %c0, %c0] {in_bounds = array} : vector<16x16xf16>, memref<2x16x16xf16> return } @@ -227,11 +227,11 @@ func.func @matmul_memref_strided(%arg0: memref<2x16x16xf16, affine_map<(d0, d1, %cst_0 = arith.constant dense<0.000000e+00> : vector<16x16xf16> %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %A = vector.transfer_read %arg0[%c0, %c0, %c0], %cst {in_bounds = [true, true]} : memref<2x16x16xf16, affine_map<(d0, d1, d2) -> (d0 * 512 + d1 * 32 + d2)>>, vector<16x16xf16> - %B = vector.transfer_read %arg1[%c0], %cst {permutation_map = #map4, in_bounds = [true, true]} : memref<16xf16>, vector<16x16xf16> - %C = vector.transfer_read %arg2[%c0, %c0, %c0], %cst {in_bounds = [true, true]} : memref<2x16x16xf16>, vector<16x16xf16> + %A = vector.transfer_read %arg0[%c0, %c0, %c0], %cst {in_bounds = array} : memref<2x16x16xf16, affine_map<(d0, d1, d2) -> (d0 * 512 + d1 * 32 + d2)>>, vector<16x16xf16> + %B = vector.transfer_read %arg1[%c0], %cst {permutation_map = #map4, in_bounds = array} : memref<16xf16>, vector<16x16xf16> + %C = vector.transfer_read %arg2[%c0, %c0, %c0], %cst {in_bounds = array} : memref<2x16x16xf16>, vector<16x16xf16> %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %A, %B, %C : vector<16x16xf16>, vector<16x16xf16> into vector<16x16xf16> - vector.transfer_write %D, %arg2[%c0, %c0, %c0] {in_bounds = [true, true]} : vector<16x16xf16>, memref<2x16x16xf16> + vector.transfer_write %D, %arg2[%c0, %c0, %c0] {in_bounds = array} : vector<16x16xf16>, memref<2x16x16xf16> return } @@ -254,11 +254,11 @@ func.func @matmul_transposed(%arg0: memref<16x16xf16>, %arg1: memref<16x16xf16>, %cst_0 = arith.constant dense<0.000000e+00> : vector<16x16xf16> %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %A = vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = [true, true]} : memref<16x16xf16>, vector<16x16xf16> - %B = vector.transfer_read %arg1[%c0, %c0], %cst {permutation_map = #map5, in_bounds = [true, true]} : memref<16x16xf16>, vector<16x16xf16> - %C = vector.transfer_read %arg2[%c0, %c0], %cst {in_bounds = [true, true]} : memref<16x16xf16>, vector<16x16xf16> + %A = vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = array} : memref<16x16xf16>, vector<16x16xf16> + %B = vector.transfer_read %arg1[%c0, %c0], %cst {permutation_map = #map5, in_bounds = array} : memref<16x16xf16>, vector<16x16xf16> + %C = vector.transfer_read %arg2[%c0, %c0], %cst {in_bounds = array} : memref<16x16xf16>, vector<16x16xf16> %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %A, %B, %C : vector<16x16xf16>, vector<16x16xf16> into vector<16x16xf16> - vector.transfer_write %D, %arg2[%c0, %c0] {in_bounds = [true, true]} : vector<16x16xf16>, memref<16x16xf16> + vector.transfer_write %D, %arg2[%c0, %c0] {in_bounds = array} : vector<16x16xf16>, memref<16x16xf16> return } @@ -281,11 +281,11 @@ func.func @matmul_transposed_broadcasted_1d(%arg0: memref<16xf16>, %arg1: memref %cst_0 = arith.constant dense<0.000000e+00> : vector<16x16xf16> %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %A = vector.transfer_read %arg0[%c0], %cst {in_bounds = [true, true], permutation_map = affine_map<(d0) -> (d0, 0)>} : memref<16xf16>, vector<16x16xf16> - %B = vector.transfer_read %arg1[%c0], %cst {in_bounds = [true, true], permutation_map = affine_map<(d0) -> (d0, 0)>} : memref<16xf16>, vector<16x16xf16> - %C = vector.transfer_read %arg2[%c0, %c0], %cst {in_bounds = [true, true]} : memref<16x16xf16>, vector<16x16xf16> + %A = vector.transfer_read %arg0[%c0], %cst {in_bounds = array, permutation_map = affine_map<(d0) -> (d0, 0)>} : memref<16xf16>, vector<16x16xf16> + %B = vector.transfer_read %arg1[%c0], %cst {in_bounds = array, permutation_map = affine_map<(d0) -> (d0, 0)>} : memref<16xf16>, vector<16x16xf16> + %C = vector.transfer_read %arg2[%c0, %c0], %cst {in_bounds = array} : memref<16x16xf16>, vector<16x16xf16> %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %A, %B, %C : vector<16x16xf16>, vector<16x16xf16> into vector<16x16xf16> - vector.transfer_write %D, %arg2[%c0, %c0] {in_bounds = [true, true]} : vector<16x16xf16>, memref<16x16xf16> + vector.transfer_write %D, %arg2[%c0, %c0] {in_bounds = array} : vector<16x16xf16>, memref<16x16xf16> return } @@ -308,11 +308,11 @@ func.func @matmul_transposed_broadcasted_2d(%arg0: memref<32x32xf16>, %arg1: mem %cst_0 = arith.constant dense<0.000000e+00> : vector<16x16xf16> %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %A = vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = [true, true], permutation_map = affine_map<(d0, d1) -> (d1, 0)>} : memref<32x32xf16>, vector<16x16xf16> - %B = vector.transfer_read %arg1[%c0, %c0], %cst {in_bounds = [true, true], permutation_map = affine_map<(d0, d1) -> (d1, 0)>} : memref<32x32xf16>, vector<16x16xf16> - %C = vector.transfer_read %arg2[%c0, %c0], %cst {in_bounds = [true, true]} : memref<16x16xf16>, vector<16x16xf16> + %A = vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = array, permutation_map = affine_map<(d0, d1) -> (d1, 0)>} : memref<32x32xf16>, vector<16x16xf16> + %B = vector.transfer_read %arg1[%c0, %c0], %cst {in_bounds = array, permutation_map = affine_map<(d0, d1) -> (d1, 0)>} : memref<32x32xf16>, vector<16x16xf16> + %C = vector.transfer_read %arg2[%c0, %c0], %cst {in_bounds = array} : memref<16x16xf16>, vector<16x16xf16> %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %A, %B, %C : vector<16x16xf16>, vector<16x16xf16> into vector<16x16xf16> - vector.transfer_write %D, %arg2[%c0, %c0] {in_bounds = [true, true]} : vector<16x16xf16>, memref<16x16xf16> + vector.transfer_write %D, %arg2[%c0, %c0] {in_bounds = array} : vector<16x16xf16>, memref<16x16xf16> return } @@ -331,21 +331,21 @@ func.func @matmul_transposed_broadcasted_2d(%arg0: memref<32x32xf16>, %arg1: mem // Do not convert to subgroup_mma ops with integer types if signedness cannot be inferred. // CHECK-LABEL: func @matmul_no_extend_int8 -// CHECK-DAG: %[[A:.+]] = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}} {in_bounds = [true, true]} : memref<16x16xi8>, vector<16x16xi8> -// CHECK-DAG: %[[B:.+]] = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}} {in_bounds = [true, true]} : memref<16x16xi8>, vector<16x16xi8> -// CHECK-DAG: %[[C:.+]] = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}} {in_bounds = [true, true]} : memref<16x16xi32>, vector<16x16xi32> +// CHECK-DAG: %[[A:.+]] = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}} {in_bounds = array} : memref<16x16xi8>, vector<16x16xi8> +// CHECK-DAG: %[[B:.+]] = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}} {in_bounds = array} : memref<16x16xi8>, vector<16x16xi8> +// CHECK-DAG: %[[C:.+]] = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}} {in_bounds = array} : memref<16x16xi32>, vector<16x16xi32> // CHECK: %[[D:.+]] = vector.contract {indexing_maps = [#[[$map]], #[[$map1]], #[[$map2]]], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %[[A]], %[[B]], %[[C]] : vector<16x16xi8>, vector<16x16xi8> into vector<16x16xi32> -// CHECK: vector.transfer_write %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}] {in_bounds = [true, true]} : vector<16x16xi32>, memref<16x16xi32> +// CHECK: vector.transfer_write %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}] {in_bounds = array} : vector<16x16xi32>, memref<16x16xi32> func.func @matmul_no_extend_int8(%arg0: memref<16x16xi8>, %arg1: memref<16x16xi8>, %arg2: memref<16x16xi32>) { %cst_0 = arith.constant dense<0> : vector<16x16xi8> %c0 = arith.constant 0 : index %cst_i8 = arith.constant 0 : i8 %cst_i32 = arith.constant 0 : i32 - %A = vector.transfer_read %arg0[%c0, %c0], %cst_i8 {in_bounds = [true, true]} : memref<16x16xi8>, vector<16x16xi8> - %B = vector.transfer_read %arg1[%c0, %c0], %cst_i8 {permutation_map = #map0, in_bounds = [true, true]} : memref<16x16xi8>, vector<16x16xi8> - %C = vector.transfer_read %arg2[%c0, %c0], %cst_i32 {in_bounds = [true, true]} : memref<16x16xi32>, vector<16x16xi32> + %A = vector.transfer_read %arg0[%c0, %c0], %cst_i8 {in_bounds = array} : memref<16x16xi8>, vector<16x16xi8> + %B = vector.transfer_read %arg1[%c0, %c0], %cst_i8 {permutation_map = #map0, in_bounds = array} : memref<16x16xi8>, vector<16x16xi8> + %C = vector.transfer_read %arg2[%c0, %c0], %cst_i32 {in_bounds = array} : memref<16x16xi32>, vector<16x16xi32> %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %A, %B, %C : vector<16x16xi8>, vector<16x16xi8> into vector<16x16xi32> - vector.transfer_write %D, %arg2[%c0, %c0] {in_bounds = [true, true]} : vector<16x16xi32>, memref<16x16xi32> + vector.transfer_write %D, %arg2[%c0, %c0] {in_bounds = array} : vector<16x16xi32>, memref<16x16xi32> return } @@ -369,13 +369,13 @@ func.func @matmul_int8(%arg0: memref<16x16xi8>, %arg1: memref<16x16xi8>, %arg2: %c0 = arith.constant 0 : index %cst_i8 = arith.constant 0 : i8 %cst_i32 = arith.constant 0 : i32 - %Ar = vector.transfer_read %arg0[%c0, %c0], %cst_i8 {in_bounds = [true, true]} : memref<16x16xi8>, vector<16x16xi8> - %Br = vector.transfer_read %arg1[%c0, %c0], %cst_i8 {permutation_map = #map0, in_bounds = [true, true]} : memref<16x16xi8>, vector<16x16xi8> - %C = vector.transfer_read %arg2[%c0, %c0], %cst_i32 {in_bounds = [true, true]} : memref<16x16xi32>, vector<16x16xi32> + %Ar = vector.transfer_read %arg0[%c0, %c0], %cst_i8 {in_bounds = array} : memref<16x16xi8>, vector<16x16xi8> + %Br = vector.transfer_read %arg1[%c0, %c0], %cst_i8 {permutation_map = #map0, in_bounds = array} : memref<16x16xi8>, vector<16x16xi8> + %C = vector.transfer_read %arg2[%c0, %c0], %cst_i32 {in_bounds = array} : memref<16x16xi32>, vector<16x16xi32> %Ae = arith.extsi %Ar : vector<16x16xi8> to vector<16x16xi32> %Be = arith.extsi %Br : vector<16x16xi8> to vector<16x16xi32> %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %Ae, %Be, %C : vector<16x16xi32>, vector<16x16xi32> into vector<16x16xi32> - vector.transfer_write %D, %arg2[%c0, %c0] {in_bounds = [true, true]} : vector<16x16xi32>, memref<16x16xi32> + vector.transfer_write %D, %arg2[%c0, %c0] {in_bounds = array} : vector<16x16xi32>, memref<16x16xi32> return } @@ -399,13 +399,13 @@ func.func @matmul_mixed_signedness_int8(%arg0: memref<16x16xi8>, %arg1: memref<1 %c0 = arith.constant 0 : index %cst_i8 = arith.constant 0 : i8 %cst_i32 = arith.constant 0 : i32 - %Ar = vector.transfer_read %arg0[%c0, %c0], %cst_i8 {in_bounds = [true, true]} : memref<16x16xi8>, vector<16x16xi8> - %Br = vector.transfer_read %arg1[%c0, %c0], %cst_i8 {permutation_map = #map0, in_bounds = [true, true]} : memref<16x16xi8>, vector<16x16xi8> - %C = vector.transfer_read %arg2[%c0, %c0], %cst_i32 {in_bounds = [true, true]} : memref<16x16xi32>, vector<16x16xi32> + %Ar = vector.transfer_read %arg0[%c0, %c0], %cst_i8 {in_bounds = array} : memref<16x16xi8>, vector<16x16xi8> + %Br = vector.transfer_read %arg1[%c0, %c0], %cst_i8 {permutation_map = #map0, in_bounds = array} : memref<16x16xi8>, vector<16x16xi8> + %C = vector.transfer_read %arg2[%c0, %c0], %cst_i32 {in_bounds = array} : memref<16x16xi32>, vector<16x16xi32> %Ae = arith.extui %Ar : vector<16x16xi8> to vector<16x16xi32> %Be = arith.extsi %Br : vector<16x16xi8> to vector<16x16xi32> %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %Ae, %Be, %C : vector<16x16xi32>, vector<16x16xi32> into vector<16x16xi32> - vector.transfer_write %D, %arg2[%c0, %c0] {in_bounds = [true, true]} : vector<16x16xi32>, memref<16x16xi32> + vector.transfer_write %D, %arg2[%c0, %c0] {in_bounds = array} : vector<16x16xi32>, memref<16x16xi32> return } @@ -429,13 +429,13 @@ func.func @matmul_mixed_signedness_int8(%arg0: memref<16x32xi8>, %arg1: memref<1 %c0 = arith.constant 0 : index %cst_i8 = arith.constant 0 : i8 %cst_i32 = arith.constant 0 : i32 - %Ar = vector.transfer_read %arg0[%c0, %c0], %cst_i8 {in_bounds = [true, true]} : memref<16x32xi8>, vector<16x32xi8> - %Br = vector.transfer_read %arg1[%c0, %c0], %cst_i8 {permutation_map = #map0, in_bounds = [true, true]} : memref<16x32xi8>, vector<16x32xi8> - %C = vector.transfer_read %arg2[%c0, %c0], %cst_i32 {in_bounds = [true, true]} : memref<16x16xi32>, vector<16x16xi32> + %Ar = vector.transfer_read %arg0[%c0, %c0], %cst_i8 {in_bounds = array} : memref<16x32xi8>, vector<16x32xi8> + %Br = vector.transfer_read %arg1[%c0, %c0], %cst_i8 {permutation_map = #map0, in_bounds = array} : memref<16x32xi8>, vector<16x32xi8> + %C = vector.transfer_read %arg2[%c0, %c0], %cst_i32 {in_bounds = array} : memref<16x16xi32>, vector<16x16xi32> %Ae = arith.extui %Ar : vector<16x32xi8> to vector<16x32xi32> %Be = arith.extsi %Br : vector<16x32xi8> to vector<16x32xi32> %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %Ae, %Be, %C : vector<16x32xi32>, vector<16x32xi32> into vector<16x16xi32> - vector.transfer_write %D, %arg2[%c0, %c0] {in_bounds = [true, true]} : vector<16x16xi32>, memref<16x16xi32> + vector.transfer_write %D, %arg2[%c0, %c0] {in_bounds = array} : vector<16x16xi32>, memref<16x16xi32> return } @@ -452,12 +452,12 @@ func.func @matmul_mixed_signedness_int8(%arg0: memref<16x32xi8>, %arg1: memref<1 func.func @cast_f16_to_f32_write(%arg0: memref<16x16xf16>, %arg1: memref<16x16xf16>, %arg2: memref<16x16xf16>, %arg3: memref<16x16xf32>) { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %A = vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = [true, true]} : memref<16x16xf16>, vector<16x16xf16> - %B = vector.transfer_read %arg1[%c0, %c0], %cst {in_bounds = [true, true]} : memref<16x16xf16>, vector<16x16xf16> - %C = vector.transfer_read %arg2[%c0, %c0], %cst {in_bounds = [true, true]} : memref<16x16xf16>, vector<16x16xf16> + %A = vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = array} : memref<16x16xf16>, vector<16x16xf16> + %B = vector.transfer_read %arg1[%c0, %c0], %cst {in_bounds = array} : memref<16x16xf16>, vector<16x16xf16> + %C = vector.transfer_read %arg2[%c0, %c0], %cst {in_bounds = array} : memref<16x16xf16>, vector<16x16xf16> %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %A, %B, %C : vector<16x16xf16>, vector<16x16xf16> into vector<16x16xf16> %cast = arith.extf %D : vector<16x16xf16> to vector<16x16xf32> - vector.transfer_write %cast, %arg3[%c0, %c0] {in_bounds = [true, true]} : vector<16x16xf32>, memref<16x16xf32> + vector.transfer_write %cast, %arg3[%c0, %c0] {in_bounds = array} : vector<16x16xf32>, memref<16x16xf32> return } @@ -472,7 +472,7 @@ func.func @cast_f16_to_f32_write(%arg0: memref<16x16xf16>, %arg1: memref<16x16xf // CHECK-SAME: %[[ALLOC:.+]]: memref<64x128xf16> // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index // CHECK-DAG: %[[CST:.+]] = arith.constant 0.000000e+00 : f16 -// CHECK: %[[READ:.+]] = vector.transfer_read %[[ALLOC]][%[[C0]], %[[C0]]], %[[CST]] {in_bounds = [true, true], permutation_map = #[[$MAP]]} +// CHECK: %[[READ:.+]] = vector.transfer_read %[[ALLOC]][%[[C0]], %[[C0]]], %[[CST]] {in_bounds = array, permutation_map = #[[$MAP]]} // CHECK: %[[EXTF1:.+]] = arith.extf %[[READ]] // CHECK-NOT: vector.transpose // CHECK: %[[RESULT:.+]] = vector.contract @@ -480,12 +480,12 @@ func.func @fold_transpose_into_transfer_read(%alloc: memref<64x128xf16>, %vector %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 %init = arith.constant dense<0.000000e+00> : vector<32x64xf32> - %0 = vector.transfer_read %alloc[%c0, %c0], %cst {in_bounds = [true, true]} : memref<64x128xf16>, vector<64x128xf16> + %0 = vector.transfer_read %alloc[%c0, %c0], %cst {in_bounds = array} : memref<64x128xf16>, vector<64x128xf16> %1 = arith.extf %0 : vector<64x128xf16> to vector<64x128xf32> %2 = arith.extf %vector : vector<32x128xf16> to vector<32x128xf32> %3 = vector.transpose %1, [1, 0] : vector<64x128xf32> to vector<128x64xf32> %4 = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %2, %3, %init : vector<32x128xf32>, vector<128x64xf32> into vector<32x64xf32> - vector.transfer_write %4, %alloc2[%c0, %c0] {in_bounds = [true, true]} : vector<32x64xf32>, memref<32x64xf32> + vector.transfer_write %4, %alloc2[%c0, %c0] {in_bounds = array} : vector<32x64xf32>, memref<32x64xf32> return } @@ -506,14 +506,14 @@ func.func @fold_transpose_into_transfer_read(%alloc: memref<64x128xf16>, %vector func.func @cast_f16_to_f32_read(%arg0: memref<16x16xf16>, %arg1: memref<16x16xf16>, %arg2: memref<16x16xf16>, %arg3: memref<16x16xf32>) { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f16 - %A = vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = [true, true]} : memref<16x16xf16>, vector<16x16xf16> - %B = vector.transfer_read %arg1[%c0, %c0], %cst {in_bounds = [true, true]} : memref<16x16xf16>, vector<16x16xf16> - %C = vector.transfer_read %arg2[%c0, %c0], %cst {in_bounds = [true, true]} : memref<16x16xf16>, vector<16x16xf16> + %A = vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = array} : memref<16x16xf16>, vector<16x16xf16> + %B = vector.transfer_read %arg1[%c0, %c0], %cst {in_bounds = array} : memref<16x16xf16>, vector<16x16xf16> + %C = vector.transfer_read %arg2[%c0, %c0], %cst {in_bounds = array} : memref<16x16xf16>, vector<16x16xf16> %Aext = arith.extf %A : vector<16x16xf16> to vector<16x16xf32> %Bext = arith.extf %B : vector<16x16xf16> to vector<16x16xf32> %Cext = arith.extf %C : vector<16x16xf16> to vector<16x16xf32> %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %Aext, %Bext, %Cext : vector<16x16xf32>, vector<16x16xf32> into vector<16x16xf32> - vector.transfer_write %D, %arg3[%c0, %c0] {in_bounds = [true, true]} : vector<16x16xf32>, memref<16x16xf32> + vector.transfer_write %D, %arg3[%c0, %c0] {in_bounds = array} : vector<16x16xf32>, memref<16x16xf32> return } diff --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir index 2fe9ba8fead17..32431175d4565 100644 --- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir +++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir @@ -2492,7 +2492,7 @@ func.func @transfer_read_1d_non_zero_addrspace(%A : memref, %base: ind func.func @transfer_read_1d_inbounds(%A : memref, %base: index) -> vector<17xf32> { %f7 = arith.constant 7.0: f32 - %f = vector.transfer_read %A[%base], %f7 {in_bounds = [true]} : + %f = vector.transfer_read %A[%base], %f7 {in_bounds = array} : memref, vector<17xf32> return %f: vector<17xf32> } @@ -2530,7 +2530,7 @@ func.func @transfer_read_1d_mask(%A : memref, %base : index) -> vector<5x func.func @transfer_read_1d_scalable_mask(%arg0: memref<1x?xf32>, %mask: vector<[4]xi1>) -> vector<[4]xf32> { %c0 = arith.constant 0 : index %pad = arith.constant 0.0 : f32 - %vec = vector.transfer_read %arg0[%c0, %c0], %pad, %mask {in_bounds = [true]} : memref<1x?xf32>, vector<[4]xf32> + %vec = vector.transfer_read %arg0[%c0, %c0], %pad, %mask {in_bounds = array} : memref<1x?xf32>, vector<[4]xf32> return %vec : vector<[4]xf32> } @@ -2539,7 +2539,7 @@ func.func @transfer_read_1d_scalable_mask(%arg0: memref<1x?xf32>, %mask: vector< // CHECK: llvm.intr.masked.store %{{.*}}, %{{.*}}, %{{.*}} {alignment = 4 : i32} : vector<[4]xf32>, vector<[4]xi1> into !llvm.ptr func.func @transfer_write_1d_scalable_mask(%arg0: memref<1x?xf32>, %vec: vector<[4]xf32>, %mask: vector<[4]xi1>) { %c0 = arith.constant 0 : index - vector.transfer_write %vec, %arg0[%c0, %c0], %mask {in_bounds = [true]} : vector<[4]xf32>, memref<1x?xf32> + vector.transfer_write %vec, %arg0[%c0, %c0], %mask {in_bounds = array} : vector<[4]xf32>, memref<1x?xf32> return } diff --git a/mlir/test/Conversion/VectorToSCF/tensor-transfer-ops.mlir b/mlir/test/Conversion/VectorToSCF/tensor-transfer-ops.mlir index 6ec74f6b32db9..f2fc13bc8e37f 100644 --- a/mlir/test/Conversion/VectorToSCF/tensor-transfer-ops.mlir +++ b/mlir/test/Conversion/VectorToSCF/tensor-transfer-ops.mlir @@ -4,7 +4,7 @@ // CHECK: %[[ALLOC:.*]] = memref.alloca() : memref> // CHECK: %[[CASTED:.*]] = vector.type_cast %[[ALLOC]] : memref> to memref<4xvector<9xf32>> // CHECK: scf.for {{.*}} { -// CHECK: %[[READ:.*]] = vector.transfer_read %{{.*}}[{{.*}}], %cst {in_bounds = [true]} : tensor, vector<9xf32> +// CHECK: %[[READ:.*]] = vector.transfer_read %{{.*}}[{{.*}}], %cst {in_bounds = array} : tensor, vector<9xf32> // CHECK: memref.store %[[READ]], %[[CASTED]][%{{.*}}] : memref<4xvector<9xf32>> // CHECK: } // CHECK: %[[LOADED:.*]] = memref.load %[[ALLOC]][] : memref> @@ -12,7 +12,7 @@ func.func @transfer_read_2d(%A : tensor, %base1 : index, %base2 : index) -> (vector<4x9xf32>){ %p = arith.constant -42.0: f32 - %f = vector.transfer_read %A[%base1, %base2], %p {in_bounds = [true, true]} + %f = vector.transfer_read %A[%base1, %base2], %p {in_bounds = array} : tensor, vector<4x9xf32> return %f : vector<4x9xf32> } @@ -25,13 +25,13 @@ func.func @transfer_read_2d(%A : tensor, %base1 : index, %base2 : index // CHECK: %[[CASTED:.*]] = vector.type_cast %[[ALLOC]] : memref> to memref<2xvector<3xf32>> // CHECK: %[[RESULT:.*]] = scf.for {{.*}} iter_args(%[[STATE:.*]] = %{{.*}}) -> (tensor) { // CHECK: %[[LOADED:.*]] = memref.load %[[CASTED]][%{{.*}}] : memref<2xvector<3xf32>> -// CHECK: %[[WRITE:.*]] = vector.transfer_write %[[LOADED]], %[[STATE]][{{.*}}] {in_bounds = [true]} : vector<3xf32>, tensor +// CHECK: %[[WRITE:.*]] = vector.transfer_write %[[LOADED]], %[[STATE]][{{.*}}] {in_bounds = array} : vector<3xf32>, tensor // CHECK: scf.yield %[[WRITE]] : tensor // CHECK: } // CHECK: return %[[RESULT]] : tensor func.func @transfer_write_2d(%A : tensor, %vec : vector<2x3xf32>, %base1 : index, %base2 : index) -> (tensor) { - %t = vector.transfer_write %vec, %A[%base1, %base2] {in_bounds = [true, true]} + %t = vector.transfer_write %vec, %A[%base1, %base2] {in_bounds = array} : vector<2x3xf32>, tensor return %t : tensor } @@ -46,6 +46,6 @@ func.func @transfer_write_2d(%A : tensor, %vec : vector<2x3xf32>, // CHECK: return %[[RESULT]] func.func @scalable_transpose_store(%vec: vector<4x[4]xf32>, %A: tensor, %base1: index, %base2: index) -> tensor { %transpose = vector.transpose %vec, [1, 0] : vector<4x[4]xf32> to vector<[4]x4xf32> - %result = vector.transfer_write %transpose, %A[%base1, %base2] {in_bounds = [true, true]} : vector<[4]x4xf32>, tensor + %result = vector.transfer_write %transpose, %A[%base1, %base2] {in_bounds = array} : vector<[4]x4xf32>, tensor return %result : tensor } diff --git a/mlir/test/Conversion/VectorToSCF/unrolled-tensor-transfer-ops.mlir b/mlir/test/Conversion/VectorToSCF/unrolled-tensor-transfer-ops.mlir index f8def658e947f..f04276ed26ad9 100644 --- a/mlir/test/Conversion/VectorToSCF/unrolled-tensor-transfer-ops.mlir +++ b/mlir/test/Conversion/VectorToSCF/unrolled-tensor-transfer-ops.mlir @@ -2,19 +2,19 @@ // CHECK-LABEL: func @transfer_read_2d( // CHECK: %[[V_INIT:.*]] = arith.constant dense<-4.200000e+01> : vector<4x9xf32> -// CHECK: %[[V0:.*]] = vector.transfer_read %{{.*}}[{{.*}}], %{{.*}} {in_bounds = [true]} : tensor, vector<9xf32> +// CHECK: %[[V0:.*]] = vector.transfer_read %{{.*}}[{{.*}}], %{{.*}} {in_bounds = array} : tensor, vector<9xf32> // CHECK: %[[I0:.*]] = vector.insert %[[V0]], %[[V_INIT]] [0] : vector<9xf32> into vector<4x9xf32> -// CHECK: %[[V1:.*]] = vector.transfer_read %{{.*}}[{{.*}}], %{{.*}} {in_bounds = [true]} : tensor, vector<9xf32> +// CHECK: %[[V1:.*]] = vector.transfer_read %{{.*}}[{{.*}}], %{{.*}} {in_bounds = array} : tensor, vector<9xf32> // CHECK: %[[I1:.*]] = vector.insert %[[V1]], %[[I0]] [1] : vector<9xf32> into vector<4x9xf32> -// CHECK: %[[V2:.*]] = vector.transfer_read %{{.*}}[{{.*}}], %{{.*}} {in_bounds = [true]} : tensor, vector<9xf32> +// CHECK: %[[V2:.*]] = vector.transfer_read %{{.*}}[{{.*}}], %{{.*}} {in_bounds = array} : tensor, vector<9xf32> // CHECK: %[[I2:.*]] = vector.insert %[[V2]], %[[I1]] [2] : vector<9xf32> into vector<4x9xf32> -// CHECK: %[[V3:.*]] = vector.transfer_read %{{.*}}[{{.*}}], %{{.*}} {in_bounds = [true]} : tensor, vector<9xf32> +// CHECK: %[[V3:.*]] = vector.transfer_read %{{.*}}[{{.*}}], %{{.*}} {in_bounds = array} : tensor, vector<9xf32> // CHECK: %[[I3:.*]] = vector.insert %[[V3]], %[[I2]] [3] : vector<9xf32> into vector<4x9xf32> // CHECK: return %[[I3]] : vector<4x9xf32> func.func @transfer_read_2d(%A : tensor, %base1 : index, %base2 : index) -> (vector<4x9xf32>){ %p = arith.constant -42.0: f32 - %f = vector.transfer_read %A[%base1, %base2], %p {in_bounds = [true, true]} + %f = vector.transfer_read %A[%base1, %base2], %p {in_bounds = array} : tensor, vector<4x9xf32> return %f : vector<4x9xf32> } @@ -23,13 +23,13 @@ func.func @transfer_read_2d(%A : tensor, %base1 : index, %base2 : index // CHECK-LABEL: func @transfer_write_2d( // CHECK: %[[V0:.*]] = vector.extract %{{.*}}[0] : vector<3xf32> from vector<2x3xf32> -// CHECK: %[[T0:.*]] = vector.transfer_write %[[V0]], %{{.*}}[{{.*}}] {in_bounds = [true]} : vector<3xf32>, tensor +// CHECK: %[[T0:.*]] = vector.transfer_write %[[V0]], %{{.*}}[{{.*}}] {in_bounds = array} : vector<3xf32>, tensor // CHECK: %[[V1:.*]] = vector.extract %{{.*}}[1] : vector<3xf32> from vector<2x3xf32> -// CHECK: %[[T1:.*]] = vector.transfer_write %[[V1]], %[[T0]][{{.*}}] {in_bounds = [true]} : vector<3xf32>, tensor +// CHECK: %[[T1:.*]] = vector.transfer_write %[[V1]], %[[T0]][{{.*}}] {in_bounds = array} : vector<3xf32>, tensor // CHECK: return %[[T1]] : tensor func.func @transfer_write_2d(%A : tensor, %vec : vector<2x3xf32>, %base1 : index, %base2 : index) -> (tensor) { - %t = vector.transfer_write %vec, %A[%base1, %base2] {in_bounds = [true, true]} + %t = vector.transfer_write %vec, %A[%base1, %base2] {in_bounds = array} : vector<2x3xf32>, tensor return %t : tensor } diff --git a/mlir/test/Conversion/VectorToSCF/unrolled-vector-to-loops.mlir b/mlir/test/Conversion/VectorToSCF/unrolled-vector-to-loops.mlir index 7d97829c06599..0e759c67b3cbd 100644 --- a/mlir/test/Conversion/VectorToSCF/unrolled-vector-to-loops.mlir +++ b/mlir/test/Conversion/VectorToSCF/unrolled-vector-to-loops.mlir @@ -19,7 +19,7 @@ func.func @transfer_read_inbounds(%A : memref) -> (vector<2x3x4xf32>) // CHECK-NEXT: vector.insert {{.*}} [1, 2] : vector<4xf32> into vector<2x3x4xf32> // CHECK-NOT: scf.if // CHECK-NOT: scf.for - %vec = vector.transfer_read %A[%c0, %c0, %c0], %f0 {in_bounds = [true, true, true]} : memref, vector<2x3x4xf32> + %vec = vector.transfer_read %A[%c0, %c0, %c0], %f0 {in_bounds = array} : memref, vector<2x3x4xf32> return %vec : vector<2x3x4xf32> } @@ -81,6 +81,6 @@ func.func @transfer_read_mask(%A : memref, %mask : vector<2x3x4xi1>) // CHECK-NEXT: vector.insert {{.*}} [1, 2] : vector<4xf32> into vector<2x3x4xf32> // CHECK-NOT: scf.if // CHECK-NOT: scf.for - %vec = vector.transfer_read %A[%c0, %c0, %c0], %f0, %mask {in_bounds = [true, true, true]}: memref, vector<2x3x4xf32> + %vec = vector.transfer_read %A[%c0, %c0, %c0], %f0, %mask {in_bounds = array}: memref, vector<2x3x4xf32> return %vec : vector<2x3x4xf32> } diff --git a/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir b/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir index 30df419822994..9fb35ff3a5faf 100644 --- a/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir +++ b/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir @@ -133,7 +133,7 @@ func.func @materialize_read(%M: index, %N: index, %O: index, %P: index) { affine.for %i1 = 0 to %N { affine.for %i2 = 0 to %O { affine.for %i3 = 0 to %P step 5 { - %f = vector.transfer_read %A[%i0, %i1, %i2, %i3], %f0 {in_bounds = [false, true, false], permutation_map = affine_map<(d0, d1, d2, d3) -> (d3, 0, d0)>} : memref, vector<5x4x3xf32> + %f = vector.transfer_read %A[%i0, %i1, %i2, %i3], %f0 {in_bounds = array, permutation_map = affine_map<(d0, d1, d2, d3) -> (d3, 0, d0)>} : memref, vector<5x4x3xf32> // Add a dummy use to prevent dead code elimination from removing // transfer read ops. "dummy_use"(%f) : (vector<5x4x3xf32>) -> () @@ -361,17 +361,17 @@ func.func @transfer_write_progressive_inbounds(%A : memref, %base: inde // CHECK-NEXT: scf.for %[[I:.*]] = %[[C0]] to %[[C3]] // CHECK-NEXT: %[[add:.*]] = affine.apply #[[$MAP0]](%[[I]])[%[[base]]] // CHECK-NEXT: %[[vec_1d:.*]] = memref.load %[[vmemref]][%[[I]]] : memref<3xvector<15xf32>> - // CHECK-NEXT: vector.transfer_write %[[vec_1d]], %[[A]][%[[add]], %[[base]]] {in_bounds = [true]} : vector<15xf32>, memref + // CHECK-NEXT: vector.transfer_write %[[vec_1d]], %[[A]][%[[add]], %[[base]]] {in_bounds = array} : vector<15xf32>, memref // FULL-UNROLL: %[[VEC0:.*]] = vector.extract %[[vec]][0] : vector<15xf32> from vector<3x15xf32> - // FULL-UNROLL: vector.transfer_write %[[VEC0]], %[[A]][%[[base]], %[[base]]] {in_bounds = [true]} : vector<15xf32>, memref + // FULL-UNROLL: vector.transfer_write %[[VEC0]], %[[A]][%[[base]], %[[base]]] {in_bounds = array} : vector<15xf32>, memref // FULL-UNROLL: %[[I1:.*]] = affine.apply #[[$MAP1]]()[%[[base]]] // FULL-UNROLL: %[[VEC1:.*]] = vector.extract %[[vec]][1] : vector<15xf32> from vector<3x15xf32> - // FULL-UNROLL: vector.transfer_write %2, %[[A]][%[[I1]], %[[base]]] {in_bounds = [true]} : vector<15xf32>, memref + // FULL-UNROLL: vector.transfer_write %2, %[[A]][%[[I1]], %[[base]]] {in_bounds = array} : vector<15xf32>, memref // FULL-UNROLL: %[[I2:.*]] = affine.apply #[[$MAP2]]()[%[[base]]] // FULL-UNROLL: %[[VEC2:.*]] = vector.extract %[[vec]][2] : vector<15xf32> from vector<3x15xf32> - // FULL-UNROLL: vector.transfer_write %[[VEC2:.*]], %[[A]][%[[I2]], %[[base]]] {in_bounds = [true]} : vector<15xf32>, memref - vector.transfer_write %vec, %A[%base, %base] {in_bounds = [true, true]} : + // FULL-UNROLL: vector.transfer_write %[[VEC2:.*]], %[[A]][%[[I2]], %[[base]]] {in_bounds = array} : vector<15xf32>, memref + vector.transfer_write %vec, %A[%base, %base] {in_bounds = array} : vector<3x15xf32>, memref return } @@ -507,7 +507,7 @@ func.func @transfer_read_with_tensor(%arg: tensor) -> vector<1xf32> { // CHECK-NEXT: %[[RESULT:.*]] = vector.broadcast %[[EXTRACTED]] : f32 to vector<1xf32> // CHECK-NEXT: return %[[RESULT]] : vector<1xf32> %f0 = arith.constant 0.0 : f32 - %0 = vector.transfer_read %arg[], %f0 {in_bounds = [true], permutation_map = affine_map<()->(0)>} : + %0 = vector.transfer_read %arg[], %f0 {in_bounds = array, permutation_map = affine_map<()->(0)>} : tensor, vector<1xf32> return %0: vector<1xf32> } @@ -528,7 +528,7 @@ func.func @transfer_write_scalable(%arg0: memref> %7 = llvm.mlir.undef : vector<[16]xf32> %8 = llvm.insertelement %arg1, %7[%0 : i32] : vector<[16]xf32> %9 = llvm.shufflevector %8, %7 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] : vector<[16]xf32> - vector.transfer_write %9, %arg0[%c0], %6 {in_bounds = [true]} : vector<[16]xf32>, memref> + vector.transfer_write %9, %arg0[%c0], %6 {in_bounds = array} : vector<[16]xf32>, memref> return } @@ -645,7 +645,7 @@ func.func @transfer_read_array_of_scalable(%arg0: memref<3x?xf32>) -> vector<3x[ %cst = arith.constant 0.000000e+00 : f32 %dim = memref.dim %arg0, %c1 : memref<3x?xf32> %mask = vector.create_mask %c1, %dim : vector<3x[4]xi1> - %read = vector.transfer_read %arg0[%c0, %c0], %cst, %mask {in_bounds = [true, true]} : memref<3x?xf32>, vector<3x[4]xf32> + %read = vector.transfer_read %arg0[%c0, %c0], %cst, %mask {in_bounds = array} : memref<3x?xf32>, vector<3x[4]xf32> return %read : vector<3x[4]xf32> } // CHECK-LABEL: func.func @transfer_read_array_of_scalable( @@ -663,7 +663,7 @@ func.func @transfer_read_array_of_scalable(%arg0: memref<3x?xf32>) -> vector<3x[ // CHECK: %[[UNPACK_MASK:.*]] = vector.type_cast %[[ALLOCA_MASK]] : memref> to memref<3xvector<[4]xi1>> // CHECK: scf.for %[[VAL_11:.*]] = %[[C0]] to %[[C3]] step %[[C1]] { // CHECK: %[[MASK_SLICE:.*]] = memref.load %[[UNPACK_MASK]]{{\[}}%[[VAL_11]]] : memref<3xvector<[4]xi1>> -// CHECK: %[[READ_SLICE:.*]] = vector.transfer_read %[[ARG]]{{\[}}%[[VAL_11]], %[[C0]]], %[[PADDING]], %[[MASK_SLICE]] {in_bounds = [true]} : memref<3x?xf32>, vector<[4]xf32> +// CHECK: %[[READ_SLICE:.*]] = vector.transfer_read %[[ARG]]{{\[}}%[[VAL_11]], %[[C0]]], %[[PADDING]], %[[MASK_SLICE]] {in_bounds = array} : memref<3x?xf32>, vector<[4]xf32> // CHECK: memref.store %[[READ_SLICE]], %[[UNPACK_VECTOR]]{{\[}}%[[VAL_11]]] : memref<3xvector<[4]xf32>> // CHECK: } // CHECK: %[[RESULT:.*]] = memref.load %[[ALLOCA_VEC]][] : memref> @@ -678,7 +678,7 @@ func.func @transfer_write_array_of_scalable(%vec: vector<3x[4]xf32>, %arg0: memr %cst = arith.constant 0.000000e+00 : f32 %dim = memref.dim %arg0, %c1 : memref<3x?xf32> %mask = vector.create_mask %c1, %dim : vector<3x[4]xi1> - vector.transfer_write %vec, %arg0[%c0, %c0], %mask {in_bounds = [true, true]} : vector<3x[4]xf32>, memref<3x?xf32> + vector.transfer_write %vec, %arg0[%c0, %c0], %mask {in_bounds = array} : vector<3x[4]xf32>, memref<3x?xf32> return } // CHECK-LABEL: func.func @transfer_write_array_of_scalable( @@ -698,7 +698,7 @@ func.func @transfer_write_array_of_scalable(%vec: vector<3x[4]xf32>, %arg0: memr // CHECK: scf.for %[[VAL_11:.*]] = %[[C0]] to %[[C3]] step %[[C1]] { // CHECK: %[[MASK_SLICE:.*]] = memref.load %[[UNPACK_VECTOR]]{{\[}}%[[VAL_11]]] : memref<3xvector<[4]xf32>> // CHECK: %[[VECTOR_SLICE:.*]] = memref.load %[[UNPACK_MASK]]{{\[}}%[[VAL_11]]] : memref<3xvector<[4]xi1>> -// CHECK: vector.transfer_write %[[MASK_SLICE]], %[[MEMREF]]{{\[}}%[[VAL_11]], %[[C0]]], %[[VECTOR_SLICE]] {in_bounds = [true]} : vector<[4]xf32>, memref<3x?xf32> +// CHECK: vector.transfer_write %[[MASK_SLICE]], %[[MEMREF]]{{\[}}%[[VAL_11]], %[[C0]]], %[[VECTOR_SLICE]] {in_bounds = array} : vector<[4]xf32>, memref<3x?xf32> // CHECK: } // CHECK: return // CHECK: } @@ -714,13 +714,13 @@ func.func @cannot_lower_transfer_write_with_leading_scalable(%vec: vector<[4]x4x %cst = arith.constant 0.000000e+00 : f32 %dim = memref.dim %arg0, %c0 : memref %mask = vector.create_mask %dim, %c4 : vector<[4]x4xi1> - vector.transfer_write %vec, %arg0[%c0, %c0], %mask {in_bounds = [true, true]} : vector<[4]x4xf32>, memref + vector.transfer_write %vec, %arg0[%c0, %c0], %mask {in_bounds = array} : vector<[4]x4xf32>, memref return } // CHECK-LABEL: func.func @cannot_lower_transfer_write_with_leading_scalable( // CHECK-SAME: %[[VEC:.*]]: vector<[4]x4xf32>, // CHECK-SAME: %[[MEMREF:.*]]: memref) -// CHECK: vector.transfer_write %[[VEC]], %[[MEMREF]][%{{.*}}, %{{.*}}], %{{.*}} {in_bounds = [true, true]} : vector<[4]x4xf32>, memref +// CHECK: vector.transfer_write %[[VEC]], %[[MEMREF]][%{{.*}}, %{{.*}}], %{{.*}} {in_bounds = array} : vector<[4]x4xf32>, memref // ----- @@ -731,12 +731,12 @@ func.func @cannot_lower_transfer_read_with_leading_scalable(%arg0: memref %mask = vector.create_mask %dim, %c4 : vector<[4]x4xi1> - %read = vector.transfer_read %arg0[%c0, %c0], %cst, %mask {in_bounds = [true, true]} : memref, vector<[4]x4xf32> + %read = vector.transfer_read %arg0[%c0, %c0], %cst, %mask {in_bounds = array} : memref, vector<[4]x4xf32> return %read : vector<[4]x4xf32> } // CHECK-LABEL: func.func @cannot_lower_transfer_read_with_leading_scalable( // CHECK-SAME: %[[MEMREF:.*]]: memref) -// CHECK: %{{.*}} = vector.transfer_read %[[MEMREF]][%{{.*}}, %{{.*}}], %{{.*}}, %{{.*}} {in_bounds = [true, true]} : memref, vector<[4]x4xf32> +// CHECK: %{{.*}} = vector.transfer_read %[[MEMREF]][%{{.*}}, %{{.*}}], %{{.*}}, %{{.*}} {in_bounds = array} : memref, vector<[4]x4xf32> // ----- @@ -746,7 +746,7 @@ func.func @cannot_lower_transfer_read_with_leading_scalable(%arg0: memref, %mask: vector<1x1xi1>) -> vector<1x1x1x1xi32> { %c0 = arith.constant 0 : index %c0_i32 = arith.constant 0 : i32 - %3 = vector.transfer_read %subview[%c0, %c0, %c0, %c0], %c0_i32, %mask {in_bounds = [false, true, true, false], permutation_map = #map1} + %3 = vector.transfer_read %subview[%c0, %c0, %c0, %c0], %c0_i32, %mask {in_bounds = array, permutation_map = #map1} : memref<1x1x1x1xi32>, vector<1x1x1x1xi32> return %3 : vector<1x1x1x1xi32> } @@ -767,7 +767,7 @@ func.func @add_arrays_of_scalable_vectors(%a: memref<1x2x?xf32>, %b: memref<1x2x %cst = arith.constant 0.000000e+00 : f32 %dim_a = memref.dim %a, %c2 : memref<1x2x?xf32> %mask_a = vector.create_mask %c2, %c3, %dim_a : vector<1x2x[4]xi1> - %vector_a = vector.transfer_read %a[%c0, %c0, %c0], %cst, %mask_a {in_bounds = [true, true, true]} : memref<1x2x?xf32>, vector<1x2x[4]xf32> + %vector_a = vector.transfer_read %a[%c0, %c0, %c0], %cst, %mask_a {in_bounds = array} : memref<1x2x?xf32>, vector<1x2x[4]xf32> return %vector_a : vector<1x2x[4]xf32> } // CHECK-LABEL: func.func @add_arrays_of_scalable_vectors @@ -783,7 +783,7 @@ func.func @cannot_fully_unroll_transfer_write_of_nd_scalable_vector(%vec: vector // FULL-UNROLL: vector.transfer_write {{.*}} : vector<[4]x[4]xf32>, memref // FULL-UNROLL-NOT: vector.extract %c0 = arith.constant 0 : index - vector.transfer_write %vec, %memref[%c0, %c0] {in_bounds = [true, true]} : vector<[4]x[4]xf32>, memref + vector.transfer_write %vec, %memref[%c0, %c0] {in_bounds = array} : vector<[4]x[4]xf32>, memref return } @@ -808,7 +808,7 @@ func.func @unroll_transfer_write_target_rank_zero(%vec : vector<2xi32>) { func.func @scalable_transpose_store_unmasked(%vec: vector<4x[4]xf32>, %dest: memref, %i: index, %j: index) { %transpose = vector.transpose %vec, [1, 0] : vector<4x[4]xf32> to vector<[4]x4xf32> - vector.transfer_write %transpose, %dest[%i, %j] {in_bounds = [true, true]} : vector<[4]x4xf32>, memref + vector.transfer_write %transpose, %dest[%i, %j] {in_bounds = array} : vector<[4]x4xf32>, memref return } // FULL-UNROLL: #[[$SLICE_MAP:.+]] = affine_map<(d0)[s0] -> (d0 + s0)> @@ -833,14 +833,14 @@ func.func @scalable_transpose_store_unmasked(%vec: vector<4x[4]xf32>, %dest: mem // FULL-UNROLL: %[[ELEM_2:.*]] = vector.extract %[[SLICE_2]]{{\[}}%[[VAL_13]]] : f32 from vector<[4]xf32> // FULL-UNROLL: %[[ELEM_3:.*]] = vector.extract %[[SLICE_3]]{{\[}}%[[VAL_13]]] : f32 from vector<[4]xf32> // FULL-UNROLL: %[[TRANSPOSE_SLICE:.*]] = vector.from_elements %[[ELEM_0]], %[[ELEM_1]], %[[ELEM_2]], %[[ELEM_3]] : vector<4xf32> -// FULL-UNROLL: vector.transfer_write %[[TRANSPOSE_SLICE]], %[[DEST]]{{\[}}%[[SLICE_I]], %[[J]]] {in_bounds = [true]} : vector<4xf32>, memref +// FULL-UNROLL: vector.transfer_write %[[TRANSPOSE_SLICE]], %[[DEST]]{{\[}}%[[SLICE_I]], %[[J]]] {in_bounds = array} : vector<4xf32>, memref // ----- func.func @scalable_transpose_store_dynamic_mask(%vec: vector<4x[4]xf32>, %dest: memref, %i: index, %j: index, %a: index, %b: index) { %transpose = vector.transpose %vec, [1, 0] : vector<4x[4]xf32> to vector<[4]x4xf32> %mask = vector.create_mask %a, %b : vector<[4]x4xi1> - vector.transfer_write %transpose, %dest[%i, %j], %mask {in_bounds = [true, true]} : vector<[4]x4xf32>, memref + vector.transfer_write %transpose, %dest[%i, %j], %mask {in_bounds = array} : vector<[4]x4xf32>, memref return } // FULL-UNROLL-LABEL: func.func @scalable_transpose_store_dynamic_mask( @@ -854,7 +854,7 @@ func.func @scalable_transpose_store_dynamic_mask(%vec: vector<4x[4]xf32>, %dest: func.func @scalable_transpose_store_constant_mask(%vec: vector<4x[4]xf32>, %dest: memref, %i: index, %j: index) { %transpose = vector.transpose %vec, [1, 0] : vector<4x[4]xf32> to vector<[4]x4xf32> %mask = vector.constant_mask [4, 3] : vector<[4]x4xi1> - vector.transfer_write %transpose, %dest[%i, %j], %mask {in_bounds = [true, true]} : vector<[4]x4xf32>, memref + vector.transfer_write %transpose, %dest[%i, %j], %mask {in_bounds = array} : vector<[4]x4xf32>, memref return } // FULL-UNROLL-LABEL: func.func @scalable_transpose_store_constant_mask @@ -871,7 +871,7 @@ func.func @scalable_transpose_store_constant_mask(%vec: vector<4x[4]xf32>, %dest /// Unsupported transpose. func.func @negative_scalable_transpose_store_0(%vec: vector<[4]x4xf32>, %dest: memref, %i: index, %j: index) { %transpose = vector.transpose %vec, [1, 0] : vector<[4]x4xf32> to vector<4x[4]xf32> - vector.transfer_write %transpose, %dest[%i, %j] {in_bounds = [true, true]} : vector<4x[4]xf32>, memref + vector.transfer_write %transpose, %dest[%i, %j] {in_bounds = array} : vector<4x[4]xf32>, memref return } // FULL-UNROLL-LABEL: @negative_scalable_transpose_store_0 @@ -882,7 +882,7 @@ func.func @negative_scalable_transpose_store_0(%vec: vector<[4]x4xf32>, %dest: m /// Non-identity permutation map (should be lowered first). func.func @negative_scalable_transpose_store_1(%vec: vector<4x[4]xf32>, %dest: memref, %i: index, %j: index) { %transpose = vector.transpose %vec, [1, 0] : vector<4x[4]xf32> to vector<[4]x4xf32> - vector.transfer_write %transpose, %dest[%i, %j] {in_bounds = [true, true], permutation_map = affine_map<(d0,d1) -> (d1, d0)> } : vector<[4]x4xf32>, memref + vector.transfer_write %transpose, %dest[%i, %j] {in_bounds = array, permutation_map = affine_map<(d0,d1) -> (d1, d0)> } : vector<[4]x4xf32>, memref return } // FULL-UNROLL-LABEL: @negative_scalable_transpose_store_1 @@ -894,7 +894,7 @@ func.func @negative_scalable_transpose_store_1(%vec: vector<4x[4]xf32>, %dest: m /// Out-of-bounds dim. func.func @negative_scalable_transpose_store_2(%vec: vector<4x[4]xf32>, %dest: memref, %i: index, %j: index) { %transpose = vector.transpose %vec, [1, 0] : vector<4x[4]xf32> to vector<[4]x4xf32> - vector.transfer_write %transpose, %dest[%i, %j] {in_bounds = [false, true]} : vector<[4]x4xf32>, memref + vector.transfer_write %transpose, %dest[%i, %j] {in_bounds = array} : vector<[4]x4xf32>, memref return } // FULL-UNROLL-LABEL: @negative_scalable_transpose_store_2 @@ -904,7 +904,7 @@ func.func @negative_scalable_transpose_store_2(%vec: vector<4x[4]xf32>, %dest: m /// Source not a vector.transpose. func.func @negative_scalable_transpose_store_3(%vec: vector<[4]x4xf32>, %dest: memref, %i: index, %j: index) { - vector.transfer_write %vec, %dest[%i, %j] {in_bounds = [true, true]} : vector<[4]x4xf32>, memref + vector.transfer_write %vec, %dest[%i, %j] {in_bounds = array} : vector<[4]x4xf32>, memref return } // FULL-UNROLL-LABEL: @negative_scalable_transpose_store_3 diff --git a/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir index 4841ecbb62e80..ede3ca0d64d98 100644 --- a/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir +++ b/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir @@ -3,7 +3,7 @@ func.func @load_1D_vector(%source: memref<8x16x32xf32>, %offset: index) -> vector<8xf32> { %c0 = arith.constant 0.0 : f32 %0 = vector.transfer_read %source[%offset, %offset, %offset], %c0 - {in_bounds = [true]} : memref<8x16x32xf32>, vector<8xf32> + {in_bounds = array} : memref<8x16x32xf32>, vector<8xf32> return %0 : vector<8xf32> } @@ -23,7 +23,7 @@ func.func @load_2D_vector(%source: memref<8x16x32xf32>, %offset: index) -> vector<8x16xf32> { %c0 = arith.constant 0.0 : f32 %0 = vector.transfer_read %source[%offset, %offset, %offset], %c0 - {in_bounds = [true, true]} : memref<8x16x32xf32>, vector<8x16xf32> + {in_bounds = array} : memref<8x16x32xf32>, vector<8x16xf32> return %0 : vector<8x16xf32> } @@ -43,7 +43,7 @@ func.func @load_zero_pad_out_of_bounds(%source: memref<32x64xf32>, %offset: index) -> vector<8x16xf32> { %c0 = arith.constant 0.0 : f32 %0 = vector.transfer_read %source[%offset, %offset], %c0 - {in_bounds = [false, true]} : memref<32x64xf32>, vector<8x16xf32> + {in_bounds = array} : memref<32x64xf32>, vector<8x16xf32> return %0 : vector<8x16xf32> } @@ -63,7 +63,7 @@ func.func @load_transposed(%source: memref<32x64xf32>, %c0 = arith.constant 0.0 : f32 %0 = vector.transfer_read %source[%offset, %offset], %c0 {permutation_map = affine_map<(d0, d1) -> (d1, d0)>, - in_bounds = [true, true]} : memref<32x64xf32>, vector<8x16xf32> + in_bounds = array} : memref<32x64xf32>, vector<8x16xf32> return %0 : vector<8x16xf32> } @@ -82,7 +82,7 @@ func.func @load_dynamic_source(%source: memref, %offset: index) -> vector<8x16xf32> { %c0 = arith.constant 0.0 : f32 %0 = vector.transfer_read %source[%offset, %offset, %offset], %c0 - {in_bounds = [true, true]} : memref, vector<8x16xf32> + {in_bounds = array} : memref, vector<8x16xf32> return %0 : vector<8x16xf32> } @@ -108,9 +108,9 @@ func.func @no_load_out_of_bounds_non_zero_pad(%source: memref<32x64xf32>, %offset: index, %arg2: index, %pad: f32) -> (vector<8x16xf32>, vector<8x16xf32>) { %c1 = arith.constant 1.0 : f32 %0 = vector.transfer_read %source[%offset, %arg2], %c1 - {in_bounds = [true, false]} : memref<32x64xf32>, vector<8x16xf32> + {in_bounds = array} : memref<32x64xf32>, vector<8x16xf32> %1 = vector.transfer_read %source[%arg2, %offset], %pad - {in_bounds = [false, true]} : memref<32x64xf32>, vector<8x16xf32> + {in_bounds = array} : memref<32x64xf32>, vector<8x16xf32> return %0, %1 : vector<8x16xf32>, vector<8x16xf32> } @@ -124,7 +124,7 @@ func.func @no_load_masked(%source : memref<4xf32>, %c0 = arith.constant 0.0 : f32 %mask = arith.constant dense<[0, 1, 0, 1]> : vector<4xi1> %0 = vector.transfer_read %source[%offset], %c0, %mask - {in_bounds = [true]} : memref<4xf32>, vector<4xf32> + {in_bounds = array} : memref<4xf32>, vector<4xf32> return %0 : vector<4xf32> } @@ -137,7 +137,7 @@ func.func @no_load_tensor(%source: tensor<32x64xf32>, %offset: index, %arg2: index) -> vector<8x16xf32> { %c0 = arith.constant 0.0 : f32 %0 = vector.transfer_read %source[%offset, %arg2], %c0 - {in_bounds = [true, true]} : tensor<32x64xf32>, vector<8x16xf32> + {in_bounds = array} : tensor<32x64xf32>, vector<8x16xf32> return %0 : vector<8x16xf32> } @@ -150,7 +150,7 @@ func.func @no_load_high_dim_vector(%source: memref<16x32x64xf32>, %offset: index, %arg2: index) -> vector<8x16x32xf32> { %c0 = arith.constant 0.0 : f32 %0 = vector.transfer_read %source[%offset, %arg2, %offset], %c0 - {in_bounds = [true, true, true]} : memref<16x32x64xf32>, vector<8x16x32xf32> + {in_bounds = array} : memref<16x32x64xf32>, vector<8x16x32xf32> return %0 : vector<8x16x32xf32> } @@ -163,7 +163,7 @@ func.func @no_load_non_unit_inner_stride( %source: memref<32xf32, strided<[?], offset: ?>>, %offset: index) -> vector<8xf32> { %c0 = arith.constant 0.0 : f32 - %0 = vector.transfer_read %source[%offset], %c0 {in_bounds = [true]} + %0 = vector.transfer_read %source[%offset], %c0 {in_bounds = array} : memref<32xf32, strided<[?], offset: ?>>, vector<8xf32> return %0 : vector<8xf32> } @@ -178,7 +178,7 @@ func.func @no_load_unsupported_map(%source: memref<16x32x64xf32>, %c0 = arith.constant 0.0 : f32 %0 = vector.transfer_read %source[%offset, %offset, %offset], %c0 {permutation_map = affine_map<(d0, d1, d2) -> (d0, d2)>, - in_bounds = [true, true]} : memref<16x32x64xf32>, vector<8x16xf32> + in_bounds = array} : memref<16x32x64xf32>, vector<8x16xf32> return %0 : vector<8x16xf32> } @@ -192,7 +192,7 @@ func.func @no_load_transpose_unsupported_data_type(%source: memref<32x64xf16>, %c0 = arith.constant 0.0 : f16 %0 = vector.transfer_read %source[%offset, %offset], %c0 {permutation_map = affine_map<(d0, d1) -> (d1, d0)>, - in_bounds = [true, true]} : memref<32x64xf16>, vector<8x16xf16> + in_bounds = array} : memref<32x64xf16>, vector<8x16xf16> return %0 : vector<8x16xf16> } diff --git a/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir index 361919c47b097..9cf131c18da6b 100644 --- a/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir +++ b/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir @@ -3,7 +3,7 @@ func.func @store_1D_vector(%vec: vector<8xf32>, %source: memref<8x16x32xf32>, %offset: index) { vector.transfer_write %vec, %source[%offset, %offset, %offset] - {in_bounds = [true]} + {in_bounds = array} : vector<8xf32>, memref<8x16x32xf32> return } @@ -23,7 +23,7 @@ func.func @store_1D_vector(%vec: vector<8xf32>, func.func @store_2D_vector(%vec: vector<8x16xf32>, %source: memref<8x16x32xf32>, %offset: index) { vector.transfer_write %vec, %source[%offset, %offset, %offset] - {in_bounds = [true, true]} + {in_bounds = array} : vector<8x16xf32>, memref<8x16x32xf32> return } @@ -43,7 +43,7 @@ func.func @store_2D_vector(%vec: vector<8x16xf32>, func.func @store_dynamic_source(%vec: vector<8x16xf32>, %source: memref, %offset: index) { vector.transfer_write %vec, %source[%offset, %offset, %offset] - {in_bounds = [true, true]} + {in_bounds = array} : vector<8x16xf32>, memref return } @@ -70,7 +70,7 @@ func.func @no_store_transposed(%vec: vector<8x16xf32>, %source: memref<32x64xf32>, %offset: index) { vector.transfer_write %vec, %source[%offset, %offset] {permutation_map = affine_map<(d0, d1) -> (d1, d0)>, - in_bounds = [true, true]} + in_bounds = array} : vector<8x16xf32>, memref<32x64xf32> return } @@ -83,7 +83,7 @@ func.func @no_store_transposed(%vec: vector<8x16xf32>, func.func @no_store_out_of_bounds(%vec: vector<8x16xf32>, %source: memref<32x64xf32>, %offset: index) { vector.transfer_write %vec, %source[%offset, %offset] - {in_bounds = [false, true]} + {in_bounds = array} : vector<8x16xf32>, memref<32x64xf32> return } @@ -97,7 +97,7 @@ func.func @no_store_masked(%vec: vector<4xf32>, %source: memref<4xf32>, %offset: index) { %mask = arith.constant dense<[0, 1, 0, 1]> : vector<4xi1> vector.transfer_write %vec, %source[%offset], %mask - {in_bounds = [true]} + {in_bounds = array} : vector<4xf32>, memref<4xf32> return } @@ -110,7 +110,7 @@ func.func @no_store_masked(%vec: vector<4xf32>, func.func @no_store_tensor(%vec: vector<8x16xf32>, %source: tensor<32x64xf32>, %offset: index) -> tensor<32x64xf32> { %0 = vector.transfer_write %vec, %source[%offset, %offset] - {in_bounds = [true, true]} + {in_bounds = array} : vector<8x16xf32>, tensor<32x64xf32> return %0 : tensor<32x64xf32> } @@ -123,7 +123,7 @@ func.func @no_store_tensor(%vec: vector<8x16xf32>, func.func @no_store_high_dim_vector(%vec: vector<8x16x32xf32>, %source: memref<16x32x64xf32>, %offset: index) { vector.transfer_write %vec, %source[%offset, %offset, %offset] - {in_bounds = [true, true, true]} + {in_bounds = array} : vector<8x16x32xf32>, memref<16x32x64xf32> return } @@ -136,7 +136,7 @@ func.func @no_store_high_dim_vector(%vec: vector<8x16x32xf32>, func.func @no_store_non_unit_inner_stride(%vec: vector<8xf32>, %source: memref<32xf32, strided<[?], offset: ?>>, %offset: index) { vector.transfer_write %vec, %source[%offset] - {in_bounds = [true]} + {in_bounds = array} : vector<8xf32>, memref<32xf32, strided<[?], offset: ?>> return } @@ -150,7 +150,7 @@ func.func @no_store_unsupported_map(%vec: vector<8x16xf32>, %source: memref<16x32x64xf32>, %offset: index) { vector.transfer_write %vec, %source[%offset, %offset, %offset] {permutation_map = affine_map<(d0, d1, d2) -> (d0, d2)>, - in_bounds = [true, true]} + in_bounds = array} : vector<8x16xf32>, memref<16x32x64xf32> return } diff --git a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir index 0a077624d18f8..03815e03a84b9 100644 --- a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir +++ b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir @@ -22,7 +22,7 @@ func.func @vec1d_1(%A : memref, %B : memref) { // CHECK-NEXT: %{{.*}} = affine.apply #[[$map_id1]](%[[C0]]) // CHECK-NEXT: %{{.*}} = affine.apply #[[$map_id1]](%[[C0]]) // CHECK-NEXT: %{{.*}} = arith.constant 0.0{{.*}}: f32 -// CHECK-NEXT: {{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}} {in_bounds = [true], permutation_map = #[[$map_proj_d0d1_0]]} : memref, vector<128xf32> +// CHECK-NEXT: {{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}} {in_bounds = array, permutation_map = #[[$map_proj_d0d1_0]]} : memref, vector<128xf32> affine.for %i0 = 0 to %M { // vectorized due to scalar -> vector %a0 = affine.load %A[%c0, %c0] : memref } @@ -425,7 +425,7 @@ func.func @vec_rejected_8(%A : memref, %B : memref) { // CHECK: %{{.*}} = affine.apply #[[$map_id1]](%{{.*}}) // CHECK: %{{.*}} = affine.apply #[[$map_id1]](%{{.*}}) // CHECK: %{{.*}} = arith.constant 0.0{{.*}}: f32 -// CHECK: {{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}} {in_bounds = [true], permutation_map = #[[$map_proj_d0d1_0]]} : memref, vector<128xf32> +// CHECK: {{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}} {in_bounds = array, permutation_map = #[[$map_proj_d0d1_0]]} : memref, vector<128xf32> affine.for %i17 = 0 to %M { // not vectorized, the 1-D pattern that matched %{{.*}} in DFS post-order prevents vectorizing %{{.*}} affine.for %i18 = 0 to %M { // vectorized due to scalar -> vector %a18 = affine.load %A[%c0, %c0] : memref @@ -459,7 +459,7 @@ func.func @vec_rejected_9(%A : memref, %B : memref) { // CHECK: %{{.*}} = affine.apply #[[$map_id1]](%{{.*}}) // CHECK-NEXT: %{{.*}} = affine.apply #[[$map_id1]](%{{.*}}) // CHECK-NEXT: %{{.*}} = arith.constant 0.0{{.*}}: f32 -// CHECK-NEXT: {{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}} {in_bounds = [true], permutation_map = #[[$map_proj_d0d1_0]]} : memref, vector<128xf32> +// CHECK-NEXT: {{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}} {in_bounds = array, permutation_map = #[[$map_proj_d0d1_0]]} : memref, vector<128xf32> affine.for %i17 = 0 to %M { // not vectorized, the 1-D pattern that matched %i18 in DFS post-order prevents vectorizing %{{.*}} affine.for %i18 = 0 to %M { // vectorized due to scalar -> vector %a18 = affine.load %A[%c0, %c0] : memref diff --git a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_2d.mlir b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_2d.mlir index eb5120a49e3d4..4f66419770476 100644 --- a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_2d.mlir +++ b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_2d.mlir @@ -123,8 +123,8 @@ func.func @vectorize_matmul(%arg0: memref, %arg1: memref, %arg // VECT: affine.for %[[I2:.*]] = #[[$map_id1]](%[[C0]]) to #[[$map_id1]](%[[M]]) step 4 { // VECT-NEXT: affine.for %[[I3:.*]] = #[[$map_id1]](%[[C0]]) to #[[$map_id1]](%[[N]]) step 8 { // VECT-NEXT: affine.for %[[I4:.*]] = #[[$map_id1]](%[[C0]]) to #[[$map_id1]](%[[K]]) { - // VECT: %[[A:.*]] = vector.transfer_read %{{.*}}[%[[I4]], %[[I3]]], %{{.*}} {in_bounds = [true, false], permutation_map = #[[$map_proj_d0d1_zerod1]]} : memref, vector<4x8xf32> - // VECT: %[[B:.*]] = vector.transfer_read %{{.*}}[%[[I2]], %[[I4]]], %{{.*}} {in_bounds = [false, true], permutation_map = #[[$map_proj_d0d1_d0zero]]} : memref, vector<4x8xf32> + // VECT: %[[A:.*]] = vector.transfer_read %{{.*}}[%[[I4]], %[[I3]]], %{{.*}} {in_bounds = array, permutation_map = #[[$map_proj_d0d1_zerod1]]} : memref, vector<4x8xf32> + // VECT: %[[B:.*]] = vector.transfer_read %{{.*}}[%[[I2]], %[[I4]]], %{{.*}} {in_bounds = array, permutation_map = #[[$map_proj_d0d1_d0zero]]} : memref, vector<4x8xf32> // VECT-NEXT: %[[C:.*]] = arith.mulf %[[B]], %[[A]] : vector<4x8xf32> // VECT: %[[D:.*]] = vector.transfer_read %{{.*}}[%[[I2]], %[[I3]]], %{{.*}} : memref, vector<4x8xf32> // VECT-NEXT: %[[E:.*]] = arith.addf %[[D]], %[[C]] : vector<4x8xf32> diff --git a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_affine_apply.mlir b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_affine_apply.mlir index 16ade6455d697..26b89fa571e60 100644 --- a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_affine_apply.mlir +++ b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_affine_apply.mlir @@ -141,7 +141,7 @@ func.func @affine_map_with_expr_2(%arg0: memref<8x12x16xf32>, %arg1: memref<8x24 // CHECK-NEXT: %[[S1:.*]] = affine.apply #[[$MAP_ID4]](%[[ARG3]], %[[ARG4]], %[[I0]]) // CHECK-NEXT: %[[S2:.*]] = affine.apply #[[$MAP_ID5]](%[[ARG3]], %[[ARG4]], %[[I0]]) // CHECK-NEXT: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK-NEXT: %[[S3:.*]] = vector.transfer_read %[[ARG0]][%[[S0]], %[[S1]], %[[S2]]], %[[CST]] {in_bounds = [true], permutation_map = #[[$MAP_ID6]]} : memref<8x12x16xf32>, vector<8xf32> +// CHECK-NEXT: %[[S3:.*]] = vector.transfer_read %[[ARG0]][%[[S0]], %[[S1]], %[[S2]]], %[[CST]] {in_bounds = array, permutation_map = #[[$MAP_ID6]]} : memref<8x12x16xf32>, vector<8xf32> // CHECK-NEXT: vector.transfer_write %[[S3]], %[[ARG1]][%[[ARG3]], %[[ARG4]], %[[ARG5]]] : vector<8xf32>, memref<8x24x48xf32> // CHECK-NEXT: } // CHECK-NEXT: } diff --git a/mlir/test/Dialect/ArmSME/vector-legalization.mlir b/mlir/test/Dialect/ArmSME/vector-legalization.mlir index 458906a187982..a4919be9ce907 100644 --- a/mlir/test/Dialect/ArmSME/vector-legalization.mlir +++ b/mlir/test/Dialect/ArmSME/vector-legalization.mlir @@ -125,14 +125,14 @@ func.func @transfer_read_f32_scalable_8x8(%src: memref) -> vector<[8]x[ // CHECK-DAG: %[[C0_I32:.*]] = arith.constant 0 : i32 // CHECK-DAG: %[[VSCALE:.*]] = vector.vscale // CHECK-DAG: %[[C4_VSCALE:.*]] = arith.muli %[[VSCALE]], %[[C4]] : index - // CHECK-DAG: %[[TOP_LEFT:.*]] = vector.transfer_read %[[SRC]][%[[C0]], %[[C0]]], %[[C0_I32]] {in_bounds = [true, true]} : memref, vector<[4]x[4]xi32> - // CHECK-DAG: %[[TOP_RIGHT:.*]] = vector.transfer_read %[[SRC]][%[[C0]], %[[C4_VSCALE]]], %[[C0_I32]] {in_bounds = [true, true]} : memref, vector<[4]x[4]xi32> - // CHECK-DAG: %[[BOTTOM_LEFT:.*]] = vector.transfer_read %[[SRC]][%[[C4_VSCALE]], %[[C0]]], %[[C0_I32]] {in_bounds = [true, true]} : memref, vector<[4]x[4]xi32> - // CHECK-DAG: %[[BOTTOM_RIGHT:.*]] = vector.transfer_read %[[SRC]][%[[C4_VSCALE]], %[[C4_VSCALE]]], %[[C0_I32]] {in_bounds = [true, true]} : memref, vector<[4]x[4]xi32> + // CHECK-DAG: %[[TOP_LEFT:.*]] = vector.transfer_read %[[SRC]][%[[C0]], %[[C0]]], %[[C0_I32]] {in_bounds = array} : memref, vector<[4]x[4]xi32> + // CHECK-DAG: %[[TOP_RIGHT:.*]] = vector.transfer_read %[[SRC]][%[[C0]], %[[C4_VSCALE]]], %[[C0_I32]] {in_bounds = array} : memref, vector<[4]x[4]xi32> + // CHECK-DAG: %[[BOTTOM_LEFT:.*]] = vector.transfer_read %[[SRC]][%[[C4_VSCALE]], %[[C0]]], %[[C0_I32]] {in_bounds = array} : memref, vector<[4]x[4]xi32> + // CHECK-DAG: %[[BOTTOM_RIGHT:.*]] = vector.transfer_read %[[SRC]][%[[C4_VSCALE]], %[[C4_VSCALE]]], %[[C0_I32]] {in_bounds = array} : memref, vector<[4]x[4]xi32> // CHECK-NEXT: return %[[TOP_LEFT]], %[[TOP_RIGHT]], %[[BOTTOM_LEFT]], %[[BOTTOM_RIGHT]] : vector<[4]x[4]xi32>, vector<[4]x[4]xi32>, vector<[4]x[4]xi32>, vector<[4]x[4]xi32> %c0 = arith.constant 0 : index %pad = arith.constant 0 : i32 - %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = [true, true]} : memref, vector<[8]x[8]xi32> + %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = array} : memref, vector<[8]x[8]xi32> return %0 : vector<[8]x[8]xi32> } @@ -155,13 +155,13 @@ func.func @transfer_read_i16_scalable_8x16_masked(%src: memref, %dim0: // CHECK-DAG: %[[RIGHT_DIM_1:.*]] = arith.addi %[[DIM1]], %[[MINUS_8_VSCALE]] : index // CHECK-DAG: %[[LEFT_MASK:.*]] = vector.create_mask %[[DIM0]], %[[DIM1]] : vector<[8]x[8]xi1> // CHECK-DAG: %[[RIGHT_MASK:.*]] = vector.create_mask %[[DIM0]], %[[RIGHT_DIM_1]] : vector<[8]x[8]xi1> - // CHECK-DAG: %[[LEFT:.*]] = vector.transfer_read %[[SRC]][%[[C0]], %[[C0]]], %[[C0_I16]], %[[LEFT_MASK]] {in_bounds = [true, true]} : memref, vector<[8]x[8]xi16> - // CHECK-DAG: %[[RIGHT:.*]] = vector.transfer_read %[[SRC]][%[[C0]], %[[C8_VSCALE]]], %[[C0_I16]], %[[RIGHT_MASK]] {in_bounds = [true, true]} : memref, vector<[8]x[8]xi16> + // CHECK-DAG: %[[LEFT:.*]] = vector.transfer_read %[[SRC]][%[[C0]], %[[C0]]], %[[C0_I16]], %[[LEFT_MASK]] {in_bounds = array} : memref, vector<[8]x[8]xi16> + // CHECK-DAG: %[[RIGHT:.*]] = vector.transfer_read %[[SRC]][%[[C0]], %[[C8_VSCALE]]], %[[C0_I16]], %[[RIGHT_MASK]] {in_bounds = array} : memref, vector<[8]x[8]xi16> // CHECK-NEXT: return %[[LEFT]], %[[RIGHT]] : vector<[8]x[8]xi16>, vector<[8]x[8]xi16> %c0 = arith.constant 0 : index %pad = arith.constant 0 : i16 %mask = vector.create_mask %dim0, %dim1 : vector<[8]x[16]xi1> - %0 = vector.transfer_read %src[%c0, %c0], %pad, %mask {in_bounds = [true, true]} : memref, vector<[8]x[16]xi16> + %0 = vector.transfer_read %src[%c0, %c0], %pad, %mask {in_bounds = array} : memref, vector<[8]x[16]xi16> return %0 : vector<[8]x[16]xi16> } @@ -180,14 +180,14 @@ func.func @transfer_write_f16_scalable_16x8(%dest: memref, %vec: vector // CHECK-DAG: %[[C8_VSCALE:.*]] = arith.muli %[[VSCALE]], %[[C8]] : index // CHECK-NEXT: scf.for %[[I:.*]] = %[[C0]] to %[[C8_VSCALE]] step %[[C1]] { // CHECK-NEXT: %[[TOP_SLICE:.*]] = vector.extract %[[TOP]][%[[I]]] : vector<[8]xf16> from vector<[8]x[8]xf16> - // CHECK-NEXT: vector.transfer_write %[[TOP_SLICE]], %[[DEST]][%[[I]], %[[C0]]] {in_bounds = [true]} : vector<[8]xf16>, memref + // CHECK-NEXT: vector.transfer_write %[[TOP_SLICE]], %[[DEST]][%[[I]], %[[C0]]] {in_bounds = array} : vector<[8]xf16>, memref // CHECK-NEXT: %[[BOTTOM_I:.*]] = arith.addi %[[C8_VSCALE]], %[[I]] : index // CHECK-NEXT: %[[BOTTOM_SLICE:.*]] = vector.extract %[[BOTTOM]][%[[I]]] : vector<[8]xf16> from vector<[8]x[8]xf16> - // CHECK-NEXT: vector.transfer_write %[[BOTTOM_SLICE]], %[[DEST]][%[[BOTTOM_I]], %[[C0]]] {in_bounds = [true]} : vector<[8]xf16>, memref + // CHECK-NEXT: vector.transfer_write %[[BOTTOM_SLICE]], %[[DEST]][%[[BOTTOM_I]], %[[C0]]] {in_bounds = array} : vector<[8]xf16>, memref // CHECK-NEXT: } // CHECK-NEXT: return %c0 = arith.constant 0 : index - vector.transfer_write %vec, %dest[%c0, %c0] {in_bounds = [true, true]} : vector<[16]x[8]xf16>, memref + vector.transfer_write %vec, %dest[%c0, %c0] {in_bounds = array} : vector<[16]x[8]xf16>, memref return } @@ -201,7 +201,7 @@ func.func @transfer_write_i8_scalable_16x16_masked(%dest: memref, %vec: // CHECK: vector.transfer_write {{.*}} : vector<[16]x[16]xi8>, memref %c0 = arith.constant 0 : index %mask = vector.create_mask %dim0, %dim0 : vector<[16]x[16]xi1> - vector.transfer_write %vec, %dest[%c0, %c0], %mask {in_bounds = [true, true]} : vector<[16]x[16]xi8>, memref + vector.transfer_write %vec, %dest[%c0, %c0], %mask {in_bounds = array} : vector<[16]x[16]xi8>, memref return } @@ -227,22 +227,22 @@ func.func @transfer_write_f32_scalable_8x8_masked(%dest: memref, %dim0: // CHECK-NEXT: %[[UPPER_SLICE_MASK:.*]] = vector.extract %[[MASK]][%[[I]]] : vector<[8]xi1> from vector<[8]x[8]xi1> // CHECK-NEXT: %[[TILE_0_SLICE_MASK:.*]] = vector.scalable.extract %[[UPPER_SLICE_MASK]][0] : vector<[4]xi1> from vector<[8]xi1> // CHECK-NEXT: %[[TILE_0_SLICE:.*]] = vector.extract %[[TILE_0]][%[[I]]] : vector<[4]xf32> from vector<[4]x[4]xf32> - // CHECK-NEXT: vector.transfer_write %[[TILE_0_SLICE]], %[[DEST]][%[[I]], %[[C0]]], %[[TILE_0_SLICE_MASK]] {in_bounds = [true]} : vector<[4]xf32>, memref + // CHECK-NEXT: vector.transfer_write %[[TILE_0_SLICE]], %[[DEST]][%[[I]], %[[C0]]], %[[TILE_0_SLICE_MASK]] {in_bounds = array} : vector<[4]xf32>, memref // CHECK-NEXT: %[[TILE_1_SLICE_MASK:.*]] = vector.scalable.extract %[[UPPER_SLICE_MASK]][4] : vector<[4]xi1> from vector<[8]xi1> // CHECK-NEXT: %[[TILE_1_SLICE:.*]] = vector.extract %[[TILE_1]][%[[I]]] : vector<[4]xf32> from vector<[4]x[4]xf32> - // CHECK-NEXT: vector.transfer_write %[[TILE_1_SLICE]], %[[DEST]][%[[I]], %[[C4_VSCALE]]], %[[TILE_1_SLICE_MASK]] {in_bounds = [true]} : vector<[4]xf32>, memref + // CHECK-NEXT: vector.transfer_write %[[TILE_1_SLICE]], %[[DEST]][%[[I]], %[[C4_VSCALE]]], %[[TILE_1_SLICE_MASK]] {in_bounds = array} : vector<[4]xf32>, memref // CHECK-NEXT: %[[LOWER_SLICE_I:.*]] = arith.addi %[[C4_VSCALE]], %[[I]] : index // CHECK-NEXT: %[[LOWER_SLICE_MASK:.*]] = vector.extract %[[MASK]][%[[LOWER_SLICE_I]]] : vector<[8]xi1> from vector<[8]x[8]xi1> // CHECK-NEXT: %[[TILE_2_SLICE_MASK:.*]] = vector.scalable.extract %[[LOWER_SLICE_MASK]][0] : vector<[4]xi1> from vector<[8]xi1> // CHECK-NEXT: %[[TILE_2_SLICE:.*]] = vector.extract %[[TILE_2]][%[[I]]] : vector<[4]xf32> from vector<[4]x[4]xf32> - // CHECK-NEXT: vector.transfer_write %[[TILE_2_SLICE]], %[[DEST]][%[[LOWER_SLICE_I]], %[[C0]]], %[[TILE_2_SLICE_MASK]] {in_bounds = [true]} : vector<[4]xf32>, memref + // CHECK-NEXT: vector.transfer_write %[[TILE_2_SLICE]], %[[DEST]][%[[LOWER_SLICE_I]], %[[C0]]], %[[TILE_2_SLICE_MASK]] {in_bounds = array} : vector<[4]xf32>, memref // CHECK-NEXT: %[[TILE_3_SLICE_MASK:.*]] = vector.scalable.extract %[[LOWER_SLICE_MASK]][4] : vector<[4]xi1> from vector<[8]xi1> // CHECK-NEXT: %[[TILE_3_SLICE:.*]] = vector.extract %[[TILE_3]][%[[I]]] : vector<[4]xf32> from vector<[4]x[4]xf32> - // CHECK-NEXT: vector.transfer_write %[[TILE_3_SLICE:.*]], %[[DEST]][%[[LOWER_SLICE_I]], %[[C4_VSCALE]]], %[[TILE_3_SLICE_MASK]] {in_bounds = [true]} : vector<[4]xf32>, memref + // CHECK-NEXT: vector.transfer_write %[[TILE_3_SLICE:.*]], %[[DEST]][%[[LOWER_SLICE_I]], %[[C4_VSCALE]]], %[[TILE_3_SLICE_MASK]] {in_bounds = array} : vector<[4]xf32>, memref // CHECK-NEXT: } %c0 = arith.constant 0 : index %mask = vector.create_mask %dim0, %dim1 : vector<[8]x[8]xi1> - vector.transfer_write %vec, %dest[%c0, %c0], %mask {in_bounds = [true, true]} : vector<[8]x[8]xf32>, memref + vector.transfer_write %vec, %dest[%c0, %c0], %mask {in_bounds = array} : vector<[8]x[8]xf32>, memref return } @@ -255,7 +255,7 @@ func.func @transfer_write_f32_scalable_8x8_masked(%dest: memref, %dim0: func.func @negative_transfer_write_f32_scalable_8x8_tensor(%dest: tensor, %vec: vector<[8]x[8]xf32>) { %c0 = arith.constant 0 : index - vector.transfer_write %vec, %dest[%c0, %c0] {in_bounds = [true, true]} : vector<[8]x[8]xf32>, tensor + vector.transfer_write %vec, %dest[%c0, %c0] {in_bounds = array} : vector<[8]x[8]xf32>, tensor return } @@ -271,7 +271,7 @@ func.func @negative_transfer_write_f32_scalable_8x8_tensor(%dest: tensor - vector.transfer_write %vec, %dest[%c0, %c0], %mask {permutation_map = #transpose, in_bounds = [true, true]} : vector<[8]x[8]xf32>, tensor + vector.transfer_write %vec, %dest[%c0, %c0], %mask {permutation_map = #transpose, in_bounds = array} : vector<[8]x[8]xf32>, tensor return } @@ -285,7 +285,7 @@ func.func @negative_transfer_write_f32_scalable_32x32(%dest: memref, %d { %c0 = arith.constant 0 : index %mask = vector.create_mask %dim0, %dim1 : vector<[32]x[32]xi1> - vector.transfer_write %vec, %dest[%c0, %c0], %mask {in_bounds = [true, true]} : vector<[32]x[32]xf32>, memref + vector.transfer_write %vec, %dest[%c0, %c0], %mask {in_bounds = array} : vector<[32]x[32]xf32>, memref return } @@ -308,28 +308,28 @@ func.func @transpose_f32_scalable_4x16_via_read(%src: memref, %dest: me // CHECK-DAG: %[[C4_VSCALE:.*]] = arith.muli %[[VSCALE]], %[[C4]] : index // CHECK-DAG: %[[C8_VSCALE:.*]] = arith.muli %[[VSCALE]], %[[C8]] : index // CHECK-DAG: %[[C12_VSCALE:.*]] = arith.muli %[[VSCALE]], %[[C12]] : index - // CHECK-DAG: %[[TILE_0:.*]] = vector.transfer_read %[[SRC]][%[[C0]], %[[C0]]], %[[PAD]] {in_bounds = [true, true], permutation_map = #{{.*}}} : memref, vector<[4]x[4]xf32> - // CHECK-DAG: %[[TILE_1:.*]] = vector.transfer_read %[[SRC]][%[[C0]], %[[C4_VSCALE]]], %[[PAD]] {in_bounds = [true, true], permutation_map = #{{.*}}} : memref, vector<[4]x[4]xf32> - // CHECK-DAG: %[[TILE_2:.*]] = vector.transfer_read %[[SRC]][%[[C0]], %[[C8_VSCALE]]], %[[PAD]] {in_bounds = [true, true], permutation_map = #{{.*}}} : memref, vector<[4]x[4]xf32> - // CHECK-DAG: %[[TILE_3:.*]] = vector.transfer_read %[[SRC]][%[[C0]], %[[C12_VSCALE]]], %[[PAD]] {in_bounds = [true, true], permutation_map = #{{.*}}} : memref, vector<[4]x[4]xf32> + // CHECK-DAG: %[[TILE_0:.*]] = vector.transfer_read %[[SRC]][%[[C0]], %[[C0]]], %[[PAD]] {in_bounds = array, permutation_map = #{{.*}}} : memref, vector<[4]x[4]xf32> + // CHECK-DAG: %[[TILE_1:.*]] = vector.transfer_read %[[SRC]][%[[C0]], %[[C4_VSCALE]]], %[[PAD]] {in_bounds = array, permutation_map = #{{.*}}} : memref, vector<[4]x[4]xf32> + // CHECK-DAG: %[[TILE_2:.*]] = vector.transfer_read %[[SRC]][%[[C0]], %[[C8_VSCALE]]], %[[PAD]] {in_bounds = array, permutation_map = #{{.*}}} : memref, vector<[4]x[4]xf32> + // CHECK-DAG: %[[TILE_3:.*]] = vector.transfer_read %[[SRC]][%[[C0]], %[[C12_VSCALE]]], %[[PAD]] {in_bounds = array, permutation_map = #{{.*}}} : memref, vector<[4]x[4]xf32> // CHECK-NEXT: scf.for %[[I:.*]] = %[[C0]] to %[[C4_VSCALE]] step %[[C1]] { // CHECK-NEXT: %[[TILE_0_SLICE:.*]] = vector.extract %[[TILE_0]][%[[I]]] : vector<[4]xf32> from vector<[4]x[4]xf32> - // CHECK-NEXT: vector.transfer_write %[[TILE_0_SLICE]], %[[DEST]][%[[I]], %[[C0]]] {in_bounds = [true]} : vector<[4]xf32>, memref + // CHECK-NEXT: vector.transfer_write %[[TILE_0_SLICE]], %[[DEST]][%[[I]], %[[C0]]] {in_bounds = array} : vector<[4]xf32>, memref // CHECK-NEXT: %[[TILE_1_I:.*]] = arith.addi %[[C4_VSCALE]], %[[I]] : index // CHECK-NEXT: %[[TILE_1_SLICE:.*]] = vector.extract %[[TILE_1]][%[[I]]] : vector<[4]xf32> from vector<[4]x[4]xf32> - // CHECK-NEXT: vector.transfer_write %[[TILE_1_SLICE]], %[[DEST]][%[[TILE_1_I]], %[[C0]]] {in_bounds = [true]} : vector<[4]xf32>, memref + // CHECK-NEXT: vector.transfer_write %[[TILE_1_SLICE]], %[[DEST]][%[[TILE_1_I]], %[[C0]]] {in_bounds = array} : vector<[4]xf32>, memref // CHECK-NEXT: %[[TILE_2_I:.*]] = arith.addi %[[C8_VSCALE]], %[[I]] : index // CHECK-NEXT: %[[TILE_2_SLICE:.*]] = vector.extract %[[TILE_2]][%[[I]]] : vector<[4]xf32> from vector<[4]x[4]xf32> - // CHECK-NEXT: vector.transfer_write %[[TILE_2_SLICE]], %[[DEST]][%[[TILE_2_I]], %[[C0]]] {in_bounds = [true]} : vector<[4]xf32>, memref + // CHECK-NEXT: vector.transfer_write %[[TILE_2_SLICE]], %[[DEST]][%[[TILE_2_I]], %[[C0]]] {in_bounds = array} : vector<[4]xf32>, memref // CHECK-NEXT: %[[TILE_3_I:.*]] = arith.addi %[[C12_VSCALE]], %[[I]] : index // CHECK-NEXT: %[[TILE_3_SLICE:.*]] = vector.extract %[[TILE_3]][%[[I]]] : vector<[4]xf32> from vector<[4]x[4]xf32> - // CHECK-NEXT: vector.transfer_write %[[TILE_3_SLICE]], %[[DEST]][%[[TILE_3_I]], %[[C0]]] {in_bounds = [true]} : vector<[4]xf32>, memref + // CHECK-NEXT: vector.transfer_write %[[TILE_3_SLICE]], %[[DEST]][%[[TILE_3_I]], %[[C0]]] {in_bounds = array} : vector<[4]xf32>, memref // CHECK-NEXT: } // CHECK-NEXT: return %c0 = arith.constant 0 : index %pad = arith.constant 0.0 : f32 - %0 = vector.transfer_read %src[%c0, %c0], %pad {permutation_map = #transpose, in_bounds = [true, true]} : memref, vector<[16]x[4]xf32> - vector.transfer_write %0, %dest[%c0, %c0] {in_bounds = [true, true]} : vector<[16]x[4]xf32>, memref + %0 = vector.transfer_read %src[%c0, %c0], %pad {permutation_map = #transpose, in_bounds = array} : memref, vector<[16]x[4]xf32> + vector.transfer_write %0, %dest[%c0, %c0] {in_bounds = array} : vector<[16]x[4]xf32>, memref return } @@ -351,19 +351,19 @@ func.func @transpose_f32_scalable_4x16_via_write(%src: memref, %dest: m // CHECK-DAG: %[[C4_VSCALE:.*]] = arith.muli %[[VSCALE]], %[[C4]] : index // CHECK-DAG: %[[C8_VSCALE:.*]] = arith.muli %[[VSCALE]], %[[C8]] : index // CHECK-DAG: %[[C12_VSCALE:.*]] = arith.muli %[[VSCALE]], %[[C12]] : index - // CHECK-DAG: %[[TILE_0:.*]] = vector.transfer_read %[[SRC]][%[[C0]], %[[C0]]], %[[PAD]] {in_bounds = [true, true]} : memref, vector<[4]x[4]xf32> - // CHECK-DAG: %[[TILE_1:.*]] = vector.transfer_read %[[SRC]][%[[C0]], %[[C4_VSCALE]]], %[[PAD]] {in_bounds = [true, true]} : memref, vector<[4]x[4]xf32> - // CHECK-DAG: %[[TILE_2:.*]] = vector.transfer_read %[[SRC]][%[[C0]], %[[C8_VSCALE]]], %[[PAD]] {in_bounds = [true, true]} : memref, vector<[4]x[4]xf32> - // CHECK-DAG: %[[TILE_3:.*]] = vector.transfer_read %[[SRC]][%[[C0]], %[[C12_VSCALE]]], %[[PAD]] {in_bounds = [true, true]} : memref, vector<[4]x[4]xf32> - // CHECK-DAG: vector.transfer_write %[[TILE_0]], %[[DEST]][%[[C0]], %[[C0]]] {in_bounds = [true, true], permutation_map = #{{.*}}} : vector<[4]x[4]xf32>, memref - // CHECK-DAG: vector.transfer_write %[[TILE_1]], %[[DEST]][%[[C4_VSCALE]], %[[C0]]] {in_bounds = [true, true], permutation_map = #{{.*}}} : vector<[4]x[4]xf32>, memref - // CHECK-DAG: vector.transfer_write %[[TILE_2]], %[[DEST]][%[[C8_VSCALE]], %[[C0]]] {in_bounds = [true, true], permutation_map = #{{.*}}} : vector<[4]x[4]xf32>, memref - // CHECK-DAG: vector.transfer_write %[[TILE_3]], %[[DEST]][%[[C12_VSCALE]], %[[C0]]] {in_bounds = [true, true], permutation_map = #{{.*}}} : vector<[4]x[4]xf32>, memref + // CHECK-DAG: %[[TILE_0:.*]] = vector.transfer_read %[[SRC]][%[[C0]], %[[C0]]], %[[PAD]] {in_bounds = array} : memref, vector<[4]x[4]xf32> + // CHECK-DAG: %[[TILE_1:.*]] = vector.transfer_read %[[SRC]][%[[C0]], %[[C4_VSCALE]]], %[[PAD]] {in_bounds = array} : memref, vector<[4]x[4]xf32> + // CHECK-DAG: %[[TILE_2:.*]] = vector.transfer_read %[[SRC]][%[[C0]], %[[C8_VSCALE]]], %[[PAD]] {in_bounds = array} : memref, vector<[4]x[4]xf32> + // CHECK-DAG: %[[TILE_3:.*]] = vector.transfer_read %[[SRC]][%[[C0]], %[[C12_VSCALE]]], %[[PAD]] {in_bounds = array} : memref, vector<[4]x[4]xf32> + // CHECK-DAG: vector.transfer_write %[[TILE_0]], %[[DEST]][%[[C0]], %[[C0]]] {in_bounds = array, permutation_map = #{{.*}}} : vector<[4]x[4]xf32>, memref + // CHECK-DAG: vector.transfer_write %[[TILE_1]], %[[DEST]][%[[C4_VSCALE]], %[[C0]]] {in_bounds = array, permutation_map = #{{.*}}} : vector<[4]x[4]xf32>, memref + // CHECK-DAG: vector.transfer_write %[[TILE_2]], %[[DEST]][%[[C8_VSCALE]], %[[C0]]] {in_bounds = array, permutation_map = #{{.*}}} : vector<[4]x[4]xf32>, memref + // CHECK-DAG: vector.transfer_write %[[TILE_3]], %[[DEST]][%[[C12_VSCALE]], %[[C0]]] {in_bounds = array, permutation_map = #{{.*}}} : vector<[4]x[4]xf32>, memref // CHECK-NEXT: return %c0 = arith.constant 0 : index %pad = arith.constant 0.0 : f32 - %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = [true, true]} : memref, vector<[4]x[16]xf32> - vector.transfer_write %0, %dest[%c0, %c0] {permutation_map = #transpose, in_bounds = [true, true]} : vector<[4]x[16]xf32>, memref + %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = array} : memref, vector<[4]x[16]xf32> + vector.transfer_write %0, %dest[%c0, %c0] {permutation_map = #transpose, in_bounds = array} : vector<[4]x[16]xf32>, memref return } @@ -470,10 +470,10 @@ func.func @lift_illegal_transpose_to_memory_with_arith_extop(%a: index, %b: inde // CHECK-LABEL: @lift_illegal_transpose_to_memory_with_in_bounds_attr func.func @lift_illegal_transpose_to_memory_with_in_bounds_attr(%a: index, %b: index, %memref: memref) -> vector<4x[8]xf32> { // CHECK: vector.transfer_read - // CHECK-SAME: in_bounds = [true, false] - // CHECK-NOT: in_bounds = [false, true] + // CHECK-SAME: in_bounds = array + // CHECK-NOT: in_bounds = array %pad = arith.constant 0.0 : f32 - %illegalRead = vector.transfer_read %memref[%a, %b], %pad {in_bounds = [false, true]}: memref, vector<[8]x4xf32> + %illegalRead = vector.transfer_read %memref[%a, %b], %pad {in_bounds = array}: memref, vector<[8]x4xf32> %legalType = vector.transpose %illegalRead, [1, 0] : vector<[8]x4xf32> to vector<4x[8]xf32> return %legalType : vector<4x[8]xf32> } @@ -517,7 +517,7 @@ func.func @lift_illegal_2d_shape_cast_to_memory(%a: index, %b: index, %memref: m // CHECK: vector.transfer_read {{.*}} : memref, vector<1x[4]xf32> // CHECK-NOT: vector.shape_cast %pad = arith.constant 0.0 : f32 - %illegalRead = vector.transfer_read %memref[%a, %b], %pad {in_bounds = [false, true]}: memref, vector<[4]x1xf32> + %illegalRead = vector.transfer_read %memref[%a, %b], %pad {in_bounds = array}: memref, vector<[4]x1xf32> %cast = vector.shape_cast %illegalRead : vector<[4]x1xf32> to vector<1x[4]xf32> return %cast : vector<1x[4]xf32> } @@ -529,7 +529,7 @@ func.func @lift_illegal_1d_shape_cast_to_memory(%a: index, %b: index, %memref: m // CHECK: vector.transfer_read {{.*}} : memref, vector<1x[4]xf32> // CHECK-NOT: vector.shape_cast {{.*}} : vector<[4]x1xf32> to vector<[4]xf32> %pad = arith.constant 0.0 : f32 - %illegalRead = vector.transfer_read %memref[%a, %b], %pad {in_bounds = [false, true]}: memref, vector<[4]x1xf32> + %illegalRead = vector.transfer_read %memref[%a, %b], %pad {in_bounds = array}: memref, vector<[4]x1xf32> %cast = vector.shape_cast %illegalRead : vector<[4]x1xf32> to vector<[4]xf32> return %cast : vector<[4]xf32> } @@ -565,9 +565,9 @@ func.func @transpose_store_scalable_via_za(%vec: vector<2x[4]xf32>, %dest: memre // CHECK-NEXT: %[[VSCALE:.*]] = vector.vscale // CHECK-NEXT: %[[C4_VSCALE:.*]] = arith.muli %[[VSCALE]], %[[C4]] : index // CHECK-NEXT: %[[MASK:.*]] = vector.create_mask %[[C4_VSCALE]], %[[C2]] : vector<[4]x[4]xi1> - // CHECK-NEXT: vector.transfer_write %[[RES]], %[[DEST]][%[[I]], %[[J]]], %[[MASK]] {in_bounds = [true, true], permutation_map = #[[$TRANSPOSE_MAP_0]]} : vector<[4]x[4]xf32>, memref + // CHECK-NEXT: vector.transfer_write %[[RES]], %[[DEST]][%[[I]], %[[J]]], %[[MASK]] {in_bounds = array, permutation_map = #[[$TRANSPOSE_MAP_0]]} : vector<[4]x[4]xf32>, memref %tr = vector.transpose %vec, [1, 0] : vector<2x[4]xf32> to vector<[4]x2xf32> - vector.transfer_write %tr, %dest[%i, %j] {in_bounds = [true, true]} : vector<[4]x2xf32>, memref + vector.transfer_write %tr, %dest[%i, %j] {in_bounds = array} : vector<[4]x2xf32>, memref return } @@ -584,7 +584,7 @@ func.func @transpose_store_scalable_via_za_masked(%vec: vector<2x[4]xf32>, %dest %c0 = arith.constant 0 : index %mask = vector.create_mask %a, %b : vector<[4]x2xi1> %tr = vector.transpose %vec, [1, 0] : vector<2x[4]xf32> to vector<[4]x2xf32> - vector.transfer_write %tr, %dest[%c0, %c0], %mask {in_bounds = [true, true]} : vector<[4]x2xf32>, memref + vector.transfer_write %tr, %dest[%c0, %c0], %mask {in_bounds = array} : vector<[4]x2xf32>, memref return } @@ -612,7 +612,7 @@ func.func @transpose_store_scalable_via_za_multi_tile(%vec: vector<8x[4]xf32>, % // CHECK: %[[J_OFFSET:.*]] = arith.addi %[[J]], %[[C4]] : index // CHECK: vector.transfer_write %[[TILE_1]], %[[DEST]][%[[I]], %[[J_OFFSET]]], %[[MASK]] {{.*}} : vector<[4]x[4]xf32>, memref %tr = vector.transpose %vec, [1, 0] : vector<8x[4]xf32> to vector<[4]x8xf32> - vector.transfer_write %tr, %dest[%i, %j] {in_bounds = [true, true]} : vector<[4]x8xf32>, memref + vector.transfer_write %tr, %dest[%i, %j] {in_bounds = array} : vector<[4]x8xf32>, memref return } @@ -633,7 +633,7 @@ func.func @transpose_store_scalable_via_za_multi_tile_wide(%vec: vector<2x[8]xf3 // CHECK: %[[I_OFFSET:.*]] = arith.addi %c4_vscale, %[[I]] : index // CHECK: vector.transfer_write %[[TILE_0]], %{{.*}}[%[[I_OFFSET]], %[[J]]] %tr = vector.transpose %vec, [1, 0] : vector<2x[8]xf32> to vector<[8]x2xf32> - vector.transfer_write %tr, %dest[%i, %j] {in_bounds = [true, true]} : vector<[8]x2xf32>, memref + vector.transfer_write %tr, %dest[%i, %j] {in_bounds = array} : vector<[8]x2xf32>, memref return } @@ -643,6 +643,6 @@ func.func @transpose_store_scalable_via_za_multi_tile_wide(%vec: vector<2x[8]xf3 // CHECK-NOT: arm_sme.get_tile func.func @negative_transpose_store_scalable_via_za__bad_source_shape(%vec: vector<2x[7]xf32>, %dest: memref, %i: index, %j: index) { %tr = vector.transpose %vec, [1, 0] : vector<2x[7]xf32> to vector<[7]x2xf32> - vector.transfer_write %tr, %dest[%i, %j] {in_bounds = [true, true]} : vector<[7]x2xf32>, memref + vector.transfer_write %tr, %dest[%i, %j] {in_bounds = array} : vector<[7]x2xf32>, memref return } diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir index 42d9cc00d3ff5..b43b2ae61e24f 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir @@ -732,16 +732,16 @@ func.func @matmul_on_tensors( // CHECK: vector.transfer_write // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"] %8 = linalg.fill ins(%cst_0 : f32) outs(%7 : tensor<256x256xf32>) -> tensor<256x256xf32> - %9 = vector.transfer_read %arg0[%c0, %c0], %cst_0 {in_bounds = [false, true]} : tensor<518x518xf32>, vector<256x256xf32> - %10 = vector.transfer_write %9, %8[%c0, %c0] {in_bounds = [true, true]} : vector<256x256xf32>, tensor<256x256xf32> + %9 = vector.transfer_read %arg0[%c0, %c0], %cst_0 {in_bounds = array} : tensor<518x518xf32>, vector<256x256xf32> + %10 = vector.transfer_write %9, %8[%c0, %c0] {in_bounds = array} : vector<256x256xf32>, tensor<256x256xf32> // CHECK: linalg.fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} // CHECK: vector.transfer_write // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"] %11 = linalg.fill ins(%cst_1 : f32) outs(%7 : tensor<256x256xf32>) -> tensor<256x256xf32> - %12 = vector.transfer_read %arg1[%c0, %c0], %cst_0 {in_bounds = [false, true]} : tensor<518x518xf32>, vector<256x256xf32> - %13 = vector.transfer_write %12, %11[%c0, %c0] {in_bounds = [true, true]} : vector<256x256xf32>, tensor<256x256xf32> + %12 = vector.transfer_read %arg1[%c0, %c0], %cst_0 {in_bounds = array} : tensor<518x518xf32>, vector<256x256xf32> + %13 = vector.transfer_write %12, %11[%c0, %c0] {in_bounds = array} : vector<256x256xf32>, tensor<256x256xf32> // CHECK: tensor.extract_slice // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} @@ -791,7 +791,7 @@ func.func @insert_slice_chain( %2 = tensor.extract_slice %0[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32> // CHECK: vector.transfer_write // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"] - %7 = vector.transfer_write %v1, %2[%c0, %c0] {in_bounds = [true, true]} : vector<32x90xf32>, tensor<32x90xf32> + %7 = vector.transfer_write %v1, %2[%c0, %c0] {in_bounds = array} : vector<32x90xf32>, tensor<32x90xf32> // CHECK: tensor.insert_slice // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] %8 = tensor.insert_slice %7 into %0[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32> @@ -801,7 +801,7 @@ func.func @insert_slice_chain( %10 = tensor.extract_slice %8[32, 0] [30, 90] [1, 1] : tensor<62x90xf32> to tensor<30x90xf32> // CHECK: vector.transfer_write // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"] - %14 = vector.transfer_write %v2, %10[%c0, %c0] {in_bounds = [true, true]} : vector<30x90xf32>, tensor<30x90xf32> + %14 = vector.transfer_write %v2, %10[%c0, %c0] {in_bounds = array} : vector<30x90xf32>, tensor<30x90xf32> // CHECK: tensor.insert_slice // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] %15 = tensor.insert_slice %14 into %8[32, 0] [30, 90] [1, 1] : tensor<30x90xf32> into tensor<62x90xf32> @@ -930,7 +930,7 @@ func.func @double_insert_slice_into_alias( %2 = tensor.extract_slice %arg2[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32> // CHECK: vector.transfer_write // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"] - %7 = vector.transfer_write %v1, %2[%c0, %c0] {in_bounds = [true, true]} : vector<32x90xf32>, tensor<32x90xf32> + %7 = vector.transfer_write %v1, %2[%c0, %c0] {in_bounds = array} : vector<32x90xf32>, tensor<32x90xf32> // CHECK: tensor.insert_slice // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] %8 = tensor.insert_slice %7 into %arg2[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32> @@ -940,7 +940,7 @@ func.func @double_insert_slice_into_alias( %10 = tensor.extract_slice %e[32, 0] [30, 90] [1, 1] : tensor to tensor<30x90xf32> // CHECK: vector.transfer_write // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"] - %14 = vector.transfer_write %v2, %10[%c0, %c0] {in_bounds = [true, true]} : vector<30x90xf32>, tensor<30x90xf32> + %14 = vector.transfer_write %v2, %10[%c0, %c0] {in_bounds = array} : vector<30x90xf32>, tensor<30x90xf32> // CHECK: tensor.insert_slice // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] %15 = tensor.insert_slice %14 into %e[32, 0] [30, 90] [1, 1] : tensor<30x90xf32> into tensor diff --git a/mlir/test/Dialect/GPU/subgroup-mma-vector-unroll.mlir b/mlir/test/Dialect/GPU/subgroup-mma-vector-unroll.mlir index 03aba89c11afc..c56a40f52de76 100644 --- a/mlir/test/Dialect/GPU/subgroup-mma-vector-unroll.mlir +++ b/mlir/test/Dialect/GPU/subgroup-mma-vector-unroll.mlir @@ -20,16 +20,16 @@ func.func @matmul(%lhs: memref<32x32xf32>, %rhs: memref<32x32xf32>, %out: memref %7 = scf.for %arg0 = %c0 to %c32 step %c16 iter_args(%arg1 = %cst) -> (vector<16x16xf32>) { %10 = affine.apply affine_map<(d0)[s0] -> (d0 + s0)>(%c0)[%5] %11 = affine.apply affine_map<(d0)[s0] -> (d0 + s0)>(%c0)[%arg0] - %12 = vector.transfer_read %lhs[%10, %11], %cst_0 {in_bounds = [true, true]} : memref<32x32xf32>, vector<16x16xf32> + %12 = vector.transfer_read %lhs[%10, %11], %cst_0 {in_bounds = array} : memref<32x32xf32>, vector<16x16xf32> %16 = affine.apply affine_map<(d0)[s0] -> (d0 + s0)>(%c0)[%6] %17 = affine.apply affine_map<(d0)[s0] -> (d0 + s0)>(%c0)[%arg0] - %18 = vector.transfer_read %rhs[%17, %16], %cst_0 {in_bounds = [true, true]} : memref<32x32xf32>, vector<16x16xf32> + %18 = vector.transfer_read %rhs[%17, %16], %cst_0 {in_bounds = array} : memref<32x32xf32>, vector<16x16xf32> %22 = vector.contract {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %12, %18, %arg1 : vector<16x16xf32>, vector<16x16xf32> into vector<16x16xf32> scf.yield %22 : vector<16x16xf32> } %8 = affine.apply affine_map<(d0)[s0] -> (d0 + s0)>(%c0)[%5] %9 = affine.apply affine_map<(d0)[s0] -> (d0 + s0)>(%c0)[%6] - vector.transfer_write %7, %out[%8, %9] {in_bounds = [true, true]} : vector<16x16xf32>, memref<32x32xf32> + vector.transfer_write %7, %out[%8, %9] {in_bounds = array} : vector<16x16xf32>, memref<32x32xf32> return } @@ -76,20 +76,20 @@ func.func @gathered_matmul(%lhs: memref<32x32xf32>, %rhs: memref<32x32xf32>, %ou %12 = vector.broadcast %11 : vector<4xindex> to vector<4x4xindex> %13 = arith.addi %12, %cst_2 : vector<4x4xindex> %14 = vector.gather %lhs[%c0, %c0] [%13], %cst_mask, %cst_pt : memref<32x32xf32>, vector<4x4xindex>, vector<4x4xi1>, vector<4x4xf32> into vector<4x4xf32> - vector.transfer_write %14, %alloc[%c0, %c0] {in_bounds = [true, true]} : vector<4x4xf32>, memref<32x32xf32> + vector.transfer_write %14, %alloc[%c0, %c0] {in_bounds = array} : vector<4x4xf32>, memref<32x32xf32> gpu.barrier %15 = affine.apply affine_map<(d0)[s0] -> (d0 + s0)>(%c0)[%5] %16 = affine.apply affine_map<(d0)[s0] -> (d0 + s0)>(%c0)[%arg0] - %17 = vector.transfer_read %alloc[%15, %16], %cst_0 {in_bounds = [true, true]} : memref<32x32xf32>, vector<16x16xf32> + %17 = vector.transfer_read %alloc[%15, %16], %cst_0 {in_bounds = array} : memref<32x32xf32>, vector<16x16xf32> %18 = affine.apply affine_map<(d0)[s0] -> (d0 + s0)>(%c0)[%6] %19 = affine.apply affine_map<(d0)[s0] -> (d0 + s0)>(%c0)[%arg0] - %20 = vector.transfer_read %rhs[%19, %18], %cst_0 {in_bounds = [true, true]} : memref<32x32xf32>, vector<16x16xf32> + %20 = vector.transfer_read %rhs[%19, %18], %cst_0 {in_bounds = array} : memref<32x32xf32>, vector<16x16xf32> %21 = vector.contract {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %17, %20, %arg1 : vector<16x16xf32>, vector<16x16xf32> into vector<16x16xf32> scf.yield %21 : vector<16x16xf32> } %8 = affine.apply affine_map<(d0)[s0] -> (d0 + s0)>(%c0)[%5] %9 = affine.apply affine_map<(d0)[s0] -> (d0 + s0)>(%c0)[%6] - vector.transfer_write %7, %out[%8, %9] {in_bounds = [true, true]} : vector<16x16xf32>, memref<32x32xf32> + vector.transfer_write %7, %out[%8, %9] {in_bounds = array} : vector<16x16xf32>, memref<32x32xf32> return } diff --git a/mlir/test/Dialect/GPU/transform-gpu.mlir b/mlir/test/Dialect/GPU/transform-gpu.mlir index 72572c6a38de1..1bd47bd61ecfa 100644 --- a/mlir/test/Dialect/GPU/transform-gpu.mlir +++ b/mlir/test/Dialect/GPU/transform-gpu.mlir @@ -672,7 +672,7 @@ func.func @simple_fill(%arg0: memref<128xf32>) -> memref<128xf32> { // CHECK: memref.subview %{{.*}}[%[[THX]]] %1 = affine.apply #map1(%arg2) %subview_0 = memref.subview %subview[%1] [32] [1] : memref<128xf32, strided<[1], offset: ?>> to memref<32xf32, strided<[1], offset: ?>> - vector.transfer_write %cst, %subview_0[%c0] {in_bounds = [true]} : vector<32xf32>, memref<32xf32, strided<[1], offset: ?>> + vector.transfer_write %cst, %subview_0[%c0] {in_bounds = array} : vector<32xf32>, memref<32xf32, strided<[1], offset: ?>> memref.copy %subview_0, %subview_0 : memref<32xf32, strided<[1], offset: ?>> to memref<32xf32, strided<[1], offset: ?>> } {mapping = [#gpu.warp]} memref.copy %subview, %subview : memref<128xf32, strided<[1], offset: ?>> to memref<128xf32, strided<[1], offset: ?>> diff --git a/mlir/test/Dialect/Linalg/forward-vector-transfers.mlir b/mlir/test/Dialect/Linalg/forward-vector-transfers.mlir index 3530770580782..918a2f4b3e8ae 100644 --- a/mlir/test/Dialect/Linalg/forward-vector-transfers.mlir +++ b/mlir/test/Dialect/Linalg/forward-vector-transfers.mlir @@ -13,7 +13,7 @@ func.func @testAllocRead(%in: memref) -> vector<32 x f32> { %alloc = memref.alloc() : memref<32 x f32> %subview = memref.subview %alloc[0][16][1] : memref<32 x f32> to memref<16 x f32> memref.copy %in, %subview : memref to memref<16 x f32> - %0 = vector.transfer_read %alloc[%c0], %f0 {in_bounds = [true]} : memref<32 x f32>, vector<32 x f32> + %0 = vector.transfer_read %alloc[%c0], %f0 {in_bounds = array} : memref<32 x f32>, vector<32 x f32> memref.dealloc %alloc : memref<32 x f32> return %0: vector<32 x f32> } @@ -32,7 +32,7 @@ func.func @testAllocFillRead(%in: memref) -> vector<32 x f32> { linalg.fill ins(%f0 : f32) outs(%alloc : memref<32 x f32>) %subview = memref.subview %alloc[0][16][1] : memref<32 x f32> to memref<16 x f32> memref.copy %in, %subview : memref to memref<16 x f32> - %0 = vector.transfer_read %alloc[%c0], %f0 {in_bounds = [true]} : memref<32 x f32>, vector<32 x f32> + %0 = vector.transfer_read %alloc[%c0], %f0 {in_bounds = array} : memref<32 x f32>, vector<32 x f32> memref.dealloc %alloc : memref<32 x f32> return %0: vector<32 x f32> } @@ -51,7 +51,7 @@ func.func @testViewRead(%in: memref) -> vector<32 x f32> { %view = memref.view %alloc[%c0][] : memref<128 x i8> to memref<32 x f32> %subview = memref.subview %view[0][16][1] : memref<32 x f32> to memref<16 x f32> memref.copy %in, %subview : memref to memref<16 x f32> - %0 = vector.transfer_read %view[%c0], %f0 {in_bounds = [true]} : memref<32 x f32>, vector<32 x f32> + %0 = vector.transfer_read %view[%c0], %f0 {in_bounds = array} : memref<32 x f32>, vector<32 x f32> memref.dealloc %alloc : memref<128 x i8> return %0: vector<32 x f32> } @@ -71,7 +71,7 @@ func.func @testViewFillRead(%in: memref) -> vector<32 x f32> { %subview = memref.subview %view[0][16][1] : memref<32 x f32> to memref<16 x f32> linalg.fill ins(%f0 : f32) outs(%view : memref<32 x f32>) memref.copy %in, %subview : memref to memref<16 x f32> - %0 = vector.transfer_read %view[%c0], %f0 {in_bounds = [true]} : memref<32 x f32>, vector<32 x f32> + %0 = vector.transfer_read %view[%c0], %f0 {in_bounds = array} : memref<32 x f32>, vector<32 x f32> memref.dealloc %alloc : memref<128 x i8> return %0: vector<32 x f32> } @@ -88,7 +88,7 @@ func.func @testAllocWrite(%vec: vector<32 x f32>, %out: memref) { %f0 = arith.constant 0.0: f32 %alloc = memref.alloc() : memref<32 x f32> %subview = memref.subview %alloc[0][16][1] : memref<32 x f32> to memref<16 x f32> - vector.transfer_write %vec, %alloc[%c0] {in_bounds = [true]} : vector<32 x f32>, memref<32 x f32> + vector.transfer_write %vec, %alloc[%c0] {in_bounds = array} : vector<32 x f32>, memref<32 x f32> memref.copy %subview, %out : memref<16 x f32> to memref memref.dealloc %alloc : memref<32 x f32> return @@ -107,7 +107,7 @@ func.func @testViewWrite(%vec: vector<32 x f32>, %out: memref) { %alloc = memref.alloc() : memref<128 x i8> %view = memref.view %alloc[%c0][] : memref<128 x i8> to memref<32 x f32> %subview = memref.subview %view[0][16][1] : memref<32 x f32> to memref<16 x f32> - vector.transfer_write %vec, %view[%c0] {in_bounds = [true]} : vector<32 x f32>, memref<32 x f32> + vector.transfer_write %vec, %view[%c0] {in_bounds = array} : vector<32 x f32>, memref<32 x f32> memref.copy %subview, %out : memref<16 x f32> to memref memref.dealloc %alloc : memref<128 x i8> return diff --git a/mlir/test/Dialect/Linalg/hoisting.mlir b/mlir/test/Dialect/Linalg/hoisting.mlir index 44c15c272bb3e..325cb329b3e7f 100644 --- a/mlir/test/Dialect/Linalg/hoisting.mlir +++ b/mlir/test/Dialect/Linalg/hoisting.mlir @@ -200,7 +200,7 @@ func.func @hoist_vector_transfer_pairs_in_affine_loops(%memref0: memref<64x64xi3 affine.for %arg3 = 0 to 64 { affine.for %arg4 = 0 to 64 step 16 { affine.for %arg5 = 0 to 64 { - %0 = vector.transfer_read %memref0[%arg3, %arg5], %c0_i32 {in_bounds = [true], permutation_map = affine_map<(d0, d1) -> (0)>} : memref<64x64xi32>, vector<16xi32> + %0 = vector.transfer_read %memref0[%arg3, %arg5], %c0_i32 {in_bounds = array, permutation_map = affine_map<(d0, d1) -> (0)>} : memref<64x64xi32>, vector<16xi32> %1 = vector.transfer_read %memref1[%arg5, %arg4], %c0_i32 : memref<64x64xi32>, vector<16xi32> %2 = vector.transfer_read %memref2[%arg3, %arg4], %c0_i32 : memref<64x64xi32>, vector<16xi32> %3 = arith.muli %0, %1 : vector<16xi32> @@ -233,10 +233,10 @@ module attributes {transform.with_named_sequence} { // CHECK: %[[ALLOC_0:.+]] = memref.alloc() : memref<32x128xf32> // CHECK: %[[CAST:.+]] = memref.cast %[[ALLOC_0]] : memref<32x128xf32> to memref<32x128xf32, strided<[128, 1], // CHECK-SAME: offset: ?>> -// CHECK: %[[D0:.+]] = vector.transfer_read %[[ALLOC]][%[[C0]], %[[C0]]], %[[CST]] {in_bounds = [true, true]} : +// CHECK: %[[D0:.+]] = vector.transfer_read %[[ALLOC]][%[[C0]], %[[C0]]], %[[CST]] {in_bounds = array} : // CHECK-SAME: memref<32x64xf32>, vector<32x64xf32> // CHECK: scf.for %[[ARG0:.+]] = %[[C0]] to %[[C1024]] step %[[C128]] { -// CHECK: %[[D1:.+]] = vector.transfer_read %[[ALLOC_0]][%[[C0]], %[[C0]]], %[[CST]] {in_bounds = [true, true]} +// CHECK: %[[D1:.+]] = vector.transfer_read %[[ALLOC_0]][%[[C0]], %[[C0]]], %[[CST]] {in_bounds = array} // CHECK-SAME: : memref<32x128xf32>, vector<32x128xf32> // CHECK: "some_use"(%[[D0]], %[[D1]], %[[CAST]]) : (vector<32x64xf32>, vector<32x128xf32>, memref<32x128xf32, // CHECK-SAME: strided<[128, 1], offset: ?>>) -> () @@ -252,8 +252,8 @@ func.func @hoist_vector_transfer_read() { %memref2 = memref.alloc() : memref<32x128xf32> %subview2 = memref.subview %memref2[%c0, %c0] [32, 128] [1, 1]: memref<32x128xf32> to memref<32x128xf32, strided<[128, 1], offset: ?>> scf.for %arg0 = %c0 to %c1024 step %c128 { - %2 = vector.transfer_read %memref2[%c0, %c0], %cst_2 {in_bounds = [true, true]} : memref<32x128xf32>, vector<32x128xf32> - %3 = vector.transfer_read %memref0[%c0, %c0], %cst_2 {in_bounds = [true, true]} : memref<32x64xf32>, vector<32x64xf32> + %2 = vector.transfer_read %memref2[%c0, %c0], %cst_2 {in_bounds = array} : memref<32x128xf32>, vector<32x128xf32> + %3 = vector.transfer_read %memref0[%c0, %c0], %cst_2 {in_bounds = array} : memref<32x64xf32>, vector<32x64xf32> "some_use"(%3, %2, %subview2) : (vector<32x64xf32>, vector<32x128xf32>, memref<32x128xf32, strided<[128, 1], offset: ?>>) -> () } memref.dealloc %memref0 : memref<32x64xf32> @@ -287,11 +287,11 @@ func.func @non_matching_transfers(%m: memref<6x1x7x32xf32>) { %cst = arith.constant dense<5.5> : vector<6x7x32xf32> %cst_0 = arith.constant 0.0 : f32 scf.for %iv = %c0 to %c1024 step %c128 { - %read = vector.transfer_read %m[%c0, %c0, %c0, %c0], %cst_0 {in_bounds = [true, true, true], permutation_map = affine_map<(d0, d1, d2, d3) -> (d0, d2, d3)>} : memref<6x1x7x32xf32>, vector<6x7x32xf32> + %read = vector.transfer_read %m[%c0, %c0, %c0, %c0], %cst_0 {in_bounds = array, permutation_map = affine_map<(d0, d1, d2, d3) -> (d0, d2, d3)>} : memref<6x1x7x32xf32>, vector<6x7x32xf32> %added = arith.addf %read, %cst : vector<6x7x32xf32> %bc = vector.broadcast %added : vector<6x7x32xf32> to vector<1x6x7x32xf32> %tr = vector.transpose %bc, [1, 0, 2, 3] : vector<1x6x7x32xf32> to vector<6x1x7x32xf32> - vector.transfer_write %tr, %m[%c0, %c0, %c0, %c0] {in_bounds = [true, true, true, true]} : vector<6x1x7x32xf32>, memref<6x1x7x32xf32> + vector.transfer_write %tr, %m[%c0, %c0, %c0, %c0] {in_bounds = array} : vector<6x1x7x32xf32>, memref<6x1x7x32xf32> } return } @@ -328,9 +328,9 @@ func.func @no_hoisting_collapse_shape(%in_0: memref<1x20x1xi32>, %1: memref<9x1x scf.for %arg0 = %c0 to %c20 step %c4 { %subview = memref.subview %in_0[0, %arg0, 0] [1, 4, 1] [1, 1, 1] : memref<1x20x1xi32> to memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>> %collapse_shape = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x4x1xi32> into memref<4xi32> - vector.transfer_write %vec, %collapse_shape[%c0] {in_bounds = [true]} : vector<4xi32>, memref<4xi32> - %read = vector.transfer_read %alloca[%c0, %c0, %c0], %c0_i32 {in_bounds = [true, true, true]} : memref<1x4x1xi32>, vector<1x4x1xi32> - vector.transfer_write %read, %subview[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x4x1xi32>, memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>> + vector.transfer_write %vec, %collapse_shape[%c0] {in_bounds = array} : vector<4xi32>, memref<4xi32> + %read = vector.transfer_read %alloca[%c0, %c0, %c0], %c0_i32 {in_bounds = array} : memref<1x4x1xi32>, vector<1x4x1xi32> + vector.transfer_write %read, %subview[%c0, %c0, %c0] {in_bounds = array} : vector<1x4x1xi32>, memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>> } return } @@ -364,8 +364,8 @@ func.func @no_hoisting_collapse_shape_2(%vec: vector<1x12x1xi32>) { %alloca = memref.alloca() {alignment = 64 : i64} : memref<1x12x1xi32> scf.for %arg0 = %c0 to %c20 step %c4 { %collapse_shape = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x12x1xi32> into memref<12xi32> - vector.transfer_write %vec, %alloca[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x12x1xi32>, memref<1x12x1xi32> - %read = vector.transfer_read %collapse_shape[%c0], %c0_i32 {in_bounds = [true]} : memref<12xi32>, vector<12xi32> + vector.transfer_write %vec, %alloca[%c0, %c0, %c0] {in_bounds = array} : vector<1x12x1xi32>, memref<1x12x1xi32> + %read = vector.transfer_read %collapse_shape[%c0], %c0_i32 {in_bounds = array} : memref<12xi32>, vector<12xi32> "prevent.dce"(%read) : (vector<12xi32>) ->() } return @@ -410,10 +410,10 @@ func.func @no_hoisting_write_to_memref(%rhs: i32, %arg1: vector<1xi32>) { %collapsed_1 = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x1x2xi32> into memref<2xi32> scf.for %_ = %c0 to %c20 step %c4 { %collapsed_2 = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x1x2xi32> into memref<2xi32> - %lhs = vector.transfer_read %collapsed_1[%c0], %c0_i32 {in_bounds = [true]} : memref<2xi32>, vector<1xi32> - %acc = vector.transfer_read %collapsed_2[%c0], %c0_i32 {in_bounds = [true]} : memref<2xi32>, vector<1xi32> + %lhs = vector.transfer_read %collapsed_1[%c0], %c0_i32 {in_bounds = array} : memref<2xi32>, vector<1xi32> + %acc = vector.transfer_read %collapsed_2[%c0], %c0_i32 {in_bounds = array} : memref<2xi32>, vector<1xi32> %op = vector.outerproduct %lhs, %rhs, %acc {kind = #vector.kind} : vector<1xi32>, i32 - vector.transfer_write %op, %collapsed_1[%c0] {in_bounds = [true]} : vector<1xi32>, memref<2xi32> + vector.transfer_write %op, %collapsed_1[%c0] {in_bounds = array} : vector<1xi32>, memref<2xi32> } return } diff --git a/mlir/test/Dialect/Linalg/transform-op-compose-masked-vectorize-and-cleanups.mlir b/mlir/test/Dialect/Linalg/transform-op-compose-masked-vectorize-and-cleanups.mlir index 61fe3da34e1d5..440904cc01302 100644 --- a/mlir/test/Dialect/Linalg/transform-op-compose-masked-vectorize-and-cleanups.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-compose-masked-vectorize-and-cleanups.mlir @@ -4,16 +4,16 @@ func.func @masked_matmul(%module: memref, %arg1: memref, %arg2: memref) { // CHECK: %[[MLHS:.*]] = vector.create_mask {{.*}} : vector<8x8xi1> - // CHECK: %[[LHS:.*]] = vector.transfer_read %{{.*}}, %[[MLHS]] {in_bounds = [true, true]} : memref>, vector<8x8xf32> + // CHECK: %[[LHS:.*]] = vector.transfer_read %{{.*}}, %[[MLHS]] {in_bounds = array} : memref>, vector<8x8xf32> // CHECK: %[[MRHS:.*]] = vector.create_mask {{.*}} : vector<8x8xi1> - // CHECK: %[[RHS:.*]] = vector.transfer_read %{{.*}}, %[[MRHS]] {in_bounds = [true, true]} : memref>, vector<8x8xf32> + // CHECK: %[[RHS:.*]] = vector.transfer_read %{{.*}}, %[[MRHS]] {in_bounds = array} : memref>, vector<8x8xf32> // CHECK: %[[MACC:.*]] = vector.create_mask {{.*}} : vector<8x8xi1> - // CHECK: %[[ACC:.*]] = vector.transfer_read {{.*}}, %[[MACC]] {in_bounds = [true, true]} : memref>, vector<8x8xf32> + // CHECK: %[[ACC:.*]] = vector.transfer_read {{.*}}, %[[MACC]] {in_bounds = array} : memref>, vector<8x8xf32> // CHECK: %[[MRES:.*]] = vector.create_mask {{.*}} : vector<8x8x8xi1> // CHECK: %[[RES:.*]] = vector.mask %[[MRES]] { vector.contract // CHECK-SAME: : vector<8x8xf32>, vector<8x8xf32> into vector<8x8xf32> // CHECK-SAME: : vector<8x8x8xi1> -> vector<8x8xf32> - // CHECK: vector.transfer_write %[[RES]], %{{.*}}, %[[MACC]] {in_bounds = [true, true]} : vector<8x8xf32>, memref> + // CHECK: vector.transfer_write %[[RES]], %{{.*}}, %[[MACC]] {in_bounds = array} : vector<8x8xf32>, memref> linalg.matmul ins(%module, %arg1 : memref, memref) outs(%arg2 : memref) return } diff --git a/mlir/test/Dialect/Linalg/vectorization-scalable.mlir b/mlir/test/Dialect/Linalg/vectorization-scalable.mlir index c3a30e3ee209e..35e76ad3fd0d0 100644 --- a/mlir/test/Dialect/Linalg/vectorization-scalable.mlir +++ b/mlir/test/Dialect/Linalg/vectorization-scalable.mlir @@ -20,11 +20,11 @@ func.func @vectorize_dynamic_identity(%arg0: tensor, // CHECK: %[[VAL_3:.*]] = arith.constant 0 : index // CHECK: %[[VAL_4:.*]] = tensor.dim %{{.*}}, %[[VAL_3]] : tensor // CHECK: %[[VAL_7:.*]] = vector.create_mask %[[VAL_4]] : vector<[4]xi1> -// CHECK: %[[VAL_8:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = [true]} : tensor, vector<[4]xf32> } : vector<[4]xi1> -> vector<[4]xf32> -// CHECK: %[[VAL_10:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = [true]} : tensor, vector<[4]xf32> } : vector<[4]xi1> -> vector<[4]xf32> -// CHECK: %[[VAL_12:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = [true]} : tensor, vector<[4]xf32> } : vector<[4]xi1> -> vector<[4]xf32> +// CHECK: %[[VAL_8:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = array} : tensor, vector<[4]xf32> } : vector<[4]xi1> -> vector<[4]xf32> +// CHECK: %[[VAL_10:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = array} : tensor, vector<[4]xf32> } : vector<[4]xi1> -> vector<[4]xf32> +// CHECK: %[[VAL_12:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = array} : tensor, vector<[4]xf32> } : vector<[4]xi1> -> vector<[4]xf32> // CHECK: %[[VAL_13:.*]] = arith.addf %[[VAL_8]], %[[VAL_10]] : vector<[4]xf32> -// CHECK: %[[VAL_14:.*]] = vector.mask %[[VAL_7]] { vector.transfer_write %{{.*}} {in_bounds = [true]} : vector<[4]xf32>, tensor } : vector<[4]xi1> -> tensor +// CHECK: %[[VAL_14:.*]] = vector.mask %[[VAL_7]] { vector.transfer_write %{{.*}} {in_bounds = array} : vector<[4]xf32>, tensor } : vector<[4]xi1> -> tensor module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { @@ -60,14 +60,14 @@ func.func @vectorize_partial_dynamic_identity(%arg0: tensor<8x?xf32>, // CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0.000000e+00 : f32 // CHECK-DAG: %[[VAL_7:.*]] = arith.constant 8 : index // CHECK: %[[VAL_8:.*]] = vector.create_mask %[[VAL_7]], %[[VAL_4]] : vector<8x[32]xi1> -// CHECK: %[[VAL_9:.*]] = vector.mask %[[VAL_8]] { vector.transfer_read %[[VAL_0]][%[[VAL_5]], %[[VAL_5]]], %[[VAL_6]] {in_bounds = [true, true]} : tensor<8x?xf32>, vector<8x[32]xf32> } : vector<8x[32]xi1> -> vector<8x[32]xf32> +// CHECK: %[[VAL_9:.*]] = vector.mask %[[VAL_8]] { vector.transfer_read %[[VAL_0]][%[[VAL_5]], %[[VAL_5]]], %[[VAL_6]] {in_bounds = array} : tensor<8x?xf32>, vector<8x[32]xf32> } : vector<8x[32]xi1> -> vector<8x[32]xf32> // CHECK: %[[VAL_10:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[VAL_11:.*]] = vector.mask %[[VAL_8]] { vector.transfer_read %[[VAL_1]][%[[VAL_5]], %[[VAL_5]]], %[[VAL_10]] {in_bounds = [true, true]} : tensor<8x?xf32>, vector<8x[32]xf32> } : vector<8x[32]xi1> -> vector<8x[32]xf32> +// CHECK: %[[VAL_11:.*]] = vector.mask %[[VAL_8]] { vector.transfer_read %[[VAL_1]][%[[VAL_5]], %[[VAL_5]]], %[[VAL_10]] {in_bounds = array} : tensor<8x?xf32>, vector<8x[32]xf32> } : vector<8x[32]xi1> -> vector<8x[32]xf32> // CHECK: %[[VAL_12:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[VAL_13:.*]] = vector.mask %[[VAL_8]] { vector.transfer_read %[[VAL_2]][%[[VAL_5]], %[[VAL_5]]], %[[VAL_12]] {in_bounds = [true, true]} : tensor<8x?xf32>, vector<8x[32]xf32> } : vector<8x[32]xi1> -> vector<8x[32]xf32> +// CHECK: %[[VAL_13:.*]] = vector.mask %[[VAL_8]] { vector.transfer_read %[[VAL_2]][%[[VAL_5]], %[[VAL_5]]], %[[VAL_12]] {in_bounds = array} : tensor<8x?xf32>, vector<8x[32]xf32> } : vector<8x[32]xi1> -> vector<8x[32]xf32> // CHECK: %[[VAL_14:.*]] = arith.addf %[[VAL_9]], %[[VAL_11]] : vector<8x[32]xf32> // CHECK: %[[VAL_15:.*]] = arith.constant 0 : index -// CHECK: %[[VAL_16:.*]] = vector.mask %[[VAL_8]] { vector.transfer_write %[[VAL_14]], %[[VAL_2]][%[[VAL_15]], %[[VAL_15]]] {in_bounds = [true, true]} : vector<8x[32]xf32>, tensor<8x?xf32> } : vector<8x[32]xi1> -> tensor<8x?xf32> +// CHECK: %[[VAL_16:.*]] = vector.mask %[[VAL_8]] { vector.transfer_write %[[VAL_14]], %[[VAL_2]][%[[VAL_15]], %[[VAL_15]]] {in_bounds = array} : vector<8x[32]xf32>, tensor<8x?xf32> } : vector<8x[32]xi1> -> tensor<8x?xf32> module attributes {transform.with_named_sequence} { @@ -103,14 +103,14 @@ func.func @vectorize_static_shape_with_mask(%arg0: tensor<8x30xf32>, // CHECK-DAG: %[[VAL_5:.*]] = arith.constant 8 : index // CHECK-DAG: %[[VAL_6:.*]] = arith.constant 30 : index // CHECK: %[[VAL_7:.*]] = vector.create_mask %[[VAL_5]], %[[VAL_6]] : vector<8x[32]xi1> -// CHECK: %[[VAL_8:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %[[VAL_0]][%[[VAL_3]], %[[VAL_3]]], %[[VAL_4]] {in_bounds = [true, true]} : tensor<8x30xf32>, vector<8x[32]xf32> } : vector<8x[32]xi1> -> vector<8x[32]xf32> +// CHECK: %[[VAL_8:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %[[VAL_0]][%[[VAL_3]], %[[VAL_3]]], %[[VAL_4]] {in_bounds = array} : tensor<8x30xf32>, vector<8x[32]xf32> } : vector<8x[32]xi1> -> vector<8x[32]xf32> // CHECK: %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[VAL_10:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %[[VAL_1]][%[[VAL_3]], %[[VAL_3]]], %[[VAL_9]] {in_bounds = [true, true]} : tensor<8x30xf32>, vector<8x[32]xf32> } : vector<8x[32]xi1> -> vector<8x[32]xf32> +// CHECK: %[[VAL_10:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %[[VAL_1]][%[[VAL_3]], %[[VAL_3]]], %[[VAL_9]] {in_bounds = array} : tensor<8x30xf32>, vector<8x[32]xf32> } : vector<8x[32]xi1> -> vector<8x[32]xf32> // CHECK: %[[VAL_11:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[VAL_12:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %[[VAL_2]][%[[VAL_3]], %[[VAL_3]]], %[[VAL_11]] {in_bounds = [true, true]} : tensor<8x30xf32>, vector<8x[32]xf32> } : vector<8x[32]xi1> -> vector<8x[32]xf32> +// CHECK: %[[VAL_12:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %[[VAL_2]][%[[VAL_3]], %[[VAL_3]]], %[[VAL_11]] {in_bounds = array} : tensor<8x30xf32>, vector<8x[32]xf32> } : vector<8x[32]xi1> -> vector<8x[32]xf32> // CHECK: %[[VAL_13:.*]] = arith.addf %[[VAL_8]], %[[VAL_10]] : vector<8x[32]xf32> // CHECK: %[[VAL_14:.*]] = arith.constant 0 : index -// CHECK: %[[VAL_15:.*]] = vector.mask %[[VAL_7]] { vector.transfer_write %[[VAL_13]], %[[VAL_2]][%[[VAL_14]], %[[VAL_14]]] {in_bounds = [true, true]} : vector<8x[32]xf32>, tensor<8x30xf32> } : vector<8x[32]xi1> -> tensor<8x30xf32> +// CHECK: %[[VAL_15:.*]] = vector.mask %[[VAL_7]] { vector.transfer_write %[[VAL_13]], %[[VAL_2]][%[[VAL_14]], %[[VAL_14]]] {in_bounds = array} : vector<8x[32]xf32>, tensor<8x30xf32> } : vector<8x[32]xi1> -> tensor<8x30xf32> module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { @@ -132,7 +132,7 @@ func.func @vectorize_dynamic_fill(%A : tensor, %arg0 : f32) -> tensor // CHECK: %[[BCAST:.*]] = vector.broadcast %{{.*}} : f32 to vector<8x[16]xf32> -// CHECK: vector.mask %[[MASK]] { vector.transfer_write %[[BCAST]], {{.*}} {in_bounds = [true, true]} : vector<8x[16]xf32>, tensor } : vector<8x[16]xi1> +// CHECK: vector.mask %[[MASK]] { vector.transfer_write %[[BCAST]], {{.*}} {in_bounds = array} : vector<8x[16]xf32>, tensor } : vector<8x[16]xi1> module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { @@ -168,8 +168,8 @@ func.func @vectorize_linalg_index(%arg0: tensor<3x3x?xf32>, %arg1: tensor<1x1x?x // CHECK: %[[DST_DIM2:.*]] = tensor.dim %[[DST]], %[[C2]] : tensor<1x1x?xf32> // CHECK: %[[MASK:.*]] = vector.create_mask %[[C1]], %[[C1]], %[[DST_DIM2]] : vector<1x1x[4]xi1> // CHECK: %[[INDEX_VEC:.*]] = vector.step : vector<[4]xindex> -// CHECK: %[[READ:.*]] = vector.mask %[[MASK]] { vector.transfer_read %[[SRC]][%c0, %c0, %2], %cst {in_bounds = [true, true, true]} : tensor<3x3x?xf32>, vector<1x1x[4]xf32> } : vector<1x1x[4]xi1> -> vector<1x1x[4]xf32> -// CHECK: %[[OUT:.*]] = vector.mask %[[MASK]] { vector.transfer_write %[[READ]], %[[DST]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x1x[4]xf32>, tensor<1x1x?xf32> } : vector<1x1x[4]xi1> -> tensor<1x1x?xf32> +// CHECK: %[[READ:.*]] = vector.mask %[[MASK]] { vector.transfer_read %[[SRC]][%c0, %c0, %2], %cst {in_bounds = array} : tensor<3x3x?xf32>, vector<1x1x[4]xf32> } : vector<1x1x[4]xi1> -> vector<1x1x[4]xf32> +// CHECK: %[[OUT:.*]] = vector.mask %[[MASK]] { vector.transfer_write %[[READ]], %[[DST]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] {in_bounds = array} : vector<1x1x[4]xf32>, tensor<1x1x?xf32> } : vector<1x1x[4]xi1> -> tensor<1x1x?xf32> // CHECK: return %[[OUT]] : tensor<1x1x?xf32> module attributes {transform.with_named_sequence} { @@ -207,7 +207,7 @@ func.func @vectorize_dynamic_reduction_scalable_1d(%arg0: tensor, // CHECK: %[[C0_idx:.*]] = arith.constant 0 : index // CHECK: %[[C0_f32:.*]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[MASK:.*]] = vector.create_mask %[[DIM_A0_0]] : vector<[4]xi1> -// CHECK: %[[VEC_RD_0:.*]] = vector.mask %[[MASK]] { vector.transfer_read %[[ARG_0]][%[[C0_idx]]], %[[C0_f32]] {in_bounds = [true]} : tensor, vector<[4]xf32> } : vector<[4]xi1> -> vector<[4]xf32> +// CHECK: %[[VEC_RD_0:.*]] = vector.mask %[[MASK]] { vector.transfer_read %[[ARG_0]][%[[C0_idx]]], %[[C0_f32]] {in_bounds = array} : tensor, vector<[4]xf32> } : vector<[4]xi1> -> vector<[4]xf32> // CHECK: %[[C0_F32:.*]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[VEC_RD_1:.*]] = vector.transfer_read %[[ARG_1]][], %[[C0_F32]] : tensor, vector // CHECK: %[[ACC_f32:.*]] = vector.extractelement %[[VEC_RD_1]][] : vector @@ -249,13 +249,13 @@ func.func @vectorize_dynamic_reduction_scalable_2d(%arg0: tensor, // CHECK: %[[C0_idx:.*]] = arith.constant 0 : index // CHECK: %[[C0_f32:.*]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[MASK_2d:.*]] = vector.create_mask %[[DIM_A0_0]], %[[DIM_A0_1]] : vector<4x[8]xi1> -// CHECK: %[[VEC_RD_0:.*]] = vector.mask %[[MASK_2d]] { vector.transfer_read %[[ARG_0]][%[[C0_idx]], %[[C0_idx]]], %[[C0_f32]] {in_bounds = [true, true]} : tensor, vector<4x[8]xf32> } : vector<4x[8]xi1> -> vector<4x[8]xf32> +// CHECK: %[[VEC_RD_0:.*]] = vector.mask %[[MASK_2d]] { vector.transfer_read %[[ARG_0]][%[[C0_idx]], %[[C0_idx]]], %[[C0_f32]] {in_bounds = array} : tensor, vector<4x[8]xf32> } : vector<4x[8]xi1> -> vector<4x[8]xf32> // CHECK: %[[C0_f32:.*]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[MASK_1d:.*]] = vector.create_mask %[[DIM_A0_0]] : vector<4xi1> -// CHECK: %[[VEC_RD_1:.*]] = vector.mask %[[MASK_1d]] { vector.transfer_read %[[ARG_1]][%[[C0_idx]]], %[[C0_f32]] {in_bounds = [true]} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> +// CHECK: %[[VEC_RD_1:.*]] = vector.mask %[[MASK_1d]] { vector.transfer_read %[[ARG_1]][%[[C0_idx]]], %[[C0_f32]] {in_bounds = array} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> // CHECK: %[[REDUCE:.*]] = vector.mask %[[MASK_2d]] { vector.multi_reduction , %[[VEC_RD_0]], %[[VEC_RD_1]] [1] : vector<4x[8]xf32> to vector<4xf32> } : vector<4x[8]xi1> -> vector<4xf32> // CHECK: %[[C0_idx:.*]] = arith.constant 0 : index -// CHECK: %{{.*}} = vector.mask %[[MASK_1d]] { vector.transfer_write %[[REDUCE]], %[[ARG_1]][%[[C0_idx]]] {in_bounds = [true]} : vector<4xf32>, tensor } : vector<4xi1> -> tensor +// CHECK: %{{.*}} = vector.mask %[[MASK_1d]] { vector.transfer_write %[[REDUCE]], %[[ARG_1]][%[[C0_idx]]] {in_bounds = array} : vector<4xf32>, tensor } : vector<4xi1> -> tensor module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { @@ -284,17 +284,17 @@ func.func @vectorize_dynamic_matvec_trailing_reduction_dim(%arg0: tensor -// CHECK: %[[VEC_RD_0:.*]] = vector.mask %[[MASK_2d]] { vector.transfer_read %[[ARG_0]][%[[C0_idx]], %[[C0_idx]]], %[[C0_f32]] {in_bounds = [true, true]} : tensor, vector<4x[4]xf32> } : vector<4x[4]xi1> -> vector<4x[4]xf32> +// CHECK: %[[VEC_RD_0:.*]] = vector.mask %[[MASK_2d]] { vector.transfer_read %[[ARG_0]][%[[C0_idx]], %[[C0_idx]]], %[[C0_f32]] {in_bounds = array} : tensor, vector<4x[4]xf32> } : vector<4x[4]xi1> -> vector<4x[4]xf32> // CHECK: %[[C0_f32:.*]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[MASK_d1:.*]] = vector.create_mask %[[DIM_A0_1]] : vector<[4]xi1> -// CHECK: %[[VEC_RD_1:.*]] = vector.mask %[[MASK_d1]] { vector.transfer_read %[[ARG_1]][%[[C0_idx]]], %[[C0_f32]] {in_bounds = [true, true], permutation_map = #map} : tensor, vector<4x[4]xf32> } : vector<[4]xi1> -> vector<4x[4]xf32> +// CHECK: %[[VEC_RD_1:.*]] = vector.mask %[[MASK_d1]] { vector.transfer_read %[[ARG_1]][%[[C0_idx]]], %[[C0_f32]] {in_bounds = array, permutation_map = #map} : tensor, vector<4x[4]xf32> } : vector<[4]xi1> -> vector<4x[4]xf32> // CHECK: %[[C0_f32:.*]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[MASK_d2:.*]] = vector.create_mask %[[DIM_A0_0]] : vector<4xi1> -// CHECK: %[[VEC_RD_2:.*]] = vector.mask %[[MASK_d2]] { vector.transfer_read %[[ARG_2]][%[[C0_idx]]], %[[C0_f32]] {in_bounds = [true]} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> +// CHECK: %[[VEC_RD_2:.*]] = vector.mask %[[MASK_d2]] { vector.transfer_read %[[ARG_2]][%[[C0_idx]]], %[[C0_f32]] {in_bounds = array} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> // CHECK: %[[MUL:.*]] = arith.mulf %[[VEC_RD_0:.*]], %[[VEC_RD_1:.*]] : vector<4x[4]xf32> // CHECK: %[[REDUCE:.*]] = vector.mask %[[MASK_2d]] { vector.multi_reduction , %[[MUL]], %[[VEC_RD_2]] [1] : vector<4x[4]xf32> to vector<4xf32> } : vector<4x[4]xi1> -> vector<4xf32> // CHECK: %[[C0_idx:.*]] = arith.constant 0 : index -// CHECK: %{{.*}} = vector.mask %[[MASK_d2]] { vector.transfer_write %[[REDUCE]], %[[ARG_2]][%[[C0_idx]]] {in_bounds = [true]} : vector<4xf32>, tensor } : vector<4xi1> -> tensor +// CHECK: %{{.*}} = vector.mask %[[MASK_d2]] { vector.transfer_write %[[REDUCE]], %[[ARG_2]][%[[C0_idx]]] {in_bounds = array} : vector<4xf32>, tensor } : vector<4xi1> -> tensor module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { @@ -332,17 +332,17 @@ func.func @vectorize_dynamic_generic_matvec_leading_parallel_dim(%arg0: tensor -// CHECK: %[[VEC_RD_0:.*]] = vector.mask %[[MASK_2d]] { vector.transfer_read %[[ARG_0]][%[[C0_idx]], %[[C0_idx]]], %[[C0_f32]] {in_bounds = [true, true]} : tensor, vector<[4]x4xf32> } : vector<[4]x4xi1> -> vector<[4]x4xf32> +// CHECK: %[[VEC_RD_0:.*]] = vector.mask %[[MASK_2d]] { vector.transfer_read %[[ARG_0]][%[[C0_idx]], %[[C0_idx]]], %[[C0_f32]] {in_bounds = array} : tensor, vector<[4]x4xf32> } : vector<[4]x4xi1> -> vector<[4]x4xf32> // CHECK: %[[C0_f32:.*]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[MASK_d1:.*]] = vector.create_mask %[[DIM_A0_1]] : vector<4xi1> -// CHECK: %[[VEC_RD_1:.*]] = vector.mask %[[MASK_d1]] { vector.transfer_read %[[ARG_1]][%[[C0_idx]]], %[[C0_f32]] {in_bounds = [true, true], permutation_map = #map} : tensor, vector<[4]x4xf32> } : vector<4xi1> -> vector<[4]x4xf32> +// CHECK: %[[VEC_RD_1:.*]] = vector.mask %[[MASK_d1]] { vector.transfer_read %[[ARG_1]][%[[C0_idx]]], %[[C0_f32]] {in_bounds = array, permutation_map = #map} : tensor, vector<[4]x4xf32> } : vector<4xi1> -> vector<[4]x4xf32> // CHECK: %[[C0_f32:.*]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[MASK_d2:.*]] = vector.create_mask %[[DIM_A0_0]] : vector<[4]xi1> -// CHECK: %[[VEC_RD_2:.*]] = vector.mask %[[MASK_d2]] { vector.transfer_read %[[ARG_2]][%[[C0_idx]]], %[[C0_f32]] {in_bounds = [true]} : tensor, vector<[4]xf32> } : vector<[4]xi1> -> vector<[4]xf32> +// CHECK: %[[VEC_RD_2:.*]] = vector.mask %[[MASK_d2]] { vector.transfer_read %[[ARG_2]][%[[C0_idx]]], %[[C0_f32]] {in_bounds = array} : tensor, vector<[4]xf32> } : vector<[4]xi1> -> vector<[4]xf32> // CHECK: %[[MUL:.*]] = arith.mulf %[[VEC_RD_0:.*]], %[[VEC_RD_1:.*]] : vector<[4]x4xf32> // CHECK: %[[REDUCE:.*]] = vector.mask %[[MASK_2d]] { vector.multi_reduction , %[[MUL]], %[[VEC_RD_2]] [1] : vector<[4]x4xf32> to vector<[4]xf32> } : vector<[4]x4xi1> -> vector<[4]xf32> // CHECK: %[[C0_idx:.*]] = arith.constant 0 : index -// CHECK: %{{.*}} = vector.mask %[[MASK_d2]] { vector.transfer_write %[[REDUCE]], %[[ARG_2]][%[[C0_idx]]] {in_bounds = [true]} : vector<[4]xf32>, tensor } : vector<[4]xi1> -> tensor +// CHECK: %{{.*}} = vector.mask %[[MASK_d2]] { vector.transfer_write %[[REDUCE]], %[[ARG_2]][%[[C0_idx]]] {in_bounds = array} : vector<[4]xf32>, tensor } : vector<[4]xi1> -> tensor module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { diff --git a/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir b/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir index 9a43d43cd9460..19aa76caf173f 100644 --- a/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir +++ b/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir @@ -345,7 +345,7 @@ func.func @vectorize_affine_apply(%arg0: tensor<5xf32>, %arg3: index) -> tensor< // CHECK: %[[ADDI_3:.*]] = arith.addi %[[ADDI_1]], %[[ADDI_2]] : vector<5xindex> // CHECK: %[[ADDI_4:.*]] = arith.addi %[[ADDI_3]], %[[CST]] : vector<5xindex> // CHECK: %[[CAST:.*]] = arith.index_cast %[[ADDI_4]] : vector<5xindex> to vector<5xi32> -// CHECK: vector.transfer_write %[[CAST]], %[[EMPTY]][%[[C0:.*]]] {in_bounds = [true]} : vector<5xi32>, tensor<5xi32> +// CHECK: vector.transfer_write %[[CAST]], %[[EMPTY]][%[[C0:.*]]] {in_bounds = array} : vector<5xi32>, tensor<5xi32> module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { @@ -704,10 +704,10 @@ module attributes {transform.with_named_sequence} { // CHECK: func @generic_vectorize_broadcast_transpose // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[CF:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[V0:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %[[CF]] {in_bounds = [true, true, true, true], permutation_map = #[[$MAP0]]} : memref<4x4xf32>, vector<4x4x4x4xf32> -// CHECK: %[[V1:.*]] = vector.transfer_read %{{.*}}[%[[C0]]], %[[CF]] {in_bounds = [true, true, true, true], permutation_map = #[[$MAP1]]} : memref<4xf32>, vector<4x4x4x4xf32> -// CHECK: %[[V2:.*]] = vector.transfer_read %{{.*}}[%[[C0]]], %[[CF]] {in_bounds = [true, true, true, true], permutation_map = #[[$MAP2]]} : memref<4xf32>, vector<4x4x4x4xf32> -// CHECK: %[[V3:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %[[CF]] {in_bounds = [true, true, true, true], permutation_map = #[[$MAP3]]} : memref<4x4xf32>, vector<4x4x4x4xf32> +// CHECK: %[[V0:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %[[CF]] {in_bounds = array, permutation_map = #[[$MAP0]]} : memref<4x4xf32>, vector<4x4x4x4xf32> +// CHECK: %[[V1:.*]] = vector.transfer_read %{{.*}}[%[[C0]]], %[[CF]] {in_bounds = array, permutation_map = #[[$MAP1]]} : memref<4xf32>, vector<4x4x4x4xf32> +// CHECK: %[[V2:.*]] = vector.transfer_read %{{.*}}[%[[C0]]], %[[CF]] {in_bounds = array, permutation_map = #[[$MAP2]]} : memref<4xf32>, vector<4x4x4x4xf32> +// CHECK: %[[V3:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %[[CF]] {in_bounds = array, permutation_map = #[[$MAP3]]} : memref<4x4xf32>, vector<4x4x4x4xf32> // CHECK: %[[SUB:.*]] = arith.subf %[[V0]], %[[V1]] : vector<4x4x4x4xf32> // CHECK: %[[ADD0:.*]] = arith.addf %[[V2]], %[[SUB]] : vector<4x4x4x4xf32> // CHECK: %[[ADD1:.*]] = arith.addf %[[V3]], %[[ADD0]] : vector<4x4x4x4xf32> @@ -758,9 +758,9 @@ module attributes {transform.with_named_sequence} { // CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0, d1) -> (0, d1, 0, d0)> // CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0, d1, d2, d3) -> (d2, d1, d3, d0)> // CHECK: func @vectorization_transpose -// CHECK: vector.transfer_read {{.*}}{in_bounds = [true, true, true, true], permutation_map = #[[MAP0]]} : memref<14x7xf32>, vector<7x14x8x16xf32> -// CHECK: vector.transfer_read {{.*}}{in_bounds = [true, true, true, true], permutation_map = #[[MAP1]]} : memref<16x14xf32>, vector<7x14x8x16xf32> -// CHECK: vector.transfer_read {{.*}}{in_bounds = [true, true, true, true], permutation_map = #[[MAP2]]} : memref<16x14x7x8xf32>, vector<7x14x8x16xf32> +// CHECK: vector.transfer_read {{.*}}{in_bounds = array, permutation_map = #[[MAP0]]} : memref<14x7xf32>, vector<7x14x8x16xf32> +// CHECK: vector.transfer_read {{.*}}{in_bounds = array, permutation_map = #[[MAP1]]} : memref<16x14xf32>, vector<7x14x8x16xf32> +// CHECK: vector.transfer_read {{.*}}{in_bounds = array, permutation_map = #[[MAP2]]} : memref<16x14x7x8xf32>, vector<7x14x8x16xf32> // CHECK: arith.addf {{.*}} : vector<7x14x8x16xf32> // CHECK: arith.addf {{.*}} : vector<7x14x8x16xf32> // CHECK: vector.transfer_write {{.*}} : vector<7x14x8x16xf32>, memref<7x14x8x16xf32> @@ -803,7 +803,7 @@ func.func @matmul_tensors( // convert it to a 2D contract. // CHECK: %[[MUL:.*]] = arith.mulf %[[V0]], %[[V1]] : vector<8x12x4xf32> // CHECK: %[[R:.*]] = vector.multi_reduction , %[[MUL]], %[[V2]] [2] : vector<8x12x4xf32> to vector<8x12xf32> - // CHECK: %[[W:.*]] = vector.transfer_write %[[R]], %[[ARG2]][%[[C0]], %[[C0]]] {in_bounds = [true, true]} : vector<8x12xf32>, tensor<8x12xf32> + // CHECK: %[[W:.*]] = vector.transfer_write %[[R]], %[[ARG2]][%[[C0]], %[[C0]]] {in_bounds = array} : vector<8x12xf32>, tensor<8x12xf32> %0 = linalg.matmul ins(%arg0, %arg1: tensor<8x4xf32>, tensor<4x12xf32>) outs(%arg2: tensor<8x12xf32>) -> tensor<8x12xf32> @@ -830,8 +830,8 @@ module attributes {transform.with_named_sequence} { // CHECK-DAG: %[[INIT:.*]] = tensor.empty() : tensor<2x3x4xf32> // CHECK-DAG: %[[VEC:.*]] = vector.broadcast %[[PAD]] : f32 to vector<2x3x4xf32> // CHECK: %[[FILL:.*]] = vector.transfer_write %[[VEC]], %[[INIT]]{{.*}} : vector<2x3x4xf32>, tensor<2x3x4xf32> -// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]], %[[C0]]], %[[PAD]] {in_bounds = [true, false, true]} : tensor<2x?x2xf32>, vector<2x3x2xf32> -// CHECK: %[[RESULT:.*]] = vector.transfer_write %[[READ]], %[[FILL]][%[[C0]], %[[C0]], %[[C2]]] {in_bounds = [true, true, true]} : vector<2x3x2xf32>, tensor<2x3x4xf32> +// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]], %[[C0]]], %[[PAD]] {in_bounds = array} : tensor<2x?x2xf32>, vector<2x3x2xf32> +// CHECK: %[[RESULT:.*]] = vector.transfer_write %[[READ]], %[[FILL]][%[[C0]], %[[C0]], %[[C2]]] {in_bounds = array} : vector<2x3x2xf32>, tensor<2x3x4xf32> // CHECK: return %[[RESULT]] func.func @pad_static(%arg0: tensor<2x?x2xf32>, %pad_value: f32) -> tensor<2x3x4xf32> { %0 = tensor.pad %arg0 low[0, 0, 2] high[0, 1, 0] { @@ -860,9 +860,9 @@ module attributes {transform.with_named_sequence} { // CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index // CHECK: %[[INIT:.*]] = tensor.empty() : tensor<2x6x4xf32> // CHECK: %[[VEC:.*]] = vector.broadcast %[[PAD]] : f32 to vector<2x6x4xf32> -// CHECK: %[[FILL:.*]] = vector.transfer_write %[[VEC]], %[[INIT]][%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<2x6x4xf32>, tensor<2x6x4xf32> -// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]], %[[C0]]], %{{.*}} {in_bounds = [true, true, true]} : tensor<2x5x2xf32>, vector<2x5x2xf32> -// CHECK: %[[WRITE:.*]] = vector.transfer_write %[[READ]], %[[FILL]][%[[C0]], %[[C0]], %[[C2]]] {in_bounds = [true, true, true]} : vector<2x5x2xf32>, tensor<2x6x4xf32> +// CHECK: %[[FILL:.*]] = vector.transfer_write %[[VEC]], %[[INIT]][%[[C0]], %[[C0]], %[[C0]]] {in_bounds = array} : vector<2x6x4xf32>, tensor<2x6x4xf32> +// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]], %[[C0]]], %{{.*}} {in_bounds = array} : tensor<2x5x2xf32>, vector<2x5x2xf32> +// CHECK: %[[WRITE:.*]] = vector.transfer_write %[[READ]], %[[FILL]][%[[C0]], %[[C0]], %[[C2]]] {in_bounds = array} : vector<2x5x2xf32>, tensor<2x6x4xf32> // CHECK: return %[[WRITE]] func.func @pad_static_source(%arg0: tensor<2x5x2xf32>, %pad_value: f32) -> tensor<2x6x4xf32> { %0 = tensor.pad %arg0 low[0, 0, 2] high[0, 1, 0] { @@ -1061,7 +1061,7 @@ func.func private @make_vector() -> tensor<12x13xf32> // CHECK-DAG: %[[C5:.*]] = arith.constant 5.0 // CHECK: %[[VEC0:.*]] = call @make_vector() : () -> tensor<12x13xf32> // CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]]], %[[C5]] : tensor<5x6xf32>, vector<7x9xf32> -// CHECK: %[[WRITE:.*]] = vector.transfer_write %[[READ]], %[[VEC0]][%[[C0]], %[[C0]]] {in_bounds = [true, true]} : vector<7x9xf32>, tensor<12x13xf32> +// CHECK: %[[WRITE:.*]] = vector.transfer_write %[[READ]], %[[VEC0]][%[[C0]], %[[C0]]] {in_bounds = array} : vector<7x9xf32>, tensor<12x13xf32> // CHECK: return %[[WRITE]] func.func @pad_and_insert_slice_source( %arg0: tensor<5x6xf32>) -> tensor<12x13xf32> { @@ -1126,8 +1126,8 @@ module attributes {transform.with_named_sequence} { // CHECK: %[[FILL:.*]] = tensor.generate // CHECK: %[[RES:.*]] = arith.mulf // CHECK: tensor.yield %[[RES]] : f32 -// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]]], %{{.*}} {in_bounds = [true, true]} : tensor<5x6xf32>, vector<5x6xf32> -// CHECK: %[[WRITE:.*]] = vector.transfer_write %[[READ]], %[[FILL]][%[[C3]], %[[C4]]] {in_bounds = [true, true]} : vector<5x6xf32>, tensor<12x13xf32> +// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]]], %{{.*}} {in_bounds = array} : tensor<5x6xf32>, vector<5x6xf32> +// CHECK: %[[WRITE:.*]] = vector.transfer_write %[[READ]], %[[FILL]][%[[C3]], %[[C4]]] {in_bounds = array} : vector<5x6xf32>, tensor<12x13xf32> // CHECK: return %[[WRITE]] func.func @pad_tensor_non_const_pad_value(%arg0: tensor<5x6xf32>) -> tensor<12x13xf32> { %c0 = arith.constant 0 : index @@ -1161,7 +1161,7 @@ func.func @sum_exp(%input: tensor<4x16x8xf32>, %output: tensor<4x16xf32>) -> tensor<4x16xf32> { // CHECK: vector.transfer_read {{.*}} : tensor<4x16x8xf32>, vector<4x16x8xf32> - // CHECK: vector.transfer_read {{.*}} {in_bounds = [true, true]} : tensor<4x16xf32>, vector<4x16xf32> + // CHECK: vector.transfer_read {{.*}} {in_bounds = array} : tensor<4x16xf32>, vector<4x16xf32> // CHECK: math.exp {{.*}} : vector<4x16x8xf32> // CHECK: vector.multi_reduction , %{{.*}}, %{{.*}} [2] : vector<4x16x8xf32> to vector<4x16xf32> // CHECK: vector.transfer_write {{.*}} : vector<4x16xf32>, tensor<4x16xf32> @@ -1201,14 +1201,14 @@ module attributes {transform.with_named_sequence} { func.func @sum_exp_2(%input: tensor<3x2xf32>, %input_2: tensor<5x4xf32>, %output: tensor<5x2xf32>) -> tensor<5x2xf32> { - // CHECK: vector.transfer_read {{.*}} {in_bounds = [true, true, true, true], permutation_map = #[[$M1]]} : tensor<3x2xf32>, vector<2x3x4x5xf32> - // CHECK: vector.transfer_read {{.*}} {in_bounds = [true, true, true, true], permutation_map = #[[$M2]]} : tensor<5x4xf32>, vector<2x3x4x5xf32> - // CHECK: vector.transfer_read {{.*}} {in_bounds = [true, true], permutation_map = #[[$M3]]} : tensor<5x2xf32>, vector<2x5xf32> + // CHECK: vector.transfer_read {{.*}} {in_bounds = array, permutation_map = #[[$M1]]} : tensor<3x2xf32>, vector<2x3x4x5xf32> + // CHECK: vector.transfer_read {{.*}} {in_bounds = array, permutation_map = #[[$M2]]} : tensor<5x4xf32>, vector<2x3x4x5xf32> + // CHECK: vector.transfer_read {{.*}} {in_bounds = array, permutation_map = #[[$M3]]} : tensor<5x2xf32>, vector<2x5xf32> // CHECK: math.exp {{.*}} : vector<2x3x4x5xf32> // CHECK: math.exp {{.*}} : vector<2x3x4x5xf32> // CHECK: addf {{.*}} : vector<2x3x4x5xf32> // CHECK: vector.multi_reduction , {{.*}}, %{{.*}} [1, 2] : vector<2x3x4x5xf32> to vector<2x5xf32> - // CHECK: vector.transfer_write {{.*}} {in_bounds = [true, true], permutation_map = #[[$M3]]} : vector<2x5xf32>, tensor<5x2xf32> + // CHECK: vector.transfer_write {{.*}} {in_bounds = array, permutation_map = #[[$M3]]} : vector<2x5xf32>, tensor<5x2xf32> // CHECK: return {{.*}} : tensor<5x2xf32> %0 = linalg.generic { indexing_maps = [ @@ -1502,10 +1502,10 @@ module attributes {transform.with_named_sequence} { // CHECK-LABEL: func @explicit_broadcast( func.func @explicit_broadcast(%arg0: tensor<4x4xf32>, %arg1: tensor<4x1xf32>) -> tensor<4x4xf32> { - // CHECK: vector.transfer_read {{.*}} {in_bounds = [true, true]} : tensor<4x4xf32>, vector<4x4xf32> - // CHECK: vector.transfer_read {{.*}} {in_bounds = [true, true], permutation_map = #[[$M5]]} : tensor<4x1xf32>, vector<4x4xf32> + // CHECK: vector.transfer_read {{.*}} {in_bounds = array} : tensor<4x4xf32>, vector<4x4xf32> + // CHECK: vector.transfer_read {{.*}} {in_bounds = array, permutation_map = #[[$M5]]} : tensor<4x1xf32>, vector<4x4xf32> // CHECK: subf {{.*}} : vector<4x4xf32> - // CHECK: vector.transfer_write {{.*}} {in_bounds = [true, true]} : vector<4x4xf32>, tensor<4x4xf32> + // CHECK: vector.transfer_write {{.*}} {in_bounds = array} : vector<4x4xf32>, tensor<4x4xf32> %c0 = arith.constant 0.0 : f32 %init = tensor.empty() : tensor<4x4xf32> %fill = linalg.fill ins(%c0 : f32) outs(%init : tensor<4x4xf32>) -> tensor<4x4xf32> @@ -1538,12 +1538,12 @@ module attributes {transform.with_named_sequence} { // CHECK-LABEL: func @fused_broadcast_red_2d func.func @fused_broadcast_red_2d(%arg0: tensor<4x4xf32>, %arg1: tensor<4x1xf32>) -> tensor<4xf32> { - // CHECK: vector.transfer_read {{.*}} {in_bounds = [true, true]} : tensor<4x4xf32>, vector<4x4xf32> - // CHECK: vector.transfer_read {{.*}} {in_bounds = [true, true], permutation_map = #[[$M6]]} : tensor<4x1xf32>, vector<4x4xf32> + // CHECK: vector.transfer_read {{.*}} {in_bounds = array} : tensor<4x4xf32>, vector<4x4xf32> + // CHECK: vector.transfer_read {{.*}} {in_bounds = array, permutation_map = #[[$M6]]} : tensor<4x1xf32>, vector<4x4xf32> // CHECK: subf {{.*}} : vector<4x4xf32> // CHECK: math.exp {{.*}} : vector<4x4xf32> // CHECK: vector.multi_reduction , {{.*}}, {{.*}} : vector<4x4xf32> to vector<4xf32> - // CHECK: vector.transfer_write {{.*}} {in_bounds = [true]} : vector<4xf32>, tensor<4xf32> + // CHECK: vector.transfer_write {{.*}} {in_bounds = array} : vector<4xf32>, tensor<4xf32> %c0 = arith.constant 0.0 : f32 %init = tensor.empty() : tensor<4xf32> %fill = linalg.fill ins(%c0 : f32) outs(%init : tensor<4xf32>) -> tensor<4xf32> @@ -1958,12 +1958,12 @@ module attributes {transform.with_named_sequence} { } // CHECK-LABEL: func @multi_output_generic_different_perm_maps -// CHECK: %[[VAL_5:.*]] = vector.transfer_read %{{.*}} {in_bounds = [true, true]} : tensor<4x1xf32>, vector<4x1xf32> +// CHECK: %[[VAL_5:.*]] = vector.transfer_read %{{.*}} {in_bounds = array} : tensor<4x1xf32>, vector<4x1xf32> // CHECK: %[[VAL_6:.*]] = arith.addf %[[VAL_5]], %[[VAL_5]] : vector<4x1xf32> // CHECK: %[[VAL_7:.*]] = vector.transpose %[[VAL_6]], [1, 0] : vector<4x1xf32> to vector<1x4xf32> // CHECK: %[[VAL_8:.*]] = vector.transpose %[[VAL_7]], [1, 0] : vector<1x4xf32> to vector<4x1xf32> -// CHECK: vector.transfer_write %[[VAL_8]], %{{.*}} {in_bounds = [true, true]} : vector<4x1xf32>, tensor<4x1xf32> -// CHECK: vector.transfer_write %[[VAL_7]], %{{.*}} {in_bounds = [true, true]} : vector<1x4xf32>, tensor<1x4xf32> +// CHECK: vector.transfer_write %[[VAL_8]], %{{.*}} {in_bounds = array} : vector<4x1xf32>, tensor<4x1xf32> +// CHECK: vector.transfer_write %[[VAL_7]], %{{.*}} {in_bounds = array} : vector<1x4xf32>, tensor<1x4xf32> // ----- @@ -1997,10 +1997,10 @@ module attributes {transform.with_named_sequence} { // CHECK: %[[VAL_1:.*]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[VAL_2:.*]] = arith.constant 0 : index // CHECK: %[[VAL_3:.*]] = tensor.empty() : tensor<1x12x197x1xf32> -// CHECK: %[[VAL_4:.*]] = vector.transfer_read %[[VAL_0]]{{\[}}%[[VAL_2]], %[[VAL_2]], %[[VAL_2]], %[[VAL_2]]], %[[VAL_1]] {in_bounds = [true, true, true, true]} : tensor<1x12x197x197xf32>, vector<1x12x197x197xf32> -// CHECK: %[[VAL_5:.*]] = vector.transfer_read %[[VAL_3]]{{\[}}%[[VAL_2]], %[[VAL_2]], %[[VAL_2]], %[[VAL_2]]], %[[VAL_1]] {in_bounds = [true, true, true], permutation_map = #[[$ATTR_32]]} : tensor<1x12x197x1xf32>, vector<1x12x197xf32> +// CHECK: %[[VAL_4:.*]] = vector.transfer_read %[[VAL_0]]{{\[}}%[[VAL_2]], %[[VAL_2]], %[[VAL_2]], %[[VAL_2]]], %[[VAL_1]] {in_bounds = array} : tensor<1x12x197x197xf32>, vector<1x12x197x197xf32> +// CHECK: %[[VAL_5:.*]] = vector.transfer_read %[[VAL_3]]{{\[}}%[[VAL_2]], %[[VAL_2]], %[[VAL_2]], %[[VAL_2]]], %[[VAL_1]] {in_bounds = array, permutation_map = #[[$ATTR_32]]} : tensor<1x12x197x1xf32>, vector<1x12x197xf32> // CHECK: %[[VAL_6:.*]] = vector.multi_reduction , %[[VAL_4]], %[[VAL_5]] [3] : vector<1x12x197x197xf32> to vector<1x12x197xf32> // CHECK: %[[VAL_7:.*]] = vector.broadcast %[[VAL_6]] : vector<1x12x197xf32> to vector<1x1x12x197xf32> // CHECK: %[[VAL_8:.*]] = vector.transpose %[[VAL_7]], [1, 2, 3, 0] : vector<1x1x12x197xf32> to vector<1x12x197x1xf32> -// CHECK: %[[VAL_9:.*]] = vector.transfer_write %[[VAL_8]], %[[VAL_3]]{{\[}}%[[VAL_2]], %[[VAL_2]], %[[VAL_2]], %[[VAL_2]]] {in_bounds = [true, true, true, true]} : vector<1x12x197x1xf32>, tensor<1x12x197x1xf32> +// CHECK: %[[VAL_9:.*]] = vector.transfer_write %[[VAL_8]], %[[VAL_3]]{{\[}}%[[VAL_2]], %[[VAL_2]], %[[VAL_2]], %[[VAL_2]]] {in_bounds = array} : vector<1x12x197x1xf32>, tensor<1x12x197x1xf32> // CHECK: return %[[VAL_9]] : tensor<1x12x197x1xf32> diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir index 0e2b2458d29cd..d9793906259bc 100644 --- a/mlir/test/Dialect/Linalg/vectorization.mlir +++ b/mlir/test/Dialect/Linalg/vectorization.mlir @@ -20,11 +20,11 @@ func.func @vectorize_dynamic_identity(%arg0: tensor, // CHECK: %[[VAL_3:.*]] = arith.constant 0 : index // CHECK: %[[VAL_4:.*]] = tensor.dim %{{.*}}, %[[VAL_3]] : tensor // CHECK: %[[VAL_7:.*]] = vector.create_mask %[[VAL_4]] : vector<4xi1> -// CHECK: %[[VAL_8:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = [true]} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> -// CHECK: %[[VAL_10:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = [true]} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> -// CHECK: %[[VAL_12:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = [true]} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> +// CHECK: %[[VAL_8:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = array} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> +// CHECK: %[[VAL_10:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = array} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> +// CHECK: %[[VAL_12:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = array} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> // CHECK: %[[VAL_13:.*]] = arith.addf %[[VAL_8]], %[[VAL_10]] : vector<4xf32> -// CHECK: %[[VAL_14:.*]] = vector.mask %[[VAL_7]] { vector.transfer_write %{{.*}} {in_bounds = [true]} : vector<4xf32>, tensor } : vector<4xi1> -> tensor +// CHECK: %[[VAL_14:.*]] = vector.mask %[[VAL_7]] { vector.transfer_write %{{.*}} {in_bounds = array} : vector<4xf32>, tensor } : vector<4xi1> -> tensor module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { @@ -57,11 +57,11 @@ func.func @vectorize_dynamic_identity_with_constant(%arg0: tensor, // CHECK: %[[VAL_3:.*]] = arith.constant 0 : index // CHECK: %[[VAL_4:.*]] = tensor.dim %{{.*}}, %[[VAL_3]] : tensor // CHECK: %[[VAL_7:.*]] = vector.create_mask %[[VAL_4]] : vector<4xi1> -// CHECK: %[[VAL_8:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = [true]} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> -// CHECK: %[[VAL_10:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = [true]} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> -// CHECK: %[[VAL_12:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = [true]} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> +// CHECK: %[[VAL_8:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = array} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> +// CHECK: %[[VAL_10:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = array} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> +// CHECK: %[[VAL_12:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = array} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> // CHECK: %[[VAL_13:.*]] = arith.addf %[[VAL_8]], %[[VAL_10]] : vector<4xf32> -// CHECK: %[[VAL_14:.*]] = vector.mask %[[VAL_7]] { vector.transfer_write %{{.*}} {in_bounds = [true]} : vector<4xf32>, tensor } : vector<4xi1> -> tensor +// CHECK: %[[VAL_14:.*]] = vector.mask %[[VAL_7]] { vector.transfer_write %{{.*}} {in_bounds = array} : vector<4xf32>, tensor } : vector<4xi1> -> tensor module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { @@ -94,11 +94,11 @@ func.func @vectorize_dynamic_identity_with_param(%arg0: tensor, // CHECK: %[[VAL_3:.*]] = arith.constant 0 : index // CHECK: %[[VAL_4:.*]] = tensor.dim %{{.*}}, %[[VAL_3]] : tensor // CHECK: %[[VAL_7:.*]] = vector.create_mask %[[VAL_4]] : vector<4xi1> -// CHECK: %[[VAL_8:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = [true]} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> -// CHECK: %[[VAL_10:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = [true]} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> -// CHECK: %[[VAL_12:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = [true]} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> +// CHECK: %[[VAL_8:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = array} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> +// CHECK: %[[VAL_10:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = array} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> +// CHECK: %[[VAL_12:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = array} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> // CHECK: %[[VAL_13:.*]] = arith.addf %[[VAL_8]], %[[VAL_10]] : vector<4xf32> -// CHECK: %[[VAL_14:.*]] = vector.mask %[[VAL_7]] { vector.transfer_write %{{.*}} {in_bounds = [true]} : vector<4xf32>, tensor } : vector<4xi1> -> tensor +// CHECK: %[[VAL_14:.*]] = vector.mask %[[VAL_7]] { vector.transfer_write %{{.*}} {in_bounds = array} : vector<4xf32>, tensor } : vector<4xi1> -> tensor module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { @@ -132,10 +132,10 @@ func.func @vectorize_dynamic_1d_broadcast(%arg0: tensor, // CHECK: %[[VAL_4:.*]] = tensor.dim %{{.*}}, %[[VAL_3]] : tensor // CHECK: %[[VAL_7:.*]] = vector.transfer_read %{{.*}} {in_bounds = {{.*}}, permutation_map = #{{.*}}} : tensor, vector<4xf32> // CHECK: %[[VAL_9:.*]] = vector.create_mask %[[VAL_4]] : vector<4xi1> -// CHECK: %[[VAL_10:.*]] = vector.mask %[[VAL_9]] { vector.transfer_read %{{.*}} {in_bounds = [true]} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> -// CHECK: %[[VAL_12:.*]] = vector.mask %[[VAL_9]] { vector.transfer_read %{{.*}} {in_bounds = [true]} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> +// CHECK: %[[VAL_10:.*]] = vector.mask %[[VAL_9]] { vector.transfer_read %{{.*}} {in_bounds = array} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> +// CHECK: %[[VAL_12:.*]] = vector.mask %[[VAL_9]] { vector.transfer_read %{{.*}} {in_bounds = array} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> // CHECK: %[[VAL_13:.*]] = arith.addf %[[VAL_7]], %[[VAL_10]] : vector<4xf32> -// CHECK: %[[VAL_14:.*]] = vector.mask %{{.*}} { vector.transfer_write %[[VAL_13]], {{.*}} {in_bounds = [true]} : vector<4xf32>, tensor } : vector<4xi1> -> tensor +// CHECK: %[[VAL_14:.*]] = vector.mask %{{.*}} { vector.transfer_write %[[VAL_13]], {{.*}} {in_bounds = array} : vector<4xf32>, tensor } : vector<4xi1> -> tensor module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { @@ -173,13 +173,13 @@ func.func @dynamic_generic_with_reduction_and_broadcast(%arg0: tensor, // CHECK: %[[VAL_6:.*]] = arith.constant 0 : index // CHECK: %[[VAL_7:.*]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[VAL_8:.*]] = vector.create_mask %[[VAL_3]], %[[VAL_5]] : vector<4x4xi1> -// CHECK: %[[VAL_9:.*]] = vector.mask %[[VAL_8]] { vector.transfer_read %[[VAL_0]]{{\[}}%[[VAL_6]], %[[VAL_6]]], %[[VAL_7]] {in_bounds = [true, true]} : tensor, vector<4x4xf32> } : vector<4x4xi1> -> vector<4x4xf32> +// CHECK: %[[VAL_9:.*]] = vector.mask %[[VAL_8]] { vector.transfer_read %[[VAL_0]]{{\[}}%[[VAL_6]], %[[VAL_6]]], %[[VAL_7]] {in_bounds = array} : tensor, vector<4x4xf32> } : vector<4x4xi1> -> vector<4x4xf32> // CHECK: %[[VAL_10:.*]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[VAL_11:.*]] = vector.create_mask %[[VAL_3]] : vector<4xi1> -// CHECK: %[[VAL_12:.*]] = vector.mask %[[VAL_11]] { vector.transfer_read %[[VAL_1]]{{\[}}%[[VAL_6]], %[[VAL_6]]], %[[VAL_10]] {in_bounds = [true], permutation_map = #[[$MAP]]} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> +// CHECK: %[[VAL_12:.*]] = vector.mask %[[VAL_11]] { vector.transfer_read %[[VAL_1]]{{\[}}%[[VAL_6]], %[[VAL_6]]], %[[VAL_10]] {in_bounds = array, permutation_map = #[[$MAP]]} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> // CHECK: %[[VAL_13:.*]] = vector.mask %[[VAL_8]] { vector.multi_reduction , %[[VAL_9]], %[[VAL_12]] [1] : vector<4x4xf32> to vector<4xf32> } : vector<4x4xi1> -> vector<4xf32> // CHECK: %[[VAL_14:.*]] = arith.constant 0 : index -// CHECK: %[[VAL_15:.*]] = vector.mask %[[VAL_11]] { vector.transfer_write %[[VAL_13]], %[[VAL_1]]{{\[}}%[[VAL_14]], %[[VAL_14]]] {in_bounds = [true], permutation_map = #[[$MAP]]} : vector<4xf32>, tensor } : vector<4xi1> -> tensor +// CHECK: %[[VAL_15:.*]] = vector.mask %[[VAL_11]] { vector.transfer_write %[[VAL_13]], %[[VAL_1]]{{\[}}%[[VAL_14]], %[[VAL_14]]] {in_bounds = array, permutation_map = #[[$MAP]]} : vector<4xf32>, tensor } : vector<4xi1> -> tensor // CHECK: return %[[VAL_15]] : tensor module attributes {transform.with_named_sequence} { @@ -214,13 +214,13 @@ func.func @vectorize_dynamic_2d_transpose(%arg0: tensor, // CHECK: %[[VAL_5:.*]] = arith.constant 0 : index // CHECK: %[[VAL_6:.*]] = tensor.dim %{{.*}}, %[[VAL_5]] : tensor // CHECK: %[[VAL_9:.*]] = vector.create_mask %[[VAL_6]], %[[VAL_4]] : vector<8x4xi1> -// CHECK: %[[VAL_10:.*]] = vector.mask %[[VAL_9]] { vector.transfer_read %{{.*}} {in_bounds = [true, true], permutation_map = #{{.*}}} : tensor, vector<4x8xf32> } : vector<8x4xi1> -> vector<4x8xf32> +// CHECK: %[[VAL_10:.*]] = vector.mask %[[VAL_9]] { vector.transfer_read %{{.*}} {in_bounds = array, permutation_map = #{{.*}}} : tensor, vector<4x8xf32> } : vector<8x4xi1> -> vector<4x8xf32> // CHECK: %[[VAL_12:.*]] = vector.create_mask %[[VAL_4]], %[[VAL_6]] : vector<4x8xi1> -// CHECK: %[[VAL_13:.*]] = vector.mask %[[VAL_12]] { vector.transfer_read %{{.*}} {in_bounds = [true, true]} : tensor, vector<4x8xf32> } : vector<4x8xi1> -> vector<4x8xf32> +// CHECK: %[[VAL_13:.*]] = vector.mask %[[VAL_12]] { vector.transfer_read %{{.*}} {in_bounds = array} : tensor, vector<4x8xf32> } : vector<4x8xi1> -> vector<4x8xf32> // CHECK: %[[VAL_14:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[VAL_15:.*]] = vector.mask %[[VAL_12]] { vector.transfer_read %{{.*}} {in_bounds = [true, true]} : tensor, vector<4x8xf32> } : vector<4x8xi1> -> vector<4x8xf32> +// CHECK: %[[VAL_15:.*]] = vector.mask %[[VAL_12]] { vector.transfer_read %{{.*}} {in_bounds = array} : tensor, vector<4x8xf32> } : vector<4x8xi1> -> vector<4x8xf32> // CHECK: %[[VAL_16:.*]] = arith.addf %[[VAL_10]], %[[VAL_13]] : vector<4x8xf32> -// CHECK: %[[VAL_17:.*]] = vector.mask %[[VAL_12]] { vector.transfer_write %[[VAL_16]], %{{.*}} {in_bounds = [true, true]} : vector<4x8xf32>, tensor } : vector<4x8xi1> -> tensor +// CHECK: %[[VAL_17:.*]] = vector.mask %[[VAL_12]] { vector.transfer_write %[[VAL_16]], %{{.*}} {in_bounds = array} : vector<4x8xf32>, tensor } : vector<4x8xi1> -> tensor module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { @@ -254,12 +254,12 @@ func.func @vectorize_dynamic_generic_2d_broadcast(%arg0: tensor, // CHECK: %[[VAL_5:.*]] = arith.constant 1 : index // CHECK: %[[VAL_6:.*]] = tensor.dim %{{.*}}, %[[VAL_5]] : tensor // CHECK: %[[VAL_9:.*]] = vector.create_mask %[[VAL_6]] : vector<8xi1> -// CHECK: %[[VAL_10:.*]] = vector.mask %[[VAL_9]] { vector.transfer_read %{{.*}} {in_bounds = [true, true], permutation_map = #{{.*}}} : tensor, vector<4x8xf32> } : vector<8xi1> -> vector<4x8xf32> +// CHECK: %[[VAL_10:.*]] = vector.mask %[[VAL_9]] { vector.transfer_read %{{.*}} {in_bounds = array, permutation_map = #{{.*}}} : tensor, vector<4x8xf32> } : vector<8xi1> -> vector<4x8xf32> // CHECK: %[[VAL_12:.*]] = vector.create_mask %[[VAL_4]], %[[VAL_6]] : vector<4x8xi1> -// CHECK: %[[VAL_13:.*]] = vector.mask %[[VAL_12]] { vector.transfer_read %{{.*}} {in_bounds = [true, true]} : tensor, vector<4x8xf32> } : vector<4x8xi1> -> vector<4x8xf32> -// CHECK: %[[VAL_15:.*]] = vector.mask %[[VAL_12]] { vector.transfer_read %{{.*}} {in_bounds = [true, true]} : tensor, vector<4x8xf32> } : vector<4x8xi1> -> vector<4x8xf32> +// CHECK: %[[VAL_13:.*]] = vector.mask %[[VAL_12]] { vector.transfer_read %{{.*}} {in_bounds = array} : tensor, vector<4x8xf32> } : vector<4x8xi1> -> vector<4x8xf32> +// CHECK: %[[VAL_15:.*]] = vector.mask %[[VAL_12]] { vector.transfer_read %{{.*}} {in_bounds = array} : tensor, vector<4x8xf32> } : vector<4x8xi1> -> vector<4x8xf32> // CHECK: %[[VAL_16:.*]] = arith.addf %[[VAL_10]], %[[VAL_13]] : vector<4x8xf32> -// CHECK: %[[VAL_18:.*]] = vector.mask %[[VAL_12]] { vector.transfer_write %{{.*}} {in_bounds = [true, true]} : vector<4x8xf32>, tensor } : vector<4x8xi1> -> tensor +// CHECK: %[[VAL_18:.*]] = vector.mask %[[VAL_12]] { vector.transfer_write %{{.*}} {in_bounds = array} : vector<4x8xf32>, tensor } : vector<4x8xi1> -> tensor module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { @@ -301,11 +301,11 @@ module attributes {transform.with_named_sequence} { // CHECK: %[[VAL_4:.*]] = arith.constant 1 : index // CHECK: %[[VAL_5:.*]] = tensor.dim %[[VAL_0]], %[[VAL_4]] : tensor // CHECK: %[[VAL_8:.*]] = vector.create_mask %[[VAL_3]], %[[VAL_5]] : vector<4x8xi1> -// CHECK: %[[VAL_9:.*]] = vector.mask %[[VAL_8]] { vector.transfer_read %[[VAL_0]]{{.*}} {in_bounds = [true, true]} : tensor, vector<4x8xf32> } : vector<4x8xi1> -> vector<4x8xf32> +// CHECK: %[[VAL_9:.*]] = vector.mask %[[VAL_8]] { vector.transfer_read %[[VAL_0]]{{.*}} {in_bounds = array} : tensor, vector<4x8xf32> } : vector<4x8xi1> -> vector<4x8xf32> // CHECK: %[[VAL_11:.*]] = vector.create_mask %[[VAL_3]] : vector<4xi1> -// CHECK: %[[VAL_12:.*]] = vector.mask %[[VAL_11]] { vector.transfer_read %[[VAL_1]]{{.*}} {in_bounds = [true]} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> +// CHECK: %[[VAL_12:.*]] = vector.mask %[[VAL_11]] { vector.transfer_read %[[VAL_1]]{{.*}} {in_bounds = array} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> // CHECK: %[[VAL_13:.*]] = vector.mask %[[VAL_8]] { vector.multi_reduction , %[[VAL_9]], %[[VAL_12]] [1] : vector<4x8xf32> to vector<4xf32> } : vector<4x8xi1> -> vector<4xf32> -// CHECK: %[[VAL_15:.*]] = vector.mask %[[VAL_11]] { vector.transfer_write %[[VAL_13]], %[[VAL_1]]{{.*}} {in_bounds = [true]} : vector<4xf32>, tensor } : vector<4xi1> -> tensor +// CHECK: %[[VAL_15:.*]] = vector.mask %[[VAL_11]] { vector.transfer_write %[[VAL_13]], %[[VAL_1]]{{.*}} {in_bounds = array} : vector<4xf32>, tensor } : vector<4xi1> -> tensor // CHECK: return %[[VAL_15]] : tensor // CHECK: } @@ -343,11 +343,11 @@ module attributes {transform.with_named_sequence} { // CHECK: %[[VAL_6:.*]] = arith.constant 2 : index // CHECK: %[[VAL_7:.*]] = tensor.dim %[[VAL_0]], %[[VAL_6]] : tensor // CHECK: %[[VAL_10:.*]] = vector.create_mask %[[VAL_3]], %[[VAL_5]], %[[VAL_7]] : vector<4x8x16xi1> -// CHECK: %[[VAL_11:.*]] = vector.mask %[[VAL_10]] { vector.transfer_read %[[VAL_0]]{{.*}} {in_bounds = [true, true, true]} : tensor, vector<4x8x16xf32> } : vector<4x8x16xi1> -> vector<4x8x16xf32> +// CHECK: %[[VAL_11:.*]] = vector.mask %[[VAL_10]] { vector.transfer_read %[[VAL_0]]{{.*}} {in_bounds = array} : tensor, vector<4x8x16xf32> } : vector<4x8x16xi1> -> vector<4x8x16xf32> // CHECK: %[[VAL_13:.*]] = vector.create_mask %[[VAL_7]], %[[VAL_5]] : vector<16x8xi1> -// CHECK: %[[VAL_14:.*]] = vector.mask %[[VAL_13]] { vector.transfer_read %[[VAL_1]]{{.*}} {in_bounds = [true, true], permutation_map = #{{.*}}} : tensor, vector<8x16xf32> } : vector<16x8xi1> -> vector<8x16xf32> +// CHECK: %[[VAL_14:.*]] = vector.mask %[[VAL_13]] { vector.transfer_read %[[VAL_1]]{{.*}} {in_bounds = array, permutation_map = #{{.*}}} : tensor, vector<8x16xf32> } : vector<16x8xi1> -> vector<8x16xf32> // CHECK: %[[VAL_15:.*]] = vector.mask %[[VAL_10]] { vector.multi_reduction , %[[VAL_11]], %[[VAL_14]] [0] : vector<4x8x16xf32> to vector<8x16xf32> } : vector<4x8x16xi1> -> vector<8x16xf32> -// CHECK: %[[VAL_17:.*]] = vector.mask %[[VAL_13]] { vector.transfer_write %[[VAL_15]], %{{.*}} {in_bounds = [true, true], permutation_map = #{{.*}}} : vector<8x16xf32>, tensor } : vector<16x8xi1> -> tensor +// CHECK: %[[VAL_17:.*]] = vector.mask %[[VAL_13]] { vector.transfer_write %[[VAL_15]], %{{.*}} {in_bounds = array, permutation_map = #{{.*}}} : vector<8x16xf32>, tensor } : vector<16x8xi1> -> tensor // ----- @@ -386,11 +386,11 @@ module attributes {transform.with_named_sequence} { // CHECK: %[[VAL_6:.*]] = arith.constant 2 : index // CHECK: %[[VAL_7:.*]] = tensor.dim %[[VAL_0]], %[[VAL_6]] : tensor // CHECK: %[[VAL_10:.*]] = vector.create_mask %[[VAL_3]], %[[VAL_5]], %[[VAL_7]] : vector<4x8x16xi1> -// CHECK: %[[VAL_11:.*]] = vector.mask %[[VAL_10]] { vector.transfer_read %[[VAL_0]]{{.*}} {in_bounds = [true, true, true]} : tensor, vector<4x8x16xf32> } : vector<4x8x16xi1> -> vector<4x8x16xf32> +// CHECK: %[[VAL_11:.*]] = vector.mask %[[VAL_10]] { vector.transfer_read %[[VAL_0]]{{.*}} {in_bounds = array} : tensor, vector<4x8x16xf32> } : vector<4x8x16xi1> -> vector<4x8x16xf32> // CHECK: %[[VAL_13:.*]] = vector.create_mask %[[VAL_7]], %[[VAL_5]] : vector<16x8xi1> -// CHECK: %[[VAL_14:.*]] = vector.mask %[[VAL_13]] { vector.transfer_read %[[VAL_1]]{{.*}} {in_bounds = [true, true], permutation_map = #{{.*}}} : tensor, vector<8x16xf32> } : vector<16x8xi1> -> vector<8x16xf32> +// CHECK: %[[VAL_14:.*]] = vector.mask %[[VAL_13]] { vector.transfer_read %[[VAL_1]]{{.*}} {in_bounds = array, permutation_map = #{{.*}}} : tensor, vector<8x16xf32> } : vector<16x8xi1> -> vector<8x16xf32> // CHECK: %[[VAL_15:.*]] = vector.mask %[[VAL_10]] { vector.multi_reduction , %[[VAL_11]], %[[VAL_14]] [0] : vector<4x8x16xf32> to vector<8x16xf32> } : vector<4x8x16xi1> -> vector<8x16xf32> -// CHECK: %[[VAL_17:.*]] = vector.mask %[[VAL_13]] { vector.transfer_write %[[VAL_15]], %{{.*}} {in_bounds = [true, true], permutation_map = #{{.*}}} : vector<8x16xf32>, tensor } : vector<16x8xi1> -> tensor +// CHECK: %[[VAL_17:.*]] = vector.mask %[[VAL_13]] { vector.transfer_write %[[VAL_15]], %{{.*}} {in_bounds = array, permutation_map = #{{.*}}} : vector<8x16xf32>, tensor } : vector<16x8xi1> -> tensor // ----- @@ -418,14 +418,14 @@ func.func @vectorize_partial_dynamic_identity(%arg0: tensor<8x?xf32>, // CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0.000000e+00 : f32 // CHECK-DAG: %[[VAL_7:.*]] = arith.constant 8 : index // CHECK: %[[VAL_8:.*]] = vector.create_mask %[[VAL_7]], %[[VAL_4]] : vector<8x32xi1> -// CHECK: %[[VAL_9:.*]] = vector.mask %[[VAL_8]] { vector.transfer_read %[[VAL_0]][%[[VAL_5]], %[[VAL_5]]], %[[VAL_6]] {in_bounds = [true, true]} : tensor<8x?xf32>, vector<8x32xf32> } : vector<8x32xi1> -> vector<8x32xf32> +// CHECK: %[[VAL_9:.*]] = vector.mask %[[VAL_8]] { vector.transfer_read %[[VAL_0]][%[[VAL_5]], %[[VAL_5]]], %[[VAL_6]] {in_bounds = array} : tensor<8x?xf32>, vector<8x32xf32> } : vector<8x32xi1> -> vector<8x32xf32> // CHECK: %[[VAL_10:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[VAL_11:.*]] = vector.mask %[[VAL_8]] { vector.transfer_read %[[VAL_1]][%[[VAL_5]], %[[VAL_5]]], %[[VAL_10]] {in_bounds = [true, true]} : tensor<8x?xf32>, vector<8x32xf32> } : vector<8x32xi1> -> vector<8x32xf32> +// CHECK: %[[VAL_11:.*]] = vector.mask %[[VAL_8]] { vector.transfer_read %[[VAL_1]][%[[VAL_5]], %[[VAL_5]]], %[[VAL_10]] {in_bounds = array} : tensor<8x?xf32>, vector<8x32xf32> } : vector<8x32xi1> -> vector<8x32xf32> // CHECK: %[[VAL_12:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[VAL_13:.*]] = vector.mask %[[VAL_8]] { vector.transfer_read %[[VAL_2]][%[[VAL_5]], %[[VAL_5]]], %[[VAL_12]] {in_bounds = [true, true]} : tensor<8x?xf32>, vector<8x32xf32> } : vector<8x32xi1> -> vector<8x32xf32> +// CHECK: %[[VAL_13:.*]] = vector.mask %[[VAL_8]] { vector.transfer_read %[[VAL_2]][%[[VAL_5]], %[[VAL_5]]], %[[VAL_12]] {in_bounds = array} : tensor<8x?xf32>, vector<8x32xf32> } : vector<8x32xi1> -> vector<8x32xf32> // CHECK: %[[VAL_14:.*]] = arith.addf %[[VAL_9]], %[[VAL_11]] : vector<8x32xf32> // CHECK: %[[VAL_15:.*]] = arith.constant 0 : index -// CHECK: %[[VAL_16:.*]] = vector.mask %[[VAL_8]] { vector.transfer_write %[[VAL_14]], %[[VAL_2]][%[[VAL_15]], %[[VAL_15]]] {in_bounds = [true, true]} : vector<8x32xf32>, tensor<8x?xf32> } : vector<8x32xi1> -> tensor<8x?xf32> +// CHECK: %[[VAL_16:.*]] = vector.mask %[[VAL_8]] { vector.transfer_write %[[VAL_14]], %[[VAL_2]][%[[VAL_15]], %[[VAL_15]]] {in_bounds = array} : vector<8x32xf32>, tensor<8x?xf32> } : vector<8x32xi1> -> tensor<8x?xf32> module attributes {transform.with_named_sequence} { @@ -490,14 +490,14 @@ func.func @vectorize_static_shape_with_mask(%arg0: tensor<8x30xf32>, // CHECK-DAG: %[[VAL_5:.*]] = arith.constant 8 : index // CHECK-DAG: %[[VAL_6:.*]] = arith.constant 30 : index // CHECK: %[[VAL_7:.*]] = vector.create_mask %[[VAL_5]], %[[VAL_6]] : vector<8x32xi1> -// CHECK: %[[VAL_8:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %[[VAL_0]][%[[VAL_3]], %[[VAL_3]]], %[[VAL_4]] {in_bounds = [true, true]} : tensor<8x30xf32>, vector<8x32xf32> } : vector<8x32xi1> -> vector<8x32xf32> +// CHECK: %[[VAL_8:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %[[VAL_0]][%[[VAL_3]], %[[VAL_3]]], %[[VAL_4]] {in_bounds = array} : tensor<8x30xf32>, vector<8x32xf32> } : vector<8x32xi1> -> vector<8x32xf32> // CHECK: %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[VAL_10:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %[[VAL_1]][%[[VAL_3]], %[[VAL_3]]], %[[VAL_9]] {in_bounds = [true, true]} : tensor<8x30xf32>, vector<8x32xf32> } : vector<8x32xi1> -> vector<8x32xf32> +// CHECK: %[[VAL_10:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %[[VAL_1]][%[[VAL_3]], %[[VAL_3]]], %[[VAL_9]] {in_bounds = array} : tensor<8x30xf32>, vector<8x32xf32> } : vector<8x32xi1> -> vector<8x32xf32> // CHECK: %[[VAL_11:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[VAL_12:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %[[VAL_2]][%[[VAL_3]], %[[VAL_3]]], %[[VAL_11]] {in_bounds = [true, true]} : tensor<8x30xf32>, vector<8x32xf32> } : vector<8x32xi1> -> vector<8x32xf32> +// CHECK: %[[VAL_12:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %[[VAL_2]][%[[VAL_3]], %[[VAL_3]]], %[[VAL_11]] {in_bounds = array} : tensor<8x30xf32>, vector<8x32xf32> } : vector<8x32xi1> -> vector<8x32xf32> // CHECK: %[[VAL_13:.*]] = arith.addf %[[VAL_8]], %[[VAL_10]] : vector<8x32xf32> // CHECK: %[[VAL_14:.*]] = arith.constant 0 : index -// CHECK: %[[VAL_15:.*]] = vector.mask %[[VAL_7]] { vector.transfer_write %[[VAL_13]], %[[VAL_2]][%[[VAL_14]], %[[VAL_14]]] {in_bounds = [true, true]} : vector<8x32xf32>, tensor<8x30xf32> } : vector<8x32xi1> -> tensor<8x30xf32> +// CHECK: %[[VAL_15:.*]] = vector.mask %[[VAL_7]] { vector.transfer_write %[[VAL_13]], %[[VAL_2]][%[[VAL_14]], %[[VAL_14]]] {in_bounds = array} : vector<8x32xf32>, tensor<8x30xf32> } : vector<8x32xi1> -> tensor<8x30xf32> module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { @@ -519,7 +519,7 @@ func.func @vectorize_dynamic_fill(%A : tensor, %arg0 : f32) -> tensor // CHECK: %[[BCAST:.*]] = vector.broadcast %{{.*}} : f32 to vector<8x16xf32> -// CHECK: vector.mask %[[MASK]] { vector.transfer_write %[[BCAST]], {{.*}} {in_bounds = [true, true]} : vector<8x16xf32>, tensor } : vector<8x16xi1> +// CHECK: vector.mask %[[MASK]] { vector.transfer_write %[[BCAST]], {{.*}} {in_bounds = array} : vector<8x16xf32>, tensor } : vector<8x16xi1> module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { @@ -566,8 +566,8 @@ func.func @test_masked_vectorize_linalg_copy(%A : memref, %B : memref // CHECK: %[[mask:.*]] = vector.create_mask %[[d0]], %[[d1]] : vector<2x4xi1> - // CHECK: vector.mask %[[mask]] {{.*}} vector.transfer_read %{{.*}} {in_bounds = [true, true]} : memref, vector<2x4xf32> } : vector<2x4xi1> -> vector<2x4xf32> - // CHECK: vector.mask %[[mask]] {{.*}} vector.transfer_write %{{.*}} {in_bounds = [true, true]} : vector<2x4xf32>, memref } : vector<2x4xi1> + // CHECK: vector.mask %[[mask]] {{.*}} vector.transfer_read %{{.*}} {in_bounds = array} : memref, vector<2x4xf32> } : vector<2x4xi1> -> vector<2x4xf32> + // CHECK: vector.mask %[[mask]] {{.*}} vector.transfer_write %{{.*}} {in_bounds = array} : vector<2x4xf32>, memref } : vector<2x4xi1> linalg.copy ins(%A : memref) outs(%B : memref) return } @@ -595,12 +595,12 @@ func.func @test_masked_vectorize_pad( // CHECK: %[[mask:.*]] = vector.create_mask %[[d0]], %[[d1]] : vector<2x4xi1> // CHECK: %[[masked_read:.*]] = vector.mask %[[mask]] { // CHECK-SAME: vector.transfer_read %{{.*}}[%[[c0_0]], %[[c0_0]]], %[[c42]] - // CHECK-SAME: {in_bounds = [true, true]} : tensor, vector<2x4xf32> + // CHECK-SAME: {in_bounds = array} : tensor, vector<2x4xf32> // CHECK-SAME: } : vector<2x4xi1> -> vector<2x4xf32> // CHECK-DAG: %[[c0_1:.*]] = arith.constant 0 : index // CHECK-DAG: %[[empty:.*]] = tensor.empty() : tensor<2x4xf32> // CHECK: vector.transfer_write %[[masked_read]], %[[empty]][%[[c0_1]], %[[c0_1]]] - // CHECK-SAME: {in_bounds = [true, true]} : vector<2x4xf32>, tensor<2x4xf32> + // CHECK-SAME: {in_bounds = array} : vector<2x4xf32>, tensor<2x4xf32> %cst = arith.constant 42.43 : f32 %c0 = arith.constant 0 : index %1 = tensor.pad %0 low[0, %c0] high[%h0, %h1] { @@ -637,14 +637,14 @@ func.func @test_masked_vectorize_dynamic_pad( // CHECK: %[[mask:.*]] = vector.create_mask %[[d0]], %[[d1]] : vector<2x4xi1> // CHECK: %[[masked_read:.*]] = vector.mask %[[mask]] { // CHECK-SAME: vector.transfer_read %{{.*}}[%[[c0_2]], %[[c0_2]]], %[[c42]] - // CHECK-SAME: {in_bounds = [true, true]} : tensor, vector<2x4xf32> + // CHECK-SAME: {in_bounds = array} : tensor, vector<2x4xf32> // CHECK-SAME: } : vector<2x4xi1> -> vector<2x4xf32> // CHECK-DAG: %[[empty:.*]] = tensor.empty(%[[res_d0]], %[[res_d1]]) : tensor // CHECK-DAG: %[[c0_3:.*]] = arith.constant 0 : index // CHECK: %[[mask_2:.*]] = vector.create_mask %[[res_d0]], %[[res_d1]] : vector<2x4xi1> // CHECK: %[[masked_write:.*]] = vector.mask %[[mask_2]] { // CHECK-SAME: vector.transfer_write %[[masked_read]], %[[empty]][%[[c0_3]], %[[c0_3]]] - // CHECK-SAME: {in_bounds = [true, true]} : vector<2x4xf32>, tensor + // CHECK-SAME: {in_bounds = array} : vector<2x4xf32>, tensor // CHECK: return %[[masked_write]] : tensor %cst = arith.constant 42.43 : f32 %c0 = arith.constant 0 : index @@ -673,13 +673,13 @@ func.func @test_vectorize_pack(%arg0: tensor<32x8x16xf32>, %arg1: tensor<4x1x32x // CHECK-DAG: %[[cst:.*]] = arith.constant 0.000000e+00 : f32 // CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index // CHECK: %[[read:.*]] = vector.transfer_read %{{.*}}[%[[c0]], %[[c0]], %[[c0]]], %[[cst]] -// CHECK-SAME: {in_bounds = [true, true, true]} : tensor<32x8x16xf32>, vector<32x8x16xf32> +// CHECK-SAME: {in_bounds = array} : tensor<32x8x16xf32>, vector<32x8x16xf32> // CHECK: %[[shape_cast:.*]] = vector.shape_cast %[[read]] : vector<32x8x16xf32> to vector<32x4x2x1x16xf32> // CHECK: %[[transpose:.*]] = vector.transpose %[[shape_cast]], [1, 3, 0, 4, 2] : vector<32x4x2x1x16xf32> to vector<4x1x32x16x2xf32> // CHECK-DAG: %[[c0_1:.*]] = arith.constant 0 : index // CHECK-DAG: %[[empty:.*]] = tensor.empty() : tensor<4x1x32x16x2xf32> // CHECK: %[[write:.*]] = vector.transfer_write %[[transpose]], %[[empty]][%[[c0_1]], %[[c0_1]], %[[c0_1]], %[[c0_1]], %[[c0_1]]] -// CHECK-SAME: {in_bounds = [true, true, true, true, true]} : vector<4x1x32x16x2xf32>, tensor<4x1x32x16x2xf32> +// CHECK-SAME: {in_bounds = array} : vector<4x1x32x16x2xf32>, tensor<4x1x32x16x2xf32> // CHECK: return %[[write]] : tensor<4x1x32x16x2xf32> module attributes {transform.with_named_sequence} { @@ -705,14 +705,14 @@ func.func @test_vectorize_padded_pack(%arg0: tensor<32x7x15xf32>, %arg1: tensor< // CHECK: %[[mask:.*]] = vector.create_mask %[[c32]], %[[c7]], %[[c15]] : vector<32x8x16xi1> // CHECK: %[[masked_read:.*]] = vector.mask %[[mask]] { // CHECK-SAME: vector.transfer_read %{{.*}}[%[[c0]], %[[c0]], %[[c0]]], %[[cst]] -// CHECK-SAME: {in_bounds = [true, true, true]} : tensor<32x7x15xf32>, vector<32x8x16xf32> +// CHECK-SAME: {in_bounds = array} : tensor<32x7x15xf32>, vector<32x8x16xf32> // CHECK-SAME: } : vector<32x8x16xi1> -> vector<32x8x16xf32> // CHECK: %[[shape_cast:.*]] = vector.shape_cast %[[masked_read]] : vector<32x8x16xf32> to vector<32x4x2x1x16xf32> // CHECK: %[[transpose:.*]] = vector.transpose %[[shape_cast]], [0, 1, 3, 4, 2] : vector<32x4x2x1x16xf32> to vector<32x4x1x16x2xf32> // CHECK-DAG: %[[c0_1:.*]] = arith.constant 0 : index // CHECK-DAG: %[[empty:.*]] = tensor.empty() : tensor<32x4x1x16x2xf32> // CHECK: %[[write:.*]] = vector.transfer_write %[[transpose]], %[[empty]][%[[c0_1]], %[[c0_1]], %[[c0_1]], %[[c0_1]], %[[c0_1]]] -// CHECK-SAME: {in_bounds = [true, true, true, true, true]} : vector<32x4x1x16x2xf32>, tensor<32x4x1x16x2xf32> +// CHECK-SAME: {in_bounds = array} : vector<32x4x1x16x2xf32>, tensor<32x4x1x16x2xf32> // CHECK: return %[[write]] : tensor<32x4x1x16x2xf32> module attributes {transform.with_named_sequence} { @@ -742,7 +742,7 @@ func.func @test_vectorize_dynamic_pack(%arg0: tensor, %arg1: tensor // CHECK: %[[masked_read:.*]] = vector.mask %[[mask]] { // CHECK-SAME: vector.transfer_read %{{.*}}[%[[c0_1]], %[[c0_1]]], %[[cst]] -// CHECK-SAME: {in_bounds = [true, true]} : tensor, vector<8x16xf32> +// CHECK-SAME: {in_bounds = array} : tensor, vector<8x16xf32> // CHECK-SAME: } : vector<8x16xi1> -> vector<8x16xf32> // CHECK: %[[shape_cast:.*]] = vector.shape_cast %[[masked_read]] : vector<8x16xf32> to vector<4x2x1x16xf32> // CHECK: %[[transpose:.*]] = vector.transpose %[[shape_cast]], [0, 2, 3, 1] : vector<4x2x1x16xf32> to vector<4x1x16x2xf32> @@ -753,7 +753,7 @@ func.func @test_vectorize_dynamic_pack(%arg0: tensor, %arg1: tensor // CHECK: %[[masked_write:.*]] = vector.mask %[[mask_0]] { // CHECK-SAME: vector.transfer_write %[[transpose]], %[[empty]][%[[c0_2]], %[[c0_2]], %[[c0_2]], %[[c0_2]]] -// CHECK-SAME: {in_bounds = [true, true, true, true]} : vector<4x1x16x2xf32>, tensor +// CHECK-SAME: {in_bounds = array} : vector<4x1x16x2xf32>, tensor // CHECK: return %[[masked_write]] : tensor module attributes {transform.with_named_sequence} { @@ -781,16 +781,16 @@ func.func @matmul(%A: memref, %B: memref, %C: memref) // CHECK-DAG: %[[VAL_7:.*]] = arith.constant 1 : index // CHECK-DAG: %[[VAL_8:.*]] = memref.dim %[[A]], %[[VAL_7]] : memref // CHECK: %[[MASK_A:.*]] = vector.create_mask %[[VAL_4]], %[[VAL_8]] : vector<8x4xi1> -// CHECK: %[[LOAD_A:.*]] = vector.mask %[[MASK_A]] { vector.transfer_read %[[A]]{{\[}}%{{.*}}, %{{.*}}], %{{.*}} {in_bounds = [true, true, true], permutation_map = #{{.*}}} : memref, vector<8x16x4xf32> } : vector<8x4xi1> -> vector<8x16x4xf32> +// CHECK: %[[LOAD_A:.*]] = vector.mask %[[MASK_A]] { vector.transfer_read %[[A]]{{\[}}%{{.*}}, %{{.*}}], %{{.*}} {in_bounds = array, permutation_map = #{{.*}}} : memref, vector<8x16x4xf32> } : vector<8x4xi1> -> vector<8x16x4xf32> // CHECK: %[[MASK_B:.*]] = vector.create_mask %[[VAL_8]], %[[VAL_6]] : vector<4x16xi1> -// CHECK: %[[LOAD_B:.*]] = vector.mask %[[MASK_B]] { vector.transfer_read %[[B]]{{\[}}%{{.*}}, %{{.*}}], %{{.*}} {in_bounds = [true, true, true], permutation_map = #{{.*}}} : memref, vector<8x16x4xf32> } : vector<4x16xi1> -> vector<8x16x4xf32> +// CHECK: %[[LOAD_B:.*]] = vector.mask %[[MASK_B]] { vector.transfer_read %[[B]]{{\[}}%{{.*}}, %{{.*}}], %{{.*}} {in_bounds = array, permutation_map = #{{.*}}} : memref, vector<8x16x4xf32> } : vector<4x16xi1> -> vector<8x16x4xf32> // CHECK: %[[MASK_C:.*]] = vector.create_mask %[[VAL_4]], %[[VAL_6]] : vector<8x16xi1> -// CHECK: %[[LOAD_C:.*]] = vector.mask %[[MASK_C]] { vector.transfer_read %[[C]]{{\[}}%{{.*}}, %{{.*}}], %{{.*}} {in_bounds = [true, true]} : memref, vector<8x16xf32> } : vector<8x16xi1> -> vector<8x16xf32> +// CHECK: %[[LOAD_C:.*]] = vector.mask %[[MASK_C]] { vector.transfer_read %[[C]]{{\[}}%{{.*}}, %{{.*}}], %{{.*}} {in_bounds = array} : memref, vector<8x16xf32> } : vector<8x16xi1> -> vector<8x16xf32> // CHECK: %[[MULF:.*]] = arith.mulf %[[LOAD_A]], %[[LOAD_B]] : vector<8x16x4xf32> // CHECK: %[[MASK_MULIT_RED:.*]] = vector.create_mask %[[VAL_4]], %[[VAL_6]], %[[VAL_8]] : vector<8x16x4xi1> // CHECK: %[[MULTI_RED:.*]] = vector.mask %[[MASK_MULIT_RED]] { vector.multi_reduction , %[[MULF]], %[[LOAD_C]] [2] : vector<8x16x4xf32> to vector<8x16xf32> } : vector<8x16x4xi1> -> vector<8x16xf32> // CHECK: %[[C2:.*]] = arith.constant 0 : index -// CHECK: vector.mask %[[MASK_C]] { vector.transfer_write %[[MULTI_RED]], %[[C]]{{\[}}%[[C2]], %[[C2]]] {in_bounds = [true, true]} : vector<8x16xf32>, memref } : vector<8x16xi1> +// CHECK: vector.mask %[[MASK_C]] { vector.transfer_write %[[MULTI_RED]], %[[C]]{{\[}}%[[C2]], %[[C2]]] {in_bounds = array} : vector<8x16xf32>, memref } : vector<8x16xi1> module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { @@ -842,16 +842,16 @@ func.func @matmul_scalable(%A: memref, %B: memref, %C: memref< // CHECK-DAG: %[[VAL_7:.*]] = arith.constant 1 : index // CHECK-DAG: %[[VAL_8:.*]] = memref.dim %[[A]], %[[VAL_7]] : memref // CHECK: %[[MASK_A:.*]] = vector.create_mask %[[VAL_4]], %[[VAL_8]] : vector<8x4xi1> -// CHECK: %[[LOAD_A:.*]] = vector.mask %[[MASK_A]] { vector.transfer_read %[[A]]{{\[}}%{{.*}}, %{{.*}}], %{{.*}} {in_bounds = [true, true, true], permutation_map = #{{.*}}} : memref, vector<8x[16]x4xf32> } : vector<8x4xi1> -> vector<8x[16]x4xf32> +// CHECK: %[[LOAD_A:.*]] = vector.mask %[[MASK_A]] { vector.transfer_read %[[A]]{{\[}}%{{.*}}, %{{.*}}], %{{.*}} {in_bounds = array, permutation_map = #{{.*}}} : memref, vector<8x[16]x4xf32> } : vector<8x4xi1> -> vector<8x[16]x4xf32> // CHECK: %[[MASK_B:.*]] = vector.create_mask %[[VAL_8]], %[[VAL_6]] : vector<4x[16]xi1> -// CHECK: %[[LOAD_B:.*]] = vector.mask %[[MASK_B]] { vector.transfer_read %[[B]]{{\[}}%{{.*}}, %{{.*}}], %{{.*}} {in_bounds = [true, true, true], permutation_map = #{{.*}}} : memref, vector<8x[16]x4xf32> } : vector<4x[16]xi1> -> vector<8x[16]x4xf32> +// CHECK: %[[LOAD_B:.*]] = vector.mask %[[MASK_B]] { vector.transfer_read %[[B]]{{\[}}%{{.*}}, %{{.*}}], %{{.*}} {in_bounds = array, permutation_map = #{{.*}}} : memref, vector<8x[16]x4xf32> } : vector<4x[16]xi1> -> vector<8x[16]x4xf32> // CHECK: %[[MASK_C:.*]] = vector.create_mask %[[VAL_4]], %[[VAL_6]] : vector<8x[16]xi1> -// CHECK: %[[LOAD_C:.*]] = vector.mask %[[MASK_C]] { vector.transfer_read %[[C]]{{\[}}%{{.*}}, %{{.*}}], %{{.*}} {in_bounds = [true, true]} : memref, vector<8x[16]xf32> } : vector<8x[16]xi1> -> vector<8x[16]xf32> +// CHECK: %[[LOAD_C:.*]] = vector.mask %[[MASK_C]] { vector.transfer_read %[[C]]{{\[}}%{{.*}}, %{{.*}}], %{{.*}} {in_bounds = array} : memref, vector<8x[16]xf32> } : vector<8x[16]xi1> -> vector<8x[16]xf32> // CHECK: %[[MULF:.*]] = arith.mulf %[[LOAD_A]], %[[LOAD_B]] : vector<8x[16]x4xf32> // CHECK: %[[MASK_MULIT_RED:.*]] = vector.create_mask %[[VAL_4]], %[[VAL_6]], %[[VAL_8]] : vector<8x[16]x4xi1> // CHECK: %[[MULTI_RED:.*]] = vector.mask %[[MASK_MULIT_RED]] { vector.multi_reduction , %[[MULF]], %[[LOAD_C]] [2] : vector<8x[16]x4xf32> to vector<8x[16]xf32> } : vector<8x[16]x4xi1> -> vector<8x[16]xf32> // CHECK: %[[C2:.*]] = arith.constant 0 : index -// CHECK: vector.mask %[[MASK_C]] { vector.transfer_write %[[MULTI_RED]], %[[C]]{{\[}}%[[C2]], %[[C2]]] {in_bounds = [true, true]} : vector<8x[16]xf32>, memref } : vector<8x[16]xi1> +// CHECK: vector.mask %[[MASK_C]] { vector.transfer_write %[[MULTI_RED]], %[[C]]{{\[}}%[[C2]], %[[C2]]] {in_bounds = array} : vector<8x[16]xf32>, memref } : vector<8x[16]xi1> module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { @@ -986,13 +986,13 @@ func.func @test_vectorize_pack_no_vector_sizes(%arg0: tensor<64x4xf32>, %arg1: t // CHECK-DAG: %[[cst:.*]] = arith.constant 0.000000e+00 : f32 // CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index // CHECK: %[[read:.*]] = vector.transfer_read %{{.*}}[%[[c0]], %[[c0]]], %[[cst]] -// CHECK-SAME: {in_bounds = [true, true]} : tensor<64x4xf32>, vector<64x4xf32> +// CHECK-SAME: {in_bounds = array} : tensor<64x4xf32>, vector<64x4xf32> // CHECK: %[[shape_cast:.*]] = vector.shape_cast %[[read]] : vector<64x4xf32> to vector<4x16x2x2xf32> // CHECK: %[[transpose:.*]] = vector.transpose %[[shape_cast]], [2, 0, 1, 3] : vector<4x16x2x2xf32> to vector<2x4x16x2xf32> // CHECK-DAG: %[[c0_1:.*]] = arith.constant 0 : index // CHECK-DAG: %[[empty:.*]] = tensor.empty() : tensor<2x4x16x2xf32> // CHECK: %[[write:.*]] = vector.transfer_write %[[transpose]], %[[empty]][%[[c0_1]], %[[c0_1]], %[[c0_1]], %[[c0_1]]] -// CHECK-SAME: {in_bounds = [true, true, true, true]} : vector<2x4x16x2xf32>, tensor<2x4x16x2xf32> +// CHECK-SAME: {in_bounds = array} : vector<2x4x16x2xf32>, tensor<2x4x16x2xf32> // CHECK: return %[[write]] : tensor<2x4x16x2xf32> module attributes {transform.with_named_sequence} { @@ -1014,13 +1014,13 @@ func.func @test_vectorize_padded_pack_no_vector_sizes(%arg0: tensor<32x7x15xf32> // CHECK-DAG: %[[cst:.*]] = arith.constant 0.000000e+00 : f32 // CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index // CHECK: %[[transfer_read:.*]] = vector.transfer_read %{{.*}}[%[[c0]], %[[c0]], %[[c0]]], %[[cst]] -// CHECK-SAME: {in_bounds = [true, false, false]} : tensor<32x7x15xf32>, vector<32x8x16xf32> +// CHECK-SAME: {in_bounds = array} : tensor<32x7x15xf32>, vector<32x8x16xf32> // CHECK: %[[shape_cast:.*]] = vector.shape_cast %[[transfer_read]] : vector<32x8x16xf32> to vector<32x4x2x1x16xf32> // CHECK: %[[transpose:.*]] = vector.transpose %[[shape_cast]], [0, 1, 3, 4, 2] : vector<32x4x2x1x16xf32> to vector<32x4x1x16x2xf32> // CHECK-DAG: %[[c0_1:.*]] = arith.constant 0 : index // CHECK-DAG: %[[empty:.*]] = tensor.empty() : tensor<32x4x1x16x2xf32> // CHECK: %[[write:.*]] = vector.transfer_write %[[transpose]], %[[empty]][%[[c0_1]], %[[c0_1]], %[[c0_1]], %[[c0_1]], %[[c0_1]]] -// CHECK-SAME: {in_bounds = [true, true, true, true, true]} : vector<32x4x1x16x2xf32>, tensor<32x4x1x16x2xf32> +// CHECK-SAME: {in_bounds = array} : vector<32x4x1x16x2xf32>, tensor<32x4x1x16x2xf32> // CHECK: return %[[write]] : tensor<32x4x1x16x2xf32> module attributes {transform.with_named_sequence} { @@ -1065,7 +1065,7 @@ func.func @test_vectorize_unpack_no_vector_sizes_slice_output(%source: tensor<8x // CHECK: %[[EMPT:.*]] = tensor.empty() : tensor<64x127xf32> // CHECK: %[[C00:.*]] = arith.constant 0 : index // CHECK: %[[WRIT:.*]] = vector.transfer_write %[[SHAPC]], %[[EMPT]]{{\[}}%[[C00]], %[[C00]]] - // CHECK-SAME: {in_bounds = [true, false]} : vector<64x128xf32>, tensor<64x127xf32> + // CHECK-SAME: {in_bounds = array} : vector<64x128xf32>, tensor<64x127xf32> // CHECK: return %[[WRIT]] : tensor<64x127xf32> %0 = tensor.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %dest : tensor<8x4x16x16xf32> -> tensor<64x127xf32> return %0 : tensor<64x127xf32> diff --git a/mlir/test/Dialect/Linalg/vectorize-conv-masked-and-scalable.mlir b/mlir/test/Dialect/Linalg/vectorize-conv-masked-and-scalable.mlir index 4964a8d2e0db8..c42af352200cb 100644 --- a/mlir/test/Dialect/Linalg/vectorize-conv-masked-and-scalable.mlir +++ b/mlir/test/Dialect/Linalg/vectorize-conv-masked-and-scalable.mlir @@ -34,18 +34,18 @@ module attributes {transform.with_named_sequence} { // CHECK: %[[C8:.*]] = arith.constant 8 : index // CHECK: %[[MASK_IN:.*]] = vector.create_mask %[[C1]], %[[C8]], %[[CH_DIM_IN]] : vector<1x8x4xi1> /// Read the input tensor -// CHECK: %[[VEC_IN:.*]] = vector.mask %[[MASK_IN]] { vector.transfer_read %[[INPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]], %[[PAD]] {in_bounds = [true, true, true]} : tensor<1x8x?xi8>, vector<1x8x4xi8> } : vector<1x8x4xi1> -> vector<1x8x4xi8> +// CHECK: %[[VEC_IN:.*]] = vector.mask %[[MASK_IN]] { vector.transfer_read %[[INPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]], %[[PAD]] {in_bounds = array} : tensor<1x8x?xi8>, vector<1x8x4xi8> } : vector<1x8x4xi1> -> vector<1x8x4xi8> /// Create a mask for the filter tensor // CHECK: %[[CH_DIM_FLT:.*]] = tensor.dim %[[FILTER]], %[[C1]] : tensor<1x?xi8> // CHECK: %[[MASK_FLT:.*]] = vector.create_mask %[[C1]], %[[CH_DIM_FLT]] : vector<1x4xi1> /// Read the filter tensor -// CHECK: %[[VEC_FLT:.*]] = vector.mask %[[MASK_FLT]] { vector.transfer_read %[[FILTER]]{{\[}}%[[C0]], %[[C0]]], %[[PAD]] {in_bounds = [true, true]} : tensor<1x?xi8>, vector<1x4xi8> } : vector<1x4xi1> -> vector<1x4xi8> +// CHECK: %[[VEC_FLT:.*]] = vector.mask %[[MASK_FLT]] { vector.transfer_read %[[FILTER]]{{\[}}%[[C0]], %[[C0]]], %[[PAD]] {in_bounds = array} : tensor<1x?xi8>, vector<1x4xi8> } : vector<1x4xi1> -> vector<1x4xi8> /// Create a mask for the output tensor // CHECK: %[[CH_DIM_OUT:.*]] = tensor.dim %[[OUTPUT]], %[[C2]] : tensor<1x8x?xi8> // CHECK: %[[MASK_OUT:.*]] = vector.create_mask %[[C1]], %[[C8]], %[[CH_DIM_OUT]] : vector<1x8x4xi1> -// CHECK: %[[VEC_OUT:.*]] = vector.mask %[[MASK_OUT]] { vector.transfer_read %[[OUTPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]], %[[PAD]] {in_bounds = [true, true, true]} : tensor<1x8x?xi8>, vector<1x8x4xi8> } : vector<1x8x4xi1> -> vector<1x8x4xi8> +// CHECK: %[[VEC_OUT:.*]] = vector.mask %[[MASK_OUT]] { vector.transfer_read %[[OUTPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]], %[[PAD]] {in_bounds = array} : tensor<1x8x?xi8>, vector<1x8x4xi8> } : vector<1x8x4xi1> -> vector<1x8x4xi8> /// Convolution // CHECK: %[[IN_1:.*]] = vector.extract_strided_slice %[[VEC_IN]] {offsets = [0, 0, 0], sizes = [1, 8, 4], strides = [1, 1, 1]} : vector<1x8x4xi8> to vector<1x8x4xi8> @@ -55,7 +55,7 @@ module attributes {transform.with_named_sequence} { // CHECK: %[[MULI:.*]] = arith.muli %[[IN_1]], %[[FLT_1_B]] : vector<1x8x4xi8> // CHECK: %[[ADDI:.*]] = arith.addi %[[MULI]], %[[OUT_1]] : vector<1x8x4xi8> // CHECK: %[[OUT_INS:.*]] = vector.insert_strided_slice %[[ADDI]], %[[VEC_OUT]] {offsets = [0, 0, 0], strides = [1, 1, 1]} : vector<1x8x4xi8> into vector<1x8x4xi8> -// CHECK: %[[OUT:.*]] = vector.mask %[[MASK_OUT]] { vector.transfer_write %[[OUT_INS]], %[[OUTPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x8x4xi8>, tensor<1x8x?xi8> } : vector<1x8x4xi1> -> tensor<1x8x?xi8> +// CHECK: %[[OUT:.*]] = vector.mask %[[MASK_OUT]] { vector.transfer_write %[[OUT_INS]], %[[OUTPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] {in_bounds = array} : vector<1x8x4xi8>, tensor<1x8x?xi8> } : vector<1x8x4xi1> -> tensor<1x8x?xi8> // CHECK: return %[[OUT]] : tensor<1x8x?xi8> // ----- @@ -95,19 +95,19 @@ module attributes {transform.with_named_sequence} { // CHECK: %[[C8:.*]] = arith.constant 8 : index // CHECK: %[[MASK_IN:.*]] = vector.create_mask %[[C1]], %[[C8]], %[[CH_DIM_IN]] : vector<1x8x[4]xi1> /// Read the input tensor -// CHECK: %[[VEC_IN:.*]] = vector.mask %[[MASK_IN]] { vector.transfer_read %[[INPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]], %[[PAD]] {in_bounds = [true, true, true]} : tensor<1x8x?xi8>, vector<1x8x[4]xi8> } : vector<1x8x[4]xi1> -> vector<1x8x[4]xi8> +// CHECK: %[[VEC_IN:.*]] = vector.mask %[[MASK_IN]] { vector.transfer_read %[[INPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]], %[[PAD]] {in_bounds = array} : tensor<1x8x?xi8>, vector<1x8x[4]xi8> } : vector<1x8x[4]xi1> -> vector<1x8x[4]xi8> /// Create a mask for the filter tensor // CHECK: %[[CH_DIM_FLT:.*]] = tensor.dim %[[FILTER]], %[[C1]] : tensor<1x?xi8> // CHECK: %[[MASK_FLT:.*]] = vector.create_mask %[[C1]], %[[CH_DIM_FLT]] : vector<1x[4]xi1> /// Read the filter tensor -// CHECK: %[[VEC_FLT:.*]] = vector.mask %[[MASK_FLT]] { vector.transfer_read %[[FILTER]]{{\[}}%[[C0]], %[[C0]]], %[[PAD]] {in_bounds = [true, true]} : tensor<1x?xi8>, vector<1x[4]xi8> } : vector<1x[4]xi1> -> vector<1x[4]xi8> +// CHECK: %[[VEC_FLT:.*]] = vector.mask %[[MASK_FLT]] { vector.transfer_read %[[FILTER]]{{\[}}%[[C0]], %[[C0]]], %[[PAD]] {in_bounds = array} : tensor<1x?xi8>, vector<1x[4]xi8> } : vector<1x[4]xi1> -> vector<1x[4]xi8> /// Create a mask for the output tensor // CHECK: %[[CH_DIM_OUT:.*]] = tensor.dim %[[OUTPUT]], %[[C2]] : tensor<1x8x?xi8> // CHECK: %[[MASK_OUT:.*]] = vector.create_mask %[[C1]], %[[C8]], %[[CH_DIM_OUT]] : vector<1x8x[4]xi1> /// Read the output tensor -// CHECK: %[[VEC_OUT:.*]] = vector.mask %[[MASK_OUT]] { vector.transfer_read %[[OUTPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]], %[[PAD]] {in_bounds = [true, true, true]} : tensor<1x8x?xi8>, vector<1x8x[4]xi8> } : vector<1x8x[4]xi1> -> vector<1x8x[4]xi8> +// CHECK: %[[VEC_OUT:.*]] = vector.mask %[[MASK_OUT]] { vector.transfer_read %[[OUTPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]], %[[PAD]] {in_bounds = array} : tensor<1x8x?xi8>, vector<1x8x[4]xi8> } : vector<1x8x[4]xi1> -> vector<1x8x[4]xi8> /// Convolution // CHECK: %[[IN_1:.*]] = vector.extract_strided_slice %[[VEC_IN]] {offsets = [0, 0, 0], sizes = [1, 8, 4], strides = [1, 1, 1]} : vector<1x8x[4]xi8> to vector<1x8x[4]xi8> @@ -117,7 +117,7 @@ module attributes {transform.with_named_sequence} { // CHECK: %[[MULI:.*]] = arith.muli %[[IN_1]], %[[FLT_1_B]] : vector<1x8x[4]xi8> // CHECK: %[[ADDI:.*]] = arith.addi %[[MULI]], %[[OUT_1]] : vector<1x8x[4]xi8> // CHECK: %[[OUT_INS:.*]] = vector.insert_strided_slice %[[ADDI]], %[[VEC_OUT]] {offsets = [0, 0, 0], strides = [1, 1, 1]} : vector<1x8x[4]xi8> into vector<1x8x[4]xi8> -// CHECK: %[[OUT:.*]] = vector.mask %[[MASK_OUT]] { vector.transfer_write %[[OUT_INS]], %[[OUTPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x8x[4]xi8>, tensor<1x8x?xi8> } : vector<1x8x[4]xi1> -> tensor<1x8x?xi8> +// CHECK: %[[OUT:.*]] = vector.mask %[[MASK_OUT]] { vector.transfer_write %[[OUT_INS]], %[[OUTPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] {in_bounds = array} : vector<1x8x[4]xi8>, tensor<1x8x?xi8> } : vector<1x8x[4]xi1> -> tensor<1x8x?xi8> // CHECK: return %[[OUT]] : tensor<1x8x?xi8> // ----- @@ -157,19 +157,19 @@ module attributes {transform.with_named_sequence} { // CHECK: %[[C5:.*]] = arith.constant 5 : index // CHECK: %[[MASK_IN:.*]] = vector.create_mask %[[C3]], %[[C5]], %[[CH_DIM_IN]] : vector<3x4x[4]xi1> /// Read the input tensor -// CHECK: %[[VEC_IN:.*]] = vector.mask %[[MASK_IN]] { vector.transfer_read %[[INPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]], %[[PAD]] {in_bounds = [true, true, true]} : memref<3x5x?xf32>, vector<3x4x[4]xf32> } : vector<3x4x[4]xi1> -> vector<3x4x[4]xf32> +// CHECK: %[[VEC_IN:.*]] = vector.mask %[[MASK_IN]] { vector.transfer_read %[[INPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]], %[[PAD]] {in_bounds = array} : memref<3x5x?xf32>, vector<3x4x[4]xf32> } : vector<3x4x[4]xi1> -> vector<3x4x[4]xf32> /// Create a mask for the filter tensor // CHECK: %[[CH_DIM_FLT:.*]] = memref.dim %[[FILTER]], %[[C1]] : memref<2x?xf32> // CHECK: %[[MASK_FLT:.*]] = vector.create_mask %[[C2]], %[[CH_DIM_FLT]] : vector<2x[4]xi1> /// Read the filter tensor -// CHECK: %[[VEC_FLT:.*]] = vector.mask %[[MASK_FLT]] { vector.transfer_read %[[FILTER]]{{\[}}%[[C0]], %[[C0]]], %[[PAD]] {in_bounds = [true, true]} : memref<2x?xf32>, vector<2x[4]xf32> } : vector<2x[4]xi1> -> vector<2x[4]xf32> +// CHECK: %[[VEC_FLT:.*]] = vector.mask %[[MASK_FLT]] { vector.transfer_read %[[FILTER]]{{\[}}%[[C0]], %[[C0]]], %[[PAD]] {in_bounds = array} : memref<2x?xf32>, vector<2x[4]xf32> } : vector<2x[4]xi1> -> vector<2x[4]xf32> /// Create a mask for the output tensor // CHECK: %[[CH_DIM_OUT:.*]] = memref.dim %[[OUTPUT]], %[[C2]] : memref<3x2x?xf32> // CHECK: %[[MASK_OUT:.*]] = vector.create_mask %[[C3]], %[[C2]], %[[CH_DIM_OUT]] : vector<3x2x[4]xi1> /// Read the output tensor -// CHECK: %[[VEC_OUT:.*]] = vector.mask %[[MASK_OUT]] { vector.transfer_read %[[OUTPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]], %[[PAD]] {in_bounds = [true, true, true]} : memref<3x2x?xf32>, vector<3x2x[4]xf32> } : vector<3x2x[4]xi1> -> vector<3x2x[4]xf32> +// CHECK: %[[VEC_OUT:.*]] = vector.mask %[[MASK_OUT]] { vector.transfer_read %[[OUTPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]], %[[PAD]] {in_bounds = array} : memref<3x2x?xf32>, vector<3x2x[4]xf32> } : vector<3x2x[4]xi1> -> vector<3x2x[4]xf32> /// Convolution // CHECK: %[[IN_1:.*]] = vector.extract_strided_slice %[[VEC_IN]] {offsets = [0, 0, 0], sizes = [3, 2, 4], strides = [1, 1, 1]} : vector<3x4x[4]xf32> to vector<3x2x[4]xf32> @@ -182,4 +182,4 @@ module attributes {transform.with_named_sequence} { // CHECK: %[[FLT_2_B:.*]] = vector.broadcast %[[FLT_2]] : vector<[4]xf32> to vector<3x2x[4]xf32> // CHECK: %[[FMA_2:.*]] = vector.fma %[[IN_2]], %[[FLT_2_B]], %[[FMA_1]] : vector<3x2x[4]xf32> // CHECK: %[[OUT_INS:.*]] = vector.insert_strided_slice %[[FMA_2]], %[[VEC_OUT]] {offsets = [0, 0, 0], strides = [1, 1, 1]} : vector<3x2x[4]xf32> into vector<3x2x[4]xf32> -// CHECK: vector.mask %[[MASK_OUT]] { vector.transfer_write %[[OUT_INS]], %[[OUTPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<3x2x[4]xf32>, memref<3x2x?xf32> } : vector<3x2x[4]xi1> +// CHECK: vector.mask %[[MASK_OUT]] { vector.transfer_write %[[OUT_INS]], %[[OUTPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] {in_bounds = array} : vector<3x2x[4]xf32>, memref<3x2x?xf32> } : vector<3x2x[4]xi1> diff --git a/mlir/test/Dialect/Linalg/vectorize-convolution.mlir b/mlir/test/Dialect/Linalg/vectorize-convolution.mlir index 7f4b9b986c81b..dbeb670652275 100644 --- a/mlir/test/Dialect/Linalg/vectorize-convolution.mlir +++ b/mlir/test/Dialect/Linalg/vectorize-convolution.mlir @@ -733,8 +733,8 @@ func.func @pooling_nwc_sum_memref_1_2_1_3(%input: memref<4x4x3xf32>, %filter: me // CHECK-SAME: (%[[INPUT:.+]]: memref<4x4x3xf32>, %[[FILTER:.+]]: memref<1xf32>, %[[OUTPUT:.+]]: memref<4x2x3xf32>) // CHECK-DAG: %[[Vc0:.+]] = arith.constant 0 : index // CHECK-DAG: %[[Vcst:.+]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[V0:.+]] = vector.transfer_read %[[INPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]], %[[Vcst]] {in_bounds = [true, true, true]} : memref<4x4x3xf32>, vector<4x4x3xf32> -// CHECK: %[[V1:.+]] = vector.transfer_read %[[OUTPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]], %[[Vcst]] {in_bounds = [true, true, true]} : memref<4x2x3xf32>, vector<4x2x3xf32> +// CHECK: %[[V0:.+]] = vector.transfer_read %[[INPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]], %[[Vcst]] {in_bounds = array} : memref<4x4x3xf32>, vector<4x4x3xf32> +// CHECK: %[[V1:.+]] = vector.transfer_read %[[OUTPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]], %[[Vcst]] {in_bounds = array} : memref<4x2x3xf32>, vector<4x2x3xf32> // CHECK: %[[V2:.+]] = vector.extract_strided_slice %[[V0]] {offsets = [0, 0, 0], sizes = [4, 1, 3], strides = [1, 1, 1]} : vector<4x4x3xf32> to vector<4x1x3xf32> // CHECK: %[[V3:.+]] = vector.extract_strided_slice %[[V0]] {offsets = [0, 3, 0], sizes = [4, 1, 3], strides = [1, 1, 1]} : vector<4x4x3xf32> to vector<4x1x3xf32> // CHECK: %[[V4:.+]] = vector.extract_strided_slice %[[V1]] {offsets = [0, 0, 0], sizes = [4, 1, 3], strides = [1, 1, 1]} : vector<4x2x3xf32> to vector<4x1x3xf32> @@ -743,7 +743,7 @@ func.func @pooling_nwc_sum_memref_1_2_1_3(%input: memref<4x4x3xf32>, %filter: me // CHECK: %[[V7:.+]] = arith.addf %[[V3]], %[[V5]] : vector<4x1x3xf32> // CHECK: %[[V8:.+]] = vector.insert_strided_slice %[[V6]], %[[V1]] {offsets = [0, 0, 0], strides = [1, 1, 1]} : vector<4x1x3xf32> into vector<4x2x3xf32> // CHECK: %[[V9:.+]] = vector.insert_strided_slice %[[V7]], %[[V8]] {offsets = [0, 1, 0], strides = [1, 1, 1]} : vector<4x1x3xf32> into vector<4x2x3xf32> -// CHECK: vector.transfer_write %[[V9]], %[[OUTPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]] {in_bounds = [true, true, true]} : vector<4x2x3xf32>, memref<4x2x3xf32> +// CHECK: vector.transfer_write %[[V9]], %[[OUTPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]] {in_bounds = array} : vector<4x2x3xf32>, memref<4x2x3xf32> // ----- @@ -759,8 +759,8 @@ func.func @pooling_nwc_max_memref_1_2_1_3(%input: memref<4x4x3xf32>, %filter: me // CHECK-SAME: (%[[INPUT:.+]]: memref<4x4x3xf32>, %[[FILTER:.+]]: memref<1xf32>, %[[OUTPUT:.+]]: memref<4x2x3xf32>) // CHECK-DAG: %[[Vc0:.+]] = arith.constant 0 : index // CHECK-DAG: %[[Vcst:.+]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[V0:.+]] = vector.transfer_read %[[INPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]], %[[Vcst]] {in_bounds = [true, true, true]} : memref<4x4x3xf32>, vector<4x4x3xf32> -// CHECK: %[[V1:.+]] = vector.transfer_read %[[OUTPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]], %[[Vcst]] {in_bounds = [true, true, true]} : memref<4x2x3xf32>, vector<4x2x3xf32> +// CHECK: %[[V0:.+]] = vector.transfer_read %[[INPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]], %[[Vcst]] {in_bounds = array} : memref<4x4x3xf32>, vector<4x4x3xf32> +// CHECK: %[[V1:.+]] = vector.transfer_read %[[OUTPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]], %[[Vcst]] {in_bounds = array} : memref<4x2x3xf32>, vector<4x2x3xf32> // CHECK: %[[V2:.+]] = vector.extract_strided_slice %[[V0]] {offsets = [0, 0, 0], sizes = [4, 1, 3], strides = [1, 1, 1]} : vector<4x4x3xf32> to vector<4x1x3xf32> // CHECK: %[[V3:.+]] = vector.extract_strided_slice %[[V0]] {offsets = [0, 3, 0], sizes = [4, 1, 3], strides = [1, 1, 1]} : vector<4x4x3xf32> to vector<4x1x3xf32> // CHECK: %[[V4:.+]] = vector.extract_strided_slice %[[V1]] {offsets = [0, 0, 0], sizes = [4, 1, 3], strides = [1, 1, 1]} : vector<4x2x3xf32> to vector<4x1x3xf32> @@ -769,7 +769,7 @@ func.func @pooling_nwc_max_memref_1_2_1_3(%input: memref<4x4x3xf32>, %filter: me // CHECK: %[[V7:.+]] = arith.maximumf %[[V3]], %[[V5]] : vector<4x1x3xf32> // CHECK: %[[V8:.+]] = vector.insert_strided_slice %[[V6]], %[[V1]] {offsets = [0, 0, 0], strides = [1, 1, 1]} : vector<4x1x3xf32> into vector<4x2x3xf32> // CHECK: %[[V9:.+]] = vector.insert_strided_slice %[[V7]], %[[V8]] {offsets = [0, 1, 0], strides = [1, 1, 1]} : vector<4x1x3xf32> into vector<4x2x3xf32> -// CHECK: vector.transfer_write %[[V9]], %[[OUTPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]] {in_bounds = [true, true, true]} : vector<4x2x3xf32>, memref<4x2x3xf32> +// CHECK: vector.transfer_write %[[V9]], %[[OUTPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]] {in_bounds = array} : vector<4x2x3xf32>, memref<4x2x3xf32> // ----- @@ -788,8 +788,8 @@ func.func @pooling_nwc_sum_i8i8i32_memref_1_2_1_3(%input: memref<4x4x3xi8>, %fil // CHECK-DAG: %[[Vc0:.+]] = arith.constant 0 : index // CHECK-DAG: %[[Vc0_i8:.+]] = arith.constant 0 : i8 // CHECK-DAG: %[[Vc0_i32:.+]] = arith.constant 0 : i32 -// CHECK: %[[V0:.+]] = vector.transfer_read %[[INPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]], %[[Vc0_i8]] {in_bounds = [true, true, true]} : memref<4x4x3xi8>, vector<4x4x3xi8> -// CHECK: %[[V1:.+]] = vector.transfer_read %[[OUTPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]], %[[Vc0_i32]] {in_bounds = [true, true, true]} : memref<4x2x3xi32>, vector<4x2x3xi32> +// CHECK: %[[V0:.+]] = vector.transfer_read %[[INPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]], %[[Vc0_i8]] {in_bounds = array} : memref<4x4x3xi8>, vector<4x4x3xi8> +// CHECK: %[[V1:.+]] = vector.transfer_read %[[OUTPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]], %[[Vc0_i32]] {in_bounds = array} : memref<4x2x3xi32>, vector<4x2x3xi32> // CHECK: %[[V2:.+]] = vector.extract_strided_slice %[[V0]] {offsets = [0, 0, 0], sizes = [4, 1, 3], strides = [1, 1, 1]} : vector<4x4x3xi8> to vector<4x1x3xi8> // CHECK: %[[V3:.+]] = vector.extract_strided_slice %[[V0]] {offsets = [0, 3, 0], sizes = [4, 1, 3], strides = [1, 1, 1]} : vector<4x4x3xi8> to vector<4x1x3xi8> // CHECK: %[[V4:.+]] = vector.extract_strided_slice %[[V1]] {offsets = [0, 0, 0], sizes = [4, 1, 3], strides = [1, 1, 1]} : vector<4x2x3xi32> to vector<4x1x3xi32> @@ -800,7 +800,7 @@ func.func @pooling_nwc_sum_i8i8i32_memref_1_2_1_3(%input: memref<4x4x3xi8>, %fil // CHECK: %[[V9:.+]] = arith.addi %[[V8]], %[[V5]] : vector<4x1x3xi32> // CHECK: %[[V10:.+]] = vector.insert_strided_slice %[[V7]], %[[V1]] {offsets = [0, 0, 0], strides = [1, 1, 1]} : vector<4x1x3xi32> into vector<4x2x3xi32> // CHECK: %[[V11:.+]] = vector.insert_strided_slice %[[V9]], %[[V10]] {offsets = [0, 1, 0], strides = [1, 1, 1]} : vector<4x1x3xi32> into vector<4x2x3xi32> -// CHECK: vector.transfer_write %[[V11]], %[[OUTPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]] {in_bounds = [true, true, true]} : vector<4x2x3xi32>, memref<4x2x3xi32> +// CHECK: vector.transfer_write %[[V11]], %[[OUTPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]] {in_bounds = array} : vector<4x2x3xi32>, memref<4x2x3xi32> // CHECK: return // ----- @@ -820,8 +820,8 @@ func.func @pooling_nwc_max_i8i8i32_memref_1_2_1_3(%input: memref<4x4x3xi8>, %fil // CHECK-DAG: %[[Vc0:.+]] = arith.constant 0 : index // CHECK-DAG: %[[Vc0_i8:.+]] = arith.constant 0 : i8 // CHECK-DAG: %[[Vc0_i32:.+]] = arith.constant 0 : i32 -// CHECK: %[[V0:.+]] = vector.transfer_read %[[INPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]], %[[Vc0_i8]] {in_bounds = [true, true, true]} : memref<4x4x3xi8>, vector<4x4x3xi8> -// CHECK: %[[V1:.+]] = vector.transfer_read %[[OUTPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]], %[[Vc0_i32]] {in_bounds = [true, true, true]} : memref<4x2x3xi32>, vector<4x2x3xi32> +// CHECK: %[[V0:.+]] = vector.transfer_read %[[INPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]], %[[Vc0_i8]] {in_bounds = array} : memref<4x4x3xi8>, vector<4x4x3xi8> +// CHECK: %[[V1:.+]] = vector.transfer_read %[[OUTPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]], %[[Vc0_i32]] {in_bounds = array} : memref<4x2x3xi32>, vector<4x2x3xi32> // CHECK: %[[V2:.+]] = vector.extract_strided_slice %[[V0]] {offsets = [0, 0, 0], sizes = [4, 1, 3], strides = [1, 1, 1]} : vector<4x4x3xi8> to vector<4x1x3xi8> // CHECK: %[[V3:.+]] = vector.extract_strided_slice %[[V0]] {offsets = [0, 3, 0], sizes = [4, 1, 3], strides = [1, 1, 1]} : vector<4x4x3xi8> to vector<4x1x3xi8> // CHECK: %[[V4:.+]] = vector.extract_strided_slice %[[V1]] {offsets = [0, 0, 0], sizes = [4, 1, 3], strides = [1, 1, 1]} : vector<4x2x3xi32> to vector<4x1x3xi32> @@ -832,7 +832,7 @@ func.func @pooling_nwc_max_i8i8i32_memref_1_2_1_3(%input: memref<4x4x3xi8>, %fil // CHECK: %[[V9:.+]] = arith.maxsi %[[V8]], %[[V5]] : vector<4x1x3xi32> // CHECK: %[[V10:.+]] = vector.insert_strided_slice %[[V7]], %[[V1]] {offsets = [0, 0, 0], strides = [1, 1, 1]} : vector<4x1x3xi32> into vector<4x2x3xi32> // CHECK: %[[V11:.+]] = vector.insert_strided_slice %[[V9]], %[[V10]] {offsets = [0, 1, 0], strides = [1, 1, 1]} : vector<4x1x3xi32> into vector<4x2x3xi32> -// CHECK: vector.transfer_write %[[V11]], %[[OUTPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]] {in_bounds = [true, true, true]} : vector<4x2x3xi32>, memref<4x2x3xi32> +// CHECK: vector.transfer_write %[[V11]], %[[OUTPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]] {in_bounds = array} : vector<4x2x3xi32>, memref<4x2x3xi32> // CHECK: return // ----- @@ -849,8 +849,8 @@ func.func @pooling_nwc_sum_memref_2_2_2_3(%input: memref<4x6x3xf32>, %filter: me // CHECK-SAME: (%[[INPUT:.+]]: memref<4x6x3xf32>, %[[FILTER:.+]]: memref<2xf32>, %[[OUTPUT:.+]]: memref<4x2x3xf32>) // CHECK-DAG: %[[Vc0:.+]] = arith.constant 0 : index // CHECK-DAG: %[[Vcst:.+]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[V0:.+]] = vector.transfer_read %[[INPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]], %[[Vcst]] {in_bounds = [true, true, true]} : memref<4x6x3xf32>, vector<4x6x3xf32> -// CHECK: %[[V1:.+]] = vector.transfer_read %[[OUTPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]], %[[Vcst]] {in_bounds = [true, true, true]} : memref<4x2x3xf32>, vector<4x2x3xf32> +// CHECK: %[[V0:.+]] = vector.transfer_read %[[INPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]], %[[Vcst]] {in_bounds = array} : memref<4x6x3xf32>, vector<4x6x3xf32> +// CHECK: %[[V1:.+]] = vector.transfer_read %[[OUTPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]], %[[Vcst]] {in_bounds = array} : memref<4x2x3xf32>, vector<4x2x3xf32> // CHECK: %[[V2:.+]] = vector.extract_strided_slice %[[V0]] {offsets = [0, 0, 0], sizes = [4, 1, 3], strides = [1, 1, 1]} : vector<4x6x3xf32> to vector<4x1x3xf32> // CHECK: %[[V3:.+]] = vector.extract_strided_slice %[[V0]] {offsets = [0, 3, 0], sizes = [4, 1, 3], strides = [1, 1, 1]} : vector<4x6x3xf32> to vector<4x1x3xf32> // CHECK: %[[V4:.+]] = vector.extract_strided_slice %[[V0]] {offsets = [0, 2, 0], sizes = [4, 1, 3], strides = [1, 1, 1]} : vector<4x6x3xf32> to vector<4x1x3xf32> @@ -863,7 +863,7 @@ func.func @pooling_nwc_sum_memref_2_2_2_3(%input: memref<4x6x3xf32>, %filter: me // CHECK: %[[V11:.+]] = arith.addf %[[V5]], %[[V9]] : vector<4x1x3xf32> // CHECK: %[[V12:.+]] = vector.insert_strided_slice %[[V10]], %[[V1]] {offsets = [0, 0, 0], strides = [1, 1, 1]} : vector<4x1x3xf32> into vector<4x2x3xf32> // CHECK: %[[V13:.+]] = vector.insert_strided_slice %[[V11]], %[[V12]] {offsets = [0, 1, 0], strides = [1, 1, 1]} : vector<4x1x3xf32> into vector<4x2x3xf32> -// CHECK: vector.transfer_write %[[V13:.+]], %[[OUTPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]] {in_bounds = [true, true, true]} : vector<4x2x3xf32>, memref<4x2x3xf32> +// CHECK: vector.transfer_write %[[V13:.+]], %[[OUTPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]] {in_bounds = array} : vector<4x2x3xf32>, memref<4x2x3xf32> // ----- @@ -880,8 +880,8 @@ func.func @pooling_ncw_sum_memref_1_2_1_3(%input: memref<4x3x4xf32>, %filter: me // CHECK-SAME: (%[[INPUT:.+]]: memref<4x3x4xf32>, %[[FILTER:.+]]: memref<1xf32>, %[[OUTPUT:.+]]: memref<4x3x2xf32>) // CHECK-DAG: %[[Vc0:.+]] = arith.constant 0 : index // CHECK-DAG: %[[Vcst:.+]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[V0:.+]] = vector.transfer_read %[[INPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]], %[[Vcst]] {in_bounds = [true, true, true]} : memref<4x3x4xf32>, vector<4x3x4xf32> -// CHECK: %[[V1:.+]] = vector.transfer_read %[[OUTPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]], %[[Vcst]] {in_bounds = [true, true, true]} : memref<4x3x2xf32>, vector<4x3x2xf32> +// CHECK: %[[V0:.+]] = vector.transfer_read %[[INPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]], %[[Vcst]] {in_bounds = array} : memref<4x3x4xf32>, vector<4x3x4xf32> +// CHECK: %[[V1:.+]] = vector.transfer_read %[[OUTPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]], %[[Vcst]] {in_bounds = array} : memref<4x3x2xf32>, vector<4x3x2xf32> // CHECK: %[[V2:.+]] = vector.transpose %[[V0]], [0, 2, 1] : vector<4x3x4xf32> to vector<4x4x3xf32> // CHECK: %[[V3:.+]] = vector.transpose %[[V1]], [0, 2, 1] : vector<4x3x2xf32> to vector<4x2x3xf32> // CHECK: %[[V4:.+]] = vector.extract_strided_slice %[[V2]] {offsets = [0, 0, 0], sizes = [4, 1, 3], strides = [1, 1, 1]} : vector<4x4x3xf32> to vector<4x1x3xf32> @@ -893,7 +893,7 @@ func.func @pooling_ncw_sum_memref_1_2_1_3(%input: memref<4x3x4xf32>, %filter: me // CHECK: %[[V10:.+]] = vector.insert_strided_slice %[[V8]], %[[V3]] {offsets = [0, 0, 0], strides = [1, 1, 1]} : vector<4x1x3xf32> into vector<4x2x3xf32> // CHECK: %[[V11:.+]] = vector.insert_strided_slice %[[V9]], %[[V10]] {offsets = [0, 1, 0], strides = [1, 1, 1]} : vector<4x1x3xf32> into vector<4x2x3xf32> // CHECK: %[[V12:.+]] = vector.transpose %[[V11]], [0, 2, 1] : vector<4x2x3xf32> to vector<4x3x2xf32> -// CHECK: vector.transfer_write %[[V12:.+]], %[[OUTPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]] {in_bounds = [true, true, true]} : vector<4x3x2xf32>, memref<4x3x2xf32> +// CHECK: vector.transfer_write %[[V12:.+]], %[[OUTPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]] {in_bounds = array} : vector<4x3x2xf32>, memref<4x3x2xf32> // ----- @@ -911,11 +911,11 @@ func.func @pooling_nwc_sum_mixed_type_memref_1_2_1_1(%input: memref<1x2x3xf16>, // CHECK-DAG: %[[Vc0:.+]] = arith.constant 0 : index // CHECK-DAG: %[[Vcst:.+]] = arith.constant 0.000000e+00 : f16 // CHECK-DAG: %[[Vcst_0:.+]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[V0:.+]] = vector.transfer_read %[[INPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]], %[[Vcst]] {in_bounds = [true, true, true]} : memref<1x2x3xf16>, vector<1x2x3xf16> -// CHECK: %[[V1:.+]] = vector.transfer_read %[[OUTPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]], %[[Vcst_0]] {in_bounds = [true, true, true]} : memref<1x2x3xf32>, vector<1x2x3xf32> +// CHECK: %[[V0:.+]] = vector.transfer_read %[[INPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]], %[[Vcst]] {in_bounds = array} : memref<1x2x3xf16>, vector<1x2x3xf16> +// CHECK: %[[V1:.+]] = vector.transfer_read %[[OUTPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]], %[[Vcst_0]] {in_bounds = array} : memref<1x2x3xf32>, vector<1x2x3xf32> // CHECK: %[[V2:.+]] = arith.extf %[[V0]] : vector<1x2x3xf16> to vector<1x2x3xf32> // CHECK: %[[V3:.+]] = arith.addf %[[V2]], %[[V1]] : vector<1x2x3xf32> -// CHECK: vector.transfer_write %[[V3:.+]], %[[OUTPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]] {in_bounds = [true, true, true]} : vector<1x2x3xf32>, memref<1x2x3xf32> +// CHECK: vector.transfer_write %[[V3:.+]], %[[OUTPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]] {in_bounds = array} : vector<1x2x3xf32>, memref<1x2x3xf32> // ----- @@ -931,13 +931,13 @@ func.func @pooling_nwc_sum_memref_2_2_2_1(%input: memref<4x4x3xf32>, %filter: me // CHECK-SAME: (%[[INPUT:.+]]: memref<4x4x3xf32>, %[[FILTER:.+]]: memref<2xf32>, %[[OUTPUT:.+]]: memref<4x2x3xf32>) // CHECK-DAG: %[[Vc0:.+]] = arith.constant 0 : index // CHECK-DAG: %[[Vcst:.+]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[V0:.+]] = vector.transfer_read %[[INPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]], %[[Vcst]] {in_bounds = [true, true, true]} : memref<4x4x3xf32>, vector<4x4x3xf32> -// CHECK: %[[V1:.+]] = vector.transfer_read %[[OUTPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]], %[[Vcst]] {in_bounds = [true, true, true]} : memref<4x2x3xf32>, vector<4x2x3xf32> +// CHECK: %[[V0:.+]] = vector.transfer_read %[[INPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]], %[[Vcst]] {in_bounds = array} : memref<4x4x3xf32>, vector<4x4x3xf32> +// CHECK: %[[V1:.+]] = vector.transfer_read %[[OUTPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]], %[[Vcst]] {in_bounds = array} : memref<4x2x3xf32>, vector<4x2x3xf32> // CHECK: %[[V2:.+]] = vector.extract_strided_slice %[[V0]] {offsets = [0, 0, 0], sizes = [4, 2, 3], strides = [1, 1, 1]} : vector<4x4x3xf32> to vector<4x2x3xf32> // CHECK: %[[V3:.+]] = vector.extract_strided_slice %[[V0]] {offsets = [0, 2, 0], sizes = [4, 2, 3], strides = [1, 1, 1]} : vector<4x4x3xf32> to vector<4x2x3xf32> // CHECK: %[[V4:.+]] = arith.addf %[[V2]], %[[V1]] : vector<4x2x3xf32> // CHECK: %[[V5:.+]] = arith.addf %[[V3]], %[[V4]] : vector<4x2x3xf32> -// CHECK: vector.transfer_write %[[V5:.+]], %[[OUTPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]] {in_bounds = [true, true, true]} : vector<4x2x3xf32>, memref<4x2x3xf32> +// CHECK: vector.transfer_write %[[V5:.+]], %[[OUTPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]] {in_bounds = array} : vector<4x2x3xf32>, memref<4x2x3xf32> // ----- @@ -954,8 +954,8 @@ func.func @pooling_ncw_sum_memref_2_2_2_3(%input: memref<4x3x6xf32>, %filter: me // CHECK-SAME: (%[[INPUT:.+]]: memref<4x3x6xf32>, %[[FILTER:.+]]: memref<2xf32>, %[[OUTPUT:.+]]: memref<4x3x2xf32>) // CHECK-DAG: %[[Vc0:.+]] = arith.constant 0 : index // CHECK-DAG: %[[Vcst:.+]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[V0:.+]] = vector.transfer_read %[[INPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]], %[[Vcst]] {in_bounds = [true, true, true]} : memref<4x3x6xf32>, vector<4x3x6xf32> -// CHECK: %[[V1:.+]] = vector.transfer_read %[[OUTPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]], %[[Vcst]] {in_bounds = [true, true, true]} : memref<4x3x2xf32>, vector<4x3x2xf32> +// CHECK: %[[V0:.+]] = vector.transfer_read %[[INPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]], %[[Vcst]] {in_bounds = array} : memref<4x3x6xf32>, vector<4x3x6xf32> +// CHECK: %[[V1:.+]] = vector.transfer_read %[[OUTPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]], %[[Vcst]] {in_bounds = array} : memref<4x3x2xf32>, vector<4x3x2xf32> // CHECK: %[[V2:.+]] = vector.transpose %[[V0]], [0, 2, 1] : vector<4x3x6xf32> to vector<4x6x3xf32> // CHECK: %[[V3:.+]] = vector.transpose %[[V1]], [0, 2, 1] : vector<4x3x2xf32> to vector<4x2x3xf32> // CHECK: %[[V4:.+]] = vector.extract_strided_slice %[[V2]] {offsets = [0, 0, 0], sizes = [4, 1, 3], strides = [1, 1, 1]} : vector<4x6x3xf32> to vector<4x1x3xf32> @@ -971,7 +971,7 @@ func.func @pooling_ncw_sum_memref_2_2_2_3(%input: memref<4x3x6xf32>, %filter: me // CHECK: %[[V14:.+]] = vector.insert_strided_slice %[[V12]], %[[V3]] {offsets = [0, 0, 0], strides = [1, 1, 1]} : vector<4x1x3xf32> into vector<4x2x3xf32> // CHECK: %[[V15:.+]] = vector.insert_strided_slice %[[V13]], %[[V14]] {offsets = [0, 1, 0], strides = [1, 1, 1]} : vector<4x1x3xf32> into vector<4x2x3xf32> // CHECK: %[[V16:.+]] = vector.transpose %[[V15]], [0, 2, 1] : vector<4x2x3xf32> to vector<4x3x2xf32> -// CHECK: vector.transfer_write %[[V16:.+]], %[[OUTPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]] {in_bounds = [true, true, true]} : vector<4x3x2xf32>, memref<4x3x2xf32> +// CHECK: vector.transfer_write %[[V16:.+]], %[[OUTPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]] {in_bounds = array} : vector<4x3x2xf32>, memref<4x3x2xf32> // ----- @@ -987,8 +987,8 @@ func.func @pooling_ncw_sum_memref_2_3_2_1(%input: memref<4x2x5xf32>, %filter: me // CHECK-SAME: (%[[INPUT:.+]]: memref<4x2x5xf32>, %[[FILTER:.+]]: memref<2xf32>, %[[OUTPUT:.+]]: memref<4x2x3xf32>) // CHECK-DAG: %[[Vc0:.+]] = arith.constant 0 : index // CHECK-DAG: %[[Vcst:.+]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[V0:.+]] = vector.transfer_read %[[INPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]], %[[Vcst]] {in_bounds = [true, true, true]} : memref<4x2x5xf32>, vector<4x2x5xf32> -// CHECK: %[[V1:.+]] = vector.transfer_read %[[OUTPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]], %[[Vcst]] {in_bounds = [true, true, true]} : memref<4x2x3xf32>, vector<4x2x3xf32> +// CHECK: %[[V0:.+]] = vector.transfer_read %[[INPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]], %[[Vcst]] {in_bounds = array} : memref<4x2x5xf32>, vector<4x2x5xf32> +// CHECK: %[[V1:.+]] = vector.transfer_read %[[OUTPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]], %[[Vcst]] {in_bounds = array} : memref<4x2x3xf32>, vector<4x2x3xf32> // CHECK: %[[V2:.+]] = vector.transpose %[[V0]], [0, 2, 1] : vector<4x2x5xf32> to vector<4x5x2xf32> // CHECK: %[[V3:.+]] = vector.transpose %[[V1]], [0, 2, 1] : vector<4x2x3xf32> to vector<4x3x2xf32> // CHECK: %[[V4:.+]] = vector.extract_strided_slice %[[V2]] {offsets = [0, 0, 0], sizes = [4, 3, 2], strides = [1, 1, 1]} : vector<4x5x2xf32> to vector<4x3x2xf32> @@ -996,4 +996,4 @@ func.func @pooling_ncw_sum_memref_2_3_2_1(%input: memref<4x2x5xf32>, %filter: me // CHECK: %[[V6:.+]] = arith.addf %[[V4]], %[[V3]] : vector<4x3x2xf32> // CHECK: %[[V7:.+]] = arith.addf %[[V5]], %[[V6]] : vector<4x3x2xf32> // CHECK: %[[V8:.+]] = vector.transpose %[[V7]], [0, 2, 1] : vector<4x3x2xf32> to vector<4x2x3xf32> -// CHECK: vector.transfer_write %[[V8:.+]], %[[OUTPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]] {in_bounds = [true, true, true]} : vector<4x2x3xf32>, memref<4x2x3xf32> +// CHECK: vector.transfer_write %[[V8:.+]], %[[OUTPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]] {in_bounds = array} : vector<4x2x3xf32>, memref<4x2x3xf32> diff --git a/mlir/test/Dialect/Linalg/vectorize-tensor-extract-masked.mlir b/mlir/test/Dialect/Linalg/vectorize-tensor-extract-masked.mlir index 31a754d934368..c6a13d0245fd4 100644 --- a/mlir/test/Dialect/Linalg/vectorize-tensor-extract-masked.mlir +++ b/mlir/test/Dialect/Linalg/vectorize-tensor-extract-masked.mlir @@ -31,7 +31,7 @@ func.func @masked_static_vectorize_nd_tensor_extract_with_affine_apply_contiguou // CHECK: %[[MASK:.*]] = vector.create_mask %[[DIM_0]], %[[DIM_1]] : vector<1x4xi1> /// TODO: This transfer_read is redundant - remove -// CHECK: vector.mask %[[MASK]] { vector.transfer_read {{.*}} {in_bounds = [true, true]} : tensor<1x3xf32>, vector<1x4xf32> } : vector<1x4xi1> -> vector<1x4xf32> +// CHECK: vector.mask %[[MASK]] { vector.transfer_read {{.*}} {in_bounds = array} : tensor<1x3xf32>, vector<1x4xf32> } : vector<1x4xi1> -> vector<1x4xf32> /// Caluclate the index vector // CHECK: %[[STEP:.*]] = vector.step : vector<4xindex> @@ -44,9 +44,9 @@ func.func @masked_static_vectorize_nd_tensor_extract_with_affine_apply_contiguou // CHECK: %[[IDX_START:.*]] = vector.extractelement %[[SC]]{{\[}}%[[C0]] : i32] : vector<4xindex> // Final read and write -// CHECK: %[[READ:.*]] = vector.mask %[[MASK]] { vector.transfer_read %[[SRC]]{{\[}}%[[C79]], %[[IDX_START]]], {{.*}} {in_bounds = [true, true]} : tensor<80x16xf32>, vector<1x4xf32> } : vector<1x4xi1> -> vector<1x4xf32> +// CHECK: %[[READ:.*]] = vector.mask %[[MASK]] { vector.transfer_read %[[SRC]]{{\[}}%[[C79]], %[[IDX_START]]], {{.*}} {in_bounds = array} : tensor<80x16xf32>, vector<1x4xf32> } : vector<1x4xi1> -> vector<1x4xf32> // CHECK: %[[C0_1:.*]] = arith.constant 0 : index -// CHECK: vector.mask %[[MASK]] { vector.transfer_write %[[READ]], %[[OUTPUT]]{{\[}}%[[C0_1]], %[[C0_1]]] {in_bounds = [true, true]} : vector<1x4xf32>, tensor<1x3xf32> } : vector<1x4xi1> -> tensor<1x3xf32> +// CHECK: vector.mask %[[MASK]] { vector.transfer_write %[[READ]], %[[OUTPUT]]{{\[}}%[[C0_1]], %[[C0_1]]] {in_bounds = array} : vector<1x4xf32>, tensor<1x3xf32> } : vector<1x4xi1> -> tensor<1x3xf32> module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { @@ -92,7 +92,7 @@ func.func @masked_static_vectorize_nd_tensor_extract_with_affine_apply_contiguou // CHECK: %[[MASK:.*]] = vector.create_mask %[[DIM_0]], %[[DIM_1]] : vector<1x[4]xi1> /// TODO: This transfer_read is redundant - remove -// CHECK: vector.mask %[[MASK]] { vector.transfer_read {{.*}} {in_bounds = [true, true]} : tensor<1x3xf32>, vector<1x[4]xf32> } : vector<1x[4]xi1> -> vector<1x[4]xf32> +// CHECK: vector.mask %[[MASK]] { vector.transfer_read {{.*}} {in_bounds = array} : tensor<1x3xf32>, vector<1x[4]xf32> } : vector<1x[4]xi1> -> vector<1x[4]xf32> /// Caluclate the index vector // CHECK: %[[STEP:.*]] = vector.step : vector<[4]xindex> @@ -105,9 +105,9 @@ func.func @masked_static_vectorize_nd_tensor_extract_with_affine_apply_contiguou // CHECK: %[[IDX_START:.*]] = vector.extractelement %[[SC]]{{\[}}%[[C0]] : i32] : vector<[4]xindex> // Final read and write -// CHECK: %[[READ:.*]] = vector.mask %[[MASK]] { vector.transfer_read %[[SRC]]{{\[}}%[[C79]], %[[IDX_START]]], {{.*}} {in_bounds = [true, true]} : tensor<80x16xf32>, vector<1x[4]xf32> } : vector<1x[4]xi1> -> vector<1x[4]xf32> +// CHECK: %[[READ:.*]] = vector.mask %[[MASK]] { vector.transfer_read %[[SRC]]{{\[}}%[[C79]], %[[IDX_START]]], {{.*}} {in_bounds = array} : tensor<80x16xf32>, vector<1x[4]xf32> } : vector<1x[4]xi1> -> vector<1x[4]xf32> // CHECK: %[[C0_1:.*]] = arith.constant 0 : index -// CHECK: vector.mask %[[MASK]] { vector.transfer_write %[[READ]], %[[OUTPUT]]{{\[}}%[[C0_1]], %[[C0_1]]] {in_bounds = [true, true]} : vector<1x[4]xf32>, tensor<1x3xf32> } : vector<1x[4]xi1> -> tensor<1x3xf32> +// CHECK: vector.mask %[[MASK]] { vector.transfer_write %[[READ]], %[[OUTPUT]]{{\[}}%[[C0_1]], %[[C0_1]]] {in_bounds = array} : vector<1x[4]xf32>, tensor<1x3xf32> } : vector<1x[4]xi1> -> tensor<1x3xf32> module attributes {transform.with_named_sequence} { @@ -153,7 +153,7 @@ func.func @masked_dynamic_vectorize_nd_tensor_extract_with_affine_apply_contiguo // CHECK: %[[MASK:.*]] = vector.create_mask %[[DIM_0]], %[[DIM_1]] : vector<1x4xi1> /// TODO: This transfer_read is redundant - remove -// CHECK: vector.mask %[[MASK]] { vector.transfer_read %[[OUTPUT]]{{.*}} {in_bounds = [true, true]} : tensor, vector<1x4xf32> } : vector<1x4xi1> -> vector<1x4xf32> +// CHECK: vector.mask %[[MASK]] { vector.transfer_read %[[OUTPUT]]{{.*}} {in_bounds = array} : tensor, vector<1x4xf32> } : vector<1x4xi1> -> vector<1x4xf32> /// Caluclate the index vector // CHECK: %[[STEP:.*]] = vector.step : vector<4xindex> @@ -166,8 +166,8 @@ func.func @masked_dynamic_vectorize_nd_tensor_extract_with_affine_apply_contiguo // CHECK: %[[IDX_START:.*]] = vector.extractelement %[[SC]]{{\[}}%[[C0]] : i32] : vector<4xindex> // Final read and write -// CHECK: %[[READ:.*]] = vector.mask %[[MASK]] { vector.transfer_read %[[SRC]]{{\[}}%[[C79]], %[[IDX_START]]], {{.*}} {in_bounds = [true, true]} : tensor, vector<1x4xf32> } : vector<1x4xi1> -> vector<1x4xf32> -// CHECK: %[[VAL_24:.*]] = vector.mask %[[MASK]] { vector.transfer_write %[[READ]], %[[OUTPUT]]{{.*}} {in_bounds = [true, true]} : vector<1x4xf32>, tensor } : vector<1x4xi1> -> tensor +// CHECK: %[[READ:.*]] = vector.mask %[[MASK]] { vector.transfer_read %[[SRC]]{{\[}}%[[C79]], %[[IDX_START]]], {{.*}} {in_bounds = array} : tensor, vector<1x4xf32> } : vector<1x4xi1> -> vector<1x4xf32> +// CHECK: %[[VAL_24:.*]] = vector.mask %[[MASK]] { vector.transfer_write %[[READ]], %[[OUTPUT]]{{.*}} {in_bounds = array} : vector<1x4xf32>, tensor } : vector<1x4xi1> -> tensor module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { @@ -212,7 +212,7 @@ func.func @masked_dynamic_vectorize_nd_tensor_extract_with_affine_apply_contiguo // CHECK: %[[MASK:.*]] = vector.create_mask %[[DIM_0]], %[[DIM_1]] : vector<1x[4]xi1> /// TODO: This transfer_read is redundant - remove -// CHECK: vector.mask %[[MASK]] { vector.transfer_read %[[OUTPUT]]{{.*}} {in_bounds = [true, true]} : tensor, vector<1x[4]xf32> } : vector<1x[4]xi1> -> vector<1x[4]xf32> +// CHECK: vector.mask %[[MASK]] { vector.transfer_read %[[OUTPUT]]{{.*}} {in_bounds = array} : tensor, vector<1x[4]xf32> } : vector<1x[4]xi1> -> vector<1x[4]xf32> /// Caluclate the index vector // CHECK: %[[STEP:.*]] = vector.step : vector<[4]xindex> @@ -225,8 +225,8 @@ func.func @masked_dynamic_vectorize_nd_tensor_extract_with_affine_apply_contiguo // CHECK: %[[IDX_START:.*]] = vector.extractelement %[[SC]]{{\[}}%[[C0]] : i32] : vector<[4]xindex> // Final read and write -// CHECK: %[[READ:.*]] = vector.mask %[[MASK]] { vector.transfer_read %[[SRC]]{{\[}}%[[C79]], %[[IDX_START]]], {{.*}} {in_bounds = [true, true]} : tensor, vector<1x[4]xf32> } : vector<1x[4]xi1> -> vector<1x[4]xf32> -// CHECK: %[[VAL_24:.*]] = vector.mask %[[MASK]] { vector.transfer_write %[[READ]], %[[OUTPUT]]{{.*}} {in_bounds = [true, true]} : vector<1x[4]xf32>, tensor } : vector<1x[4]xi1> -> tensor +// CHECK: %[[READ:.*]] = vector.mask %[[MASK]] { vector.transfer_read %[[SRC]]{{\[}}%[[C79]], %[[IDX_START]]], {{.*}} {in_bounds = array} : tensor, vector<1x[4]xf32> } : vector<1x[4]xi1> -> vector<1x[4]xf32> +// CHECK: %[[VAL_24:.*]] = vector.mask %[[MASK]] { vector.transfer_write %[[READ]], %[[OUTPUT]]{{.*}} {in_bounds = array} : vector<1x[4]xf32>, tensor } : vector<1x[4]xi1> -> tensor module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { @@ -257,7 +257,7 @@ func.func @masked_vectorize_nd_tensor_extract_with_affine_apply_gather(%6: tenso // CHECK-DAG: %[[VAL_4:.*]] = arith.constant 1 : index // CHECK-DAG: %[[VAL_5:.*]] = arith.constant 3 : index // CHECK: %[[VAL_8:.*]] = vector.create_mask %[[VAL_4]], %[[VAL_5]] : vector<1x4xi1> -// CHECK: %[[VAL_9:.*]] = vector.mask %[[VAL_8]] { vector.transfer_read {{.*}} {in_bounds = [true, true]} : tensor<1x3xf32>, vector<1x4xf32> } : vector<1x4xi1> -> vector<1x4xf32> +// CHECK: %[[VAL_9:.*]] = vector.mask %[[VAL_8]] { vector.transfer_read {{.*}} {in_bounds = array} : tensor<1x3xf32>, vector<1x4xf32> } : vector<1x4xi1> -> vector<1x4xf32> // CHECK: %[[VAL_11:.*]] = vector.broadcast {{.*}} : index to vector<4xindex> // CHECK: %[[VAL_12:.*]] = arith.addi {{.*}} : vector<4xindex> // CHECK: %[[VAL_16:.*]] = vector.broadcast {{.*}} : vector<4xindex> to vector<1x4xindex> @@ -266,7 +266,7 @@ func.func @masked_vectorize_nd_tensor_extract_with_affine_apply_gather(%6: tenso // CHECK: %[[VAL_20:.*]] = arith.muli {{.*}} : vector<1x4xindex> // CHECK: %[[VAL_22:.*]] = arith.addi {{.*}} : vector<1x4xindex> // CHECK: %[[VAL_23:.*]] = vector.mask %[[VAL_8]] { vector.gather {{.*}} : tensor<80x16xf32>, vector<1x4xindex>, vector<1x4xi1>, vector<1x4xf32> into vector<1x4xf32> } : vector<1x4xi1> -> vector<1x4xf32> -// CHECK: %[[VAL_25:.*]] = vector.mask %[[VAL_8]] { vector.transfer_write {{.*}} {in_bounds = [true, true]} : vector<1x4xf32>, tensor<1x3xf32> } : vector<1x4xi1> -> tensor<1x3xf32> +// CHECK: %[[VAL_25:.*]] = vector.mask %[[VAL_8]] { vector.transfer_write {{.*}} {in_bounds = array} : vector<1x4xf32>, tensor<1x3xf32> } : vector<1x4xi1> -> tensor<1x3xf32> module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { @@ -305,7 +305,7 @@ func.func @masked_dynamic_vectorize_nd_tensor_extract_with_affine_apply_gather(% // CHECK: %[[VAL_8:.*]] = arith.constant 0 : index // CHECK: %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[VAL_10:.*]] = vector.create_mask %[[VAL_5]], %[[VAL_7]] : vector<1x4xi1> -// CHECK: %[[VAL_11:.*]] = vector.mask %[[VAL_10]] { vector.transfer_read %[[VAL_2]]{{\[}}%[[VAL_8]], %[[VAL_8]]], %[[VAL_9]] {in_bounds = [true, true]} : tensor, vector<1x4xf32> } : vector<1x4xi1> -> vector<1x4xf32> +// CHECK: %[[VAL_11:.*]] = vector.mask %[[VAL_10]] { vector.transfer_read %[[VAL_2]]{{\[}}%[[VAL_8]], %[[VAL_8]]], %[[VAL_9]] {in_bounds = array} : tensor, vector<1x4xf32> } : vector<1x4xi1> -> vector<1x4xf32> // CHECK: %[[VAL_12:.*]] = vector.step : vector<4xindex> // CHECK: %[[VAL_13:.*]] = vector.broadcast %[[VAL_1]] : index to vector<4xindex> // CHECK: %[[VAL_14:.*]] = arith.addi %[[VAL_12]], %[[VAL_13]] : vector<4xindex> @@ -321,7 +321,7 @@ func.func @masked_dynamic_vectorize_nd_tensor_extract_with_affine_apply_gather(% // CHECK: %[[VAL_24:.*]] = arith.addi %[[VAL_23]], %[[VAL_22]] : vector<1x4xindex> // CHECK: %[[VAL_25:.*]] = vector.mask %[[VAL_10]] { vector.gather %[[VAL_0]]{{\[}}%[[VAL_17]], %[[VAL_17]]] {{\[}}%[[VAL_24]]], %[[VAL_15]], %[[VAL_16]] : tensor, vector<1x4xindex>, vector<1x4xi1>, vector<1x4xf32> into vector<1x4xf32> } : vector<1x4xi1> -> vector<1x4xf32> // CHECK: %[[VAL_26:.*]] = arith.constant 0 : index -// CHECK: %[[VAL_27:.*]] = vector.mask %[[VAL_10]] { vector.transfer_write %[[VAL_25]], %[[VAL_2]]{{\[}}%[[VAL_26]], %[[VAL_26]]] {in_bounds = [true, true]} : vector<1x4xf32>, tensor } : vector<1x4xi1> -> tensor +// CHECK: %[[VAL_27:.*]] = vector.mask %[[VAL_10]] { vector.transfer_write %[[VAL_25]], %[[VAL_2]]{{\[}}%[[VAL_26]], %[[VAL_26]]] {in_bounds = array} : vector<1x4xf32>, tensor } : vector<1x4xi1> -> tensor // CHECK: return %[[VAL_27]] : tensor // CHECK: } @@ -362,7 +362,7 @@ func.func @extract_masked_vectorize(%arg0: tensor, %arg1: tensor -// CHECK: %[[VAL_11:.*]] = vector.mask %[[VAL_10]] { vector.transfer_read %[[VAL_1]]{{\[}}%[[VAL_8]], %[[VAL_8]]], %[[VAL_9]] {in_bounds = [true, true]} : tensor, vector<3x3xf32> } : vector<3x3xi1> -> vector<3x3xf32> +// CHECK: %[[VAL_11:.*]] = vector.mask %[[VAL_10]] { vector.transfer_read %[[VAL_1]]{{\[}}%[[VAL_8]], %[[VAL_8]]], %[[VAL_9]] {in_bounds = array} : tensor, vector<3x3xf32> } : vector<3x3xi1> -> vector<3x3xf32> // CHECK: %[[VAL_12:.*]] = arith.constant dense : vector<3x3xi1> // CHECK: %[[VAL_13:.*]] = arith.constant dense<0.000000e+00> : vector<3x3xf32> // CHECK: %[[VAL_14:.*]] = arith.constant 0 : index @@ -375,7 +375,7 @@ func.func @extract_masked_vectorize(%arg0: tensor, %arg1: tensor // CHECK: %[[VAL_22:.*]] = vector.mask %[[VAL_10]] { vector.gather %[[VAL_0]]{{\[}}%[[VAL_14]], %[[VAL_14]]] {{\[}}%[[VAL_21]]], %[[VAL_12]], %[[VAL_13]] : tensor, vector<3x3xindex>, vector<3x3xi1>, vector<3x3xf32> into vector<3x3xf32> } : vector<3x3xi1> -> vector<3x3xf32> // CHECK: %[[VAL_23:.*]] = arith.constant 0 : index -// CHECK: %[[VAL_24:.*]] = vector.mask %[[VAL_10]] { vector.transfer_write %[[VAL_22]], %[[VAL_1]]{{\[}}%[[VAL_23]], %[[VAL_23]]] {in_bounds = [true, true]} : vector<3x3xf32>, tensor } : vector<3x3xi1> -> tensor +// CHECK: %[[VAL_24:.*]] = vector.mask %[[VAL_10]] { vector.transfer_write %[[VAL_22]], %[[VAL_1]]{{\[}}%[[VAL_23]], %[[VAL_23]]] {in_bounds = array} : vector<3x3xf32>, tensor } : vector<3x3xi1> -> tensor module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { diff --git a/mlir/test/Dialect/Linalg/vectorize-tensor-extract.mlir b/mlir/test/Dialect/Linalg/vectorize-tensor-extract.mlir index 2c56b7139fec4..ac3d2a51f78e7 100644 --- a/mlir/test/Dialect/Linalg/vectorize-tensor-extract.mlir +++ b/mlir/test/Dialect/Linalg/vectorize-tensor-extract.mlir @@ -61,7 +61,7 @@ func.func @vectorize_nd_tensor_extract_constant_idx(%arg0: tensor<3x3xf32>, %arg // CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index // CHECK-DAG: %[[C0_f32_2:.*]] = arith.constant 0.000000e+00 : f32 // CHECK-DAG: %[[C0_f32:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG_0]][%[[C1]], %[[C2]]], %[[C0_f32]] {in_bounds = [true, true, true], permutation_map = #[[$MAP]]} : tensor<3x3xf32>, vector<1x1x3xf32> +// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG_0]][%[[C1]], %[[C2]]], %[[C0_f32]] {in_bounds = array, permutation_map = #[[$MAP]]} : tensor<3x3xf32>, vector<1x1x3xf32> // CHECK: %[[C0_4:.*]] = arith.constant 0 : index // CHECK: vector.transfer_write %[[READ]], %[[ARG_1]][%[[C0_4]], %[[C0_4]], %[[C0_4]]] : vector<1x1x3xf32>, tensor<1x1x3xf32> @@ -106,8 +106,8 @@ func.func @vectorize_nd_tensor_extract_transfer_read_basic( // CHECK: %[[IDX1:.*]] = vector.extractelement %[[IDX_VEC0]][%[[C0_i32]] : i32] : vector<3xindex> // CHECK: %[[IDX_VEC:.*]] = vector.shape_cast %[[CST]] : vector<1x1x3xindex> to vector<3xindex> // CHECK: %[[IDX2:.*]] = vector.extractelement %[[IDX_VEC]][%[[C0_i32]] : i32] : vector<3xindex> -// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG0]][%[[IDX1]], %[[IDX2]], %[[C0:.*]]], %[[CST_0]] {in_bounds = [true, true, true]} : tensor<3x3x3xf32>, vector<1x1x3xf32> -// CHECK: vector.transfer_write %[[READ]], %[[ARG1]][%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x1x3xf32>, tensor<1x1x3xf32> +// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG0]][%[[IDX1]], %[[IDX2]], %[[C0:.*]]], %[[CST_0]] {in_bounds = array} : tensor<3x3x3xf32>, vector<1x1x3xf32> +// CHECK: vector.transfer_write %[[READ]], %[[ARG1]][%[[C0]], %[[C0]], %[[C0]]] {in_bounds = array} : vector<1x1x3xf32>, tensor<1x1x3xf32> // Same as example above, but reading into a column tensor. Note that after the // vectorizatoin, the `TransferOpReduceRank` will replace @@ -138,7 +138,7 @@ func.func @vectorize_nd_tensor_extract_transfer_read_basic_column( // CHECK: %[[C0:.*]] = arith.constant 0 : index // CHECK: %[[EXTRACT:.*]] = tensor.extract %[[INPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] : tensor<3x3x3xf32> // CHECK: %[[BCAST:.*]] = vector.broadcast %[[EXTRACT]] : f32 to vector<3x1x1xf32> -// CHECK: %[[RES:.*]] = vector.transfer_write %[[BCAST]], %[[OUTPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<3x1x1xf32>, tensor<3x1x1xf32> +// CHECK: %[[RES:.*]] = vector.transfer_write %[[BCAST]], %[[OUTPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] {in_bounds = array} : vector<3x1x1xf32>, tensor<3x1x1xf32> // CHECK: return %[[RES]] : tensor<3x1x1xf32> module attributes {transform.with_named_sequence} { @@ -190,8 +190,8 @@ func.func @vectorize_nd_tensor_extract_transfer_read_complex(%6: tensor<45x80x16 // CHECK: %[[VAL_17:.*]] = vector.shape_cast %[[VAL_12]] : vector<1x4xindex> to vector<4xindex> // CHECK: %[[VAL_18:.*]] = vector.extractelement %[[VAL_17]]{{\[}}%[[VAL_7]] : i32] : vector<4xindex> // CHECK: %[[VAL_19:.*]] = vector.extractelement %[[VAL_16]]{{\[}}%[[VAL_7]] : i32] : vector<4xindex> -// CHECK: %[[VAL_20:.*]] = vector.transfer_read %[[VAL_0]]{{\[}}%[[VAL_18]], %[[VAL_10]], %[[VAL_19]]], %[[VAL_8]] {in_bounds = [true, true]} : tensor<45x80x16xf32>, vector<1x4xf32> -// CHECK: %[[VAL_21:.*]] = vector.transfer_write %[[VAL_20]], %[[VAL_5]]{{\[}}%[[VAL_9]], %[[VAL_9]]] {in_bounds = [true, true]} : vector<1x4xf32>, tensor<1x4xf32> +// CHECK: %[[VAL_20:.*]] = vector.transfer_read %[[VAL_0]]{{\[}}%[[VAL_18]], %[[VAL_10]], %[[VAL_19]]], %[[VAL_8]] {in_bounds = array} : tensor<45x80x16xf32>, vector<1x4xf32> +// CHECK: %[[VAL_21:.*]] = vector.transfer_write %[[VAL_20]], %[[VAL_5]]{{\[}}%[[VAL_9]], %[[VAL_9]]] {in_bounds = array} : vector<1x4xf32>, tensor<1x4xf32> // CHECK: return %[[VAL_21]] : tensor<1x4xf32> // CHECK: } @@ -233,8 +233,8 @@ func.func @vectorize_nd_tensor_extract_index_from_tensor(%arg0: tensor<3x3xf32>, // CHECK-DAG: %[[CST:.*]] = arith.constant dense<3> : vector<7x2x4x3xindex> // CHECK-DAG: %[[CST_1:.*]] = arith.constant dense : vector<4x7x3x2xi1> // CHECK-DAG: %[[PASSTHRU:.*]] = arith.constant dense<0.000000e+00> : vector<4x7x3x2xf32> -// CHECK: %[[V0:.*]] = vector.transfer_read %[[ARG1]][%[[C0]], %[[C0]]], %[[C0_i32]] {in_bounds = [true, true]} : tensor<4x3xi32>, vector<4x3xi32> -// CHECK: %[[V1:.*]] = vector.transfer_read %[[ARG2]][%[[C0]], %[[C0]]], %[[C0_i32]] {in_bounds = [true, true]} : tensor<4x3xi32>, vector<4x3xi32> +// CHECK: %[[V0:.*]] = vector.transfer_read %[[ARG1]][%[[C0]], %[[C0]]], %[[C0_i32]] {in_bounds = array} : tensor<4x3xi32>, vector<4x3xi32> +// CHECK: %[[V1:.*]] = vector.transfer_read %[[ARG2]][%[[C0]], %[[C0]]], %[[C0_i32]] {in_bounds = array} : tensor<4x3xi32>, vector<4x3xi32> // CHECK: %[[CAST:.*]] = arith.index_cast %[[V0]] : vector<4x3xi32> to vector<4x3xindex> // CHECK: %[[B1:.*]] = vector.broadcast %[[CAST]] : vector<4x3xindex> to vector<7x2x4x3xindex> // CHECK: %[[CAST_1:.*]] = arith.index_cast %[[V1]] : vector<4x3xi32> to vector<4x3xindex> @@ -243,7 +243,7 @@ func.func @vectorize_nd_tensor_extract_index_from_tensor(%arg0: tensor<3x3xf32>, // CHECK: %[[ADDI:.*]] = arith.addi %[[B2]], %[[MULI]] : vector<7x2x4x3xindex> // CHECK: %[[T:.*]] = vector.transpose %[[ADDI]], [2, 0, 3, 1] : vector<7x2x4x3xindex> to vector<4x7x3x2xindex> // CHECK: %[[GATHER:.*]] = vector.gather %[[ARG0]][%[[C0]], %[[C0]]] [%[[T]]], %[[CST_1]], %[[PASSTHRU]] : tensor<3x3xf32>, vector<4x7x3x2xindex>, vector<4x7x3x2xi1>, vector<4x7x3x2xf32> into vector<4x7x3x2xf32> -// CHECK: vector.transfer_write %[[GATHER]], %[[ARG4]][%[[C0]], %[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true, true]} : vector<4x7x3x2xf32>, tensor<4x7x3x2xf32> +// CHECK: vector.transfer_write %[[GATHER]], %[[ARG4]][%[[C0]], %[[C0]], %[[C0]], %[[C0]]] {in_bounds = array} : vector<4x7x3x2xf32>, tensor<4x7x3x2xf32> module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { @@ -303,7 +303,7 @@ module attributes {transform.with_named_sequence} { // CHECK: %[[B3:.*]] = vector.broadcast %[[B2]] : vector<1xindex> to vector<8x1xindex> // CHECK: %[[ADDI:.*]] = arith.addi %[[B3]], %[[T]] : vector<8x1xindex> // CHECK: %[[GATHER:.*]] = vector.gather %[[ARG0]][%[[C0]], %[[C0]], %[[C0]]] [%[[ADDI]]], %[[CST_2]], %[[PASSTHRU]] : tensor<8x128x768xf32>, vector<8x1xindex>, vector<8x1xi1>, vector<8x1xf32> into vector<8x1xf32> -// CHECK: vector.transfer_write %[[GATHER]], %[[EMPTY]][%[[C0]], %[[C0]]] {in_bounds = [true, true]} : vector<8x1xf32>, tensor<8x1xf32> +// CHECK: vector.transfer_write %[[GATHER]], %[[EMPTY]][%[[C0]], %[[C0]]] {in_bounds = array} : vector<8x1xf32>, tensor<8x1xf32> // ----- @@ -347,7 +347,7 @@ module attributes {transform.with_named_sequence} { // CHECK: %[[MUL:.*]] = arith.muli %[[B]], %[[C128]] : vector<1x8xindex> // CHECK: %[[TR:.*]] = vector.transpose %[[MUL]], [1, 0] : vector<1x8xindex> to vector<8x1xindex> // CHECK: %[[GATHER:.*]] = vector.gather %[[SRC]]{{\[}}%[[C0]], %[[C0]]] {{\[}}%[[TR]]], %[[MASK]], %[[PASS_THRU]] : tensor<8x128xf32>, vector<8x1xindex>, vector<8x1xi1>, vector<8x1xf32> into vector<8x1xf32> -// CHECK: %[[RES:.*]] = vector.transfer_write %[[GATHER]], %[[OUT]]{{\[}}%[[C0]], %[[C0]]] {in_bounds = [true, true]} : vector<8x1xf32>, tensor<8x1xf32> +// CHECK: %[[RES:.*]] = vector.transfer_write %[[GATHER]], %[[OUT]]{{\[}}%[[C0]], %[[C0]]] {in_bounds = array} : vector<8x1xf32>, tensor<8x1xf32> // CHECK: return %[[RES]] : tensor<8x1xf32> // ----- @@ -392,7 +392,7 @@ module attributes {transform.with_named_sequence} { // CHECK: %[[B:.*]] = vector.broadcast %[[IDX_VEC]] : vector<8xindex> to vector<1x8xindex> // CHECK: %[[TR:.*]] = vector.transpose %[[B]], [1, 0] : vector<1x8xindex> to vector<8x1xindex> // CHECK: %[[GATHER:.*]] = vector.gather %[[SRC]]{{\[}}%[[C0]], %[[C0]]] {{\[}}%[[TR]]], %[[MASK]], %[[PASS_THRU]] : tensor<8x128xf32>, vector<8x1xindex>, vector<8x1xi1>, vector<8x1xf32> into vector<8x1xf32> -// CHECK: %[[RES:.*]] = vector.transfer_write %[[GATHER]], %[[OUT]]{{\[}}%[[C0]], %[[C0]]] {in_bounds = [true, true]} : vector<8x1xf32>, tensor<8x1xf32> +// CHECK: %[[RES:.*]] = vector.transfer_write %[[GATHER]], %[[OUT]]{{\[}}%[[C0]], %[[C0]]] {in_bounds = array} : vector<8x1xf32>, tensor<8x1xf32> // CHECK: return %[[RES]] : tensor<8x1xf32> // ----- @@ -425,12 +425,12 @@ func.func @vectorize_nd_tensor_extract_contiguous_and_gather(%arg0: tensor<6xf32 // CHECK-DAG: %[[VAL_6:.*]] = arith.constant dense : vector<5xi1> // CHECK-DAG: %[[VAL_7:.*]] = arith.constant dense<0.000000e+00> : vector<5xf32> // CHECK: %[[VAL_8:.*]] = tensor.empty() : tensor<5xf32> -// CHECK: %[[VAL_9:.*]] = vector.transfer_read %[[VAL_1]]{{\[}}%[[VAL_2]]], %[[VAL_3]] {in_bounds = [true]} : tensor<5xi32>, vector<5xi32> +// CHECK: %[[VAL_9:.*]] = vector.transfer_read %[[VAL_1]]{{\[}}%[[VAL_2]]], %[[VAL_3]] {in_bounds = array} : tensor<5xi32>, vector<5xi32> // CHECK: %[[VAL_10:.*]] = arith.index_cast %[[VAL_9]] : vector<5xi32> to vector<5xindex> // CHECK: %[[VAL_11:.*]] = arith.maxsi %[[VAL_10]], %[[VAL_4]] : vector<5xindex> // CHECK: %[[VAL_12:.*]] = arith.minsi %[[VAL_11]], %[[VAL_5]] : vector<5xindex> // CHECK: %[[VAL_13:.*]] = vector.gather %[[VAL_0]]{{\[}}%[[VAL_2]]] {{\[}}%[[VAL_12]]], %[[VAL_6]], %[[VAL_7]] : tensor<6xf32>, vector<5xindex>, vector<5xi1>, vector<5xf32> into vector<5xf32> -// CHECK: %[[VAL_14:.*]] = vector.transfer_write %[[VAL_13]], %[[VAL_8]]{{\[}}%[[VAL_2]]] {in_bounds = [true]} : vector<5xf32>, tensor<5xf32> +// CHECK: %[[VAL_14:.*]] = vector.transfer_write %[[VAL_13]], %[[VAL_8]]{{\[}}%[[VAL_2]]] {in_bounds = array} : vector<5xf32>, tensor<5xf32> // CHECK: return %[[VAL_14]] : tensor<5xf32> module attributes {transform.with_named_sequence} { @@ -472,8 +472,8 @@ func.func @vectorize_nd_tensor_extract_with_affine_apply_contiguous(%6: tensor<8 // CHECK: %[[VAL_8:.*]] = vector.broadcast %[[VAL_1]] : index to vector<4xindex> // CHECK: %[[VAL_9:.*]] = arith.addi %[[VAL_8]], %[[VAL_3]] : vector<4xindex> // CHECK: %[[VAL_10:.*]] = vector.extractelement %[[VAL_9]]{{\[}}%[[VAL_4]] : i32] : vector<4xindex> -// CHECK: %[[VAL_11:.*]] = vector.transfer_read %[[VAL_0]]{{\[}}%[[VAL_7]], %[[VAL_10]]], %[[VAL_5]] {in_bounds = [true, true]} : tensor<80x16xf32>, vector<1x4xf32> -// CHECK: %[[VAL_12:.*]] = vector.transfer_write %[[VAL_11]], %[[VAL_2]]{{\[}}%[[VAL_6]], %[[VAL_6]]] {in_bounds = [true, true]} : vector<1x4xf32>, tensor<1x4xf32> +// CHECK: %[[VAL_11:.*]] = vector.transfer_read %[[VAL_0]]{{\[}}%[[VAL_7]], %[[VAL_10]]], %[[VAL_5]] {in_bounds = array} : tensor<80x16xf32>, vector<1x4xf32> +// CHECK: %[[VAL_12:.*]] = vector.transfer_write %[[VAL_11]], %[[VAL_2]]{{\[}}%[[VAL_6]], %[[VAL_6]]] {in_bounds = array} : vector<1x4xf32>, tensor<1x4xf32> // CHECK: return %[[VAL_12]] : tensor<1x4xf32> // CHECK: } @@ -518,7 +518,7 @@ func.func @vectorize_nd_tensor_extract_with_tensor_extract(%input_1: tensor<1x20 // CHECK: tensor.extract %[[INPUT_1]][%[[EXTRACTED_0_IDX_0]], %[[EXTRACTED_0_IDX_1]]] : tensor<1x20xi32> // The following `tensor.extract` from the generic Op s a contiguous load (all Ops used // for address calculation also satisfy the required conditions). -// CHECK: vector.transfer_read %[[INPUT_2]][%{{.*}}, %{{.*}}, %{{.*}} {in_bounds = [true, true]} : tensor<257x24xf32>, vector<1x4xf32> +// CHECK: vector.transfer_read %[[INPUT_2]][%{{.*}}, %{{.*}}, %{{.*}} {in_bounds = array} : tensor<257x24xf32>, vector<1x4xf32> module attributes {transform.with_named_sequence} { @@ -563,7 +563,7 @@ func.func @vectorize_nd_tensor_extract_with_affine_apply_gather(%6: tensor<80x16 // CHECK: %[[VAL_11:.*]] = arith.muli %[[VAL_10]], %[[VAL_7]] : vector<1x4xindex> // CHECK: %[[VAL_12:.*]] = arith.addi %[[VAL_11]], %[[VAL_7]] : vector<1x4xindex> // CHECK: %[[VAL_13:.*]] = vector.gather %[[VAL_0]]{{\[}}%[[VAL_6]], %[[VAL_6]]] {{\[}}%[[VAL_12]]], %[[VAL_4]], %[[VAL_5]] : tensor<80x16xf32>, vector<1x4xindex>, vector<1x4xi1>, vector<1x4xf32> into vector<1x4xf32> -// CHECK: %[[VAL_14:.*]] = vector.transfer_write %[[VAL_13]], %[[VAL_2]]{{\[}}%[[VAL_6]], %[[VAL_6]]] {in_bounds = [true, true]} : vector<1x4xf32>, tensor<1x4xf32> +// CHECK: %[[VAL_14:.*]] = vector.transfer_write %[[VAL_13]], %[[VAL_2]]{{\[}}%[[VAL_6]], %[[VAL_6]]] {in_bounds = array} : vector<1x4xf32>, tensor<1x4xf32> // CHECK: return %[[VAL_14]] : tensor<1x4xf32> // CHECK: } @@ -605,7 +605,7 @@ func.func @vectorize_nd_tensor_extract_with_maxsi_gather(%arg0: tensor<80x16xf32 // CHECK: %[[VAL_7:.*]] = vector.broadcast %[[VAL_2]] : vector<4xindex> to vector<1x4xindex> // CHECK: %[[VAL_8:.*]] = arith.addi %[[VAL_7]], %[[VAL_3]] : vector<1x4xindex> // CHECK: %[[VAL_9:.*]] = vector.gather %[[VAL_0]]{{\[}}%[[VAL_6]], %[[VAL_6]]] {{\[}}%[[VAL_8]]], %[[VAL_4]], %[[VAL_5]] : tensor<80x16xf32>, vector<1x4xindex>, vector<1x4xi1>, vector<1x4xf32> into vector<1x4xf32> -// CHECK: %[[VAL_10:.*]] = vector.transfer_write %[[VAL_9]], %[[VAL_1]]{{\[}}%[[VAL_6]], %[[VAL_6]]] {in_bounds = [true, true]} : vector<1x4xf32>, tensor<1x4xf32> +// CHECK: %[[VAL_10:.*]] = vector.transfer_write %[[VAL_9]], %[[VAL_1]]{{\[}}%[[VAL_6]], %[[VAL_6]]] {in_bounds = array} : vector<1x4xf32>, tensor<1x4xf32> // CHECK: return %[[VAL_10]] : tensor<1x4xf32> // CHECK: } @@ -646,8 +646,8 @@ func.func @vectorize_nd_tensor_extract_with_maxsi_contiguous(%arg0: tensor<80x16 // CHECK-DAG: %[[VAL_5:.*]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[VAL_6:.*]] = vector.shape_cast %[[VAL_2]] : vector<1x4xindex> to vector<4xindex> // CHECK: %[[VAL_7:.*]] = vector.extractelement %[[VAL_6]]{{\[}}%[[VAL_3]] : i32] : vector<4xindex> -// CHECK: %[[VAL_8:.*]] = vector.transfer_read %[[VAL_0]]{{\[}}%[[VAL_7]], %[[VAL_4]]], %[[VAL_5]] {in_bounds = [true, true]} : tensor<80x16xf32>, vector<1x4xf32> -// CHECK: %[[VAL_9:.*]] = vector.transfer_write %[[VAL_8]], %[[VAL_1]]{{\[}}%[[VAL_4]], %[[VAL_4]]] {in_bounds = [true, true]} : vector<1x4xf32>, tensor<1x4xf32> +// CHECK: %[[VAL_8:.*]] = vector.transfer_read %[[VAL_0]]{{\[}}%[[VAL_7]], %[[VAL_4]]], %[[VAL_5]] {in_bounds = array} : tensor<80x16xf32>, vector<1x4xf32> +// CHECK: %[[VAL_9:.*]] = vector.transfer_write %[[VAL_8]], %[[VAL_1]]{{\[}}%[[VAL_4]], %[[VAL_4]]] {in_bounds = array} : vector<1x4xf32>, tensor<1x4xf32> // CHECK: return %[[VAL_9]] : tensor<1x4xf32> // CHECK: } @@ -684,11 +684,11 @@ func.func @vectorize_nd_tensor_extract_block_arg(%arg0: tensor<5x6xf32>, %arg1: // CHECK-DAG: %[[VAL_5:.*]] = arith.constant dense<0.000000e+00> : vector<5xf32> // CHECK-DAG: %[[VAL_6:.*]] = arith.constant dense<6> : vector<5xindex> // CHECK: %[[VAL_7:.*]] = tensor.empty() : tensor<5xf32> -// CHECK: %[[VAL_8:.*]] = vector.transfer_read %[[VAL_1]]{{\[}}%[[VAL_2]]], %[[VAL_2]] {in_bounds = [true]} : tensor<5xindex>, vector<5xindex> +// CHECK: %[[VAL_8:.*]] = vector.transfer_read %[[VAL_1]]{{\[}}%[[VAL_2]]], %[[VAL_2]] {in_bounds = array} : tensor<5xindex>, vector<5xindex> // CHECK: %[[VAL_9:.*]] = arith.muli %[[VAL_8]], %[[VAL_6]] : vector<5xindex> // CHECK: %[[VAL_10:.*]] = arith.addi %[[VAL_9]], %[[VAL_3]] : vector<5xindex> // CHECK: %[[VAL_11:.*]] = vector.gather %[[VAL_0]]{{\[}}%[[VAL_2]], %[[VAL_2]]] {{\[}}%[[VAL_10]]], %[[VAL_4]], %[[VAL_5]] : tensor<5x6xf32>, vector<5xindex>, vector<5xi1>, vector<5xf32> into vector<5xf32> -// CHECK: %[[VAL_12:.*]] = vector.transfer_write %[[VAL_11]], %[[VAL_7]]{{\[}}%[[VAL_2]]] {in_bounds = [true]} : vector<5xf32>, tensor<5xf32> +// CHECK: %[[VAL_12:.*]] = vector.transfer_write %[[VAL_11]], %[[VAL_7]]{{\[}}%[[VAL_2]]] {in_bounds = array} : vector<5xf32>, tensor<5xf32> // CHECK: return %[[VAL_12]] : tensor<5xf32> // CHECK: } @@ -819,7 +819,7 @@ func.func @vectorize_scalar_broadcast_column_tensor(%in: tensor<1x1x4xi32>) -> t // CHECK: %[[VAL_20:.*]] = vector.shape_cast %[[VAL_15]] : vector<1x1x4xindex> to vector<4xindex> // CHECK: %[[VAL_21:.*]] = vector.extractelement %[[VAL_20]]{{\[}}%[[VAL_19]] : i32] : vector<4xindex> // CHECK: %[[VAL_22:.*]] = arith.constant 0 : i32 -// CHECK: %[[VAL_23:.*]] = vector.transfer_read %[[VAL_3]]{{\[}}%[[VAL_21]], %[[VAL_2]]], %[[VAL_22]] {in_bounds = [true, true, true], permutation_map = #[[$ATTR_1]]} : tensor<15x1xi32>, vector<1x1x4xi32> +// CHECK: %[[VAL_23:.*]] = vector.transfer_read %[[VAL_3]]{{\[}}%[[VAL_21]], %[[VAL_2]]], %[[VAL_22]] {in_bounds = array, permutation_map = #[[$ATTR_1]]} : tensor<15x1xi32>, vector<1x1x4xi32> // CHECK: %[[VAL_24:.*]] = arith.constant 0 : index // CHECK: %[[VAL_25:.*]] = vector.transfer_write %[[VAL_23]], %[[VAL_0]]{{\[}}%[[VAL_24]], %[[VAL_24]], %[[VAL_24]]] : vector<1x1x4xi32>, tensor<1x1x4xi32> diff --git a/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir b/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir index 327cacf7d9a20..ce1ac5646d26b 100644 --- a/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir +++ b/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir @@ -102,7 +102,7 @@ func.func @fold_subview_with_transfer_read(%arg0 : memref<12x32xf32>, %arg1 : in %f1 = arith.constant 1.0 : f32 %0 = memref.subview %arg0[%arg1, %arg2][4, 4][%arg5, %arg6] : memref<12x32xf32> to memref<4x4xf32, strided<[?, ?], offset: ?>> - %1 = vector.transfer_read %0[%arg3, %arg4], %f1 {in_bounds = [true]} : memref<4x4xf32, strided<[?, ?], offset: ?>>, vector<4xf32> + %1 = vector.transfer_read %0[%arg3, %arg4], %f1 {in_bounds = array} : memref<4x4xf32, strided<[?, ?], offset: ?>>, vector<4xf32> return %1 : vector<4xf32> } // CHECK: func @fold_subview_with_transfer_read @@ -116,7 +116,7 @@ func.func @fold_static_stride_subview_with_transfer_write_0d( %v : vector) { %f1 = arith.constant 1.0 : f32 %0 = memref.subview %arg0[%arg1, %arg2][1, 1][1, 1] : memref<12x32xf32> to memref> - vector.transfer_write %v, %0[] {in_bounds = []} : vector, memref> + vector.transfer_write %v, %0[] : vector, memref> return } // CHECK: func @fold_static_stride_subview_with_transfer_write_0d @@ -132,7 +132,7 @@ func.func @fold_static_stride_subview_with_transfer_write_0d( func.func @fold_static_stride_subview_with_transfer_write(%arg0 : memref<12x32xf32>, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index, %arg5: index, %arg6 : index, %arg7 : vector<4xf32>) { %0 = memref.subview %arg0[%arg1, %arg2][4, 4][%arg5, %arg6] : memref<12x32xf32> to memref<4x4xf32, strided<[?, ?], offset: ?>> - vector.transfer_write %arg7, %0[%arg3, %arg4] {in_bounds = [true]} : vector<4xf32>, memref<4x4xf32, strided<[?, ?], offset: ?>> + vector.transfer_write %arg7, %0[%arg3, %arg4] {in_bounds = array} : vector<4xf32>, memref<4x4xf32, strided<[?, ?], offset: ?>> return } // CHECK: func @fold_static_stride_subview_with_transfer_write @@ -186,7 +186,7 @@ func.func @fold_vector_transfer_read_with_rank_reduced_subview( %0 = memref.subview %arg0[0, %arg1, %arg2] [1, %arg3, %arg4] [1, 1, 1] : memref> to memref> - %1 = vector.transfer_read %0[%arg5, %arg6], %cst {in_bounds = [true]} + %1 = vector.transfer_read %0[%arg5, %arg6], %cst {in_bounds = array} : memref>, vector<4xf32> return %1 : vector<4xf32> } @@ -214,7 +214,7 @@ func.func @fold_vector_transfer_write_with_rank_reduced_subview( %0 = memref.subview %arg0[0, %arg2, %arg3] [1, %arg4, %arg5] [1, 1, 1] : memref> to memref> - vector.transfer_write %arg1, %0[%arg6, %arg7] {in_bounds = [true]} + vector.transfer_write %arg1, %0[%arg6, %arg7] {in_bounds = array} : vector<4xf32>, memref> return } @@ -231,7 +231,7 @@ func.func @fold_vector_transfer_write_with_rank_reduced_subview( // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index // CHECK-DAG: %[[IDX0:.+]] = affine.apply #[[MAP1]]()[%[[ARG2]], %[[ARG6]]] // CHECK-DAG: %[[IDX1:.+]] = affine.apply #[[MAP1]]()[%[[ARG3]], %[[ARG7]]] -// CHECK-DAG: vector.transfer_write %[[ARG1]], %[[ARG0]][%[[C0]], %[[IDX0]], %[[IDX1]]] {in_bounds = [true]} : vector<4xf32>, memref} : vector<4xf32>, memref> to memref> - vector.transfer_write %arg1, %0[%arg6, %arg7] {in_bounds = [true]} + vector.transfer_write %arg1, %0[%arg6, %arg7] {in_bounds = array} : vector<4xf32>, memref> return } @@ -262,7 +262,7 @@ func.func @fold_vector_transfer_write_with_inner_rank_reduced_subview( // CHECK-DAG: %[[IDX0:.+]] = affine.apply #[[MAP1]]()[%[[ARG2]], %[[ARG6]]] // CHECK-DAG: %[[IDX1:.+]] = affine.apply #[[MAP1]]()[%[[ARG3]], %[[ARG7]]] // CHECK-DAG: vector.transfer_write %[[ARG1]], %[[ARG0]][%[[IDX0]], %[[IDX1]], %[[C0]]] -// CHECK-SAME: {in_bounds = [true], permutation_map = #[[MAP2]]} : vector<4xf32>, memref, permutation_map = #[[MAP2]]} : vector<4xf32>, memref> to memref> - %1 = vector.transfer_read %0[%arg5, %arg6], %cst, %mask {in_bounds = [true]} + %1 = vector.transfer_read %0[%arg5, %arg6], %cst, %mask {in_bounds = array} : memref>, vector<4xf32> return %1 : vector<4xf32> } @@ -303,7 +303,7 @@ func.func @fold_masked_vector_transfer_read_with_rank_reducing_subview( : memref> to memref> %1 = vector.transfer_read %0[%arg5, %arg6], %cst, %mask { - permutation_map = affine_map<(d0, d1) -> (d1, d0)>, in_bounds = [true, true]} + permutation_map = affine_map<(d0, d1) -> (d1, d0)>, in_bounds = array} : memref>, vector<3x4xf32> return %1 : vector<3x4xf32> } @@ -334,7 +334,7 @@ func.func @fold_masked_vector_transfer_write_with_subview( %0 = memref.subview %arg0[%arg2, %arg3] [%arg4, %arg5] [1, 1] : memref> to memref> - vector.transfer_write %arg1, %0[%arg6, %arg7], %mask {in_bounds = [true]} + vector.transfer_write %arg1, %0[%arg6, %arg7], %mask {in_bounds = array} : vector<4xf32>, memref> return } @@ -351,7 +351,7 @@ func.func @fold_masked_vector_transfer_write_with_subview( // CHECK-SAME: %[[MASK:[a-zA-Z0-9]+]]: vector<4xi1> // CHECK-DAG: %[[IDX0:.+]] = affine.apply #[[MAP1]]()[%[[ARG2]], %[[ARG6]]] // CHECK-DAG: %[[IDX1:.+]] = affine.apply #[[MAP1]]()[%[[ARG3]], %[[ARG7]]] -// CHECK-DAG: vector.transfer_write %[[ARG1]], %[[ARG0]][%[[IDX0]], %[[IDX1]]], %[[MASK]] {in_bounds = [true]} : vector<4xf32>, memref} : vector<4xf32>, memref> to memref> vector.transfer_write %arg1, %0[%arg6, %arg7], %mask { - permutation_map = affine_map<(d0, d1) -> (d1, d0)>, in_bounds = [true, true]} + permutation_map = affine_map<(d0, d1) -> (d1, d0)>, in_bounds = array} : vector<3x4xf32>, memref> return } @@ -383,7 +383,7 @@ func.func @fold_masked_vector_transfer_write_with_rank_reducing_subview( // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index // CHECK-DAG: %[[IDX0:.+]] = affine.apply #[[MAP0]]()[%[[ARG2]], %[[ARG6]]] // CHECK-DAG: %[[IDX1:.+]] = affine.apply #[[MAP0]]()[%[[ARG3]], %[[ARG7]]] -// CHECK-DAG: vector.transfer_write %[[ARG1]], %[[ARG0]][%[[C0]], %[[IDX0]], %[[C0]], %[[IDX1]]], %[[ARG8]] {in_bounds = [true, true], permutation_map = #[[MAP1]]} : vector<3x4xf32>, memref, permutation_map = #[[MAP1]]} : vector<3x4xf32>, memref (d0 * 64 + d1)>, 3> %c0 = arith.constant 0 : index %cst_0 = arith.constant dense<0.000000e+00> : vector<1x4xf32> - vector.transfer_write %cst_0, %1[%c0, %c0] {in_bounds = [true, true]} : + vector.transfer_write %cst_0, %1[%c0, %c0] {in_bounds = array} : vector<1x4xf32>, memref<8x4xf32, affine_map<(d0, d1) -> (d0 * 64 + d1)>, 3> return } @@ -339,8 +339,8 @@ func.func @store_to_load(%arg: vector<4xf32>) -> vector<4xf32> { %c0 = arith.constant 0 : index %cst_1 = arith.constant 0.000000e+00 : f32 %alloc = memref.alloc() {alignment = 64 : i64} : memref<64xf32> - vector.transfer_write %arg, %alloc[%c0] {in_bounds = [true]} : vector<4xf32>, memref<64xf32> - %r = vector.transfer_read %alloc[%c0], %cst_1 {in_bounds = [true]} : memref<64xf32>, vector<4xf32> + vector.transfer_write %arg, %alloc[%c0] {in_bounds = array} : vector<4xf32>, memref<64xf32> + %r = vector.transfer_read %alloc[%c0], %cst_1 {in_bounds = array} : memref<64xf32>, vector<4xf32> return %r : vector<4xf32> } diff --git a/mlir/test/Dialect/NVGPU/transform-create-async-groups.mlir b/mlir/test/Dialect/NVGPU/transform-create-async-groups.mlir index 8290001c45856..8ee500dbbe714 100644 --- a/mlir/test/Dialect/NVGPU/transform-create-async-groups.mlir +++ b/mlir/test/Dialect/NVGPU/transform-create-async-groups.mlir @@ -10,13 +10,13 @@ builtin.module { %cst_0 = arith.constant 0.000000e+00 : f32 // Make sure we emit the bypassL1. // CHECK: %[[CP0:.*]] = nvgpu.device_async_copy {{.*}}, {{.*}}, 4 {bypassL1} : - %1 = vector.transfer_read %a[%c0, %c0], %cst_0 {in_bounds = [true]} : memref<1024x1024xf32>, vector<4xf32> - vector.transfer_write %1, %0[%c0, %c0, %c0] {in_bounds = [true]} : vector<4xf32>, memref<4x32x16xf32, #gpu.address_space> + %1 = vector.transfer_read %a[%c0, %c0], %cst_0 {in_bounds = array} : memref<1024x1024xf32>, vector<4xf32> + vector.transfer_write %1, %0[%c0, %c0, %c0] {in_bounds = array} : vector<4xf32>, memref<4x32x16xf32, #gpu.address_space> // CHECK-NOT: nvgpu.device_async_create_group // CHECK: %[[CP1:.*]] = nvgpu.device_async_copy {{.*}}, {{.*}}, 1 - %2 = vector.transfer_read %a[%c0, %c4], %cst_0 {in_bounds = [true]} : memref<1024x1024xf32>, vector<1xf32> - vector.transfer_write %2, %0[%c0, %c4, %c0] {in_bounds = [true]} : vector<1xf32>, memref<4x32x16xf32, #gpu.address_space> + %2 = vector.transfer_read %a[%c0, %c4], %cst_0 {in_bounds = array} : memref<1024x1024xf32>, vector<1xf32> + vector.transfer_write %2, %0[%c0, %c4, %c0] {in_bounds = array} : vector<1xf32>, memref<4x32x16xf32, #gpu.address_space> // CHECK: %[[G:.*]] = nvgpu.device_async_create_group %[[CP0]], %[[CP1]] // CHECK: nvgpu.device_async_wait %[[G]] return @@ -44,13 +44,13 @@ builtin.module { %cst_0 = arith.constant 0.000000e+00 : f32 // Make sure we don't emit the bypassL1. // CHECK: %[[CP0:.*]] = nvgpu.device_async_copy {{.*}}, {{.*}}, 4 : - %1 = vector.transfer_read %a[%c0, %c0], %cst_0 {in_bounds = [true]} : memref<1024x1024xf32>, vector<4xf32> - vector.transfer_write %1, %0[%c0, %c0, %c0] {in_bounds = [true]} : vector<4xf32>, memref<4x32x16xf32, #gpu.address_space> + %1 = vector.transfer_read %a[%c0, %c0], %cst_0 {in_bounds = array} : memref<1024x1024xf32>, vector<4xf32> + vector.transfer_write %1, %0[%c0, %c0, %c0] {in_bounds = array} : vector<4xf32>, memref<4x32x16xf32, #gpu.address_space> // CHECK-NOT: nvgpu.device_async_create_group // CHECK: %[[CP1:.*]] = nvgpu.device_async_copy {{.*}}, {{.*}}, 1 : - %2 = vector.transfer_read %a[%c0, %c4], %cst_0 {in_bounds = [true]} : memref<1024x1024xf32>, vector<1xf32> - vector.transfer_write %2, %0[%c0, %c4, %c0] {in_bounds = [true]} : vector<1xf32>, memref<4x32x16xf32, #gpu.address_space> + %2 = vector.transfer_read %a[%c0, %c4], %cst_0 {in_bounds = array} : memref<1024x1024xf32>, vector<1xf32> + vector.transfer_write %2, %0[%c0, %c4, %c0] {in_bounds = array} : vector<1xf32>, memref<4x32x16xf32, #gpu.address_space> // CHECK: %[[G:.*]] = nvgpu.device_async_create_group %[[CP0]], %[[CP1]] // CHECK: nvgpu.device_async_wait %[[G]] return @@ -147,8 +147,8 @@ builtin.module { %cst_0 = arith.constant 0.000000e+00 : f32 // CHECK: nvgpu.device_async_copy {{.*}}, {{.*}}, 4, %[[sz]] {bypassL1} : %mask = vector.create_mask %sz : vector<4xi1> - %1 = vector.transfer_read %a[%c0, %c0], %cst_0, %mask {in_bounds = [true]} : memref<1024x1024xf32>, vector<4xf32> - vector.transfer_write %1, %0[%c0, %c0, %c0] {in_bounds = [true]} : vector<4xf32>, memref<4x32x16xf32, #gpu.address_space> + %1 = vector.transfer_read %a[%c0, %c0], %cst_0, %mask {in_bounds = array} : memref<1024x1024xf32>, vector<4xf32> + vector.transfer_write %1, %0[%c0, %c0, %c0] {in_bounds = array} : vector<4xf32>, memref<4x32x16xf32, #gpu.address_space> return } @@ -188,8 +188,8 @@ builtin.module { // CHECK: %[[s2:.*]] = arith.select %[[cmpi2]], %[[sz1]], %[[c0]] // CHECK: nvgpu.device_async_copy %[[a]][%[[c2]], %[[c0]]], {{.*}}, 4, %[[s2]] {bypassL1} %mask = vector.create_mask %sz0, %sz1 : vector<3x4xi1> - %1 = vector.transfer_read %a[%c0, %c0], %cst_0, %mask {in_bounds = [true, true]} : memref<1024x1024xf32>, vector<3x4xf32> - vector.transfer_write %1, %0[%c0, %c0, %c0] {in_bounds = [true, true]} : vector<3x4xf32>, memref<4x32x16xf32, #gpu.address_space> + %1 = vector.transfer_read %a[%c0, %c0], %cst_0, %mask {in_bounds = array} : memref<1024x1024xf32>, vector<3x4xf32> + vector.transfer_write %1, %0[%c0, %c0, %c0] {in_bounds = array} : vector<3x4xf32>, memref<4x32x16xf32, #gpu.address_space> return } @@ -251,8 +251,8 @@ builtin.module { // CHECK: %[[s5:.*]] = arith.select %[[cond5]], %[[sz2]], %[[c0]] // CHECK: nvgpu.device_async_copy %[[a]][%[[c1]], %[[c2]], %[[c0]]], {{.*}}, 4, %[[s5]] {bypassL1} %mask = vector.create_mask %sz0, %sz1, %sz2 : vector<2x3x4xi1> - %1 = vector.transfer_read %a[%c0, %c0, %c0], %cst_0, %mask {in_bounds = [true, true, true]} : memref<1024x1024x1024xf32>, vector<2x3x4xf32> - vector.transfer_write %1, %0[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<2x3x4xf32>, memref<4x32x16xf32, #gpu.address_space> + %1 = vector.transfer_read %a[%c0, %c0, %c0], %cst_0, %mask {in_bounds = array} : memref<1024x1024x1024xf32>, vector<2x3x4xf32> + vector.transfer_write %1, %0[%c0, %c0, %c0] {in_bounds = array} : vector<2x3x4xf32>, memref<4x32x16xf32, #gpu.address_space> return } diff --git a/mlir/test/Dialect/SCF/transform-loop-fuse-sibling.mlir b/mlir/test/Dialect/SCF/transform-loop-fuse-sibling.mlir index 54dd2bdf953ca..80d7fde009b7f 100644 --- a/mlir/test/Dialect/SCF/transform-loop-fuse-sibling.mlir +++ b/mlir/test/Dialect/SCF/transform-loop-fuse-sibling.mlir @@ -17,20 +17,20 @@ func.func @fuse_1st_for_into_2nd(%A: tensor<128xf32>, %B: tensor<128xf32>) -> (t // CHECK-DAG: [[SLICE0:%.*]] = vector.transfer_read [[IA]][[[IV]]], [[ZERO]] // CHECK: [[OUT1:%.*]] = arith.addf [[SLICE0]], [[ASLICE]] // CHECK-NEXT: [[WRT0:%.*]] = vector.transfer_write [[OUT1]], [[IA]][[[IV]]] - %2 = vector.transfer_read %A[%arg3], %cst {in_bounds = [true]} : tensor<128xf32>, vector<16xf32> - %3 = vector.transfer_read %arg4[%arg3], %cst {in_bounds = [true]} : tensor<128xf32>, vector<16xf32> + %2 = vector.transfer_read %A[%arg3], %cst {in_bounds = array} : tensor<128xf32>, vector<16xf32> + %3 = vector.transfer_read %arg4[%arg3], %cst {in_bounds = array} : tensor<128xf32>, vector<16xf32> %5 = arith.addf %3, %2 : vector<16xf32> - %6 = vector.transfer_write %5, %arg4[%arg3] {in_bounds = [true]} : vector<16xf32>, tensor<128xf32> + %6 = vector.transfer_write %5, %arg4[%arg3] {in_bounds = array} : vector<16xf32>, tensor<128xf32> scf.yield %6 : tensor<128xf32> } %dup1 = scf.for %arg3 = %c0 to %c128 step %c16 iter_args(%arg4 = %B) -> (tensor<128xf32>) { // CHECK-DAG: [[SLICE1:%.*]] = vector.transfer_read [[IB]][[[IV]]], [[ZERO]] // CHECK: [[OUT2:%.*]] = arith.addf [[SLICE1]], [[ASLICE]] // CHECK-NEXT: [[WRT1:%.*]] = vector.transfer_write [[OUT2]], [[IB]][[[IV]]] - %dup2 = vector.transfer_read %A[%arg3], %cst {in_bounds = [true]} : tensor<128xf32>, vector<16xf32> - %dup3 = vector.transfer_read %arg4[%arg3], %cst {in_bounds = [true]} : tensor<128xf32>, vector<16xf32> + %dup2 = vector.transfer_read %A[%arg3], %cst {in_bounds = array} : tensor<128xf32>, vector<16xf32> + %dup3 = vector.transfer_read %arg4[%arg3], %cst {in_bounds = array} : tensor<128xf32>, vector<16xf32> %dup5 = arith.addf %dup3, %dup2 : vector<16xf32> - %dup6 = vector.transfer_write %dup5, %arg4[%arg3] {in_bounds = [true]} : vector<16xf32>, tensor<128xf32> + %dup6 = vector.transfer_write %dup5, %arg4[%arg3] {in_bounds = array} : vector<16xf32>, tensor<128xf32> // CHECK: scf.yield [[WRT0]], [[WRT1]] : {{.*}} scf.yield %dup6 : tensor<128xf32> } @@ -63,23 +63,23 @@ func.func @fuse_2nd_for_into_1st(%A: tensor<128xf32>, %B: tensor<128xf32>) -> (t // CHECK-DAG: [[SLICE0:%.*]] = vector.transfer_read [[IB]][[[IV]]], [[ZERO]] // CHECK: [[OUT1:%.*]] = arith.addf [[SLICE0]], [[ASLICE]] // CHECK-NEXT: [[WRT0:%.*]] = vector.transfer_write [[OUT1]], [[IB]][[[IV]]] - %2 = vector.transfer_read %A[%arg3], %cst {in_bounds = [true]} : tensor<128xf32>, vector<16xf32> - %3 = vector.transfer_read %arg4[%arg3], %cst {in_bounds = [true]} : tensor<128xf32>, vector<16xf32> + %2 = vector.transfer_read %A[%arg3], %cst {in_bounds = array} : tensor<128xf32>, vector<16xf32> + %3 = vector.transfer_read %arg4[%arg3], %cst {in_bounds = array} : tensor<128xf32>, vector<16xf32> %5 = arith.addf %3, %2 : vector<16xf32> - %6 = vector.transfer_write %5, %arg4[%arg3] {in_bounds = [true]} : vector<16xf32>, tensor<128xf32> + %6 = vector.transfer_write %5, %arg4[%arg3] {in_bounds = array} : vector<16xf32>, tensor<128xf32> scf.yield %6 : tensor<128xf32> } %dup1 = scf.for %arg3 = %c0 to %c128 step %c16 iter_args(%arg4 = %B) -> (tensor<128xf32>) { // CHECK-DAG: [[SLICE1:%.*]] = vector.transfer_read [[IA]][[[IV]]], [[ZERO]] // CHECK: [[OUT2:%.*]] = arith.addf [[SLICE1]], [[ASLICE]] // CHECK-NEXT: [[WRT1:%.*]] = vector.transfer_write [[OUT2]], [[IA]][[[IV]]] - %dup2 = vector.transfer_read %A[%arg3], %cst {in_bounds = [true]} : tensor<128xf32>, vector<16xf32> + %dup2 = vector.transfer_read %A[%arg3], %cst {in_bounds = array} : tensor<128xf32>, vector<16xf32> // NB: the dominance check used to fail on the following line, // however the defining op for the value of %arg3 occurs above the source loop and hence is safe // and %arg4 is a block argument of the scope of the loops and hence is safe - %dup3 = vector.transfer_read %arg4[%arg3], %cst {in_bounds = [true]} : tensor<128xf32>, vector<16xf32> + %dup3 = vector.transfer_read %arg4[%arg3], %cst {in_bounds = array} : tensor<128xf32>, vector<16xf32> %dup5 = arith.addf %dup3, %dup2 : vector<16xf32> - %dup6 = vector.transfer_write %dup5, %arg4[%arg3] {in_bounds = [true]} : vector<16xf32>, tensor<128xf32> + %dup6 = vector.transfer_write %dup5, %arg4[%arg3] {in_bounds = array} : vector<16xf32>, tensor<128xf32> // CHECK: scf.yield [[WRT0]], [[WRT1]] : {{.*}} scf.yield %dup6 : tensor<128xf32> } @@ -189,12 +189,12 @@ func.func @fuse_no_iter_args(%A: tensor<128xf32>, %B: tensor<128xf32>) { // CHECK-NOCLEANUP: scf.for [[IV:%.*]] = [[C0]] to [[C128]] step [[C16]] {{.*}} scf.for %arg0 = %c0 to %c128 step %c16 { // CHECK-NOCLEANUP: [[ASLICE:%.*]] = vector.transfer_read [[A]][[[IV]]], [[ZERO]] - %2 = vector.transfer_read %A[%arg0], %cst {in_bounds = [true]} : tensor<128xf32>, vector<16xf32> + %2 = vector.transfer_read %A[%arg0], %cst {in_bounds = array} : tensor<128xf32>, vector<16xf32> scf.yield } scf.for %arg0 = %c0 to %c128 step %c16 { // CHECK-NOCLEANUP: [[BSLICE:%.*]] = vector.transfer_read [[B]][[[IV]]], [[ZERO]] - %dup2 = vector.transfer_read %B[%arg0], %cst {in_bounds = [true]} : tensor<128xf32>, vector<16xf32> + %dup2 = vector.transfer_read %B[%arg0], %cst {in_bounds = array} : tensor<128xf32>, vector<16xf32> scf.yield } return @@ -217,17 +217,17 @@ func.func @source_for_uses_result_of_target_for_err(%A: tensor<128xf32>, %B: ten %cst = arith.constant 0.000000e+00 : f32 // expected-error @below {{user of results of target should be properly dominated by source}} %1 = scf.for %arg3 = %c0 to %c128 step %c16 iter_args(%arg4 = %A) -> (tensor<128xf32>) { - %2 = vector.transfer_read %A[%arg3], %cst {in_bounds = [true]} : tensor<128xf32>, vector<16xf32> - %3 = vector.transfer_read %arg4[%arg3], %cst {in_bounds = [true]} : tensor<128xf32>, vector<16xf32> + %2 = vector.transfer_read %A[%arg3], %cst {in_bounds = array} : tensor<128xf32>, vector<16xf32> + %3 = vector.transfer_read %arg4[%arg3], %cst {in_bounds = array} : tensor<128xf32>, vector<16xf32> %5 = arith.addf %3, %2 : vector<16xf32> - %6 = vector.transfer_write %5, %arg4[%arg3] {in_bounds = [true]} : vector<16xf32>, tensor<128xf32> + %6 = vector.transfer_write %5, %arg4[%arg3] {in_bounds = array} : vector<16xf32>, tensor<128xf32> scf.yield %6 : tensor<128xf32> } %dup1 = scf.for %arg3 = %c0 to %c128 step %c16 iter_args(%arg4 = %1) -> (tensor<128xf32>) { - %dup2 = vector.transfer_read %A[%arg3], %cst {in_bounds = [true]} : tensor<128xf32>, vector<16xf32> - %dup3 = vector.transfer_read %arg4[%arg3], %cst {in_bounds = [true]} : tensor<128xf32>, vector<16xf32> + %dup2 = vector.transfer_read %A[%arg3], %cst {in_bounds = array} : tensor<128xf32>, vector<16xf32> + %dup3 = vector.transfer_read %arg4[%arg3], %cst {in_bounds = array} : tensor<128xf32>, vector<16xf32> %dup5 = arith.addf %dup3, %dup2 : vector<16xf32> - %dup6 = vector.transfer_write %dup5, %arg4[%arg3] {in_bounds = [true]} : vector<16xf32>, tensor<128xf32> + %dup6 = vector.transfer_write %dup5, %arg4[%arg3] {in_bounds = array} : vector<16xf32>, tensor<128xf32> scf.yield %dup6 : tensor<128xf32> } return %1, %dup1 : tensor<128xf32>, tensor<128xf32> @@ -276,18 +276,18 @@ func.func @target_for_region_uses_result_of_source_for_err(%A: tensor<128xf32>, %c128 = arith.constant 128 : index %cst = arith.constant 0.000000e+00 : f32 %1 = scf.for %arg3 = %c0 to %c128 step %c16 iter_args(%arg4 = %A) -> (tensor<128xf32>) { - %2 = vector.transfer_read %A[%arg3], %cst {in_bounds = [true]} : tensor<128xf32>, vector<16xf32> - %3 = vector.transfer_read %arg4[%arg3], %cst {in_bounds = [true]} : tensor<128xf32>, vector<16xf32> + %2 = vector.transfer_read %A[%arg3], %cst {in_bounds = array} : tensor<128xf32>, vector<16xf32> + %3 = vector.transfer_read %arg4[%arg3], %cst {in_bounds = array} : tensor<128xf32>, vector<16xf32> %5 = arith.addf %3, %2 : vector<16xf32> - %6 = vector.transfer_write %5, %arg4[%arg3] {in_bounds = [true]} : vector<16xf32>, tensor<128xf32> + %6 = vector.transfer_write %5, %arg4[%arg3] {in_bounds = array} : vector<16xf32>, tensor<128xf32> scf.yield %6 : tensor<128xf32> } %dup1 = scf.for %arg3 = %c0 to %c128 step %c16 iter_args(%arg4 = %B) -> (tensor<128xf32>) { // expected-error @below {{values used inside regions of target should be properly dominated by source}} - %dup2 = vector.transfer_read %1[%arg3], %cst {in_bounds = [true]} : tensor<128xf32>, vector<16xf32> - %dup3 = vector.transfer_read %arg4[%arg3], %cst {in_bounds = [true]} : tensor<128xf32>, vector<16xf32> + %dup2 = vector.transfer_read %1[%arg3], %cst {in_bounds = array} : tensor<128xf32>, vector<16xf32> + %dup3 = vector.transfer_read %arg4[%arg3], %cst {in_bounds = array} : tensor<128xf32>, vector<16xf32> %dup5 = arith.addf %dup3, %dup2 : vector<16xf32> - %dup6 = vector.transfer_write %dup5, %arg4[%arg3] {in_bounds = [true]} : vector<16xf32>, tensor<128xf32> + %dup6 = vector.transfer_write %dup5, %arg4[%arg3] {in_bounds = array} : vector<16xf32>, tensor<128xf32> scf.yield %dup6 : tensor<128xf32> } return %1, %dup1 : tensor<128xf32>, tensor<128xf32> @@ -347,20 +347,20 @@ func.func @foreach_loop_pair_fuse(%arg1: tensor<128xf32>, %arg2: tensor<128xf32> // CHECK-DAG: [[SLICE0:%.*]] = vector.transfer_read [[IB0]][[[IV]]], [[ZERO]] // CHECK: [[OUT1:%.*]] = arith.addf [[SLICE0]], [[ASLICE]] // CHECK-NEXT: [[WRT0:%.*]] = vector.transfer_write [[OUT1]], [[IB0]][[[IV]]] - %2 = vector.transfer_read %arg1[%arg3], %cst {in_bounds = [true]} : tensor<128xf32>, vector<16xf32> - %3 = vector.transfer_read %arg4[%arg3], %cst {in_bounds = [true]} : tensor<128xf32>, vector<16xf32> + %2 = vector.transfer_read %arg1[%arg3], %cst {in_bounds = array} : tensor<128xf32>, vector<16xf32> + %3 = vector.transfer_read %arg4[%arg3], %cst {in_bounds = array} : tensor<128xf32>, vector<16xf32> %5 = arith.addf %3, %2 : vector<16xf32> - %6 = vector.transfer_write %5, %arg4[%arg3] {in_bounds = [true]} : vector<16xf32>, tensor<128xf32> + %6 = vector.transfer_write %5, %arg4[%arg3] {in_bounds = array} : vector<16xf32>, tensor<128xf32> scf.yield %6 : tensor<128xf32> } {target_loops} %dup1 = scf.for %arg3 = %c0 to %c128 step %c16 iter_args(%arg4 = %arg2) -> (tensor<128xf32>) { // CHECK-DAG: [[SLICE1:%.*]] = vector.transfer_read [[IB1]][[[IV]]], [[ZERO]] // CHECK: [[OUT2:%.*]] = arith.addf [[SLICE1]], [[ASLICE]] // CHECK-NEXT: [[WRT1:%.*]] = vector.transfer_write [[OUT2]], [[IB1]][[[IV]]] - %dup2 = vector.transfer_read %arg1[%arg3], %cst {in_bounds = [true]} : tensor<128xf32>, vector<16xf32> - %dup3 = vector.transfer_read %arg4[%arg3], %cst {in_bounds = [true]} : tensor<128xf32>, vector<16xf32> + %dup2 = vector.transfer_read %arg1[%arg3], %cst {in_bounds = array} : tensor<128xf32>, vector<16xf32> + %dup3 = vector.transfer_read %arg4[%arg3], %cst {in_bounds = array} : tensor<128xf32>, vector<16xf32> %dup5 = arith.addf %dup3, %dup2 : vector<16xf32> - %dup6 = vector.transfer_write %dup5, %arg4[%arg3] {in_bounds = [true]} : vector<16xf32>, tensor<128xf32> + %dup6 = vector.transfer_write %dup5, %arg4[%arg3] {in_bounds = array} : vector<16xf32>, tensor<128xf32> // CHECK: scf.yield [[WRT0]], [[WRT1]] : {{.*}} scf.yield %dup6 : tensor<128xf32> } {source_loops} @@ -369,20 +369,20 @@ func.func @foreach_loop_pair_fuse(%arg1: tensor<128xf32>, %arg2: tensor<128xf32> // CHECK-DAG: [[SLICE0:%.*]] = vector.transfer_read [[IB0]][[[IV]]], [[ZERO]] // CHECK: [[OUT1:%.*]] = arith.addf [[SLICE0]], [[ASLICE]] // CHECK-NEXT: [[WRT0:%.*]] = vector.transfer_write [[OUT1]], [[IB0]][[[IV]]] - %2 = vector.transfer_read %arg1[%arg3], %cst {in_bounds = [true]} : tensor<128xf32>, vector<32xf32> - %3 = vector.transfer_read %arg4[%arg3], %cst {in_bounds = [true]} : tensor<128xf32>, vector<32xf32> + %2 = vector.transfer_read %arg1[%arg3], %cst {in_bounds = array} : tensor<128xf32>, vector<32xf32> + %3 = vector.transfer_read %arg4[%arg3], %cst {in_bounds = array} : tensor<128xf32>, vector<32xf32> %5 = arith.addf %3, %2 : vector<32xf32> - %6 = vector.transfer_write %5, %arg4[%arg3] {in_bounds = [true]} : vector<32xf32>, tensor<128xf32> + %6 = vector.transfer_write %5, %arg4[%arg3] {in_bounds = array} : vector<32xf32>, tensor<128xf32> scf.yield %6 : tensor<128xf32> } {target_loops} %dup2 = scf.for %arg3 = %c0 to %c128 step %c32 iter_args(%arg4 = %arg2) -> (tensor<128xf32>) { // CHECK-DAG: [[SLICE1:%.*]] = vector.transfer_read [[IB1]][[[IV]]], [[ZERO]] // CHECK: [[OUT2:%.*]] = arith.addf [[SLICE1]], [[ASLICE]] // CHECK-NEXT: [[WRT1:%.*]] = vector.transfer_write [[OUT2]], [[IB1]][[[IV]]] - %dup2 = vector.transfer_read %arg1[%arg3], %cst {in_bounds = [true]} : tensor<128xf32>, vector<32xf32> - %dup3 = vector.transfer_read %arg4[%arg3], %cst {in_bounds = [true]} : tensor<128xf32>, vector<32xf32> + %dup2 = vector.transfer_read %arg1[%arg3], %cst {in_bounds = array} : tensor<128xf32>, vector<32xf32> + %dup3 = vector.transfer_read %arg4[%arg3], %cst {in_bounds = array} : tensor<128xf32>, vector<32xf32> %dup5 = arith.addf %dup3, %dup2 : vector<32xf32> - %dup6 = vector.transfer_write %dup5, %arg4[%arg3] {in_bounds = [true]} : vector<32xf32>, tensor<128xf32> + %dup6 = vector.transfer_write %dup5, %arg4[%arg3] {in_bounds = array} : vector<32xf32>, tensor<128xf32> // CHECK: scf.yield [[WRT0]], [[WRT1]] : {{.*}} scf.yield %dup6 : tensor<128xf32> } {source_loops} diff --git a/mlir/test/Dialect/SCF/transform-ops.mlir b/mlir/test/Dialect/SCF/transform-ops.mlir index d9445182769e7..1bac4a7c98300 100644 --- a/mlir/test/Dialect/SCF/transform-ops.mlir +++ b/mlir/test/Dialect/SCF/transform-ops.mlir @@ -541,7 +541,7 @@ func.func @loop_pipeline(%arg0: memref<4x16xf32>, %arg1: vector<16xf32>) -> vect // CHECK: arith.addf // CHECK: arith.addf %0 = scf.for %arg2 = %c0 to %c3 step %c1 iter_args(%arg3 = %arg1) -> (vector<16xf32>) { - %1 = vector.transfer_read %arg0[%arg2, %c0], %cst {in_bounds = [true]} : memref<4x16xf32>, vector<16xf32> + %1 = vector.transfer_read %arg0[%arg2, %c0], %cst {in_bounds = array} : memref<4x16xf32>, vector<16xf32> %2 = arith.addf %1, %arg3 : vector<16xf32> scf.yield %2 : vector<16xf32> } @@ -568,7 +568,7 @@ func.func @loop_pipeline_lb_gt_0(%arg0: memref<4x16xf32>, %arg1: vector<16xf32>) // CHECK: arith.addf // CHECK: arith.addf %0 = scf.for %arg2 = %c1 to %c3 step %c1 iter_args(%arg3 = %arg1) -> (vector<16xf32>) { - %1 = vector.transfer_read %arg0[%arg2, %c1], %cst {in_bounds = [true]} : memref<4x16xf32>, vector<16xf32> + %1 = vector.transfer_read %arg0[%arg2, %c1], %cst {in_bounds = array} : memref<4x16xf32>, vector<16xf32> %2 = arith.addf %1, %arg3 : vector<16xf32> scf.yield %2 : vector<16xf32> } diff --git a/mlir/test/Dialect/Tensor/fold-tensor-subset-ops-into-vector-transfers.mlir b/mlir/test/Dialect/Tensor/fold-tensor-subset-ops-into-vector-transfers.mlir index c2f21683d0cd6..dc41a7e1da305 100644 --- a/mlir/test/Dialect/Tensor/fold-tensor-subset-ops-into-vector-transfers.mlir +++ b/mlir/test/Dialect/Tensor/fold-tensor-subset-ops-into-vector-transfers.mlir @@ -18,14 +18,14 @@ module attributes {transform.with_named_sequence} { // CHECK-SAME: %[[t:.*]]: tensor, %[[s1:.*]]: index, %[[s2:.*]]: index // CHECK-DAG: %[[c8:.*]] = arith.constant 8 : index // CHECK: %[[add:.*]] = affine.apply #[[$map]]()[%[[s1]]] -// CHECK: %[[r:.*]] = vector.transfer_read %[[t]][%[[c8]], %[[add]]], %{{.*}} {in_bounds = [true, true]} : tensor, vector<5x6xf32> +// CHECK: %[[r:.*]] = vector.transfer_read %[[t]][%[[c8]], %[[add]]], %{{.*}} {in_bounds = array} : tensor, vector<5x6xf32> // CHECK: return %[[r]] func.func @transfer_read_of_extract_slice(%t : tensor, %s1 : index, %s2 : index) -> vector<5x6xf32> { %c3 = arith.constant 3 : index %c4 = arith.constant 4 : index %cst = arith.constant 0.0 : f32 %0 = tensor.extract_slice %t[5, %s1] [10, %s2] [1, 1] : tensor to tensor<10x?xf32> - %1 = vector.transfer_read %0[%c3, %c4], %cst {in_bounds = [true, true]} : tensor<10x?xf32>, vector<5x6xf32> + %1 = vector.transfer_read %0[%c3, %c4], %cst {in_bounds = array} : tensor<10x?xf32>, vector<5x6xf32> return %1 : vector<5x6xf32> } @@ -33,14 +33,14 @@ func.func @transfer_read_of_extract_slice(%t : tensor, %s1 : index, %s2 // CHECK-SAME: %[[t:.*]]: tensor, %[[s1:.*]]: index, %[[s2:.*]]: index // CHECK-DAG: %[[c8:.*]] = arith.constant 8 : index // CHECK: %[[add:.*]] = affine.apply #[[$map]]()[%[[s1]]] -// CHECK: %[[r:.*]] = vector.transfer_read %[[t]][%[[c8]], %[[add]]], %{{.*}} {in_bounds = [true]} : tensor, vector<6xf32> +// CHECK: %[[r:.*]] = vector.transfer_read %[[t]][%[[c8]], %[[add]]], %{{.*}} {in_bounds = array} : tensor, vector<6xf32> // CHECK: return %[[r]] func.func @transfer_read_of_extract_slice_1d(%t : tensor, %s1 : index, %s2 : index) -> vector<6xf32> { %c3 = arith.constant 3 : index %c4 = arith.constant 4 : index %cst = arith.constant 0.0 : f32 %0 = tensor.extract_slice %t[5, %s1] [10, %s2] [1, 1] : tensor to tensor<10x?xf32> - %1 = vector.transfer_read %0[%c3, %c4], %cst {in_bounds = [true]} : tensor<10x?xf32>, vector<6xf32> + %1 = vector.transfer_read %0[%c3, %c4], %cst {in_bounds = array} : tensor<10x?xf32>, vector<6xf32> return %1 : vector<6xf32> } @@ -49,14 +49,14 @@ func.func @transfer_read_of_extract_slice_1d(%t : tensor, %s1 : index, // CHECK-DAG: %[[c5:.*]] = arith.constant 5 : index // CHECK-DAG: %[[c10:.*]] = arith.constant 10 : index // CHECK: %[[add:.*]] = affine.apply #[[$map1]]()[%[[s1]]] -// CHECK: %[[r:.*]] = vector.transfer_read %[[t]][%[[c5]], %[[add]], %[[c10]]], %{{.*}} {in_bounds = [true, true]} : tensor, vector<5x6xf32> +// CHECK: %[[r:.*]] = vector.transfer_read %[[t]][%[[c5]], %[[add]], %[[c10]]], %{{.*}} {in_bounds = array} : tensor, vector<5x6xf32> // CHECK: return %[[r]] func.func @transfer_read_of_extract_slice_rank_reducing(%t : tensor, %s1 : index, %s2 : index) -> vector<5x6xf32> { %c3 = arith.constant 3 : index %c4 = arith.constant 4 : index %cst = arith.constant 0.0 : f32 %0 = tensor.extract_slice %t[5, %s1, 6] [1, %s2, 12] [1, 1, 1] : tensor to tensor - %1 = vector.transfer_read %0[%c3, %c4], %cst {in_bounds = [true, true]} : tensor, vector<5x6xf32> + %1 = vector.transfer_read %0[%c3, %c4], %cst {in_bounds = array} : tensor, vector<5x6xf32> return %1 : vector<5x6xf32> } @@ -64,14 +64,14 @@ func.func @transfer_read_of_extract_slice_rank_reducing(%t : tensor, // CHECK-SAME: %[[t:.*]]: tensor, %[[s1:.*]]: index, %[[s2:.*]]: index // CHECK-DAG: %[[c8:.*]] = arith.constant 8 : index // CHECK-DAG: %[[c10:.*]] = arith.constant 10 : index -// CHECK: %[[r:.*]] = vector.transfer_read %[[t]][%[[c8]], %[[s1]], %[[c10]]], %{{.*}} {in_bounds = [true, true], permutation_map = #[[$map2]]} : tensor, vector<5x6xf32> +// CHECK: %[[r:.*]] = vector.transfer_read %[[t]][%[[c8]], %[[s1]], %[[c10]]], %{{.*}} {in_bounds = array, permutation_map = #[[$map2]]} : tensor, vector<5x6xf32> // CHECK: return %[[r]] func.func @transfer_read_of_extract_slice_non_leading_rank_reduction(%t : tensor, %s1 : index, %s2 : index) -> vector<5x6xf32> { %c3 = arith.constant 3 : index %c4 = arith.constant 4 : index %cst = arith.constant 0.0 : f32 %0 = tensor.extract_slice %t[5, %s1, 6] [%s2, 1, 12] [1, 1, 1] : tensor to tensor - %1 = vector.transfer_read %0[%c3, %c4], %cst {in_bounds = [true, true]} : tensor, vector<5x6xf32> + %1 = vector.transfer_read %0[%c3, %c4], %cst {in_bounds = array} : tensor, vector<5x6xf32> return %1 : vector<5x6xf32> } @@ -86,18 +86,18 @@ func.func @masked_transfer_read_of_extract_slice(%t : tensor, %s1 : ind %cst = arith.constant 0.0 : f32 %0 = tensor.extract_slice %t[5, %s1] [10, %s2] [1, 1] : tensor to tensor<10x?xf32> %mask = vector.create_mask %c3, %c4 : vector<5x6xi1> - %1 = vector.mask %mask {vector.transfer_read %0[%c3, %c4], %cst {in_bounds = [true, true]} : tensor<10x?xf32>, vector<5x6xf32>} : vector<5x6xi1> -> vector<5x6xf32> + %1 = vector.mask %mask {vector.transfer_read %0[%c3, %c4], %cst {in_bounds = array} : tensor<10x?xf32>, vector<5x6xf32>} : vector<5x6xi1> -> vector<5x6xf32> return %1 : vector<5x6xf32> } // CHECK-LABEL: func @insert_slice_of_transfer_write( // CHECK-SAME: %[[t1:.*]]: tensor, %[[v:.*]]: vector<5x6xf32>, %[[s:.*]]: index // CHECK: %[[c3:.*]] = arith.constant 3 : index -// CHECK: %[[r:.*]] = vector.transfer_write %[[v]], %[[t1]][%[[c3]], %[[s]]] {in_bounds = [true, true]} : vector<5x6xf32>, tensor +// CHECK: %[[r:.*]] = vector.transfer_write %[[v]], %[[t1]][%[[c3]], %[[s]]] {in_bounds = array} : vector<5x6xf32>, tensor // CHECK: return %[[r]] func.func @insert_slice_of_transfer_write(%t1 : tensor, %v : vector<5x6xf32>, %s : index, %t2 : tensor<5x6xf32>) -> tensor { %c0 = arith.constant 0 : index - %0 = vector.transfer_write %v, %t2[%c0, %c0] {in_bounds = [true, true]} : vector<5x6xf32>, tensor<5x6xf32> + %0 = vector.transfer_write %v, %t2[%c0, %c0] {in_bounds = array} : vector<5x6xf32>, tensor<5x6xf32> %1 = tensor.insert_slice %0 into %t1[3, %s] [5, 6] [1, 1] : tensor<5x6xf32> into tensor return %1 : tensor } @@ -105,11 +105,11 @@ func.func @insert_slice_of_transfer_write(%t1 : tensor, %v : vector<5x // CHECK-LABEL: func @unit_insert_slice_of_unit_transfer_write( // CHECK-SAME: %[[t1:.*]]: tensor<1x1x12xf32>, %[[v:.*]]: vector<1x6xf32>, %[[s:.*]]: index // CHECK: %[[c0:.*]] = arith.constant 0 : index -// CHECK: %[[r:.*]] = vector.transfer_write %[[v]], %[[t1]][%[[c0]], %[[c0]], %[[s]]] {in_bounds = [true, true]} : vector<1x6xf32>, tensor<1x1x12xf32> +// CHECK: %[[r:.*]] = vector.transfer_write %[[v]], %[[t1]][%[[c0]], %[[c0]], %[[s]]] {in_bounds = array} : vector<1x6xf32>, tensor<1x1x12xf32> // CHECK: return %[[r]] func.func @unit_insert_slice_of_unit_transfer_write(%t1 : tensor<1x1x12xf32>, %v : vector<1x6xf32>, %s : index, %t2 : tensor<1x6xf32>) -> tensor<1x1x12xf32> { %c0 = arith.constant 0 : index - %0 = vector.transfer_write %v, %t2[%c0, %c0] {in_bounds = [true, true]} : vector<1x6xf32>, tensor<1x6xf32> + %0 = vector.transfer_write %v, %t2[%c0, %c0] {in_bounds = array} : vector<1x6xf32>, tensor<1x6xf32> %1 = tensor.insert_slice %0 into %t1[0, 0, %s] [1, 1, 6] [1, 1, 1] : tensor<1x6xf32> into tensor<1x1x12xf32> return %1 : tensor<1x1x12xf32> } @@ -118,10 +118,10 @@ func.func @unit_insert_slice_of_unit_transfer_write(%t1 : tensor<1x1x12xf32>, %v // CHECK-SAME: %[[t1:.*]]: tensor, %[[v:.*]]: vector<5x6xf32>, %[[s:.*]]: index // CHECK-DAG: %[[c3:.*]] = arith.constant 3 : index // CHECK-DAG: %[[c4:.*]] = arith.constant 4 : index -// CHECK: %[[r:.*]] = vector.transfer_write %[[v]], %[[t1]][%[[c4]], %[[c3]], %[[s]]] {in_bounds = [true, true], permutation_map = #[[$map2]]} : vector<5x6xf32>, tensor +// CHECK: %[[r:.*]] = vector.transfer_write %[[v]], %[[t1]][%[[c4]], %[[c3]], %[[s]]] {in_bounds = array, permutation_map = #[[$map2]]} : vector<5x6xf32>, tensor func.func @insert_slice_of_transfer_write_non_leading_rank_reduction(%t1 : tensor, %v : vector<5x6xf32>, %s : index, %t2 : tensor<5x6xf32>) -> tensor { %c0 = arith.constant 0 : index - %0 = vector.transfer_write %v, %t2[%c0, %c0] {in_bounds = [true, true]} : vector<5x6xf32>, tensor<5x6xf32> + %0 = vector.transfer_write %v, %t2[%c0, %c0] {in_bounds = array} : vector<5x6xf32>, tensor<5x6xf32> %1 = tensor.insert_slice %0 into %t1[4, 3, %s] [5, 1, 6] [1, 1, 1] : tensor<5x6xf32> into tensor return %1 : tensor } @@ -130,11 +130,11 @@ func.func @insert_slice_of_transfer_write_non_leading_rank_reduction(%t1 : tenso // CHECK-SAME: %[[t1:.*]]: tensor, %[[v:.*]]: vector<5x6xf32>, %[[s:.*]]: index // CHECK-DAG: %[[c3:.*]] = arith.constant 3 : index // CHECK-DAG: %[[c4:.*]] = arith.constant 4 : index -// CHECK: %[[r:.*]] = vector.transfer_write %[[v]], %[[t1]][%[[c4]], %[[c3]], %[[s]]] {in_bounds = [true, true]} : vector<5x6xf32>, tensor +// CHECK: %[[r:.*]] = vector.transfer_write %[[v]], %[[t1]][%[[c4]], %[[c3]], %[[s]]] {in_bounds = array} : vector<5x6xf32>, tensor // CHECK: return %[[r]] func.func @insert_slice_of_transfer_write_rank_extending(%t1 : tensor, %v : vector<5x6xf32>, %s : index, %t2 : tensor<5x6xf32>) -> tensor { %c0 = arith.constant 0 : index - %0 = vector.transfer_write %v, %t2[%c0, %c0] {in_bounds = [true, true]} : vector<5x6xf32>, tensor<5x6xf32> + %0 = vector.transfer_write %v, %t2[%c0, %c0] {in_bounds = array} : vector<5x6xf32>, tensor<5x6xf32> %1 = tensor.insert_slice %0 into %t1[4, 3, %s] [1, 5, 6] [1, 1, 1] : tensor<5x6xf32> into tensor return %1 : tensor } diff --git a/mlir/test/Dialect/Tensor/fold-tensor-subset-ops.mlir b/mlir/test/Dialect/Tensor/fold-tensor-subset-ops.mlir index 1a84e14104932..3bc5ef6651181 100644 --- a/mlir/test/Dialect/Tensor/fold-tensor-subset-ops.mlir +++ b/mlir/test/Dialect/Tensor/fold-tensor-subset-ops.mlir @@ -8,7 +8,7 @@ func.func @fold_vector_transfer_read_with_rank_reduced_extract_slice( %0 = tensor.extract_slice %arg0[0, %arg1, %arg2] [1, %arg3, %arg4] [1, 1, 1] : tensor to tensor - %1 = vector.transfer_read %0[%arg5, %arg6], %cst {in_bounds = [true]} + %1 = vector.transfer_read %0[%arg5, %arg6], %cst {in_bounds = array} : tensor, vector<4xf32> return %1 : vector<4xf32> } @@ -40,7 +40,7 @@ func.func @transfer_read_from_rank_reducing_extract_slice_failure( // Can't fold this atm since we don' emit the proper vector.extract_strided_slice. // CHECK: tensor.extract_slice %0 = tensor.extract_slice %src[0, %i1, %i2, %i3] [1, 4, 1, 4] [2, 3, 4, 5] : tensor<1x8x8x8xf32> to tensor<1x4x4xf32> - %1 = vector.transfer_read %0[%c1, %i4, %c2], %f0 {in_bounds = [true]} : tensor<1x4x4xf32>, vector<4xf32> + %1 = vector.transfer_read %0[%c1, %i4, %c2], %f0 {in_bounds = array} : tensor<1x4x4xf32>, vector<4xf32> return %1 : vector<4xf32> } @@ -52,14 +52,14 @@ func.func @transfer_read_from_rank_reducing_extract_slice_failure( // CHECK-SAME: %[[t:.*]]: tensor, %[[s1:.*]]: index, %[[s2:.*]]: index // CHECK-DAG: %[[c8:.*]] = arith.constant 8 : index // CHECK: %[[add:.*]] = affine.apply #[[$ADD_4]]()[%[[s1]]] -// CHECK: %[[r:.*]] = vector.transfer_read %[[t]][%[[c8]], %[[add]]], %{{.*}} {in_bounds = [true, true]} : tensor, vector<5x6xf32> +// CHECK: %[[r:.*]] = vector.transfer_read %[[t]][%[[c8]], %[[add]]], %{{.*}} {in_bounds = array} : tensor, vector<5x6xf32> // CHECK: return %[[r]] func.func @transfer_read_of_extract_slice(%t : tensor, %s1 : index, %s2 : index) -> vector<5x6xf32> { %c3 = arith.constant 3 : index %c4 = arith.constant 4 : index %cst = arith.constant 0.0 : f32 %0 = tensor.extract_slice %t[5, %s1] [10, %s2] [1, 1] : tensor to tensor<10x?xf32> - %1 = vector.transfer_read %0[%c3, %c4], %cst {in_bounds = [true, true]} : tensor<10x?xf32>, vector<5x6xf32> + %1 = vector.transfer_read %0[%c3, %c4], %cst {in_bounds = array} : tensor<10x?xf32>, vector<5x6xf32> return %1 : vector<5x6xf32> } // ----- @@ -87,14 +87,14 @@ func.func @fold_extract_slice_with_transfer_read_0d( // CHECK-SAME: %[[t:.*]]: tensor, %[[s1:.*]]: index, %[[s2:.*]]: index // CHECK-DAG: %[[c8:.*]] = arith.constant 8 : index // CHECK: %[[add:.*]] = affine.apply #[[$ADD_4]]()[%[[s1]]] -// CHECK: %[[r:.*]] = vector.transfer_read %[[t]][%[[c8]], %[[add]]], %{{.*}} {in_bounds = [true]} : tensor, vector<6xf32> +// CHECK: %[[r:.*]] = vector.transfer_read %[[t]][%[[c8]], %[[add]]], %{{.*}} {in_bounds = array} : tensor, vector<6xf32> // CHECK: return %[[r]] func.func @transfer_read_of_extract_slice(%t : tensor, %s1 : index, %s2 : index) -> vector<6xf32> { %c3 = arith.constant 3 : index %c4 = arith.constant 4 : index %cst = arith.constant 0.0 : f32 %0 = tensor.extract_slice %t[5, %s1] [10, %s2] [1, 1] : tensor to tensor<10x?xf32> - %1 = vector.transfer_read %0[%c3, %c4], %cst {in_bounds = [true]} : tensor<10x?xf32>, vector<6xf32> + %1 = vector.transfer_read %0[%c3, %c4], %cst {in_bounds = array} : tensor<10x?xf32>, vector<6xf32> return %1 : vector<6xf32> } @@ -107,14 +107,14 @@ func.func @transfer_read_of_extract_slice(%t : tensor, %s1 : index, %s2 // CHECK-DAG: %[[c5:.*]] = arith.constant 5 : index // CHECK-DAG: %[[c10:.*]] = arith.constant 10 : index // CHECK: %[[add:.*]] = affine.apply #[[$ADD_3]]()[%[[s1]]] -// CHECK: %[[r:.*]] = vector.transfer_read %[[t]][%[[c5]], %[[add]], %[[c10]]], %{{.*}} {in_bounds = [true, true]} : tensor, vector<5x6xf32> +// CHECK: %[[r:.*]] = vector.transfer_read %[[t]][%[[c5]], %[[add]], %[[c10]]], %{{.*}} {in_bounds = array} : tensor, vector<5x6xf32> // CHECK: return %[[r]] func.func @transfer_read_of_extract_slice_rank_reducing(%t : tensor, %s1 : index, %s2 : index) -> vector<5x6xf32> { %c3 = arith.constant 3 : index %c4 = arith.constant 4 : index %cst = arith.constant 0.0 : f32 %0 = tensor.extract_slice %t[5, %s1, 6] [1, %s2, 12] [1, 1, 1] : tensor to tensor - %1 = vector.transfer_read %0[%c3, %c4], %cst {in_bounds = [true, true]} : tensor, vector<5x6xf32> + %1 = vector.transfer_read %0[%c3, %c4], %cst {in_bounds = array} : tensor, vector<5x6xf32> return %1 : vector<5x6xf32> } @@ -137,7 +137,7 @@ func.func @transfer_read_of_extract_slice_swappy_rank_reducing(%t : tensor, vector<5x6xf32> %0 = tensor.extract_slice %t[5, %s1, %s2] [%s2, 1, 12] [1, 1, 1] : tensor to tensor - %1 = vector.transfer_read %0[%c3, %c4], %cst {in_bounds = [true, true]} : tensor, vector<5x6xf32> + %1 = vector.transfer_read %0[%c3, %c4], %cst {in_bounds = array} : tensor, vector<5x6xf32> return %1 : vector<5x6xf32> } @@ -166,8 +166,8 @@ func.func @fold_vector_transfer_write_with_rank_reduced_insert_slice( // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index // CHECK-DAG: %[[IDX0:.+]] = affine.apply #[[MAP1]]()[%[[ARG2]], %[[ARG6]]] // CHECK-DAG: %[[IDX1:.+]] = affine.apply #[[MAP1]]()[%[[ARG3]], %[[ARG7]]] -// CHECK-DAG: vector.transfer_write %[[ARG1]], %[[ARG0]][%[[C0]], %[[IDX0]], %[[IDX1]]] {in_bounds = [true]} : vector<4xf32>, tensor} : vector<4xf32>, tensor} : vector<4xf32>, tensor %1 = tensor.insert_slice %0 into %arg0[0, %arg2, %arg3] [1, %arg4, %arg5] [1, 1, 1] : tensor into tensor @@ -200,8 +200,8 @@ func.func @fold_vector_transfer_write_with_inner_rank_reduced_insert_slice( // CHECK-DAG: %[[IDX0:.+]] = affine.apply #[[MAP1]]()[%[[ARG2]], %[[ARG6]]] // CHECK-DAG: %[[IDX1:.+]] = affine.apply #[[MAP1]]()[%[[ARG3]], %[[ARG7]]] // CHECK-DAG: vector.transfer_write %[[ARG1]], %[[ARG0]][%[[IDX0]], %[[IDX1]], %[[C0]]] - // CHECK-SAME: {in_bounds = [true], permutation_map = #[[MAP2]]} : vector<4xf32>, tensor, permutation_map = #[[MAP2]]} : vector<4xf32>, tensor} : vector<4xf32>, tensor %1 = tensor.insert_slice %0 into %arg0[%arg2, %arg3, 0] [%arg4, %arg5, 1] [1, 1, 1] : tensor into tensor @@ -217,9 +217,9 @@ func.func @insert_slice_of_transfer_write(%t1 : tensor, %v : vector<5x // CHECK-NOT: insert_slice // CHECK: %[[c3:.*]] = arith.constant 3 : index -// CHECK: %[[r:.*]] = vector.transfer_write %[[v]], %[[t1]][%[[c3]], %[[s]]] {in_bounds = [true, true]} : vector<5x6xf32>, tensor +// CHECK: %[[r:.*]] = vector.transfer_write %[[v]], %[[t1]][%[[c3]], %[[s]]] {in_bounds = array} : vector<5x6xf32>, tensor // CHECK: return %[[r]] - %0 = vector.transfer_write %v, %t2[%c0, %c0] {in_bounds = [true, true]} : vector<5x6xf32>, tensor<5x6xf32> + %0 = vector.transfer_write %v, %t2[%c0, %c0] {in_bounds = array} : vector<5x6xf32>, tensor<5x6xf32> %1 = tensor.insert_slice %0 into %t1[3, %s] [5, 6] [1, 1] : tensor<5x6xf32> into tensor return %1 : tensor } @@ -239,9 +239,9 @@ func.func @insert_slice_of_transfer_write_swappy_rank_extending( // CHECK-DAG: %[[c3:.*]] = arith.constant 3 : index // CHECK-DAG: %[[c4:.*]] = arith.constant 4 : index // CHECK: %[[r:.*]] = vector.transfer_write %[[v]], %[[t1]][%[[c4]], %[[c3]], %[[s]]] -// CHECK-SAME: {in_bounds = [true, true], permutation_map = #[[$d0d2]]} : vector<5x6xf32>, tensor +// CHECK-SAME: {in_bounds = array, permutation_map = #[[$d0d2]]} : vector<5x6xf32>, tensor // CHECK: return %[[r]] - %0 = vector.transfer_write %v, %t2[%c0, %c0] {in_bounds = [true, true]} : vector<5x6xf32>, tensor<5x6xf32> + %0 = vector.transfer_write %v, %t2[%c0, %c0] {in_bounds = array} : vector<5x6xf32>, tensor<5x6xf32> %1 = tensor.insert_slice %0 into %t1[4, 3, %s] [5, 1, 6] [1, 1, 1] : tensor<5x6xf32> into tensor return %1 : tensor } @@ -252,11 +252,11 @@ func.func @insert_slice_of_transfer_write_swappy_rank_extending( // CHECK-SAME: %[[t1:.*]]: tensor, %[[v:.*]]: vector<5x6xf32>, %[[s:.*]]: index // CHECK-DAG: %[[c3:.*]] = arith.constant 3 : index // CHECK-DAG: %[[c4:.*]] = arith.constant 4 : index -// CHECK: %[[r:.*]] = vector.transfer_write %[[v]], %[[t1]][%[[c4]], %[[c3]], %[[s]]] {in_bounds = [true, true]} : vector<5x6xf32>, tensor +// CHECK: %[[r:.*]] = vector.transfer_write %[[v]], %[[t1]][%[[c4]], %[[c3]], %[[s]]] {in_bounds = array} : vector<5x6xf32>, tensor // CHECK: return %[[r]] func.func @insert_slice_of_transfer_write_rank_extending(%t1 : tensor, %v : vector<5x6xf32>, %s : index, %t2 : tensor<5x6xf32>) -> tensor { %c0 = arith.constant 0 : index - %0 = vector.transfer_write %v, %t2[%c0, %c0] {in_bounds = [true, true]} : vector<5x6xf32>, tensor<5x6xf32> + %0 = vector.transfer_write %v, %t2[%c0, %c0] {in_bounds = array} : vector<5x6xf32>, tensor<5x6xf32> %1 = tensor.insert_slice %0 into %t1[4, 3, %s] [1, 5, 6] [1, 1, 1] : tensor<5x6xf32> into tensor return %1 : tensor } diff --git a/mlir/test/Dialect/Vector/bufferize.mlir b/mlir/test/Dialect/Vector/bufferize.mlir index 3399f60a2c3bf..defa2b754351b 100644 --- a/mlir/test/Dialect/Vector/bufferize.mlir +++ b/mlir/test/Dialect/Vector/bufferize.mlir @@ -3,11 +3,11 @@ // CHECK-LABEL: func @transfer_read( // CHECK-SAME: %[[t:.*]]: tensor, %[[o1:.*]]: index, %[[o2:.*]]: index, %[[pad:.*]]: f32) // CHECK: %[[m:.*]] = bufferization.to_memref %[[t]] : memref -// CHECK: %[[r:.*]] = vector.transfer_read %[[m]][%[[o1]], %[[o2]]], %[[pad]] {in_bounds = [true, false]} : memref, vector<5x6xf32> +// CHECK: %[[r:.*]] = vector.transfer_read %[[m]][%[[o1]], %[[o2]]], %[[pad]] {in_bounds = array} : memref, vector<5x6xf32> // CHECK: return %[[r]] func.func @transfer_read(%t: tensor, %o1: index, %o2: index, %pad: f32) -> vector<5x6xf32> { - %0 = vector.transfer_read %t[%o1, %o2], %pad {in_bounds = [true, false]} + %0 = vector.transfer_read %t[%o1, %o2], %pad {in_bounds = array} : tensor, vector<5x6xf32> return %0 : vector<5x6xf32> } @@ -19,13 +19,13 @@ func.func @transfer_read(%t: tensor, %o1: index, // CHECK: %[[m:.*]] = bufferization.to_memref %[[t]] : memref // CHECK: %[[alloc:.*]] = memref.alloc(%{{.*}}, %{{.*}}) {{.*}} : memref // CHECK: memref.copy %[[m]], %[[alloc]] -// CHECK: vector.transfer_write %[[vec]], %[[alloc]][%[[o1]], %[[o2]]], %[[mask]] {in_bounds = [true, false]} : vector<5x6xf32>, memref +// CHECK: vector.transfer_write %[[vec]], %[[alloc]][%[[o1]], %[[o2]]], %[[mask]] {in_bounds = array} : vector<5x6xf32>, memref // CHECK: %[[r:.*]] = bufferization.to_tensor %[[alloc]] : memref // CHECK: return %[[r]] func.func @transfer_write(%t: tensor, %o1: index, %o2: index, %vec: vector<5x6xf32>, %mask: vector<5x6xi1>) -> tensor { - %0 = vector.transfer_write %vec, %t[%o1, %o2], %mask {in_bounds = [true, false]} + %0 = vector.transfer_write %vec, %t[%o1, %o2], %mask {in_bounds = array} : vector<5x6xf32>, tensor return %0 : tensor } diff --git a/mlir/test/Dialect/Vector/canonicalize.mlir b/mlir/test/Dialect/Vector/canonicalize.mlir index b7c78de4b5bd8..44d5d8c51defc 100644 --- a/mlir/test/Dialect/Vector/canonicalize.mlir +++ b/mlir/test/Dialect/Vector/canonicalize.mlir @@ -442,10 +442,10 @@ func.func @cast_transfers(%A: memref<4x8xf32>) -> (vector<4x8xf32>) { %f0 = arith.constant 0.0 : f32 %0 = memref.cast %A : memref<4x8xf32> to memref - // CHECK: vector.transfer_read %{{.*}} {in_bounds = [true, true]} : memref<4x8xf32>, vector<4x8xf32> + // CHECK: vector.transfer_read %{{.*}} {in_bounds = array} : memref<4x8xf32>, vector<4x8xf32> %1 = vector.transfer_read %0[%c0, %c0], %f0 : memref, vector<4x8xf32> - // CHECK: vector.transfer_write %{{.*}} {in_bounds = [true, true]} : vector<4x8xf32>, memref<4x8xf32> + // CHECK: vector.transfer_write %{{.*}} {in_bounds = array} : vector<4x8xf32>, memref<4x8xf32> vector.transfer_write %1, %0[%c0, %c0] : vector<4x8xf32>, memref return %1 : vector<4x8xf32> } @@ -458,7 +458,7 @@ func.func @cast_transfers(%A: tensor<4x8xf32>) -> (vector<4x8xf32>) { %f0 = arith.constant 0.0 : f32 %0 = tensor.cast %A : tensor<4x8xf32> to tensor - // CHECK: vector.transfer_read %{{.*}} {in_bounds = [true, true]} : tensor<4x8xf32>, vector<4x8xf32> + // CHECK: vector.transfer_read %{{.*}} {in_bounds = array} : tensor<4x8xf32>, vector<4x8xf32> %1 = vector.transfer_read %0[%c0, %c0], %f0 : tensor, vector<4x8xf32> return %1 : vector<4x8xf32> @@ -903,10 +903,10 @@ func.func @fold_vector_transfers(%A: memref) -> (vector<4x8xf32>, vecto %c0 = arith.constant 0 : index %f0 = arith.constant 0.0 : f32 - // CHECK: vector.transfer_read %{{.*}} {in_bounds = [false, true]} + // CHECK: vector.transfer_read %{{.*}} {in_bounds = array} %1 = vector.transfer_read %A[%c0, %c0], %f0 : memref, vector<4x8xf32> - // CHECK: vector.transfer_write %{{.*}} {in_bounds = [false, true]} + // CHECK: vector.transfer_write %{{.*}} {in_bounds = array} vector.transfer_write %1, %A[%c0, %c0] : vector<4x8xf32>, memref // Both dims may be out-of-bounds, attribute is elided. @@ -1190,20 +1190,20 @@ func.func @transfer_folding_1(%t0: tensor<2x3x4xf32>, %t1: tensor<2x3x4xf32>) { %c0 = arith.constant 0 : index %pad = arith.constant 0.0 : f32 - %v = vector.transfer_read %t0[%c0, %c0, %c0], %pad {in_bounds = [true, true, true]} : + %v = vector.transfer_read %t0[%c0, %c0, %c0], %pad {in_bounds = array} : tensor<2x3x4xf32>, vector<2x3x4xf32> - %r0 = vector.transfer_write %v, %t1[%c0, %c0, %c0] {in_bounds = [true, true, true]} : + %r0 = vector.transfer_write %v, %t1[%c0, %c0, %c0] {in_bounds = array} : vector<2x3x4xf32>, tensor<2x3x4xf32> %t2 = "test.constant"() { value = dense<6.0> : tensor<2x3x4xf32>} : () -> (tensor<2x3x4xf32>) - %r1 = vector.transfer_write %v, %t2[%c0, %c0, %c0] {in_bounds = [true, true, true]} : + %r1 = vector.transfer_write %v, %t2[%c0, %c0, %c0] {in_bounds = array} : vector<2x3x4xf32>, tensor<2x3x4xf32> // CHECK-NEXT: some_op_that_may_have_side_effects %t3 = "some_op_that_may_have_side_effects"() : () -> (tensor<2x3x4xf32>) - %r2 = vector.transfer_write %v, %t0[%c0, %c0, %c0] {in_bounds = [true, true, true]} : + %r2 = vector.transfer_write %v, %t0[%c0, %c0, %c0] {in_bounds = array} : vector<2x3x4xf32>, tensor<2x3x4xf32> // CHECK-NEXT: return %[[T0]], %[[T0]], %[[T0]] @@ -1258,11 +1258,11 @@ func.func @store_to_load_tensor(%arg0 : tensor<4x4xf32>, %c2 = arith.constant 2 : index %c0 = arith.constant 0 : index %cf0 = arith.constant 0.0 : f32 - %w0 = vector.transfer_write %v0, %arg0[%c1, %c0] {in_bounds = [true, true]} : + %w0 = vector.transfer_write %v0, %arg0[%c1, %c0] {in_bounds = array} : vector<1x4xf32>, tensor<4x4xf32> - %w1 = vector.transfer_write %v1, %w0[%c2, %c0] {in_bounds = [true, true]} : + %w1 = vector.transfer_write %v1, %w0[%c2, %c0] {in_bounds = array} : vector<1x4xf32>, tensor<4x4xf32> - %0 = vector.transfer_read %w1[%c1, %c0], %cf0 {in_bounds = [true, true]} : + %0 = vector.transfer_read %w1[%c1, %c0], %cf0 {in_bounds = array} : tensor<4x4xf32>, vector<1x4xf32> return %0 : vector<1x4xf32> } @@ -1280,11 +1280,11 @@ func.func @store_to_load_negative_tensor(%arg0 : tensor<4x4xf32>, %c2 = arith.constant 2 : index %c0 = arith.constant 0 : index %cf0 = arith.constant 0.0 : f32 - %w0 = vector.transfer_write %v0, %arg0[%c1, %c0] {in_bounds = [true, true]} : + %w0 = vector.transfer_write %v0, %arg0[%c1, %c0] {in_bounds = array} : vector<1x4xf32>, tensor<4x4xf32> - %w1 = vector.transfer_write %v0, %w0[%i, %i] {in_bounds = [true, true]} : + %w1 = vector.transfer_write %v0, %w0[%i, %i] {in_bounds = array} : vector<1x4xf32>, tensor<4x4xf32> - %0 = vector.transfer_read %w1[%c1, %c0], %cf0 {in_bounds = [true, true]} : + %0 = vector.transfer_read %w1[%c1, %c0], %cf0 {in_bounds = array} : tensor<4x4xf32>, vector<1x4xf32> return %0 : vector<1x4xf32> } @@ -1300,9 +1300,9 @@ func.func @store_to_load_tensor_broadcast(%arg0 : tensor<4x4xf32>, %v0 : vector<4x2xf32>) -> vector<4x2x6xf32> { %c0 = arith.constant 0 : index %cf0 = arith.constant 0.0 : f32 - %w0 = vector.transfer_write %v0, %arg0[%c0, %c0] {in_bounds = [true, true]} : + %w0 = vector.transfer_write %v0, %arg0[%c0, %c0] {in_bounds = array} : vector<4x2xf32>, tensor<4x4xf32> - %0 = vector.transfer_read %w0[%c0, %c0], %cf0 {in_bounds = [true, true, true], + %0 = vector.transfer_read %w0[%c0, %c0], %cf0 {in_bounds = array, permutation_map = affine_map<(d0, d1) -> (d0, d1, 0)>} : tensor<4x4xf32>, vector<4x2x6xf32> return %0 : vector<4x2x6xf32> @@ -1318,9 +1318,9 @@ func.func @store_to_load_tensor_broadcast_scalable(%arg0 : tensor, %v0 : vector<[4]xf32>) -> vector<6x[4]xf32> { %c0 = arith.constant 0 : index %cf0 = arith.constant 0.0 : f32 - %w0 = vector.transfer_write %v0, %arg0[%c0] {in_bounds = [true]} : + %w0 = vector.transfer_write %v0, %arg0[%c0] {in_bounds = array} : vector<[4]xf32>, tensor - %0 = vector.transfer_read %w0[%c0], %cf0 {in_bounds = [true, true], + %0 = vector.transfer_read %w0[%c0], %cf0 {in_bounds = array, permutation_map = affine_map<(d0) -> (0, d0)>} : tensor, vector<6x[4]xf32> return %0 : vector<6x[4]xf32> @@ -1337,10 +1337,10 @@ func.func @store_to_load_tensor_perm_broadcast(%arg0 : tensor<4x4x4xf32>, %v0 : vector<4x1xf32>) -> vector<1x100x4x5xf32> { %c0 = arith.constant 0 : index %cf0 = arith.constant 0.0 : f32 - %w0 = vector.transfer_write %v0, %arg0[%c0, %c0, %c0] {in_bounds = [true, true], + %w0 = vector.transfer_write %v0, %arg0[%c0, %c0, %c0] {in_bounds = array, permutation_map = affine_map<(d0, d1, d2) -> (d2, d1)>} : vector<4x1xf32>, tensor<4x4x4xf32> - %0 = vector.transfer_read %w0[%c0, %c0, %c0], %cf0 {in_bounds = [true, true, true, true], + %0 = vector.transfer_read %w0[%c0, %c0, %c0], %cf0 {in_bounds = array, permutation_map = affine_map<(d0, d1, d2) -> (d1, 0, d2, 0)>} : tensor<4x4x4xf32>, vector<1x100x4x5xf32> return %0 : vector<1x100x4x5xf32> @@ -1363,11 +1363,11 @@ func.func @dead_store_tensor(%arg0 : tensor<4x4xf32>, %c2 = arith.constant 2 : index %c0 = arith.constant 0 : index %cf0 = arith.constant 0.0 : f32 - %w0 = vector.transfer_write %v0, %arg0[%c1, %c0] {in_bounds = [true, true]} : + %w0 = vector.transfer_write %v0, %arg0[%c1, %c0] {in_bounds = array} : vector<1x4xf32>, tensor<4x4xf32> - %w1 = vector.transfer_write %v0, %w0[%c2, %c0] {in_bounds = [true, true]} : + %w1 = vector.transfer_write %v0, %w0[%c2, %c0] {in_bounds = array} : vector<1x4xf32>, tensor<4x4xf32> - %w2 = vector.transfer_write %v1, %w1[%c1, %c0] {in_bounds = [true, true]} : + %w2 = vector.transfer_write %v1, %w1[%c1, %c0] {in_bounds = array} : vector<1x4xf32>, tensor<4x4xf32> return %w2 : tensor<4x4xf32> } @@ -1388,14 +1388,14 @@ func.func @dead_store_tensor_negative(%arg0 : tensor<4x4xf32>, %c2 = arith.constant 2 : index %c0 = arith.constant 0 : index %cf0 = arith.constant 0.0 : f32 - %w0 = vector.transfer_write %v0, %arg0[%c1, %c0] {in_bounds = [true, true]} : + %w0 = vector.transfer_write %v0, %arg0[%c1, %c0] {in_bounds = array} : vector<1x4xf32>, tensor<4x4xf32> - %w1 = vector.transfer_write %v0, %w0[%c2, %c0] {in_bounds = [true, true]} : + %w1 = vector.transfer_write %v0, %w0[%c2, %c0] {in_bounds = array} : vector<1x4xf32>, tensor<4x4xf32> - %0 = vector.transfer_read %w1[%i, %i], %cf0 {in_bounds = [true, true]} : + %0 = vector.transfer_read %w1[%i, %i], %cf0 {in_bounds = array} : tensor<4x4xf32>, vector<1x4xf32> %x = arith.addf %0, %0 : vector<1x4xf32> - %w2 = vector.transfer_write %x, %w0[%c1, %c0] {in_bounds = [true, true]} : + %w2 = vector.transfer_write %x, %w0[%c1, %c0] {in_bounds = array} : vector<1x4xf32>, tensor<4x4xf32> return %w2 : tensor<4x4xf32> } @@ -1420,11 +1420,11 @@ func.func @swap_extract_slice_transfer_write(%arg0 : vector<8x4xf32>, // CHECK-SAME: [%[[IV]], 16] [%[[SZ]], 8] // CHECK: %[[T1:.*]] = vector.transfer_write %[[VEC]] // CHECK-SAME: %[[T0]][%[[C0]], %[[C0]]] - // CHECK-SAME: in_bounds = [true, false] + // CHECK-SAME: in_bounds = array // CHECK-SAME: permutation_map = #[[$MAP]] // CHECK: %[[T2:.*]] = tensor.insert_slice %[[T1]] into %[[ITER_ARG]] // CHECK-SAME: [%[[IV]], 16] [%[[SZ]], 8] - %0 = vector.transfer_write %arg0, %arg1[%c0, %c0] {in_bounds = [true, true], permutation_map = affine_map<(d0, d1) -> (d1, d0)>} : vector<8x4xf32>, tensor<4x8xf32> + %0 = vector.transfer_write %arg0, %arg1[%c0, %c0] {in_bounds = array, permutation_map = affine_map<(d0, d1) -> (d1, d0)>} : vector<8x4xf32>, tensor<4x8xf32> %1 = tensor.extract_slice %0[0, 0] [%sz, 8] [1, 1] : tensor<4x8xf32> to tensor %2 = tensor.insert_slice %1 into %arg2[%iv, 16] [%sz, 8] [1, 1] : tensor into tensor<64x64xf32> @@ -1452,7 +1452,7 @@ func.func @do_not_swap_extract_slice_transfer_write(%arg0 : vector<8xf32>, // CHECK: %[[T0:.*]] = vector.transfer_write %[[VEC]] // CHECK: %[[T1:.*]] = tensor.extract_slice %[[T0]] // CHECK: %[[T2:.*]] = tensor.insert_slice %[[T1]] - %0 = vector.transfer_write %arg0, %arg2[%c0] {in_bounds = [true]} : vector<8xf32>, tensor<8xf32> + %0 = vector.transfer_write %arg0, %arg2[%c0] {in_bounds = array} : vector<8xf32>, tensor<8xf32> %1 = tensor.extract_slice %0[0] [%iv] [1] : tensor<8xf32> to tensor %2 = tensor.insert_slice %1 into %arg3[%iv] [%sz] [1] : tensor into tensor<64xf32> @@ -1460,7 +1460,7 @@ func.func @do_not_swap_extract_slice_transfer_write(%arg0 : vector<8xf32>, // CHECK: %[[T3:.*]] = vector.transfer_write %[[VEC_SMALL]] // CHECK: %[[T4:.*]] = tensor.extract_slice %[[T3]] // CHECK: %[[T5:.*]] = tensor.insert_slice %[[T4]] - %3 = vector.transfer_write %arg1, %arg2[%c0] {in_bounds = [true]} : vector<4xf32>, tensor<8xf32> + %3 = vector.transfer_write %arg1, %arg2[%c0] {in_bounds = array} : vector<4xf32>, tensor<8xf32> %4 = tensor.extract_slice %3[0] [%sz] [1] : tensor<8xf32> to tensor %5 = tensor.insert_slice %4 into %arg3[%iv] [%sz] [1] : tensor into tensor<64xf32> @@ -1468,7 +1468,7 @@ func.func @do_not_swap_extract_slice_transfer_write(%arg0 : vector<8xf32>, // CHECK: %[[T6:.*]] = vector.transfer_write %[[VEC]] // CHECK: %[[T7:.*]] = tensor.extract_slice %[[T6]] // CHECK: %[[T8:.*]] = tensor.insert_slice %[[T7]] - %6 = vector.transfer_write %arg0, %arg2[%c0] {in_bounds = [true]} : vector<8xf32>, tensor<8xf32> + %6 = vector.transfer_write %arg0, %arg2[%c0] {in_bounds = array} : vector<8xf32>, tensor<8xf32> %7 = tensor.extract_slice %6[0] [1] [1] : tensor<8xf32> to tensor %8 = tensor.insert_slice %7 into %arg3[%iv] [1] [1] : tensor into tensor<64xf32> @@ -2389,7 +2389,7 @@ func.func @transfer_read_from_rank_reducing_extract_slice(%src: tensor<1x8x8x8xf %c0 = arith.constant 0 : index %f0 = arith.constant 0.000000e+00 : f32 %0 = tensor.extract_slice %src[0, %i1, %i2, %i3] [1, 4, 1, 4] [1, 1, 1, 1] : tensor<1x8x8x8xf32> to tensor<1x4x4xf32> - %1 = vector.transfer_read %0[%c0, %i4, %c0], %f0 {in_bounds = [true]} : tensor<1x4x4xf32>, vector<4xf32> + %1 = vector.transfer_read %0[%c0, %i4, %c0], %f0 {in_bounds = array} : tensor<1x4x4xf32>, vector<4xf32> return %1 : vector<4xf32> } @@ -2571,12 +2571,12 @@ func.func @load_store_forwarding_rank_mismatch(%v0: vector<4x1x1xf32>, %arg0: te %cf0 = arith.constant 0.0 : f32 // d0 is explicitly written. %w0 = vector.transfer_write %v0, %arg0[%c0, %c0, %c0] - {in_bounds = [true, true, true], + {in_bounds = array, permutation_map = affine_map<(d0, d1, d2) -> (d2, d1, d0)>} : vector<4x1x1xf32>, tensor<4x4x4xf32> // d0 is implicitly read (rank-reduction of unit dim). %r = vector.transfer_read %w0[%c0, %c0, %c0], %cf0 - {in_bounds = [true, true, true, true], + {in_bounds = array, permutation_map = affine_map<(d0, d1, d2) -> (d1, 0, d2, 0)>} : tensor<4x4x4xf32>, vector<1x100x4x5xf32> return %r : vector<1x100x4x5xf32> diff --git a/mlir/test/Dialect/Vector/eliminate-masks.mlir b/mlir/test/Dialect/Vector/eliminate-masks.mlir index 0b89b0604faab..eb3e749f6974c 100644 --- a/mlir/test/Dialect/Vector/eliminate-masks.mlir +++ b/mlir/test/Dialect/Vector/eliminate-masks.mlir @@ -23,11 +23,11 @@ func.func @eliminate_redundant_masks_through_insert_and_extracts(%tensor: tensor %mask = vector.create_mask %dim_1 : vector<[4]xi1> // 3. Read the slice and do some computation. - %vec = vector.transfer_read %extracted_slice_1[%c0], %c0_f32, %mask {in_bounds = [true]} : tensor, vector<[4]xf32> + %vec = vector.transfer_read %extracted_slice_1[%c0], %c0_f32, %mask {in_bounds = array} : tensor, vector<[4]xf32> %new_vec = "test.some_computation"(%vec) : (vector<[4]xf32>) -> (vector<[4]xf32>) // 4. Write the new value. - %write = vector.transfer_write %new_vec, %extracted_slice_1[%c0], %mask {in_bounds = [true]} : vector<[4]xf32>, tensor + %write = vector.transfer_write %new_vec, %extracted_slice_1[%c0], %mask {in_bounds = array} : vector<[4]xf32>, tensor // 5. Insert and yield the new tensor value. %result = tensor.insert_slice %write into %arg[0, %i] [1, %c4_vscale] [1, 1] : tensor into tensor<1x?xf32> diff --git a/mlir/test/Dialect/Vector/invalid.mlir b/mlir/test/Dialect/Vector/invalid.mlir index e2bc5ef6128e7..6600c3f09265d 100644 --- a/mlir/test/Dialect/Vector/invalid.mlir +++ b/mlir/test/Dialect/Vector/invalid.mlir @@ -500,7 +500,7 @@ func.func @test_vector.transfer_read(%arg0: memref>) { %f0 = arith.constant 0.0 : f32 %vf0 = vector.splat %f0 : vector<2x3xf32> // expected-error@+1 {{ expects the in_bounds attr of same rank as permutation_map results: affine_map<(d0, d1) -> (d0, d1)>}} - %0 = vector.transfer_read %arg0[%c3, %c3], %vf0 {in_bounds = [true], permutation_map = affine_map<(d0, d1)->(d0, d1)>} : memref>, vector<1x1x2x3xf32> + %0 = vector.transfer_read %arg0[%c3, %c3], %vf0 {in_bounds = array, permutation_map = affine_map<(d0, d1)->(d0, d1)>} : memref>, vector<1x1x2x3xf32> } // ----- @@ -510,7 +510,7 @@ func.func @test_vector.transfer_read(%arg0: memref>) { %f0 = arith.constant 0.0 : f32 %vf0 = vector.splat %f0 : vector<2x3xf32> // expected-error@+1 {{requires broadcast dimensions to be in-bounds}} - %0 = vector.transfer_read %arg0[%c3, %c3], %vf0 {in_bounds = [false, true], permutation_map = affine_map<(d0, d1)->(0, d1)>} : memref>, vector<1x1x2x3xf32> + %0 = vector.transfer_read %arg0[%c3, %c3], %vf0 {in_bounds = array, permutation_map = affine_map<(d0, d1)->(0, d1)>} : memref>, vector<1x1x2x3xf32> } // ----- diff --git a/mlir/test/Dialect/Vector/lower-vector-mask.mlir b/mlir/test/Dialect/Vector/lower-vector-mask.mlir index a8a1164e2f762..aaf73e906e49b 100644 --- a/mlir/test/Dialect/Vector/lower-vector-mask.mlir +++ b/mlir/test/Dialect/Vector/lower-vector-mask.mlir @@ -55,14 +55,14 @@ func.func @vector_gather(%arg0: tensor<64xf32>, %arg1: tensor<3xf32>) -> tensor< %cst = arith.constant 0.000000e+00 : f32 %c3 = arith.constant 3 : index %0 = vector.create_mask %c3 : vector<4xi1> - %1 = vector.mask %0 { vector.transfer_read %arg1[%c0], %cst {in_bounds = [true]} : tensor<3xf32>, vector<4xf32> } : vector<4xi1> -> vector<4xf32> + %1 = vector.mask %0 { vector.transfer_read %arg1[%c0], %cst {in_bounds = array} : tensor<3xf32>, vector<4xf32> } : vector<4xi1> -> vector<4xf32> %cst_0 = arith.constant dense<[0, 1, 2, 3]> : vector<4xindex> %cst_1 = arith.constant dense : vector<4xi1> %cst_2 = arith.constant dense<0.000000e+00> : vector<4xf32> %c0_3 = arith.constant 0 : index %2 = vector.mask %0 { vector.gather %arg0[%c0_3] [%cst_0], %cst_1, %cst_2 : tensor<64xf32>, vector<4xindex>, vector<4xi1>, vector<4xf32> into vector<4xf32> } : vector<4xi1> -> vector<4xf32> %c0_4 = arith.constant 0 : index - %3 = vector.mask %0 { vector.transfer_write %2, %arg1[%c0_4] {in_bounds = [true]} : vector<4xf32>, tensor<3xf32> } : vector<4xi1> -> tensor<3xf32> + %3 = vector.mask %0 { vector.transfer_write %2, %arg1[%c0_4] {in_bounds = array} : vector<4xf32>, tensor<3xf32> } : vector<4xi1> -> tensor<3xf32> return %3 : tensor<3xf32> } @@ -75,7 +75,7 @@ func.func @vector_gather(%arg0: tensor<64xf32>, %arg1: tensor<3xf32>) -> tensor< // CHECK: %[[VAL_5:.*]] = arith.constant 3 : index // CHECK: %[[VAL_6:.*]] = vector.create_mask %[[VAL_5]] : vector<4xi1> // CHECK: %[[VAL_7:.*]] = vector.gather %[[VAL_0]][%[[VAL_4]]] [%[[VAL_3]]], %[[VAL_6]], %[[VAL_2]] : tensor<64xf32>, vector<4xindex>, vector<4xi1>, vector<4xf32> into vector<4xf32> -// CHECK: %[[VAL_8:.*]] = vector.transfer_write %[[VAL_7]], %[[VAL_1]][%[[VAL_4]]], %[[VAL_6]] {in_bounds = [true]} : vector<4xf32>, tensor<3xf32> +// CHECK: %[[VAL_8:.*]] = vector.transfer_write %[[VAL_7]], %[[VAL_1]][%[[VAL_4]]], %[[VAL_6]] {in_bounds = array} : vector<4xf32>, tensor<3xf32> // ----- diff --git a/mlir/test/Dialect/Vector/ops.mlir b/mlir/test/Dialect/Vector/ops.mlir index 08d1a189231bc..e24be3ab10cad 100644 --- a/mlir/test/Dialect/Vector/ops.mlir +++ b/mlir/test/Dialect/Vector/ops.mlir @@ -61,8 +61,8 @@ func.func @vector_transfer_ops(%arg0: memref, %3 = vector.transfer_read %arg0[%c3, %c3], %cst {permutation_map = affine_map<(d0, d1)->(d1)>} : memref, vector<128xf32> // CHECK: vector.transfer_read %{{.*}}[%[[C3]], %[[C3]]], %{{.*}} : memref>, vector<1x1x4x3xf32> %4 = vector.transfer_read %arg1[%c3, %c3], %vf0 {permutation_map = affine_map<(d0, d1)->(d0, d1)>} : memref>, vector<1x1x4x3xf32> - // CHECK: vector.transfer_read %{{.*}}[%[[C3]], %[[C3]]], %{{.*}} {in_bounds = [false, true]} : memref>, vector<1x1x4x3xf32> - %5 = vector.transfer_read %arg1[%c3, %c3], %vf0 {in_bounds = [false, true]} : memref>, vector<1x1x4x3xf32> + // CHECK: vector.transfer_read %{{.*}}[%[[C3]], %[[C3]]], %{{.*}} {in_bounds = array} : memref>, vector<1x1x4x3xf32> + %5 = vector.transfer_read %arg1[%c3, %c3], %vf0 {in_bounds = array} : memref>, vector<1x1x4x3xf32> // CHECK: vector.transfer_read %{{.*}}[%[[C3]], %[[C3]]], %{{.*}} : memref>, vector<5x24xi8> %6 = vector.transfer_read %arg2[%c3, %c3], %v0 : memref>, vector<5x24xi8> // CHECK: vector.transfer_read %{{.*}}[%[[C3]], %[[C3]]], %{{.*}} : memref>, vector<5x48xi8> @@ -70,7 +70,7 @@ func.func @vector_transfer_ops(%arg0: memref, // CHECK: vector.transfer_read %{{.*}}[%[[C3]], %[[C3]]], %{{.*}}, %{{.*}} : memref, vector<5xf32> %8 = vector.transfer_read %arg0[%c3, %c3], %f0, %m : memref, vector<5xf32> // CHECK: vector.transfer_read %{{.*}}[%[[C3]], %[[C3]], %[[C3]]], %{{.*}}, %{{.*}} : memref, vector<5x4x8xf32> - %9 = vector.transfer_read %arg4[%c3, %c3, %c3], %f0, %m2 {in_bounds = [false, false, true], permutation_map = affine_map<(d0, d1, d2)->(d1, d0, 0)>} : memref, vector<5x4x8xf32> + %9 = vector.transfer_read %arg4[%c3, %c3, %c3], %f0, %m2 {in_bounds = array, permutation_map = affine_map<(d0, d1, d2)->(d1, d0, 0)>} : memref, vector<5x4x8xf32> // CHECK: vector.transfer_write vector.transfer_write %0, %arg0[%c3, %c3] {permutation_map = affine_map<(d0, d1)->(d0)>} : vector<128xf32>, memref @@ -79,7 +79,7 @@ func.func @vector_transfer_ops(%arg0: memref, // CHECK: vector.transfer_write %{{.*}}, %{{.*}}[%[[C3]], %[[C3]]] : vector<1x1x4x3xf32>, memref> vector.transfer_write %4, %arg1[%c3, %c3] {permutation_map = affine_map<(d0, d1)->(d0, d1)>} : vector<1x1x4x3xf32>, memref> // CHECK: vector.transfer_write %{{.*}}, %{{.*}}[%[[C3]], %[[C3]]] : vector<1x1x4x3xf32>, memref> - vector.transfer_write %5, %arg1[%c3, %c3] {in_bounds = [false, false]} : vector<1x1x4x3xf32>, memref> + vector.transfer_write %5, %arg1[%c3, %c3] {in_bounds = array} : vector<1x1x4x3xf32>, memref> // CHECK: vector.transfer_write %{{.*}}, %{{.*}}[%[[C3]], %[[C3]]] : vector<5x24xi8>, memref> vector.transfer_write %6, %arg2[%c3, %c3] : vector<5x24xi8>, memref> // CHECK: vector.transfer_write %{{.*}}, %{{.*}}[%[[C3]], %[[C3]]] : vector<5x48xi8>, memref> @@ -121,8 +121,8 @@ func.func @vector_transfer_ops_tensor(%arg0: tensor, %3 = vector.transfer_read %arg0[%c3, %c3], %cst {permutation_map = affine_map<(d0, d1)->(d1)>} : tensor, vector<128xf32> // CHECK: vector.transfer_read %{{.*}}[%[[C3]], %[[C3]]], %{{.*}} : tensor>, vector<1x1x4x3xf32> %4 = vector.transfer_read %arg1[%c3, %c3], %vf0 {permutation_map = affine_map<(d0, d1)->(d0, d1)>} : tensor>, vector<1x1x4x3xf32> - // CHECK: vector.transfer_read %{{.*}}[%[[C3]], %[[C3]]], %{{.*}} {in_bounds = [false, true]} : tensor>, vector<1x1x4x3xf32> - %5 = vector.transfer_read %arg1[%c3, %c3], %vf0 {in_bounds = [false, true]} : tensor>, vector<1x1x4x3xf32> + // CHECK: vector.transfer_read %{{.*}}[%[[C3]], %[[C3]]], %{{.*}} {in_bounds = array} : tensor>, vector<1x1x4x3xf32> + %5 = vector.transfer_read %arg1[%c3, %c3], %vf0 {in_bounds = array} : tensor>, vector<1x1x4x3xf32> // CHECK: vector.transfer_read %{{.*}}[%[[C3]], %[[C3]]], %{{.*}} : tensor>, vector<5x24xi8> %6 = vector.transfer_read %arg2[%c3, %c3], %v0 : tensor>, vector<5x24xi8> // CHECK: vector.transfer_read %{{.*}}[%[[C3]], %[[C3]]], %{{.*}} : tensor>, vector<5x48xi8> @@ -136,7 +136,7 @@ func.func @vector_transfer_ops_tensor(%arg0: tensor, // CHECK: vector.transfer_write %{{.*}}, %{{.*}}[%[[C3]], %[[C3]]] : vector<1x1x4x3xf32>, tensor> %10 = vector.transfer_write %4, %arg1[%c3, %c3] {permutation_map = affine_map<(d0, d1)->(d0, d1)>} : vector<1x1x4x3xf32>, tensor> // CHECK: vector.transfer_write %{{.*}}, %{{.*}}[%[[C3]], %[[C3]]] : vector<1x1x4x3xf32>, tensor> - %11 = vector.transfer_write %5, %arg1[%c3, %c3] {in_bounds = [false, false]} : vector<1x1x4x3xf32>, tensor> + %11 = vector.transfer_write %5, %arg1[%c3, %c3] {in_bounds = array} : vector<1x1x4x3xf32>, tensor> // CHECK: vector.transfer_write %{{.*}}, %{{.*}}[%[[C3]], %[[C3]]] : vector<5x24xi8>, tensor> %12 = vector.transfer_write %6, %arg2[%c3, %c3] : vector<5x24xi8>, tensor> // CHECK: vector.transfer_write %{{.*}}, %{{.*}}[%[[C3]], %[[C3]]] : vector<5x48xi8>, tensor> diff --git a/mlir/test/Dialect/Vector/scalar-vector-transfer-to-memref.mlir b/mlir/test/Dialect/Vector/scalar-vector-transfer-to-memref.mlir index c5cb09b9aa9f9..20e17d3fd10c1 100644 --- a/mlir/test/Dialect/Vector/scalar-vector-transfer-to-memref.mlir +++ b/mlir/test/Dialect/Vector/scalar-vector-transfer-to-memref.mlir @@ -23,7 +23,7 @@ func.func @transfer_read_0d(%m: memref, %idx: index) -> f32 { func.func @transfer_read_1d(%m: memref, %idx: index, %idx2: index) -> f32 { %cst = arith.constant 0.0 : f32 %c0 = arith.constant 0 : index - %0 = vector.transfer_read %m[%idx, %idx, %idx], %cst {in_bounds = [true]} : memref, vector<5xf32> + %0 = vector.transfer_read %m[%idx, %idx, %idx], %cst {in_bounds = array} : memref, vector<5xf32> %1 = vector.extractelement %0[%idx2 : index] : vector<5xf32> return %1 : f32 } @@ -92,7 +92,7 @@ func.func @tensor_transfer_write_0d(%t: tensor, %idx: index, %f: f32) func.func @transfer_read_2d_extract(%m: memref, %idx: index, %idx2: index) -> f32 { %cst = arith.constant 0.0 : f32 %c0 = arith.constant 0 : index - %0 = vector.transfer_read %m[%idx, %idx, %idx, %idx], %cst {in_bounds = [true, true]} : memref, vector<10x5xf32> + %0 = vector.transfer_read %m[%idx, %idx, %idx, %idx], %cst {in_bounds = array} : memref, vector<10x5xf32> %1 = vector.extract %0[8, 1] : f32 from vector<10x5xf32> return %1 : f32 } @@ -130,7 +130,7 @@ func.func @transfer_write_arith_constant(%m: memref, %idx: index) { func.func @transfer_read_multi_use(%m: memref, %idx: index) -> (f32, f32) { %cst = arith.constant 0.0 : f32 - %0 = vector.transfer_read %m[%idx], %cst {in_bounds = [true]} : memref, vector<16xf32> + %0 = vector.transfer_read %m[%idx], %cst {in_bounds = array} : memref, vector<16xf32> %1 = vector.extract %0[0] : f32 from vector<16xf32> %2 = vector.extract %0[1] : f32 from vector<16xf32> return %1, %2 : f32, f32 @@ -148,7 +148,7 @@ func.func @transfer_read_multi_use(%m: memref, %idx: index) -> (f32, f32) func.func @subvector_extract(%m: memref, %idx: index) -> vector<16xf32> { %cst = arith.constant 0.0 : f32 - %0 = vector.transfer_read %m[%idx, %idx], %cst {in_bounds = [true, true]} : memref, vector<8x16xf32> + %0 = vector.transfer_read %m[%idx, %idx], %cst {in_bounds = array} : memref, vector<8x16xf32> %1 = vector.extract %0[0] : vector<16xf32> from vector<8x16xf32> return %1 : vector<16xf32> } diff --git a/mlir/test/Dialect/Vector/vector-dropleadunitdim-transforms.mlir b/mlir/test/Dialect/Vector/vector-dropleadunitdim-transforms.mlir index 9526d610e490e..72da4399d9526 100644 --- a/mlir/test/Dialect/Vector/vector-dropleadunitdim-transforms.mlir +++ b/mlir/test/Dialect/Vector/vector-dropleadunitdim-transforms.mlir @@ -319,9 +319,9 @@ func.func @cast_away_transfer_read_leading_one_dims(%arg0: memref<1x4x8x16xf16>) %c0 = arith.constant 0 : index // CHECK: %[[F0:.+]] = arith.constant 0.000000e+00 : f16 %f0 = arith.constant 0. : f16 - // CHECK: %[[READ:.+]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]], %[[C0]], %[[C0]]], %[[F0]] {in_bounds = [true]} : memref<1x4x8x16xf16>, vector<4xf16> + // CHECK: %[[READ:.+]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]], %[[C0]], %[[C0]]], %[[F0]] {in_bounds = array} : memref<1x4x8x16xf16>, vector<4xf16> // CHECK: %[[CAST:.+]] = vector.broadcast %[[READ]] : vector<4xf16> to vector<1x4xf16> - %0 = vector.transfer_read %arg0[%c0, %c0, %c0, %c0], %f0 {in_bounds = [true, true]} : memref<1x4x8x16xf16>, vector<1x4xf16> + %0 = vector.transfer_read %arg0[%c0, %c0, %c0, %c0], %f0 {in_bounds = array} : memref<1x4x8x16xf16>, vector<1x4xf16> // CHECK: return %[[CAST]] return %0: vector<1x4xf16> } @@ -333,9 +333,9 @@ func.func @cast_away_masked_transfer_read_leading_one_dims(%arg0: memref<1x4x8x1 // CHECK: %[[F0:.+]] = arith.constant 0.000000e+00 : f16 %f0 = arith.constant 0. : f16 // CHECK: %[[MASK_CAST:.+]] = vector.extract %{{.*}}[0] : vector<4xi1> from vector<1x4xi1> - // CHECK: %[[READ:.+]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]], %[[C0]], %[[C0]]], %[[F0]], %[[MASK_CAST]] {in_bounds = [true]} : memref<1x4x8x16xf16>, vector<4xf16> + // CHECK: %[[READ:.+]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]], %[[C0]], %[[C0]]], %[[F0]], %[[MASK_CAST]] {in_bounds = array} : memref<1x4x8x16xf16>, vector<4xf16> // CHECK: %[[CAST:.+]] = vector.broadcast %[[READ]] : vector<4xf16> to vector<1x4xf16> - %0 = vector.transfer_read %arg0[%c0, %c0, %c0, %c0], %f0, %arg1 {in_bounds = [true, true]} : memref<1x4x8x16xf16>, vector<1x4xf16> + %0 = vector.transfer_read %arg0[%c0, %c0, %c0, %c0], %f0, %arg1 {in_bounds = array} : memref<1x4x8x16xf16>, vector<1x4xf16> // CHECK: return %[[CAST]] return %0: vector<1x4xf16> } @@ -345,7 +345,7 @@ func.func @cast_away_transfer_read_leading_one_dims_one_element(%arg0: memref<1x %c0 = arith.constant 0 : index %f0 = arith.constant 0. : f16 // CHECK: vector.broadcast %{{.+}} : vector<1xf16> to vector<1x1xf16> - %0 = vector.transfer_read %arg0[%c0, %c0, %c0, %c0], %f0 {in_bounds = [true, true]} : memref<1x1x1x1xf16>, vector<1x1xf16> + %0 = vector.transfer_read %arg0[%c0, %c0, %c0, %c0], %f0 {in_bounds = array} : memref<1x1x1x1xf16>, vector<1x1xf16> return %0: vector<1x1xf16> } @@ -359,10 +359,10 @@ func.func @cast_away_nontrivial_map_masked_transfer_read(%arg0: memref<1x4x8xf16 // CHECK: %[[F0:.+]] = arith.constant 0.000000e+00 : f16 %f0 = arith.constant 0. : f16 // CHECK: %[[MASK_CAST:.+]] = vector.shape_cast %{{.*}} : vector<1x4x1xi1> to vector<4xi1> - // CHECK: %[[READ:.+]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]], %[[C0]]], %[[F0]], %[[MASK_CAST]] {in_bounds = [true] + // CHECK: %[[READ:.+]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]], %[[C0]]], %[[F0]], %[[MASK_CAST]] {in_bounds = array // CHECK-SAME: permutation_map = #[[$MAP]]} : memref<1x4x8xf16>, vector<4xf16> // CHECK: %[[CAST:.+]] = vector.broadcast %[[READ]] : vector<4xf16> to vector<1x1x4xf16> - %0 = vector.transfer_read %arg0[%c0, %c0, %c0], %f0, %arg1 {in_bounds = [true, true, true], + %0 = vector.transfer_read %arg0[%c0, %c0, %c0], %f0, %arg1 {in_bounds = array, permutation_map = affine_map<(d0, d1, d2) -> (d0, d2, d1)>} : memref<1x4x8xf16>, vector<1x1x4xf16> // CHECK: return %[[CAST]] return %0: vector<1x1x4xf16> @@ -381,7 +381,7 @@ func.func @not_insert_cast_fo4_transfer_read_under_mask(%arg0: memref<1x1x4xf16> %f0 = arith.constant 0. : f16 %mask = vector.constant_mask [1, 3] : vector<1x4xi1> %ret = vector.mask %mask { - vector.transfer_read %arg0[%c0, %c0, %c0], %f0 {in_bounds = [true, true]} : memref<1x1x4xf16>, vector<1x4xf16> + vector.transfer_read %arg0[%c0, %c0, %c0], %f0 {in_bounds = array} : memref<1x1x4xf16>, vector<1x4xf16> } : vector<1x4xi1> -> vector<1x4xf16> return %ret: vector<1x4xf16> } @@ -393,9 +393,9 @@ func.func @cast_away_transfer_write_leading_one_dims(%arg0: memref<1x4x8x16xf16> // CHECK: %[[C0:.+]] = arith.constant 0 : index %c0 = arith.constant 0 : index // CHECK: %[[CAST:.+]] = vector.extract %{{.*}}[0] : vector<4xf16> from vector<1x4xf16> - // CHECK: vector.transfer_write %[[CAST]], %{{.*}}[%[[C0]], %[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true]} : vector<4xf16>, memref<1x4x8x16xf16> + // CHECK: vector.transfer_write %[[CAST]], %{{.*}}[%[[C0]], %[[C0]], %[[C0]], %[[C0]]] {in_bounds = array} : vector<4xf16>, memref<1x4x8x16xf16> - vector.transfer_write %arg1, %arg0[%c0, %c0, %c0, %c0] {in_bounds = [true, true]} : vector<1x4xf16>, memref<1x4x8x16xf16> + vector.transfer_write %arg1, %arg0[%c0, %c0, %c0, %c0] {in_bounds = array} : vector<1x4xf16>, memref<1x4x8x16xf16> return } @@ -405,9 +405,9 @@ func.func @cast_away_masked_transfer_write_leading_one_dims(%arg0: memref<1x4x8x %c0 = arith.constant 0 : index // CHECK: %[[CAST:.+]] = vector.extract %{{.*}}[0] : vector<4xf16> from vector<1x4xf16> // CHECK: %[[MASK_CAST:.+]] = vector.extract %{{.*}}[0] : vector<4xi1> from vector<1x4xi1> - // CHECK: vector.transfer_write %[[CAST]], %{{.*}}[%[[C0]], %[[C0]], %[[C0]], %[[C0]]], %[[MASK_CAST]] {in_bounds = [true]} : vector<4xf16>, memref<1x4x8x16xf16> + // CHECK: vector.transfer_write %[[CAST]], %{{.*}}[%[[C0]], %[[C0]], %[[C0]], %[[C0]]], %[[MASK_CAST]] {in_bounds = array} : vector<4xf16>, memref<1x4x8x16xf16> - vector.transfer_write %arg1, %arg0[%c0, %c0, %c0, %c0], %arg2 {in_bounds = [true, true]} : vector<1x4xf16>, memref<1x4x8x16xf16> + vector.transfer_write %arg1, %arg0[%c0, %c0, %c0, %c0], %arg2 {in_bounds = array} : vector<1x4xf16>, memref<1x4x8x16xf16> return } @@ -415,7 +415,7 @@ func.func @cast_away_masked_transfer_write_leading_one_dims(%arg0: memref<1x4x8x func.func @cast_away_transfer_write_leading_one_dims_one_element(%arg0: memref<1x1x1x1xf16>, %arg1: vector<1x1xf16>) { %c0 = arith.constant 0 : index // CHECK: vector.extract %{{.+}}[0] : vector<1xf16> from vector<1x1xf16> - vector.transfer_write %arg1, %arg0[%c0, %c0, %c0, %c0] {in_bounds = [true, true]} : vector<1x1xf16>, memref<1x1x1x1xf16> + vector.transfer_write %arg1, %arg0[%c0, %c0, %c0, %c0] {in_bounds = array} : vector<1x1xf16>, memref<1x1x1x1xf16> return } @@ -431,7 +431,7 @@ func.func @not_insert_cast_for_transfer_write_under_mask(%arg0: memref<1x1x4xf16 %c0 = arith.constant 0 : index %mask = vector.constant_mask [1, 3] : vector<1x4xi1> vector.mask %mask { - vector.transfer_write %arg1, %arg0[%c0, %c0, %c0] {in_bounds = [true, true]} : vector<1x4xf16>, memref<1x1x4xf16> + vector.transfer_write %arg1, %arg0[%c0, %c0, %c0] {in_bounds = array} : vector<1x4xf16>, memref<1x1x4xf16> } : vector<1x4xi1> return } @@ -445,10 +445,10 @@ func.func @cast_away_nontrivial_map_masked_transfer_write(%arg0: memref<1x4x8xf1 %c0 = arith.constant 0 : index // CHECK: %[[CAST:.+]] = vector.extract %{{.*}}[0, 0] : vector<4xf16> from vector<1x1x4xf16> // CHECK: %[[MASK_CAST:.+]] = vector.shape_cast %{{.*}} : vector<1x4x1xi1> to vector<4xi1> - // CHECK: vector.transfer_write %[[CAST]], %{{.*}}[%[[C0]], %[[C0]], %[[C0]]], %[[MASK_CAST]] {in_bounds = [true] + // CHECK: vector.transfer_write %[[CAST]], %{{.*}}[%[[C0]], %[[C0]], %[[C0]]], %[[MASK_CAST]] {in_bounds = array // CHECK-SAME: permutation_map = #[[$MAP]]} : vector<4xf16>, memref<1x4x8xf16> - vector.transfer_write %arg1, %arg0[%c0, %c0, %c0], %arg2 {in_bounds = [true, true, true], + vector.transfer_write %arg1, %arg0[%c0, %c0, %c0], %arg2 {in_bounds = array, permutation_map = affine_map<(d0, d1, d2) -> (d0, d2, d1)>} : vector<1x1x4xf16>, memref<1x4x8xf16> return } diff --git a/mlir/test/Dialect/Vector/vector-emulate-narrow-type.mlir b/mlir/test/Dialect/Vector/vector-emulate-narrow-type.mlir index cba299b2a1d95..03e00258e737f 100644 --- a/mlir/test/Dialect/Vector/vector-emulate-narrow-type.mlir +++ b/mlir/test/Dialect/Vector/vector-emulate-narrow-type.mlir @@ -85,7 +85,7 @@ func.func @vector_load_i4_dynamic(%arg0 : index, %arg1 : index, %arg2 : index, % func.func @vector_transfer_read_i4(%arg1: index, %arg2: index) -> vector<8xi4> { %c0 = arith.constant 0 : i4 %0 = memref.alloc() : memref<3x8xi4> - %1 = vector.transfer_read %0[%arg1, %arg2], %c0 {in_bounds = [true]} : + %1 = vector.transfer_read %0[%arg1, %arg2], %c0 {in_bounds = array} : memref<3x8xi4>, vector<8xi4> return %1 : vector<8xi4> } diff --git a/mlir/test/Dialect/Vector/vector-mask-lowering-transforms.mlir b/mlir/test/Dialect/Vector/vector-mask-lowering-transforms.mlir index 7838543e151be..8443fc76bfda1 100644 --- a/mlir/test/Dialect/Vector/vector-mask-lowering-transforms.mlir +++ b/mlir/test/Dialect/Vector/vector-mask-lowering-transforms.mlir @@ -113,11 +113,11 @@ func.func @transfer_read_3d( %f0 = arith.constant 0.0 : f32 // CHECK: %[[mask:.*]] = vector.create_mask // CHECK-NOT: vector.mask - // CHECK: vector.transfer_read {{.*}}, %[[mask]] {in_bounds = [true, true, true]} + // CHECK: vector.transfer_read {{.*}}, %[[mask]] {in_bounds = array} // CHECK-SAME: : tensor, vector<2x1x7xf32> %0 = vector.create_mask %arg0, %arg1, %arg2 : vector<2x1x7xi1> %1 = vector.mask %0 { - vector.transfer_read %t[%c0, %c0, %c0], %f0 {in_bounds = [true, true, true]} + vector.transfer_read %t[%c0, %c0, %c0], %f0 {in_bounds = array} : tensor, vector<2x1x7xf32> } : vector<2x1x7xi1> -> vector<2x1x7xf32> diff --git a/mlir/test/Dialect/Vector/vector-multi-reduction-lowering.mlir b/mlir/test/Dialect/Vector/vector-multi-reduction-lowering.mlir index 6e93923608cbf..3921b101a644d 100644 --- a/mlir/test/Dialect/Vector/vector-multi-reduction-lowering.mlir +++ b/mlir/test/Dialect/Vector/vector-multi-reduction-lowering.mlir @@ -143,13 +143,13 @@ func.func @vectorize_dynamic_reduction(%arg0: tensor, %arg1: tensor - %1 = vector.mask %0 { vector.transfer_read %arg0[%c0_1, %c0_1], %cst {in_bounds = [true, true]} : tensor, vector<4x8xf32> } : vector<4x8xi1> -> vector<4x8xf32> + %1 = vector.mask %0 { vector.transfer_read %arg0[%c0_1, %c0_1], %cst {in_bounds = array} : tensor, vector<4x8xf32> } : vector<4x8xi1> -> vector<4x8xf32> %cst_2 = arith.constant 0.000000e+00 : f32 %2 = vector.create_mask %dim : vector<4xi1> - %3 = vector.mask %2 { vector.transfer_read %arg1[%c0_1], %cst_2 {in_bounds = [true]} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> + %3 = vector.mask %2 { vector.transfer_read %arg1[%c0_1], %cst_2 {in_bounds = array} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> %4 = vector.mask %0 { vector.multi_reduction , %1, %3 [1] : vector<4x8xf32> to vector<4xf32> } : vector<4x8xi1> -> vector<4xf32> %c0_3 = arith.constant 0 : index - %5 = vector.mask %2 { vector.transfer_write %4, %arg1[%c0_3] {in_bounds = [true]} : vector<4xf32>, tensor } : vector<4xi1> -> tensor + %5 = vector.mask %2 { vector.transfer_write %4, %arg1[%c0_3] {in_bounds = array} : vector<4xf32>, tensor } : vector<4xi1> -> tensor return %5 : tensor } @@ -183,7 +183,7 @@ func.func @vectorize_1d_dynamic_reduction(%arg0: tensor) -> f32 { %c0_1 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %0 = vector.create_mask %dim : vector<8xi1> - %1 = vector.mask %0 { vector.transfer_read %arg0[%c0_1], %cst {in_bounds = [true]} : tensor, vector<8xf32> } : vector<8xi1> -> vector<8xf32> + %1 = vector.mask %0 { vector.transfer_read %arg0[%c0_1], %cst {in_bounds = array} : tensor, vector<8xf32> } : vector<8xi1> -> vector<8xf32> %4 = vector.mask %0 { vector.multi_reduction , %1, %cst [0] : vector<8xf32> to f32 } : vector<8xi1> -> f32 return %4 : f32 } @@ -205,13 +205,13 @@ func.func @vectorize_dynamic_transpose_reduction(%arg0: tensor, %arg1 %c0_2 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %0 = vector.create_mask %dim, %dim_0, %dim_1 : vector<4x8x16xi1> - %1 = vector.mask %0 { vector.transfer_read %arg0[%c0_2, %c0_2, %c0_2], %cst {in_bounds = [true, true, true]} : tensor, vector<4x8x16xf32> } : vector<4x8x16xi1> -> vector<4x8x16xf32> + %1 = vector.mask %0 { vector.transfer_read %arg0[%c0_2, %c0_2, %c0_2], %cst {in_bounds = array} : tensor, vector<4x8x16xf32> } : vector<4x8x16xi1> -> vector<4x8x16xf32> %cst_3 = arith.constant 0.000000e+00 : f32 %2 = vector.create_mask %dim_1, %dim_0 : vector<16x8xi1> - %3 = vector.mask %2 { vector.transfer_read %arg1[%c0_2, %c0_2], %cst_3 {in_bounds = [true, true], permutation_map = affine_map<(d0, d1) -> (d1, d0)>} : tensor, vector<8x16xf32> } : vector<16x8xi1> -> vector<8x16xf32> + %3 = vector.mask %2 { vector.transfer_read %arg1[%c0_2, %c0_2], %cst_3 {in_bounds = array, permutation_map = affine_map<(d0, d1) -> (d1, d0)>} : tensor, vector<8x16xf32> } : vector<16x8xi1> -> vector<8x16xf32> %4 = vector.mask %0 { vector.multi_reduction , %1, %3 [0] : vector<4x8x16xf32> to vector<8x16xf32> } : vector<4x8x16xi1> -> vector<8x16xf32> %c0_4 = arith.constant 0 : index - %5 = vector.mask %2 { vector.transfer_write %4, %arg1[%c0_4, %c0_4] {in_bounds = [true, true], permutation_map = affine_map<(d0, d1) -> (d1, d0)>} : vector<8x16xf32>, tensor } : vector<16x8xi1> -> tensor + %5 = vector.mask %2 { vector.transfer_write %4, %arg1[%c0_4, %c0_4] {in_bounds = array, permutation_map = affine_map<(d0, d1) -> (d1, d0)>} : vector<8x16xf32>, tensor } : vector<16x8xi1> -> tensor return %5 : tensor } diff --git a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir index cd56c1bf9695b..724048c5f0f2e 100644 --- a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir +++ b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir @@ -8,7 +8,7 @@ func.func @contiguous_inner_most(%src: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>) -> vector<1x8x1xf32>{ %c0 = arith.constant 0 : index %pad = arith.constant 0.0 : f32 - %v = vector.transfer_read %src[%c0, %c0, %c0, %c0], %pad {in_bounds = [true, true, true]} : memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>, vector<1x8x1xf32> + %v = vector.transfer_read %src[%c0, %c0, %c0, %c0], %pad {in_bounds = array} : memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>, vector<1x8x1xf32> return %v : vector<1x8x1xf32> } @@ -27,7 +27,7 @@ func.func @contiguous_inner_most(%src: memref<1x1x8x1xf32, strided<[3072, 8, 1, func.func @contiguous_inner_most_scalable_inner_dim(%src: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>) -> vector<1x[8]x1xf32>{ %c0 = arith.constant 0 : index %pad = arith.constant 0.0 : f32 - %v = vector.transfer_read %src[%c0, %c0, %c0, %c0], %pad {in_bounds = [true, true, true]} : memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>, vector<1x[8]x1xf32> + %v = vector.transfer_read %src[%c0, %c0, %c0, %c0], %pad {in_bounds = array} : memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>, vector<1x[8]x1xf32> return %v : vector<1x[8]x1xf32> } @@ -45,7 +45,7 @@ func.func @contiguous_inner_most_scalable_inner_dim(%src: memref<1x1x8x1xf32, st func.func @negative_dynamic_trailing_dim(%src: memref<1x1x8x?xf32, strided<[3072, 8, 1, 1], offset: ?>>) -> vector<1x8x1xf32>{ %c0 = arith.constant 0 : index %pad = arith.constant 0.0 : f32 - %v = vector.transfer_read %src[%c0, %c0, %c0, %c0], %pad {in_bounds = [true, true, true]} : memref<1x1x8x?xf32, strided<[3072, 8, 1, 1], offset: ?>>, vector<1x8x1xf32> + %v = vector.transfer_read %src[%c0, %c0, %c0, %c0], %pad {in_bounds = array} : memref<1x1x8x?xf32, strided<[3072, 8, 1, 1], offset: ?>>, vector<1x8x1xf32> return %v : vector<1x8x1xf32> } @@ -59,7 +59,7 @@ func.func @negative_dynamic_trailing_dim(%src: memref<1x1x8x?xf32, strided<[3072 func.func @negative_scalable_one_trailing_dim(%src: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>) -> vector<1x8x[1]xf32>{ %c0 = arith.constant 0 : index %pad = arith.constant 0.0 : f32 - %v = vector.transfer_read %src[%c0, %c0, %c0, %c0], %pad {in_bounds = [true, true, true]} : memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>, vector<1x8x[1]xf32> + %v = vector.transfer_read %src[%c0, %c0, %c0, %c0], %pad {in_bounds = array} : memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>, vector<1x8x[1]xf32> return %v : vector<1x8x[1]xf32> } // CHECK-LABEL: func @negative_scalable_one_trailing_dim @@ -71,7 +71,7 @@ func.func @negative_scalable_one_trailing_dim(%src: memref<1x1x8x1xf32, strided< func.func @contiguous_inner_most_dynamic_outer(%i: index, %ii: index, %memref: memref) -> vector<8x1xf32> { %c0 = arith.constant 0 : index %pad = arith.constant 0.0 : f32 - %v = vector.transfer_read %memref[%i, %ii, %c0, %c0], %pad {in_bounds = [true, true]} : memref, vector<8x1xf32> + %v = vector.transfer_read %memref[%i, %ii, %c0, %c0], %pad {in_bounds = array} : memref, vector<8x1xf32> return %v : vector<8x1xf32> } // CHECK: func.func @contiguous_inner_most_dynamic_outer @@ -97,7 +97,7 @@ func.func @contiguous_inner_most_dynamic_outer(%i: index, %ii: index, %memref: m func.func @contiguous_inner_most_outer_dim_dyn_scalable_inner_dim(%i: index, %ii: index, %memref: memref) -> vector<[8]x1xf32> { %c0 = arith.constant 0 : index %pad = arith.constant 0.0 : f32 - %v = vector.transfer_read %memref[%i, %ii, %c0, %c0], %pad {in_bounds = [true, true]} : memref, vector<[8]x1xf32> + %v = vector.transfer_read %memref[%i, %ii, %c0, %c0], %pad {in_bounds = array} : memref, vector<[8]x1xf32> return %v : vector<[8]x1xf32> } // CHECK-LABEL: func @contiguous_inner_most_outer_dim_dyn_scalable_inner_dim @@ -106,7 +106,7 @@ func.func @contiguous_inner_most_outer_dim_dyn_scalable_inner_dim(%i: index, %ii // CHECK-SAME: %[[SRC:[a-zA-Z0-9]+]] // CHECK: %[[VIEW:.+]] = memref.subview %[[SRC]]{{.*}} memref to memref> // CHECK: %[[VEC_READ:.+]] = vector.transfer_read %[[VIEW]] -// CHECK-SAME: {in_bounds = [true]} +// CHECK-SAME: {in_bounds = array} // CHECK-SAME: memref>, vector<[8]xf32> // CHECK: vector.shape_cast %[[VEC_READ]] @@ -120,7 +120,7 @@ func.func @contiguous_inner_most_outer_dim_dyn_scalable_inner_dim(%i: index, %ii func.func @contiguous_inner_most_zero_idx_in_bounds(%src: memref<16x1xf32>, %i:index) -> (vector<8x1xf32>) { %pad = arith.constant 0.0 : f32 %c0 = arith.constant 0 : index - %v = vector.transfer_read %src[%i, %c0], %pad {in_bounds = [true, true]} : memref<16x1xf32>, vector<8x1xf32> + %v = vector.transfer_read %src[%i, %c0], %pad {in_bounds = array} : memref<16x1xf32>, vector<8x1xf32> return %v : vector<8x1xf32> } // CHECK-LABEL: func.func @contiguous_inner_most_zero_idx_in_bounds( @@ -128,7 +128,7 @@ func.func @contiguous_inner_most_zero_idx_in_bounds(%src: memref<16x1xf32>, %i:i // CHECK-SAME: %[[IDX:.*]]: index) -> vector<8x1xf32> { // CHECK: %[[PAD:.*]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[SV:.*]] = memref.subview %[[MEM]][0, 0] [16, 1] [1, 1] : memref<16x1xf32> to memref<16xf32, strided<[1]>> -// CHECK: %[[READ:.*]] = vector.transfer_read %[[SV]]{{\[}}%[[IDX]]], %[[PAD]] {in_bounds = [true]} : memref<16xf32, strided<[1]>>, vector<8xf32> +// CHECK: %[[READ:.*]] = vector.transfer_read %[[SV]]{{\[}}%[[IDX]]], %[[PAD]] {in_bounds = array} : memref<16xf32, strided<[1]>>, vector<8xf32> // CHECK: vector.shape_cast %[[READ]] : vector<8xf32> to vector<8x1xf32> // The index to be dropped is == 0, so it's safe to collapse. The "out of @@ -137,7 +137,7 @@ func.func @contiguous_inner_most_zero_idx_in_bounds(%src: memref<16x1xf32>, %i:i func.func @contiguous_inner_most_zero_idx_out_of_bounds(%src: memref<16x1xf32>, %i:index) -> (vector<8x1xf32>) { %pad = arith.constant 0.0 : f32 %c0 = arith.constant 0 : index - %v = vector.transfer_read %src[%i, %c0], %pad {in_bounds = [true, false]} : memref<16x1xf32>, vector<8x1xf32> + %v = vector.transfer_read %src[%i, %c0], %pad {in_bounds = array} : memref<16x1xf32>, vector<8x1xf32> return %v : vector<8x1xf32> } // CHECK-LABEL: func.func @contiguous_inner_most_zero_idx_out_of_bounds( @@ -145,14 +145,14 @@ func.func @contiguous_inner_most_zero_idx_out_of_bounds(%src: memref<16x1xf32>, // CHECK-SAME: %[[IDX:.*]]: index) -> vector<8x1xf32> { // CHECK: %[[PAD:.*]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[SV:.*]] = memref.subview %[[MEM]][0, 0] [16, 1] [1, 1] : memref<16x1xf32> to memref<16xf32, strided<[1]>> -// CHECK: %[[READ:.*]] = vector.transfer_read %[[SV]]{{\[}}%[[IDX]]], %[[PAD]] {in_bounds = [true]} : memref<16xf32, strided<[1]>>, vector<8xf32> +// CHECK: %[[READ:.*]] = vector.transfer_read %[[SV]]{{\[}}%[[IDX]]], %[[PAD]] {in_bounds = array} : memref<16xf32, strided<[1]>>, vector<8xf32> // CHECK: vector.shape_cast %[[READ]] : vector<8xf32> to vector<8x1xf32> // The index to be dropped is unknown, but since it's "in bounds", it has to be // == 0. It's safe to collapse the corresponding dim. func.func @contiguous_inner_most_non_zero_idx_in_bounds(%src: memref<16x1xf32>, %i:index) -> (vector<8x1xf32>) { %pad = arith.constant 0.0 : f32 - %v = vector.transfer_read %src[%i, %i], %pad {in_bounds = [true, true]} : memref<16x1xf32>, vector<8x1xf32> + %v = vector.transfer_read %src[%i, %i], %pad {in_bounds = array} : memref<16x1xf32>, vector<8x1xf32> return %v : vector<8x1xf32> } // CHECK-LABEL: func.func @contiguous_inner_most_non_zero_idx_in_bounds( @@ -160,7 +160,7 @@ func.func @contiguous_inner_most_non_zero_idx_in_bounds(%src: memref<16x1xf32>, // CHECK-SAME: %[[IDX:.*]]: index) -> vector<8x1xf32> { // CHECK: %[[PAD:.*]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[SV:.*]] = memref.subview %[[MEM]][0, 0] [16, 1] [1, 1] : memref<16x1xf32> to memref<16xf32, strided<[1]>> -// CHECK: %[[READ:.*]] = vector.transfer_read %[[SV]]{{\[}}%[[IDX]]], %[[PAD]] {in_bounds = [true]} : memref<16xf32, strided<[1]>>, vector<8xf32> +// CHECK: %[[READ:.*]] = vector.transfer_read %[[SV]]{{\[}}%[[IDX]]], %[[PAD]] {in_bounds = array} : memref<16xf32, strided<[1]>>, vector<8xf32> // CHECK: vector.shape_cast %[[READ]] : vector<8xf32> to vector<8x1xf32> // Same as the top example within this split, but with the outer vector @@ -169,7 +169,7 @@ func.func @contiguous_inner_most_non_zero_idx_in_bounds(%src: memref<16x1xf32>, func.func @contiguous_inner_most_non_zero_idx_in_bounds_scalable(%src: memref<16x1xf32>, %i:index) -> (vector<[8]x1xf32>) { %pad = arith.constant 0.0 : f32 - %v = vector.transfer_read %src[%i, %i], %pad {in_bounds = [true, true]} : memref<16x1xf32>, vector<[8]x1xf32> + %v = vector.transfer_read %src[%i, %i], %pad {in_bounds = array} : memref<16x1xf32>, vector<[8]x1xf32> return %v : vector<[8]x1xf32> } // CHECK-LABEL: func.func @contiguous_inner_most_non_zero_idx_in_bounds_scalable @@ -177,14 +177,14 @@ func.func @contiguous_inner_most_non_zero_idx_in_bounds_scalable(%src: memref<16 // CHECK-SAME: %[[IDX:.*]]: index) -> vector<[8]x1xf32> { // CHECK: %[[PAD:.*]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[SV:.*]] = memref.subview %[[MEM]][0, 0] [16, 1] [1, 1] : memref<16x1xf32> to memref<16xf32, strided<[1]>> -// CHECK: %[[READ:.*]] = vector.transfer_read %[[SV]]{{\[}}%[[IDX]]], %[[PAD]] {in_bounds = [true]} : memref<16xf32, strided<[1]>>, vector<[8]xf32> +// CHECK: %[[READ:.*]] = vector.transfer_read %[[SV]]{{\[}}%[[IDX]]], %[[PAD]] {in_bounds = array} : memref<16xf32, strided<[1]>>, vector<[8]xf32> // CHECK: vector.shape_cast %[[READ]] : vector<[8]xf32> to vector<[8]x1xf32> // The index to be dropped is unknown and "out of bounds" - not safe to // collapse. func.func @negative_contiguous_inner_most_non_zero_idx_out_of_bounds(%src: memref<16x1xf32>, %i:index) -> (vector<8x1xf32>) { %pad = arith.constant 0.0 : f32 - %v = vector.transfer_read %src[%i, %i], %pad {in_bounds = [true, false]} : memref<16x1xf32>, vector<8x1xf32> + %v = vector.transfer_read %src[%i, %i], %pad {in_bounds = array} : memref<16x1xf32>, vector<8x1xf32> return %v : vector<8x1xf32> } // CHECK-LABEL: func.func @negative_contiguous_inner_most_non_zero_idx_out_of_bounds( @@ -198,14 +198,14 @@ func.func @contiguous_inner_most_dim_with_subview(%src: memref<1000x1xf32>, %i:i %c0 = arith.constant 0 : index %pad = arith.constant 0.0 : f32 %sv = memref.subview %src[%i, 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>> - %v = vector.transfer_read %sv[%ii, %c0], %pad {in_bounds = [true, true]} : memref<40x1xf32, strided<[1, 1], offset: ?>>, vector<4x1xf32> + %v = vector.transfer_read %sv[%ii, %c0], %pad {in_bounds = array} : memref<40x1xf32, strided<[1, 1], offset: ?>>, vector<4x1xf32> return %v : vector<4x1xf32> } // CHECK: func @contiguous_inner_most_dim_with_subview(%[[SRC:.+]]: memref<1000x1xf32>, %[[II:.+]]: index, %[[J:.+]]: index) -> vector<4x1xf32> // CHECK: %[[SRC_0:.+]] = memref.subview %[[SRC]] // CHECK: %[[SRC_1:.+]] = memref.subview %[[SRC_0]] // CHECK: %[[V:.+]] = vector.transfer_read %[[SRC_1]] -// CHECK-SAME: {in_bounds = [true]} +// CHECK-SAME: {in_bounds = array} // CHECK-SAME: vector<4xf32> // Same as the top example within this split, but with the outer vector @@ -216,14 +216,14 @@ func.func @contiguous_inner_most_dim_with_subview_scalable_inner_dim(%src: memre %c0 = arith.constant 0 : index %pad = arith.constant 0.0 : f32 %sv = memref.subview %src[%i, 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>> - %v = vector.transfer_read %sv[%ii, %c0], %pad {in_bounds = [true, true]} : memref<40x1xf32, strided<[1, 1], offset: ?>>, vector<[4]x1xf32> + %v = vector.transfer_read %sv[%ii, %c0], %pad {in_bounds = array} : memref<40x1xf32, strided<[1, 1], offset: ?>>, vector<[4]x1xf32> return %v : vector<[4]x1xf32> } // CHECK-LABEL: func @contiguous_inner_most_dim_with_subview_scalable_inner_dim // CHECK-SAME: %[[SRC:.+]]: memref<1000x1xf32> // CHECK: %[[SRC_0:.+]] = memref.subview %[[SRC]] // CHECK: %[[V:.+]] = vector.transfer_read %[[SRC_0]] -// CHECK-SAME: {in_bounds = [true]} +// CHECK-SAME: {in_bounds = array} // CHECK-SAME: vector<[4]xf32> // ----- @@ -232,14 +232,14 @@ func.func @contiguous_inner_most_dim_with_subview_2d(%src: memref<1000x1x1xf32>, %c0 = arith.constant 0 : index %pad = arith.constant 0.0 : f32 %sv = memref.subview %src[%i, 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>> - %v = vector.transfer_read %sv[%ii, %c0, %c0], %pad {in_bounds = [true, true, true]} : memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>, vector<4x1x1xf32> + %v = vector.transfer_read %sv[%ii, %c0, %c0], %pad {in_bounds = array} : memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>, vector<4x1x1xf32> return %v : vector<4x1x1xf32> } // CHECK: func @contiguous_inner_most_dim_with_subview_2d(%[[SRC:.+]]: memref<1000x1x1xf32>, %[[II:.+]]: index, %[[J:.+]]: index) -> vector<4x1x1xf32> // CHECK: %[[SRC_0:.+]] = memref.subview %[[SRC]] // CHECK: %[[SRC_1:.+]] = memref.subview %[[SRC_0]] // CHECK: %[[V:.+]] = vector.transfer_read %[[SRC_1]] -// CHECK-SAME: {in_bounds = [true]} +// CHECK-SAME: {in_bounds = array} // CHECK-SAME: vector<4xf32> // Same as the top example within this split, but with the outer vector @@ -250,7 +250,7 @@ func.func @contiguous_inner_most_dim_with_subview_2d_scalable_inner_dim(%src: me %c0 = arith.constant 0 : index %pad = arith.constant 0.0 : f32 %sv = memref.subview %src[%i, 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>> - %v = vector.transfer_read %sv[%ii, %c0, %c0], %pad {in_bounds = [true, true, true]} : memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>, vector<[4]x1x1xf32> + %v = vector.transfer_read %sv[%ii, %c0, %c0], %pad {in_bounds = array} : memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>, vector<[4]x1x1xf32> return %v : vector<[4]x1x1xf32> } // CHECK-LABEL: func @contiguous_inner_most_dim_with_subview_2d_scalable_inner_dim( @@ -258,7 +258,7 @@ func.func @contiguous_inner_most_dim_with_subview_2d_scalable_inner_dim(%src: me // CHECK: %[[SRC_0:.+]] = memref.subview %[[SRC]] // CHECK: %[[SRC_1:.+]] = memref.subview %[[SRC_0]] // CHECK: %[[V:.+]] = vector.transfer_read %[[SRC_1]] -// CHECK-SAME: {in_bounds = [true]} +// CHECK-SAME: {in_bounds = array} // CHECK-SAME: vector<[4]xf32> // CHECK: vector.shape_cast %[[V]] @@ -296,7 +296,7 @@ func.func @negative_non_unit_strides(%src: memref<512x16x1xf32, strided<[8192, 1 %c0 = arith.constant 0 : index %pad = arith.constant 0.000000e+00 : f32 %v = vector.transfer_read %src[%i, %c0, %c0], %pad - {in_bounds = [true, true, true]} + {in_bounds = array} : memref<512x16x1xf32, strided<[8192, 16, 4], offset: ?>>, vector<16x16x1xf32> return %v : vector<16x16x1xf32> } @@ -313,7 +313,7 @@ func.func @negative_non_unit_strides(%src: memref<512x16x1xf32, strided<[8192, 1 func.func @contiguous_inner_most(%dest: memref<1x512x16x1x1xf32>, %v: vector<1x16x16x1x1xf32>, %i: index) { %c0 = arith.constant 0 : index vector.transfer_write %v, %dest[%c0, %i, %c0, %c0, %c0] - {in_bounds = [true, true, true, true, true]} + {in_bounds = array} : vector<1x16x16x1x1xf32>, memref<1x512x16x1x1xf32> return } @@ -335,7 +335,7 @@ func.func @contiguous_inner_most(%dest: memref<1x512x16x1x1xf32>, %v: vector<1x1 func.func @contiguous_inner_most_scalable_inner_dim(%dest: memref<1x512x16x1x1xf32>, %v: vector<1x16x[16]x1x1xf32>, %i: index) { %c0 = arith.constant 0 : index vector.transfer_write %v, %dest[%c0, %i, %c0, %c0, %c0] - {in_bounds = [true, true, true, true, true]} + {in_bounds = array} : vector<1x16x[16]x1x1xf32>, memref<1x512x16x1x1xf32> return } @@ -356,7 +356,7 @@ func.func @contiguous_inner_most_scalable_inner_dim(%dest: memref<1x512x16x1x1xf func.func @negative_dynamic_trailing_dim(%dest: memref<1x512x16x1x?xf32>, %v: vector<1x16x16x1x1xf32>, %i: index) { %c0 = arith.constant 0 : index vector.transfer_write %v, %dest[%c0, %i, %c0, %c0, %c0] - {in_bounds = [true, true, true, true, true]} + {in_bounds = array} : vector<1x16x16x1x1xf32>, memref<1x512x16x1x?xf32> return } @@ -370,7 +370,7 @@ func.func @negative_dynamic_trailing_dim(%dest: memref<1x512x16x1x?xf32>, %v: ve func.func @negative_scalable_one_trailing_dim(%dest: memref<1x512x16x1x1xf32>, %v: vector<1x16x16x1x[1]xf32>, %i: index) { %c0 = arith.constant 0 : index vector.transfer_write %v, %dest[%c0, %i, %c0, %c0, %c0] - {in_bounds = [true, true, true, true, true]} + {in_bounds = array} : vector<1x16x16x1x[1]xf32>, memref<1x512x16x1x1xf32> return } @@ -383,7 +383,7 @@ func.func @negative_scalable_one_trailing_dim(%dest: memref<1x512x16x1x1xf32>, % func.func @contiguous_inner_most_dynamic_outer(%i: index, %ii: index, %dest: memref, %v: vector<8x1xf32>) { %c0 = arith.constant 0 : index - vector.transfer_write %v, %dest[%i, %ii, %c0, %c0] {in_bounds = [true, true]} : vector<8x1xf32>, memref + vector.transfer_write %v, %dest[%i, %ii, %c0, %c0] {in_bounds = array} : vector<8x1xf32>, memref return } // CHECK-LABEL: func.func @contiguous_inner_most_dynamic_outer( @@ -396,7 +396,7 @@ func.func @contiguous_inner_most_dynamic_outer(%i: index, %ii: index, %dest: mem // CHECK: %[[DIM1:.*]] = memref.dim %[[MEM]], %[[C1]] : memref // CHECK: %[[SV:.*]] = memref.subview %[[MEM]][0, 0, 0, 0] {{\[}}%[[DIM0]], %[[DIM1]], 16, 1] [1, 1, 1, 1] : memref to memref> // CHECK: %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<8x1xf32> to vector<8xf32> -// CHECK: vector.transfer_write %[[SC]], %[[SV]]{{\[}}%[[IDX_0]], %[[IDX_1]], %[[C0]]] {in_bounds = [true]} : vector<8xf32>, memref> +// CHECK: vector.transfer_write %[[SC]], %[[SV]]{{\[}}%[[IDX_0]], %[[IDX_1]], %[[C0]]] {in_bounds = array} : vector<8xf32>, memref> // Same as the top example within this split, but with the outer vector // dim scalable. Note that this example only makes sense when "8 = [8]" (i.e. @@ -404,7 +404,7 @@ func.func @contiguous_inner_most_dynamic_outer(%i: index, %ii: index, %dest: mem func.func @contiguous_inner_most_dynamic_outer_scalable_inner_dim(%i: index, %ii: index, %dest: memref, %v: vector<[8]x1xf32>) { %c0 = arith.constant 0 : index - vector.transfer_write %v, %dest[%i, %ii, %c0, %c0] {in_bounds = [true, true]} : vector<[8]x1xf32>, memref + vector.transfer_write %v, %dest[%i, %ii, %c0, %c0] {in_bounds = array} : vector<[8]x1xf32>, memref return } // CHECK-LABEL: func.func @contiguous_inner_most_dynamic_outer_scalable_inner_dim( @@ -417,7 +417,7 @@ func.func @contiguous_inner_most_dynamic_outer_scalable_inner_dim(%i: index, %ii // CHECK: %[[DIM1:.*]] = memref.dim %[[MEM]], %[[C1]] : memref // CHECK: %[[SV:.*]] = memref.subview %[[MEM]][0, 0, 0, 0] {{\[}}%[[DIM0]], %[[DIM1]], 16, 1] [1, 1, 1, 1] : memref to memref> // CHECK: %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<[8]x1xf32> to vector<[8]xf32> -// CHECK: vector.transfer_write %[[SC]], %[[SV]]{{\[}}%[[IDX_0]], %[[IDX_1]], %[[C0]]] {in_bounds = [true]} : vector<[8]xf32>, memref> +// CHECK: vector.transfer_write %[[SC]], %[[SV]]{{\[}}%[[IDX_0]], %[[IDX_1]], %[[C0]]] {in_bounds = array} : vector<[8]xf32>, memref> // ----- @@ -428,7 +428,7 @@ func.func @contiguous_inner_most_dynamic_outer_scalable_inner_dim(%i: index, %ii // should be preserved correctly. func.func @contiguous_inner_most_zero_idx_in_bounds(%dest: memref<16x1xf32>, %v: vector<8x1xf32>, %i: index) { %c0 = arith.constant 0 : index - vector.transfer_write %v, %dest[%i, %c0] {in_bounds = [true, true]} : vector<8x1xf32>, memref<16x1xf32> + vector.transfer_write %v, %dest[%i, %c0] {in_bounds = array} : vector<8x1xf32>, memref<16x1xf32> return } // CHECK-LABEL: func.func @contiguous_inner_most_zero_idx_in_bounds( @@ -437,14 +437,14 @@ func.func @contiguous_inner_most_zero_idx_in_bounds(%dest: memref<16x1xf32>, %v: // CHECK-SAME: %[[IDX:.*]]: index) { // CHECK: %[[SV:.*]] = memref.subview %[[MEM]][0, 0] [16, 1] [1, 1] : memref<16x1xf32> to memref<16xf32, strided<[1]>> // CHECK: %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<8x1xf32> to vector<8xf32> -// CHECK: vector.transfer_write %[[SC]], %[[SV]]{{\[}}%[[IDX]]] {in_bounds = [true]} : vector<8xf32>, memref<16xf32, strided<[1]>> +// CHECK: vector.transfer_write %[[SC]], %[[SV]]{{\[}}%[[IDX]]] {in_bounds = array} : vector<8xf32>, memref<16xf32, strided<[1]>> // The index to be dropped is == 0, so it's safe to collapse. The "out of // bounds" attribute is too conservative and will be folded to "in bounds" // before the pattern runs. The other index should be preserved correctly. func.func @contiguous_inner_most_zero_idx_out_of_bounds(%dest: memref<16x1xf32>, %v: vector<8x1xf32>, %i: index) { %c0 = arith.constant 0 : index - vector.transfer_write %v, %dest[%i, %c0] {in_bounds = [true, false]} : vector<8x1xf32>, memref<16x1xf32> + vector.transfer_write %v, %dest[%i, %c0] {in_bounds = array} : vector<8x1xf32>, memref<16x1xf32> return } // CHECK-LABEL: func.func @contiguous_inner_most_zero_idx_out_of_bounds @@ -453,12 +453,12 @@ func.func @contiguous_inner_most_zero_idx_out_of_bounds(%dest: memref<16x1xf32>, // CHECK-SAME: %[[IDX:.*]]: index) { // CHECK: %[[SV:.*]] = memref.subview %[[MEM]][0, 0] [16, 1] [1, 1] : memref<16x1xf32> to memref<16xf32, strided<[1]>> // CHECK: %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<8x1xf32> to vector<8xf32> -// CHECK: vector.transfer_write %[[SC]], %[[SV]]{{\[}}%[[IDX]]] {in_bounds = [true]} : vector<8xf32>, memref<16xf32, strided<[1]>> +// CHECK: vector.transfer_write %[[SC]], %[[SV]]{{\[}}%[[IDX]]] {in_bounds = array} : vector<8xf32>, memref<16xf32, strided<[1]>> // The index to be dropped is unknown, but since it's "in bounds", it has to be // == 0. It's safe to collapse the corresponding dim. func.func @contiguous_inner_most_dim_non_zero_idx_in_bounds(%dest: memref<16x1xf32>, %v: vector<8x1xf32>, %i: index) { - vector.transfer_write %v, %dest[%i, %i] {in_bounds = [true, true]} : vector<8x1xf32>, memref<16x1xf32> + vector.transfer_write %v, %dest[%i, %i] {in_bounds = array} : vector<8x1xf32>, memref<16x1xf32> return } // CHECK-LABEL: func @contiguous_inner_most_dim_non_zero_idx_in_bounds @@ -467,14 +467,14 @@ func.func @contiguous_inner_most_dim_non_zero_idx_in_bounds(%dest: memref<16x1xf // CHECK-SAME: %[[IDX:.*]]: index) { // CHECK: %[[SV:.*]] = memref.subview %[[MEM]][0, 0] [16, 1] [1, 1] : memref<16x1xf32> to memref<16xf32, strided<[1]>> // CHECK: %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<8x1xf32> to vector<8xf32> -// CHECK: vector.transfer_write %[[SC]], %[[SV]]{{\[}}%[[IDX]]] {in_bounds = [true]} : vector<8xf32>, memref<16xf32, strided<[1]>> +// CHECK: vector.transfer_write %[[SC]], %[[SV]]{{\[}}%[[IDX]]] {in_bounds = array} : vector<8xf32>, memref<16xf32, strided<[1]>> // Same as the top example within this split, but with the outer vector // dim scalable. Note that this example only makes sense when "8 = [8]" (i.e. // vscale = 1). This is assumed via the `in_bounds` attribute. func.func @contiguous_inner_most_non_zero_idx_in_bounds_scalable(%dest: memref<16x1xf32>, %v: vector<[8]x1xf32>, %i: index) { - vector.transfer_write %v, %dest[%i, %i] {in_bounds = [true, true]} : vector<[8]x1xf32>, memref<16x1xf32> + vector.transfer_write %v, %dest[%i, %i] {in_bounds = array} : vector<[8]x1xf32>, memref<16x1xf32> return } // CHECK-LABEL: func.func @contiguous_inner_most_non_zero_idx_in_bounds_scalable( @@ -483,12 +483,12 @@ func.func @contiguous_inner_most_non_zero_idx_in_bounds_scalable(%dest: memref<1 // CHECK-SAME: %[[IDX:.*]]: index) { // CHECK: %[[SV:.*]] = memref.subview %[[MEM]][0, 0] [16, 1] [1, 1] : memref<16x1xf32> to memref<16xf32, strided<[1]>> // CHECK: %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<[8]x1xf32> to vector<[8]xf32> -// CHECK: vector.transfer_write %[[SC]], %[[SV]]{{\[}}%[[IDX]]] {in_bounds = [true]} : vector<[8]xf32>, memref<16xf32, strided<[1]>> +// CHECK: vector.transfer_write %[[SC]], %[[SV]]{{\[}}%[[IDX]]] {in_bounds = array} : vector<[8]xf32>, memref<16xf32, strided<[1]>> // The index to be dropped is unknown and "out of bounds" - not safe to // collapse. func.func @negative_contiguous_inner_most_dim_non_zero_idx_out_of_bounds(%dest: memref<16x1xf32>, %v: vector<8x1xf32>, %i: index) { - vector.transfer_write %v, %dest[%i, %i] {in_bounds = [true, false]} : vector<8x1xf32>, memref<16x1xf32> + vector.transfer_write %v, %dest[%i, %i] {in_bounds = array} : vector<8x1xf32>, memref<16x1xf32> return } // CHECK-LABEL: func @negative_contiguous_inner_most_dim_non_zero_idx_out_of_bounds @@ -504,7 +504,7 @@ func.func @contiguous_inner_most_dim_with_subview(%dest: memref<1000x1xf32>, %i: %c0 = arith.constant 0 : index %cst = arith.constant 0.0 : f32 %0 = memref.subview %dest[%i, 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>> - vector.transfer_write %vec, %0[%ii, %c0] {in_bounds = [true, true]} : vector<4x1xf32>, memref<40x1xf32, strided<[1, 1], offset: ?>> + vector.transfer_write %vec, %0[%ii, %c0] {in_bounds = array} : vector<4x1xf32>, memref<40x1xf32, strided<[1, 1], offset: ?>> return } @@ -515,7 +515,7 @@ func.func @contiguous_inner_most_dim_with_subview(%dest: memref<1000x1xf32>, %i: // CHECK: %[[SV_1:.*]] = memref.subview %[[MEM]]{{\[}}%[[IDX_1]], 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>> // CHECK: %[[SV_2:.*]] = memref.subview %[[SV_1]][0, 0] [40, 1] [1, 1] : memref<40x1xf32, strided<[1, 1], offset: ?>> to memref<40xf32, strided<[1], offset: ?>> // CHECK: %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<4x1xf32> to vector<4xf32> -// CHECK: vector.transfer_write %[[SC]], %[[SV_2]]{{\[}}%[[IDX_2]]] {in_bounds = [true]} : vector<4xf32>, memref<40xf32, strided<[1], offset: ?>> +// CHECK: vector.transfer_write %[[SC]], %[[SV_2]]{{\[}}%[[IDX_2]]] {in_bounds = array} : vector<4xf32>, memref<40xf32, strided<[1], offset: ?>> // Same as the top example within this split, but with the outer vector // dim scalable. Note that this example only makes sense when "4 = [4]" (i.e. @@ -525,7 +525,7 @@ func.func @contiguous_inner_most_dim_with_subview_scalable_inner_dim(%dest: memr %c0 = arith.constant 0 : index %cst = arith.constant 0.0 : f32 %0 = memref.subview %dest[%i, 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>> - vector.transfer_write %vec, %0[%ii, %c0] {in_bounds = [true, true]} : vector<[4]x1xf32>, memref<40x1xf32, strided<[1, 1], offset: ?>> + vector.transfer_write %vec, %0[%ii, %c0] {in_bounds = array} : vector<[4]x1xf32>, memref<40x1xf32, strided<[1, 1], offset: ?>> return } @@ -536,7 +536,7 @@ func.func @contiguous_inner_most_dim_with_subview_scalable_inner_dim(%dest: memr // CHECK: %[[SV_1:.*]] = memref.subview %[[MEM]]{{\[}}%[[IDX_1]], 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>> // CHECK: %[[SV_2:.*]] = memref.subview %[[SV_1]][0, 0] [40, 1] [1, 1] : memref<40x1xf32, strided<[1, 1], offset: ?>> to memref<40xf32, strided<[1], offset: ?>> // CHECK: %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<[4]x1xf32> to vector<[4]xf32> -// CHECK: vector.transfer_write %[[SC]], %[[SV_2]]{{\[}}%[[IDX_2]]] {in_bounds = [true]} : vector<[4]xf32>, memref<40xf32, strided<[1], offset: ?>> +// CHECK: vector.transfer_write %[[SC]], %[[SV_2]]{{\[}}%[[IDX_2]]] {in_bounds = array} : vector<[4]xf32>, memref<40xf32, strided<[1], offset: ?>> // ----- @@ -544,7 +544,7 @@ func.func @contiguous_inner_most_dim_with_subview_2d(%dest: memref<1000x1x1xf32> %c0 = arith.constant 0 : index %cst = arith.constant 0.0 : f32 %0 = memref.subview %dest[%i, 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>> - vector.transfer_write %vec, %0[%ii, %c0, %c0] {in_bounds = [true, true, true]} : vector<4x1x1xf32>, memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>> + vector.transfer_write %vec, %0[%ii, %c0, %c0] {in_bounds = array} : vector<4x1x1xf32>, memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>> return } // CHECK-LABEL: func.func @contiguous_inner_most_dim_with_subview_2d( @@ -554,7 +554,7 @@ func.func @contiguous_inner_most_dim_with_subview_2d(%dest: memref<1000x1x1xf32> // CHECK: %[[SV_1:.*]] = memref.subview %[[MEM]]{{\[}}%[[IDX_1]], 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>> // CHECK: %[[SV_2:.*]] = memref.subview %[[SV_1]][0, 0, 0] [40, 1, 1] [1, 1, 1] : memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>> to memref<40xf32, strided<[1], offset: ?>> // CHECK: %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<4x1x1xf32> to vector<4xf32> -// CHECK: vector.transfer_write %[[SC]], %[[SV_2]]{{\[}}%[[IDX_2]]] {in_bounds = [true]} : vector<4xf32>, memref<40xf32, strided<[1], offset: ?>> +// CHECK: vector.transfer_write %[[SC]], %[[SV_2]]{{\[}}%[[IDX_2]]] {in_bounds = array} : vector<4xf32>, memref<40xf32, strided<[1], offset: ?>> // Same as the top example within this split, but with the outer vector // dim scalable. Note that this example only makes sense when "4 = [4]" (i.e. @@ -564,7 +564,7 @@ func.func @contiguous_inner_most_dim_with_subview_2d_scalable(%dest: memref<1000 %c0 = arith.constant 0 : index %cst = arith.constant 0.0 : f32 %0 = memref.subview %dest[%i, 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>> - vector.transfer_write %vec, %0[%ii, %c0, %c0] {in_bounds = [true, true, true]} : vector<[4]x1x1xf32>, memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>> + vector.transfer_write %vec, %0[%ii, %c0, %c0] {in_bounds = array} : vector<[4]x1x1xf32>, memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>> return } // CHECK-LABEL: func.func @contiguous_inner_most_dim_with_subview_2d_scalable @@ -574,7 +574,7 @@ func.func @contiguous_inner_most_dim_with_subview_2d_scalable(%dest: memref<1000 // CHECK: %[[SV_1:.*]] = memref.subview %[[MEM]]{{\[}}%[[IDX_1]], 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>> // CHECK: %[[SV_2:.*]] = memref.subview %[[SV_1]][0, 0, 0] [40, 1, 1] [1, 1, 1] : memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>> to memref<40xf32, strided<[1], offset: ?>> // CHECK: %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<[4]x1x1xf32> to vector<[4]xf32> -// CHECK: vector.transfer_write %[[SC]], %[[SV_2]]{{\[}}%[[IDX_2]]] {in_bounds = [true]} : vector<[4]xf32>, memref<40xf32, strided<[1], offset: ?>> +// CHECK: vector.transfer_write %[[SC]], %[[SV_2]]{{\[}}%[[IDX_2]]] {in_bounds = array} : vector<[4]xf32>, memref<40xf32, strided<[1], offset: ?>> // ----- @@ -607,7 +607,7 @@ func.func @negative_non_unit_inner_memref_dim(%dest: memref<4x8xf32>, %vec: vect func.func @negative_non_unit_strides(%dest: memref<512x16x1xf32, strided<[8192, 16, 4], offset: ?>>, %v: vector<16x16x1xf32>, %i: index) { %c0 = arith.constant 0 : index vector.transfer_write %v, %dest[%i, %c0, %c0] - {in_bounds = [true, true, true]} + {in_bounds = array} : vector<16x16x1xf32>, memref<512x16x1xf32, strided<[8192, 16, 4], offset: ?>> return } diff --git a/mlir/test/Dialect/Vector/vector-transfer-drop-unit-dims-patterns.mlir b/mlir/test/Dialect/Vector/vector-transfer-drop-unit-dims-patterns.mlir index e9d12b044e2c7..f7d39444322a5 100644 --- a/mlir/test/Dialect/Vector/vector-transfer-drop-unit-dims-patterns.mlir +++ b/mlir/test/Dialect/Vector/vector-transfer-drop-unit-dims-patterns.mlir @@ -38,7 +38,7 @@ func.func @transfer_read_and_vector_rank_reducing( // CHECK-SAME: %[[ARG:.+]]: memref<1x1x3x2x1xf32> // CHECK: %[[SUBVIEW:.+]] = memref.subview %[[ARG]][0, 0, 0, 0, 0] [1, 1, 3, 2, 1] [1, 1, 1, 1, 1] // CHECK-SAME: memref<1x1x3x2x1xf32> to memref<3x2xf32> -// CHECK: vector.transfer_read %[[SUBVIEW]]{{.*}} {in_bounds = [true, true]} : memref<3x2xf32>, vector<3x2xf32> +// CHECK: vector.transfer_read %[[SUBVIEW]]{{.*}} {in_bounds = array} : memref<3x2xf32>, vector<3x2xf32> func.func @transfer_write_and_vector_rank_reducing( %arg : memref<1x1x3x2x1xf32>, @@ -52,7 +52,7 @@ func.func @transfer_write_and_vector_rank_reducing( // CHECK-SAME: %[[ARG:.+]]: memref<1x1x3x2x1xf32> // CHECK: %[[SUBVIEW:.+]] = memref.subview %[[ARG]][0, 0, 0, 0, 0] [1, 1, 3, 2, 1] [1, 1, 1, 1, 1] // CHECK-SAME: memref<1x1x3x2x1xf32> to memref<3x2xf32> -// CHECK: vector.transfer_write %{{.*}}, %[[SUBVIEW]]{{.*}} {in_bounds = [true, true]} : vector<3x2xf32>, memref<3x2xf32> +// CHECK: vector.transfer_write %{{.*}}, %[[SUBVIEW]]{{.*}} {in_bounds = array} : vector<3x2xf32>, memref<3x2xf32> func.func @transfer_read_and_vector_rank_reducing_to_0d( %arg : memref<1x1x1x1x1xf32>) -> vector<1x1x1xf32> { @@ -86,7 +86,7 @@ func.func @transfer_read_dynamic_rank_reducing( %arg : memref>) -> vector<[16]x1xi8> { %c0 = arith.constant 0 : index %pad = arith.constant 0 : i8 - %v = vector.transfer_read %arg[%c0, %c0], %pad {in_bounds = [true, true]} : + %v = vector.transfer_read %arg[%c0, %c0], %pad {in_bounds = array} : memref>, vector<[16]x1xi8> return %v : vector<[16]x1xi8> } @@ -104,7 +104,7 @@ func.func @masked_transfer_read_dynamic_rank_reducing_1( %c1 = arith.constant 1 : index %pad = arith.constant 0 : i8 %mask = vector.create_mask %mask_dim0, %c1 : vector<[16]x1xi1> - %v = vector.transfer_read %arg[%c0, %c0], %pad, %mask {in_bounds = [true, true]} : + %v = vector.transfer_read %arg[%c0, %c0], %pad, %mask {in_bounds = array} : memref>, vector<[16]x1xi8> return %v : vector<[16]x1xi8> } @@ -116,7 +116,7 @@ func.func @masked_transfer_read_dynamic_rank_reducing_1( // CHECK: %[[MASK:.+]] = vector.create_mask %[[MASK_DIM0]] : vector<[16]xi1> // CHECK: %[[DIM0:.+]] = memref.dim %[[ARG]], %[[C0]] : memref> // CHECK: %[[SUBVIEW:.+]] = memref.subview %[[ARG]][0, 0] [%[[DIM0]], 1] [1, 1] : memref to memref -// CHECK: vector.transfer_read %[[SUBVIEW]][{{.*}}], %[[PAD]], %[[MASK]] {in_bounds = [true]} : memref, vector<[16]xi8> +// CHECK: vector.transfer_read %[[SUBVIEW]][{{.*}}], %[[PAD]], %[[MASK]] {in_bounds = array} : memref, vector<[16]xi8> func.func @masked_transfer_read_dynamic_rank_reducing_2( %arg : memref<1x?x3x1x?x1xi8, strided<[?, ?, ?, ?, ?, ?], offset: ?>>, @@ -126,7 +126,7 @@ func.func @masked_transfer_read_dynamic_rank_reducing_2( %c2 = arith.constant 2 : index %pad = arith.constant 0 : i8 %mask = vector.create_mask %c1, %mask_dim1, %c2, %c1, %mask_dim4, %c1 : vector<1x[1]x3x1x[16]x1xi1> - %v = vector.transfer_read %arg[%c0, %c0, %c0, %c0, %c0, %c0], %pad, %mask {in_bounds = [true, true, true, true, true, true]} : + %v = vector.transfer_read %arg[%c0, %c0, %c0, %c0, %c0, %c0], %pad, %mask {in_bounds = array} : memref<1x?x3x1x?x1xi8, strided<[?, ?, ?, ?, ?, ?], offset: ?>>, vector<1x[1]x3x1x[16]x1xi8> return %v : vector<1x[1]x3x1x[16]x1xi8> } @@ -142,7 +142,7 @@ func.func @masked_transfer_read_dynamic_rank_reducing_2( // CHECK: %[[DIM1:.+]] = memref.dim %[[ARG]], %[[C1]] : memref<1x?x3x1x?x1xi8, strided<[?, ?, ?, ?, ?, ?], offset: ?>> // CHECK: %[[DIM4:.+]] = memref.dim %[[ARG]], %[[C4]] : memref<1x?x3x1x?x1xi8, strided<[?, ?, ?, ?, ?, ?], offset: ?>> // CHECK: %[[SUBVIEW:.+]] = memref.subview %[[ARG]][0, 0, 0, 0, 0, 0] [1, %[[DIM1]], 3, 1, %[[DIM4]], 1] [1, 1, 1, 1, 1, 1] : memref<1x?x3x1x?x1xi8, {{.*}}> to memref -// CHECK: vector.transfer_read %[[SUBVIEW]][{{.*}}], %[[PAD]], %[[MASK]] {in_bounds = [true, true, true]} : memref, vector<[1]x3x[16]xi8> +// CHECK: vector.transfer_read %[[SUBVIEW]][{{.*}}], %[[PAD]], %[[MASK]] {in_bounds = array} : memref, vector<[1]x3x[16]xi8> func.func @masked_transfer_write_and_vector_rank_reducing( %arg : memref<1x1x3x1x16x1xf32>, @@ -164,7 +164,7 @@ func.func @masked_transfer_write_and_vector_rank_reducing( // CHECK: %[[MASK:.+]] = vector.create_mask %[[MASKDIM1]], %[[MASKDIM2]] : vector<3x16xi1> // CHECK: %[[SUBVIEW:.+]] = memref.subview %[[ARG]][0, 0, 0, 0, 0, 0] [1, 1, 3, 1, 16, 1] [1, 1, 1, 1, 1, 1] // CHECK-SAME: memref<1x1x3x1x16x1xf32> to memref<3x16xf32> -// CHECK: vector.transfer_write %{{.*}}, %[[SUBVIEW]]{{.*}}, %[[MASK]] {in_bounds = [true, true]} : vector<3x16xf32>, memref<3x16xf32> +// CHECK: vector.transfer_write %{{.*}}, %[[SUBVIEW]]{{.*}}, %[[MASK]] {in_bounds = array} : vector<3x16xf32>, memref<3x16xf32> func.func @masked_transfer_write_dynamic_rank_reducing( %arg : memref>, @@ -174,7 +174,7 @@ func.func @masked_transfer_write_dynamic_rank_reducing( %c1 = arith.constant 1 : index %pad = arith.constant 0 : i8 %mask = vector.create_mask %mask_dim0, %c1 : vector<[16]x1xi1> - vector.transfer_write %vec, %arg[%c0, %c0], %mask {in_bounds = [true, true]} : + vector.transfer_write %vec, %arg[%c0, %c0], %mask {in_bounds = array} : vector<[16]x1xi8>, memref> return } @@ -186,7 +186,7 @@ func.func @masked_transfer_write_dynamic_rank_reducing( // CHECK: %[[MASK:.+]] = vector.create_mask %[[MASK_DIM0]] : vector<[16]xi1> // CHECK: %[[DIM0:.+]] = memref.dim %[[ARG]], %[[C0]] : memref> // CHECK: %[[SUBVIEW:.+]] = memref.subview %[[ARG]][0, 0] [%[[DIM0]], 1] [1, 1] : memref to memref -// CHECK: vector.transfer_write {{.*}}, %[[SUBVIEW]][%[[C0]]], %[[MASK]] {in_bounds = [true]} : vector<[16]xi8>, memref +// CHECK: vector.transfer_write {{.*}}, %[[SUBVIEW]][%[[C0]]], %[[MASK]] {in_bounds = array} : vector<[16]xi8>, memref /// Only masks operands of vector.create_mask are currently supported. func.func @unsupported_masked_transfer_read_dynamic_rank_reducing_1( @@ -194,7 +194,7 @@ func.func @unsupported_masked_transfer_read_dynamic_rank_reducing_1( %mask : vector<[16]x1xi1>) -> vector<[16]x1xi8> { %c0 = arith.constant 0 : index %pad = arith.constant 0 : i8 - %v = vector.transfer_read %arg[%c0, %c0], %pad, %mask {in_bounds = [true, true]} : + %v = vector.transfer_read %arg[%c0, %c0], %pad, %mask {in_bounds = array} : memref>, vector<[16]x1xi8> return %v : vector<[16]x1xi8> } @@ -212,7 +212,7 @@ func.func @unsupported_masked_transfer_read_dynamic_rank_reducing_2( %c1 = arith.constant 1 : index %pad = arith.constant 0 : i8 %mask = vector.create_mask %mask_dim0, %mask_dim1 : vector<[16]x1xi1> - %v = vector.transfer_read %arg[%c0, %c0], %pad, %mask {in_bounds = [true, true]} : + %v = vector.transfer_read %arg[%c0, %c0], %pad, %mask {in_bounds = array} : memref>, vector<[16]x1xi8> return %v : vector<[16]x1xi8> } @@ -229,7 +229,7 @@ func.func @masked_transfer_read_dynamic_rank_reducing_scalable_unit_dim( %c1 = arith.constant 1 : index %pad = arith.constant 0 : i8 %mask = vector.create_mask %mask_dim0, %c1 : vector<[16]x[1]xi1> - %v = vector.transfer_read %arg[%c0, %c0], %pad, %mask {in_bounds = [true, true]} : + %v = vector.transfer_read %arg[%c0, %c0], %pad, %mask {in_bounds = array} : memref>, vector<[16]x[1]xi8> return %v : vector<[16]x[1]xi8> } diff --git a/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir b/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir index e840dc6bbf224..618fb5916591d 100644 --- a/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir +++ b/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir @@ -88,7 +88,7 @@ func.func @transfer_read_dims_mismatch_contiguous( // CHECK: %[[VAL_1:.*]] = arith.constant 0 : i8 // CHECK: %[[VAL_2:.*]] = arith.constant 0 : index // CHECK: %[[VAL_3:.*]] = memref.collapse_shape %[[MEM]] {{\[\[}}0, 1, 2, 3]] : memref<5x4x3x2xi8, strided<[24, 6, 2, 1], offset: ?>> into memref<120xi8, strided<[1], offset: ?>> -// CHECK: %[[VAL_4:.*]] = vector.transfer_read %[[VAL_3]]{{\[}}%[[VAL_2]]], %[[VAL_1]] {in_bounds = [true]} : memref<120xi8, strided<[1], offset: ?>>, vector<4xi8> +// CHECK: %[[VAL_4:.*]] = vector.transfer_read %[[VAL_3]]{{\[}}%[[VAL_2]]], %[[VAL_1]] {in_bounds = array} : memref<120xi8, strided<[1], offset: ?>>, vector<4xi8> // CHECK: %[[VAL_5:.*]] = vector.shape_cast %[[VAL_4]] : vector<4xi8> to vector<1x1x2x2xi8> // CHECK: return %[[VAL_5]] : vector<1x1x2x2xi8> @@ -105,7 +105,7 @@ func.func @transfer_read_dims_mismatch_non_zero_indices( %c0 = arith.constant 0 : index %c0_i32 = arith.constant 0 : i32 %res = vector.transfer_read %mem[%c0, %idx_1, %idx_2, %c0], %c0_i32 { - in_bounds = [true, true, true] + in_bounds = array } : memref<1x43x4x6xi32>, vector<1x2x6xi32> return %res : vector<1x2x6xi32> } @@ -119,7 +119,7 @@ func.func @transfer_read_dims_mismatch_non_zero_indices( // CHECK: %[[C_0_IDX:.*]] = arith.constant 0 : index // CHECK: %[[COLLAPSED_IN:.*]] = memref.collapse_shape %[[MEM]] {{\[}}[0], [1, 2, 3]] : memref<1x43x4x6xi32> into memref<1x1032xi32> // CHECK: %[[COLLAPSED_IDX:.*]] = affine.apply #[[$ATTR_0]]()[%[[IDX_1]], %[[IDX_2]]] -// CHECK: %[[READ:.*]] = vector.transfer_read %[[COLLAPSED_IN]][%[[C_0_IDX]], %[[COLLAPSED_IDX]]], %[[C_0]] {in_bounds = [true]} : memref<1x1032xi32>, vector<12xi32> +// CHECK: %[[READ:.*]] = vector.transfer_read %[[COLLAPSED_IN]][%[[C_0_IDX]], %[[COLLAPSED_IDX]]], %[[C_0]] {in_bounds = array} : memref<1x1032xi32>, vector<12xi32> // CHECK-128B-LABEL: func @transfer_read_dims_mismatch_non_zero_indices( // CHECK-128B-NOT: memref.collapse_shape @@ -137,7 +137,7 @@ func.func @transfer_read_dims_mismatch_non_contiguous_non_zero_indices( %c0 = arith.constant 0 : index %cst_1 = arith.constant 0.000000e+00 : f32 %res = vector.transfer_read %mem[%c0, %idx_1, %idx_2, %c0], %cst_1 { - in_bounds = [true, true] + in_bounds = array } : memref<1x3x3x2xf32, strided<[40, 10, 2, 1], offset: ?>>, vector<2x2xf32> return %res : vector<2x2xf32> } @@ -165,7 +165,7 @@ func.func @transfer_read_leading_dynamic_dims( %c0_i8 = arith.constant 0 : i8 %c0 = arith.constant 0 : index %res = vector.transfer_read %mem[%idx_1, %idx_2, %c0, %c0], %c0_i8 { - in_bounds = [true, true] + in_bounds = array } : memref>, vector<8x4xi8> return %res : vector<8x4xi8> } @@ -178,7 +178,7 @@ func.func @transfer_read_leading_dynamic_dims( // CHECK-SAME: : memref into memref // CHECK: %[[VEC1D:.+]] = vector.transfer_read %[[COLLAPSED]] // CHECK-SAME: [%[[IDX_1]], %[[IDX_2]], %[[C0]]], %[[C0_I8]] -// CHECK-SAME: {in_bounds = [true]} +// CHECK-SAME: {in_bounds = array} // CHECK-SAME: : memref, vector<32xi8> // CHECK: %[[RES:.+]] = vector.shape_cast %[[VEC1D]] : vector<32xi8> to vector<8x4xi8> // CHECK: return %[[RES]] : vector<8x4xi8> @@ -198,7 +198,7 @@ func.func @negative_transfer_read_dynamic_dim_to_flatten( %c0 = arith.constant 0 : index %c0_i32 = arith.constant 0 : i32 %res = vector.transfer_read %mem[%c0, %idx_1, %idx_2, %c0], %c0_i32 { - in_bounds = [true, true, true] + in_bounds = array } : memref<1x?x4x6xi32>, vector<1x2x6xi32> return %res : vector<1x2x6xi32> } @@ -357,7 +357,7 @@ func.func @transfer_write_dims_mismatch_contiguous( // CHECK: %[[VAL_2:.*]] = arith.constant 0 : index // CHECK: %[[VAL_3:.*]] = memref.collapse_shape %[[MEM]] {{\[\[}}0, 1, 2, 3]] : memref<5x4x3x2xi8, strided<[24, 6, 2, 1], offset: ?>> into memref<120xi8, strided<[1], offset: ?>> // CHECK: %[[VAL_4:.*]] = vector.shape_cast %[[VEC]] : vector<1x1x2x2xi8> to vector<4xi8> -// CHECK: vector.transfer_write %[[VAL_4]], %[[VAL_3]]{{\[}}%[[VAL_2]]] {in_bounds = [true]} : vector<4xi8>, memref<120xi8, strided<[1], offset: ?>> +// CHECK: vector.transfer_write %[[VAL_4]], %[[VAL_3]]{{\[}}%[[VAL_2]]] {in_bounds = array} : vector<4xi8>, memref<120xi8, strided<[1], offset: ?>> // CHECK-128B-LABEL: func @transfer_write_dims_mismatch_contiguous( // CHECK-128B: memref.collapse_shape @@ -372,7 +372,7 @@ func.func @transfer_write_dims_mismatch_non_zero_indices( %c0 = arith.constant 0 : index %c0_i32 = arith.constant 0 : i32 - vector.transfer_write %vec, %mem[%c0, %idx_1, %idx_2, %c0] {in_bounds = [true, true, true]} : + vector.transfer_write %vec, %mem[%c0, %idx_1, %idx_2, %c0] {in_bounds = array} : vector<1x2x6xi32>, memref<1x43x4x6xi32> return } @@ -387,7 +387,7 @@ func.func @transfer_write_dims_mismatch_non_zero_indices( // CHECK-DAG: %[[IDX:.*]] = affine.apply #[[$ATTR_0]](){{\[}}%[[IDX_1]], %[[IDX_2]]] // CHECK-DAG: %[[CS:.*]] = memref.collapse_shape %[[MEM]] {{\[\[}}0], [1, 2, 3]] : memref<1x43x4x6xi32> into memref<1x1032xi32> // CHECK: %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<1x2x6xi32> to vector<12xi32> -// CHECK: vector.transfer_write %[[SC]], %[[CS]]{{\[}}%[[C0]], %[[IDX]]] {in_bounds = [true]} : vector<12xi32>, memref<1x1032xi32> +// CHECK: vector.transfer_write %[[SC]], %[[CS]]{{\[}}%[[C0]], %[[IDX]]] {in_bounds = array} : vector<12xi32>, memref<1x1032xi32> // CHECK-128B-LABEL: func @transfer_write_dims_mismatch_non_zero_indices( // CHECK-128B-NOT: memref.collapse_shape @@ -405,7 +405,7 @@ func.func @transfer_write_dims_mismatch_non_contiguous_non_zero_indices( %idx_2 : index) { %c0 = arith.constant 0 : index - vector.transfer_write %vec, %mem[%c0, %idx_1, %idx_2, %c0] {in_bounds = [true, true]} : vector<2x2xf32>, memref<1x3x3x2xf32, strided<[40, 10, 2, 1], offset: ?>> + vector.transfer_write %vec, %mem[%c0, %idx_1, %idx_2, %c0] {in_bounds = array} : vector<2x2xf32>, memref<1x3x3x2xf32, strided<[40, 10, 2, 1], offset: ?>> return } @@ -430,7 +430,7 @@ func.func @transfer_write_leading_dynamic_dims( %idx_2 : index) { %c0 = arith.constant 0 : index - vector.transfer_write %vec, %mem[%idx_1, %idx_2, %c0, %c0] {in_bounds = [true, true]} : + vector.transfer_write %vec, %mem[%idx_1, %idx_2, %c0, %c0] {in_bounds = array} : vector<8x4xi8>, memref> return } @@ -443,7 +443,7 @@ func.func @transfer_write_leading_dynamic_dims( // CHECK: %[[VEC1D:.+]] = vector.shape_cast %[[VEC]] : vector<8x4xi8> to vector<32xi8> // CHECK: vector.transfer_write %[[VEC1D]], %[[COLLAPSED]] // CHECK-SAME: [%[[ARG2]], %[[ARG3]], %[[C0]]] -// CHECK-SAME: {in_bounds = [true]} +// CHECK-SAME: {in_bounds = array} // CHECK-SAME: : vector<32xi8>, memref // CHECK-128B-LABEL: func @transfer_write_leading_dynamic_dims @@ -461,7 +461,7 @@ func.func @negative_transfer_write_dynamic_to_flatten( %c0 = arith.constant 0 : index %c0_i32 = arith.constant 0 : i32 - vector.transfer_write %vec, %mem[%c0, %idx_1, %idx_2, %c0] {in_bounds = [true, true, true]} : + vector.transfer_write %vec, %mem[%c0, %idx_1, %idx_2, %c0] {in_bounds = array} : vector<1x2x6xi32>, memref<1x?x4x6xi32> return } @@ -542,7 +542,7 @@ func.func @negative_out_of_bound_transfer_read( %mem : memref>) -> vector<5x4x3x2xi8> { %c0 = arith.constant 0 : index %cst = arith.constant 0 : i8 - %res = vector.transfer_read %mem[%c0, %c0, %c0, %c0], %cst {in_bounds = [false, true, true, true]} : + %res = vector.transfer_read %mem[%c0, %c0, %c0, %c0], %cst {in_bounds = array} : memref>, vector<5x4x3x2xi8> return %res : vector<5x4x3x2xi8> } @@ -554,7 +554,7 @@ func.func @negative_out_of_bound_transfer_read( func.func @negative_out_of_bound_transfer_write( %mem : memref>, %vec : vector<1x1x3x2xi8>) { %c0 = arith.constant 0 : index - vector.transfer_write %vec, %mem [%c0, %c0, %c0, %c0] {in_bounds = [false, true, true, true]} : + vector.transfer_write %vec, %mem [%c0, %c0, %c0, %c0] {in_bounds = array} : vector<1x1x3x2xi8>, memref> return } diff --git a/mlir/test/Dialect/Vector/vector-transfer-full-partial-split-copy-transform.mlir b/mlir/test/Dialect/Vector/vector-transfer-full-partial-split-copy-transform.mlir index 483147c6f6a40..a81696e3b5c4f 100644 --- a/mlir/test/Dialect/Vector/vector-transfer-full-partial-split-copy-transform.mlir +++ b/mlir/test/Dialect/Vector/vector-transfer-full-partial-split-copy-transform.mlir @@ -46,7 +46,7 @@ func.func @split_vector_transfer_read_2d(%A: memref, %i: index, %j: ind // CHECK-SAME: memref, index, index // CHECK: } // CHECK: %[[res:.*]] = vector.transfer_read %[[ifres]]#0[%[[ifres]]#1, %[[ifres]]#2], %cst - // CHECK-SAME: {in_bounds = [true, true]} : memref, vector<4x8xf32> + // CHECK-SAME: {in_bounds = array} : memref, vector<4x8xf32> %1 = vector.transfer_read %A[%i, %j], %f0 : memref, vector<4x8xf32> // CHECK: return %[[res]] : vector<4x8xf32> @@ -98,7 +98,7 @@ func.func @split_vector_transfer_read_strided_2d( // CHECK: scf.yield %[[yielded]], %[[c0]], %[[c0]] : // CHECK-SAME: memref>, index, index // CHECK: } - // CHECK: %[[res:.*]] = vector.transfer_read {{.*}} {in_bounds = [true, true]} : + // CHECK: %[[res:.*]] = vector.transfer_read {{.*}} {in_bounds = array} : // CHECK-SAME: memref>, vector<4x8xf32> %1 = vector.transfer_read %A[%i, %j], %f0 : memref<7x8xf32, strided<[?, 1], offset: ?>>, vector<4x8xf32> @@ -154,7 +154,7 @@ func.func @split_vector_transfer_write_2d(%V: vector<4x8xf32>, %A: memref, memref +// CHECK-SAME: {in_bounds = array} : vector<4x8xf32>, memref // CHECK: %[[OUT_BOUNDS:.*]] = arith.xori %[[IN_BOUNDS]], %[[CT]] : i1 // CHECK: scf.if %[[OUT_BOUNDS]] { // CHECK: %[[VAL_19:.*]] = memref.dim %[[DEST]], %[[C0]] : memref @@ -225,7 +225,7 @@ func.func @split_vector_transfer_write_strided_2d( // CHECK: vector.transfer_write %[[VEC]], // CHECK-SAME: %[[IN_BOUND_DEST:.*]]#0 // CHECK-SAME: [%[[IN_BOUND_DEST]]#1, %[[IN_BOUND_DEST]]#2] -// CHECK-SAME: {in_bounds = [true, true]} +// CHECK-SAME: {in_bounds = array} // CHECK-SAME: : vector<4x8xf32>, memref> // CHECK: %[[OUT_BOUNDS:.*]] = arith.xori %[[IN_BOUNDS]], %[[CT]] : i1 // CHECK: scf.if %[[OUT_BOUNDS]] { diff --git a/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir b/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir index a9c7bf8e8b327..babc69d3745b9 100644 --- a/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir +++ b/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir @@ -42,7 +42,7 @@ func.func @split_vector_transfer_read_2d(%A: memref, %i: index, %j: ind // CHECK-SAME: memref, index, index // CHECK: } // CHECK: %[[res:.*]] = vector.transfer_read %[[ifres]]#0[%[[ifres]]#1, %[[ifres]]#2], %cst - // CHECK-SAME: {in_bounds = [true, true]} : memref, vector<4x8xf32> + // CHECK-SAME: {in_bounds = array} : memref, vector<4x8xf32> %1 = vector.transfer_read %A[%i, %j], %f0 : memref, vector<4x8xf32> @@ -92,7 +92,7 @@ func.func @split_vector_transfer_read_strided_2d( // CHECK: scf.yield %[[yielded]], %[[c0]], %[[c0]] : // CHECK-SAME: memref>, index, index // CHECK: } - // CHECK: %[[res:.*]] = vector.transfer_read {{.*}} {in_bounds = [true, true]} : + // CHECK: %[[res:.*]] = vector.transfer_read {{.*}} {in_bounds = array} : // CHECK-SAME: memref>, vector<4x8xf32> %1 = vector.transfer_read %A[%i, %j], %f0 : memref<7x8xf32, strided<[?, 1], offset: ?>>, vector<4x8xf32> @@ -125,7 +125,7 @@ func.func @split_vector_transfer_read_mem_space(%A: memref, %i: inde // CHECK-SAME: memref>, index, index // CHECK: } // CHECK: %[[res:.*]] = vector.transfer_read %[[ifres]]#0[%[[ifres]]#1, %[[ifres]]#2], %cst - // CHECK-SAME: {in_bounds = [true, true]} : memref>, vector<4x8xf32> + // CHECK-SAME: {in_bounds = array} : memref>, vector<4x8xf32> %1 = vector.transfer_read %A[%i, %j], %f0 : memref, vector<4x8xf32> @@ -178,7 +178,7 @@ func.func @split_vector_transfer_write_2d(%V: vector<4x8xf32>, %A: memref, memref +// CHECK-SAME: {in_bounds = array} : vector<4x8xf32>, memref // CHECK: %[[OUT_BOUNDS:.*]] = arith.xori %[[IN_BOUNDS]], %[[CT]] : i1 // CHECK: scf.if %[[OUT_BOUNDS]] { // CHECK: %[[CASTED:.*]] = vector.type_cast %[[TEMP]] @@ -245,7 +245,7 @@ func.func @split_vector_transfer_write_strided_2d( // CHECK: vector.transfer_write %[[VEC]], // CHECK-SAME: %[[IN_BOUND_DEST:.*]]#0 // CHECK-SAME: [%[[IN_BOUND_DEST]]#1, %[[IN_BOUND_DEST]]#2] -// CHECK-SAME: {in_bounds = [true, true]} : vector<4x8xf32>, memref> +// CHECK-SAME: {in_bounds = array} : vector<4x8xf32>, memref> // CHECK: %[[OUT_BOUNDS:.*]] = arith.xori %[[IN_BOUNDS]], %[[CT]] : i1 // CHECK: scf.if %[[OUT_BOUNDS]] { // CHECK: %[[VAL_19:.*]] = vector.type_cast %[[TEMP]] @@ -291,7 +291,7 @@ func.func @split_vector_transfer_write_mem_space(%V: vector<4x8xf32>, %A: memref // CHECK: } // CHECK: vector.transfer_write %[[VEC]], // CHECK-SAME: %[[IN_BOUND_DEST:.*]]#0[%[[IN_BOUND_DEST]]#1, %[[IN_BOUND_DEST]]#2] -// CHECK-SAME: {in_bounds = [true, true]} : vector<4x8xf32>, memref> +// CHECK-SAME: {in_bounds = array} : vector<4x8xf32>, memref> module attributes {transform.with_named_sequence} { diff --git a/mlir/test/Dialect/Vector/vector-transfer-permutation-lowering.mlir b/mlir/test/Dialect/Vector/vector-transfer-permutation-lowering.mlir index 15000d706adfc..30c4a4a14f948 100644 --- a/mlir/test/Dialect/Vector/vector-transfer-permutation-lowering.mlir +++ b/mlir/test/Dialect/Vector/vector-transfer-permutation-lowering.mlir @@ -17,14 +17,14 @@ // CHECK: %[[TR:.*]] = vector.transpose %[[VEC]], [1, 0] : vector<4x8xi16> to vector<8x4xi16> // CHECK: vector.transfer_write // CHECK-NOT: permutation_map -// CHECK-SAME: %[[TR]], %[[MEM]]{{.*}} {in_bounds = [true, true]} : vector<8x4xi16>, memref<2x2x8x4xi16> +// CHECK-SAME: %[[TR]], %[[MEM]]{{.*}} {in_bounds = array} : vector<8x4xi16>, memref<2x2x8x4xi16> func.func @xfer_write_transposing_permutation_map( %vec: vector<4x8xi16>, %mem: memref<2x2x8x4xi16>) { %c0 = arith.constant 0 : index vector.transfer_write %vec, %mem[%c0, %c0, %c0, %c0] { - in_bounds = [true, true], + in_bounds = array, permutation_map = affine_map<(d0, d1, d2, d3) -> (d3, d2)> } : vector<4x8xi16>, memref<2x2x8x4xi16> @@ -49,7 +49,7 @@ func.func @xfer_write_transposing_permutation_map_out_of_bounds( %c0 = arith.constant 0 : index vector.transfer_write %vec, %mem[%c0, %c0, %c0, %c0] { - in_bounds = [false, false], + in_bounds = array, permutation_map = affine_map<(d0, d1, d2, d3) -> (d3, d2)> } : vector<4x8xi16>, memref<2x2x?x?xi16> @@ -63,7 +63,7 @@ func.func @xfer_write_transposing_permutation_map_out_of_bounds( // CHECK: %[[TR:.*]] = vector.transpose %[[VEC]], [1, 0] : vector<4x[8]xi16> to vector<[8]x4xi16> // CHECK: vector.transfer_write // CHECK-NOT: permutation_map -// CHECK-SAME: %[[TR]], %[[MEM]]{{.*}}, %[[MASK]] {in_bounds = [true, true]} : vector<[8]x4xi16>, memref<2x2x?x4xi16> +// CHECK-SAME: %[[TR]], %[[MEM]]{{.*}}, %[[MASK]] {in_bounds = array} : vector<[8]x4xi16>, memref<2x2x?x4xi16> func.func @xfer_write_transposing_permutation_map_with_mask_scalable( %vec: vector<4x[8]xi16>, %mem: memref<2x2x?x4xi16>, @@ -71,7 +71,7 @@ func.func @xfer_write_transposing_permutation_map_with_mask_scalable( %c0 = arith.constant 0 : index vector.transfer_write %vec, %mem[%c0, %c0, %c0, %c0], %mask { - in_bounds = [true, true], + in_bounds = array, permutation_map = affine_map<(d0, d1, d2, d3) -> (d3, d2)> } : vector<4x[8]xi16>, memref<2x2x?x4xi16> @@ -89,7 +89,7 @@ func.func @xfer_write_transposing_permutation_map_masked( %c0 = arith.constant 0 : index vector.mask %mask { vector.transfer_write %vec, %mem[%c0, %c0, %c0, %c0] { - in_bounds = [true, true], + in_bounds = array, permutation_map = affine_map<(d0, d1, d2, d3) -> (d3, d2)> } : vector<4x8xi16>, memref<2x2x8x4xi16> } : vector<8x4xi1> @@ -114,7 +114,7 @@ func.func @xfer_write_transposing_permutation_map_masked( // CHECK-SAME: %[[IDX_1:.*]]: index, %[[IDX_2:.*]]: index) { // CHECK: %[[BC:.*]] = vector.broadcast %[[VEC]] : vector<7xf32> to vector<1x7xf32> // CHECK: %[[TR:.*]] = vector.transpose %[[BC]], [1, 0] : vector<1x7xf32> to vector<7x1xf32> -// CHECK: vector.transfer_write %[[TR]], %[[MEM]]{{\[}}%[[IDX_1]], %[[IDX_2]]] {in_bounds = [false, true]} : vector<7x1xf32>, memref +// CHECK: vector.transfer_write %[[TR]], %[[MEM]]{{\[}}%[[IDX_1]], %[[IDX_2]]] {in_bounds = array} : vector<7x1xf32>, memref func.func @xfer_write_non_transposing_permutation_map( %mem : memref, %vec : vector<7xf32>, @@ -138,7 +138,7 @@ func.func @xfer_write_non_transposing_permutation_map( // CHECK: %[[BC_MASK:.*]] = vector.broadcast %[[MASK]] : vector<7xi1> to vector<1x7xi1> // CHECK: %[[TR_MASK:.*]] = vector.transpose %[[BC_MASK]], [1, 0] : vector<1x7xi1> to vector<7x1xi1> // CHECK: %[[TR_VEC:.*]] = vector.transpose %[[BC_VEC]], [1, 0] : vector<1x7xf32> to vector<7x1xf32> -// CHECK: vector.transfer_write %[[TR_VEC]], %[[MEM]]{{\[}}%[[IDX_1]], %[[IDX_2]]], %[[TR_MASK]] {in_bounds = [false, true]} : vector<7x1xf32>, memref +// CHECK: vector.transfer_write %[[TR_VEC]], %[[MEM]]{{\[}}%[[IDX_1]], %[[IDX_2]]], %[[TR_MASK]] {in_bounds = array} : vector<7x1xf32>, memref func.func @xfer_write_non_transposing_permutation_map_with_mask_out_of_bounds( %mem : memref, %vec : vector<7xf32>, @@ -148,7 +148,7 @@ func.func @xfer_write_non_transposing_permutation_map_with_mask_out_of_bounds( vector.transfer_write %vec, %mem[%idx_1, %idx_2], %mask { permutation_map = affine_map<(d0, d1) -> (d0)>, - in_bounds = [false] + in_bounds = array } : vector<7xf32>, memref return @@ -163,7 +163,7 @@ func.func @xfer_write_non_transposing_permutation_map_with_mask_out_of_bounds( // CHECK: %[[BC_2:.*]] = vector.broadcast %[[MASK]] : vector<4x[8]xi1> to vector<1x4x[8]xi1> // CHECK: %[[TRANSPOSE_1:.*]] = vector.transpose %[[BC_2]], [1, 2, 0] : vector<1x4x[8]xi1> to vector<4x[8]x1xi1> // CHECK: %[[TRANSPOSE_2:.*]] = vector.transpose %[[BC_1]], [1, 2, 0] : vector<1x4x[8]xi16> to vector<4x[8]x1xi16> -// CHECK: vector.transfer_write %[[TRANSPOSE_2]], %[[MEM]]{{.*}}, %[[TRANSPOSE_1]] {in_bounds = [true, true, true]} : vector<4x[8]x1xi16>, memref<1x4x?x1xi16> +// CHECK: vector.transfer_write %[[TRANSPOSE_2]], %[[MEM]]{{.*}}, %[[TRANSPOSE_1]] {in_bounds = array} : vector<4x[8]x1xi16>, memref<1x4x?x1xi16> func.func @permutation_with_mask_xfer_write_scalable( %vec: vector<4x[8]xi16>, %mem: memref<1x4x?x1xi16>, @@ -171,7 +171,7 @@ func.func @permutation_with_mask_xfer_write_scalable( %c0 = arith.constant 0 : index vector.transfer_write %vec, %mem[%c0, %c0, %c0, %c0], %mask { - in_bounds = [true, true], + in_bounds = array, permutation_map = affine_map<(d0, d1, d2, d3) -> (d1, d2)> } : vector<4x[8]xi16>, memref<1x4x?x1xi16> @@ -216,7 +216,7 @@ func.func @masked_permutation_xfer_write_scalable( %c0 = arith.constant 0 : index %res = vector.mask %mask { vector.transfer_write %vec, %dest[%c0, %c0, %c0, %c0] { - in_bounds = [true, true], + in_bounds = array, permutation_map = affine_map<(d0, d1, d2, d3) -> (d1, d2)> } : vector<4x[8]xi16>, tensor } : vector<4x[8]xi1> -> tensor @@ -240,7 +240,7 @@ func.func @masked_non_permutation_xfer_write_fixed_width( %mask = vector.create_mask %dim, %dim, %dim : vector<14x8x16xi1> %res = vector.mask %mask { vector.transfer_write %vec, %dest[%c0, %c0, %c0, %c0] { - in_bounds = [false, false, true], + in_bounds = array, permutation_map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)> } : vector<14x8x16xf32>, tensor } : vector<14x8x16xi1> -> tensor @@ -264,7 +264,7 @@ func.func @masked_non_permutation_xfer_write_fixed_width( // CHECK: %[[C0:.*]] = arith.constant 0 : index // CHECK: %[[PASS_THROUGH:.*]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[MASK:.*]] = vector.create_mask %[[IDX_2]], %[[IDX_1]] : vector<2x4xi1> -// CHECK: %[[T_READ:.*]] = vector.transfer_read %[[MEM]]{{\[}}%[[C0]], %[[C0]]], %[[PASS_THROUGH]], %[[MASK]] {in_bounds = [true, true]} : memref, vector<2x4xf32> +// CHECK: %[[T_READ:.*]] = vector.transfer_read %[[MEM]]{{\[}}%[[C0]], %[[C0]]], %[[PASS_THROUGH]], %[[MASK]] {in_bounds = array} : memref, vector<2x4xf32> // CHECK: %[[BCAST:.*]] = vector.broadcast %[[T_READ]] : vector<2x4xf32> to vector<8x2x4xf32> // CHECK: %[[TRANSPOSE:.*]] = vector.transpose %[[BCAST]], [0, 2, 1] : vector<8x2x4xf32> to vector<8x4x2xf32> // CHECK: return %[[TRANSPOSE]] : vector<8x4x2xf32> @@ -278,7 +278,7 @@ func.func @permutation_with_mask_xfer_read_fixed_width( %mask = vector.create_mask %dim_2, %dim_1 : vector<2x4xi1> %res = vector.transfer_read %mem[%c0, %c0], %cst_0, %mask { - in_bounds = [true, true, true], + in_bounds = array, permutation_map = affine_map<(d0, d1) -> (0, d1, d0)> } : memref, vector<8x4x2xf32> @@ -292,7 +292,7 @@ func.func @permutation_with_mask_xfer_read_fixed_width( // CHECK: %[[C0:.*]] = arith.constant 0 : index // CHECK: %[[PASS_THROUGH:.*]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[MASK:.*]] = vector.create_mask %[[IDX_2]], %[[IDX_1]] : vector<2x[4]xi1> -// CHECK: %[[T_READ:.*]] = vector.transfer_read %[[MEM]]{{\[}}%[[C0]], %[[C0]]], %[[PASS_THROUGH]], %[[MASK]] {in_bounds = [true, true]} : memref, vector<2x[4]xf32> +// CHECK: %[[T_READ:.*]] = vector.transfer_read %[[MEM]]{{\[}}%[[C0]], %[[C0]]], %[[PASS_THROUGH]], %[[MASK]] {in_bounds = array} : memref, vector<2x[4]xf32> // CHECK: %[[BCAST:.*]] = vector.broadcast %[[T_READ]] : vector<2x[4]xf32> to vector<8x2x[4]xf32> // CHECK: %[[TRANSPOSE:.*]] = vector.transpose %[[BCAST]], [0, 2, 1] : vector<8x2x[4]xf32> to vector<8x[4]x2xf32> // CHECK: return %[[TRANSPOSE]] : vector<8x[4]x2xf32> @@ -306,7 +306,7 @@ func.func @permutation_with_mask_xfer_read_scalable( %mask = vector.create_mask %dim_2, %dim_1 : vector<2x[4]xi1> %res = vector.transfer_read %mem[%c0, %c0], %cst_0, %mask { - in_bounds = [true, true, true], + in_bounds = array, permutation_map = affine_map<(d0, d1) -> (0, d1, d0)> } : memref, vector<8x[4]x2xf32> @@ -327,7 +327,7 @@ func.func @masked_permutation_xfer_read_fixed_width( %c0 = arith.constant 0 : index %3 = vector.mask %mask { vector.transfer_read %dest[%c0, %c0], %cst { - in_bounds = [false, true, false], + in_bounds = array, permutation_map = affine_map<(d0, d1) -> (d1, 0, d0)> } : tensor, vector<1x4x4xf32> } : vector<4x1xi1> -> vector<1x4x4xf32> @@ -351,7 +351,7 @@ func.func @masked_permutation_xfer_read_scalable( %res = vector.mask %mask { vector.transfer_read %dest[%c0, %c0], %cst_0 { - in_bounds = [true, true, true], + in_bounds = array, permutation_map = affine_map<(d0, d1) -> (0, d1, d0)> } : tensor, vector<8x[4]x2xf32> } :vector<2x[4]xi1> -> vector<8x[4]x2xf32> @@ -391,7 +391,7 @@ func.func @transfer_read_reduce_rank_scalable( %cst_0 = arith.constant 0.000000e+00 : f32 %res = vector.transfer_read %mem[%c0, %c0, %c0, %c0], %cst_0 { - in_bounds = [true, true, true, true], + in_bounds = array, permutation_map = affine_map<(d0, d1, d2, d3) -> (0, d1, 0, d3)> } : memref, vector<8x[4]x2x3xf32> @@ -414,7 +414,7 @@ func.func @masked_transfer_read_reduce_rank( %res = vector.mask %mask { vector.transfer_read %mem[%c0, %c0, %c0, %c0], %cst_0 { - in_bounds = [true, true, true, true], + in_bounds = array, permutation_map = affine_map<(d0, d1, d2, d3) -> (0, d1, 0, d3)> } : memref, vector<8x[4]x2x3xf32> } : vector<[4]x3xi1> -> vector<8x[4]x2x3xf32> diff --git a/mlir/test/Dialect/Vector/vector-transfer-to-vector-load-store.mlir b/mlir/test/Dialect/Vector/vector-transfer-to-vector-load-store.mlir index 4d8e4a8296fb5..6544b1763dfce 100644 --- a/mlir/test/Dialect/Vector/vector-transfer-to-vector-load-store.mlir +++ b/mlir/test/Dialect/Vector/vector-transfer-to-vector-load-store.mlir @@ -28,7 +28,7 @@ func.func @vector_transfer_ops_0d_tensor(%src: tensor) -> vector<1xf32> { // CHECK-NEXT: %[[S:.*]] = tensor.extract %[[SRC]][] : tensor // CHECK-NEXT: %[[V:.*]] = vector.broadcast %[[S]] : f32 to vector<1xf32> - %res = vector.transfer_read %src[], %f0 {in_bounds = [true], permutation_map = affine_map<()->(0)>} : + %res = vector.transfer_read %src[], %f0 {in_bounds = array, permutation_map = affine_map<()->(0)>} : tensor, vector<1xf32> // CHECK-NEXT: return %[[V]] @@ -46,8 +46,8 @@ func.func @vector_transfer_ops_0d_tensor(%src: tensor) -> vector<1xf32> { func.func @transfer_to_load(%mem : memref<8x8xf32>, %idx : index) -> vector<4xf32> { %cf0 = arith.constant 0.0 : f32 - %res = vector.transfer_read %mem[%idx, %idx], %cf0 {in_bounds = [true]} : memref<8x8xf32>, vector<4xf32> - vector.transfer_write %res, %mem[%idx, %idx] {in_bounds = [true]} : vector<4xf32>, memref<8x8xf32> + %res = vector.transfer_read %mem[%idx, %idx], %cf0 {in_bounds = array} : memref<8x8xf32>, vector<4xf32> + vector.transfer_write %res, %mem[%idx, %idx] {in_bounds = array} : vector<4xf32>, memref<8x8xf32> return %res : vector<4xf32> } @@ -63,8 +63,8 @@ func.func @transfer_to_load(%mem : memref<8x8xf32>, %idx : index) -> vector<4xf3 func.func @masked_transfer_to_load(%mem : memref<8x8xf32>, %idx : index, %mask : vector<4xi1>) -> memref<8x8xf32> { %cf0 = arith.constant 0.0 : f32 - %read = vector.mask %mask {vector.transfer_read %mem[%idx, %idx], %cf0 {in_bounds = [true]} : memref<8x8xf32>, vector<4xf32>} : vector<4xi1> -> vector<4xf32> - vector.mask %mask {vector.transfer_write %read, %mem[%idx, %idx] {in_bounds = [true]} : vector<4xf32>, memref<8x8xf32> } : vector<4xi1> + %read = vector.mask %mask {vector.transfer_read %mem[%idx, %idx], %cf0 {in_bounds = array} : memref<8x8xf32>, vector<4xf32>} : vector<4xi1> -> vector<4xf32> + vector.mask %mask {vector.transfer_write %read, %mem[%idx, %idx] {in_bounds = array} : vector<4xf32>, memref<8x8xf32> } : vector<4xi1> return %mem : memref<8x8xf32> } @@ -79,8 +79,8 @@ func.func @masked_transfer_to_load(%mem : memref<8x8xf32>, %idx : index, %mask : func.func @transfer_2D(%mem : memref<8x8xf32>, %idx : index) -> vector<2x4xf32> { %cf0 = arith.constant 0.0 : f32 - %res = vector.transfer_read %mem[%idx, %idx], %cf0 {in_bounds = [true, true]} : memref<8x8xf32>, vector<2x4xf32> - vector.transfer_write %res, %mem[%idx, %idx] {in_bounds = [true, true]} : vector<2x4xf32>, memref<8x8xf32> + %res = vector.transfer_read %mem[%idx, %idx], %cf0 {in_bounds = array} : memref<8x8xf32>, vector<2x4xf32> + vector.transfer_write %res, %mem[%idx, %idx] {in_bounds = array} : vector<2x4xf32>, memref<8x8xf32> return %res : vector<2x4xf32> } @@ -106,15 +106,15 @@ func.func @transfer_vector_element(%mem : memref<8x8xvector<2x4xf32>>, %idx : in // CHECK-SAME: %[[MEM:.*]]: memref<8x8xvector<2x4xf32>>, // CHECK-SAME: %[[IDX:.*]]: index) -> vector<1x2x4xf32> { // CHECK-NEXT: %[[CF0:.*]] = arith.constant dense<0.000000e+00> : vector<2x4xf32> -// CHECK-NEXT: %[[RES:.*]] = vector.transfer_read %[[MEM]][%[[IDX]], %[[IDX]]], %[[CF0]] {in_bounds = [true]} : memref<8x8xvector<2x4xf32>>, vector<1x2x4xf32> -// CHECK-NEXT: vector.transfer_write %[[RES:.*]], %[[MEM]][%[[IDX]], %[[IDX]]] {in_bounds = [true]} : vector<1x2x4xf32>, memref<8x8xvector<2x4xf32>> +// CHECK-NEXT: %[[RES:.*]] = vector.transfer_read %[[MEM]][%[[IDX]], %[[IDX]]], %[[CF0]] {in_bounds = array} : memref<8x8xvector<2x4xf32>>, vector<1x2x4xf32> +// CHECK-NEXT: vector.transfer_write %[[RES:.*]], %[[MEM]][%[[IDX]], %[[IDX]]] {in_bounds = array} : vector<1x2x4xf32>, memref<8x8xvector<2x4xf32>> // CHECK-NEXT: return %[[RES]] : vector<1x2x4xf32> // CHECK-NEXT: } func.func @transfer_vector_element_different_types(%mem : memref<8x8xvector<2x4xf32>>, %idx : index) -> vector<1x2x4xf32> { %cf0 = arith.constant dense<0.0> : vector<2x4xf32> - %res = vector.transfer_read %mem[%idx, %idx], %cf0 {in_bounds = [true]} : memref<8x8xvector<2x4xf32>>, vector<1x2x4xf32> - vector.transfer_write %res, %mem[%idx, %idx] {in_bounds = [true]} : vector<1x2x4xf32>, memref<8x8xvector<2x4xf32>> + %res = vector.transfer_read %mem[%idx, %idx], %cf0 {in_bounds = array} : memref<8x8xvector<2x4xf32>>, vector<1x2x4xf32> + vector.transfer_write %res, %mem[%idx, %idx] {in_bounds = array} : vector<1x2x4xf32>, memref<8x8xvector<2x4xf32>> return %res : vector<1x2x4xf32> } @@ -124,15 +124,15 @@ func.func @transfer_vector_element_different_types(%mem : memref<8x8xvector<2x4x // CHECK-SAME: %[[MEM:.*]]: memref<8x8xf32>, // CHECK-SAME: %[[IDX:.*]]: index) -> vector<2x4xf32> { // CHECK-NEXT: %[[CF0:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK-NEXT: %[[RES:.*]] = vector.transfer_read %[[MEM]][%[[IDX]], %[[IDX]]], %[[CF0]] {in_bounds = [true, false]} : memref<8x8xf32>, vector<2x4xf32> -// CHECK-NEXT: vector.transfer_write %[[RES]], %[[MEM]][%[[IDX]], %[[IDX]]] {in_bounds = [false, true]} : vector<2x4xf32>, memref<8x8xf32> +// CHECK-NEXT: %[[RES:.*]] = vector.transfer_read %[[MEM]][%[[IDX]], %[[IDX]]], %[[CF0]] {in_bounds = array} : memref<8x8xf32>, vector<2x4xf32> +// CHECK-NEXT: vector.transfer_write %[[RES]], %[[MEM]][%[[IDX]], %[[IDX]]] {in_bounds = array} : vector<2x4xf32>, memref<8x8xf32> // CHECK-NEXT: return %[[RES]] : vector<2x4xf32> // CHECK-NEXT: } func.func @transfer_2D_not_inbounds(%mem : memref<8x8xf32>, %idx : index) -> vector<2x4xf32> { %cf0 = arith.constant 0.0 : f32 - %res = vector.transfer_read %mem[%idx, %idx], %cf0 {in_bounds = [true, false]} : memref<8x8xf32>, vector<2x4xf32> - vector.transfer_write %res, %mem[%idx, %idx] {in_bounds = [false, true]} : vector<2x4xf32>, memref<8x8xf32> + %res = vector.transfer_read %mem[%idx, %idx], %cf0 {in_bounds = array} : memref<8x8xf32>, vector<2x4xf32> + vector.transfer_write %res, %mem[%idx, %idx] {in_bounds = array} : vector<2x4xf32>, memref<8x8xf32> return %res : vector<2x4xf32> } @@ -165,8 +165,8 @@ func.func @transfer_not_inbounds(%mem : memref<8x8xf32>, %idx : index) -> vector #layout = affine_map<(d0, d1) -> (d0*16 + d1)> func.func @transfer_nondefault_layout(%mem : memref<8x8xf32, #layout>, %idx : index) -> vector<4xf32> { %cf0 = arith.constant 0.0 : f32 - %res = vector.transfer_read %mem[%idx, %idx], %cf0 {in_bounds = [true]} : memref<8x8xf32, #layout>, vector<4xf32> - vector.transfer_write %res, %mem[%idx, %idx] {in_bounds = [true]} : vector<4xf32>, memref<8x8xf32, #layout> + %res = vector.transfer_read %mem[%idx, %idx], %cf0 {in_bounds = array} : memref<8x8xf32, #layout>, vector<4xf32> + vector.transfer_write %res, %mem[%idx, %idx] {in_bounds = array} : vector<4xf32>, memref<8x8xf32, #layout> return %res : vector<4xf32> } @@ -176,13 +176,13 @@ func.func @transfer_nondefault_layout(%mem : memref<8x8xf32, #layout>, %idx : in // CHECK-SAME: %[[MEM:.*]]: memref<8x8xf32>, // CHECK-SAME: %[[IDX:.*]]: index) -> vector<4xf32> { // CHECK-NEXT: %[[CF0:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK-NEXT: %[[RES:.*]] = vector.transfer_read %[[MEM]][%[[IDX]], %[[IDX]]], %[[CF0]] {in_bounds = [true], permutation_map = #{{.*}}} : memref<8x8xf32>, vector<4xf32> +// CHECK-NEXT: %[[RES:.*]] = vector.transfer_read %[[MEM]][%[[IDX]], %[[IDX]]], %[[CF0]] {in_bounds = array, permutation_map = #{{.*}}} : memref<8x8xf32>, vector<4xf32> // CHECK-NEXT: return %[[RES]] : vector<4xf32> // CHECK-NEXT: } func.func @transfer_perm_map(%mem : memref<8x8xf32>, %idx : index) -> vector<4xf32> { %cf0 = arith.constant 0.0 : f32 - %res = vector.transfer_read %mem[%idx, %idx], %cf0 {in_bounds = [true], permutation_map = affine_map<(d0, d1) -> (d0)>} : memref<8x8xf32>, vector<4xf32> + %res = vector.transfer_read %mem[%idx, %idx], %cf0 {in_bounds = array, permutation_map = affine_map<(d0, d1) -> (d0)>} : memref<8x8xf32>, vector<4xf32> return %res : vector<4xf32> } @@ -200,7 +200,7 @@ func.func @transfer_perm_map(%mem : memref<8x8xf32>, %idx : index) -> vector<4xf func.func @transfer_broadcasting(%mem : memref<8x8xf32>, %idx : index) -> vector<4xf32> { %cf0 = arith.constant 0.0 : f32 %res = vector.transfer_read %mem[%idx, %idx], %cf0 - {in_bounds = [true], permutation_map = #broadcast_1d} + {in_bounds = array, permutation_map = #broadcast_1d} : memref<8x8xf32>, vector<4xf32> return %res : vector<4xf32> } @@ -214,7 +214,7 @@ func.func @transfer_broadcasting(%mem : memref<8x8xf32>, %idx : index) -> vector // CHECK-NEXT: } func.func @transfer_scalar(%mem : memref, %idx : index) -> vector<1xf32> { %cf0 = arith.constant 0.0 : f32 - %res = vector.transfer_read %mem[%idx, %idx], %cf0 {in_bounds = [true]} : memref, vector<1xf32> + %res = vector.transfer_read %mem[%idx, %idx], %cf0 {in_bounds = array} : memref, vector<1xf32> return %res : vector<1xf32> } @@ -231,7 +231,7 @@ func.func @transfer_scalar(%mem : memref, %idx : index) -> vector<1xf32 func.func @transfer_broadcasting_2D(%mem : memref<8x8xf32>, %idx : index) -> vector<4x4xf32> { %cf0 = arith.constant 0.0 : f32 %res = vector.transfer_read %mem[%idx, %idx], %cf0 - {in_bounds = [true, true], permutation_map = #broadcast_2d} + {in_bounds = array, permutation_map = #broadcast_2d} : memref<8x8xf32>, vector<4x4xf32> return %res : vector<4x4xf32> } @@ -249,7 +249,7 @@ func.func @transfer_broadcasting_2D(%mem : memref<8x8xf32>, %idx : index) -> vec func.func @transfer_broadcasting_complex(%mem : memref<10x20x30x8x8xf32>, %idx : index) -> vector<3x2x4x5xf32> { %cf0 = arith.constant 0.0 : f32 %res = vector.transfer_read %mem[%idx, %idx, %idx, %idx, %idx], %cf0 - {in_bounds = [true, true, true, true], permutation_map = #broadcast_2d_in_4d} + {in_bounds = array, permutation_map = #broadcast_2d_in_4d} : memref<10x20x30x8x8xf32>, vector<3x2x4x5xf32> return %res : vector<3x2x4x5xf32> } @@ -290,29 +290,29 @@ func.func @transfer_read_permutations(%mem_0 : memref, %mem_1 : memref< // CHECK: %[[MASK0:.*]] = vector.splat %{{.*}} : vector<14x7xi1> %mask0 = vector.splat %m : vector<14x7xi1> - %0 = vector.transfer_read %mem_1[%c0, %c0, %c0, %c0], %cst, %mask0 {in_bounds = [true, false, true, true], permutation_map = #map0} : memref, vector<7x14x8x16xf32> -// CHECK: vector.transfer_read {{.*}} %[[MASK0]] {in_bounds = [false, true, true, true], permutation_map = #[[$MAP0]]} : memref, vector<14x7x8x16xf32> + %0 = vector.transfer_read %mem_1[%c0, %c0, %c0, %c0], %cst, %mask0 {in_bounds = array, permutation_map = #map0} : memref, vector<7x14x8x16xf32> +// CHECK: vector.transfer_read {{.*}} %[[MASK0]] {in_bounds = array, permutation_map = #[[$MAP0]]} : memref, vector<14x7x8x16xf32> // CHECK: vector.transpose %{{.*}}, [1, 0, 2, 3] : vector<14x7x8x16xf32> to vector<7x14x8x16xf32> // CHECK: %[[MASK1:.*]] = vector.splat %{{.*}} : vector<16x14xi1> %mask1 = vector.splat %m : vector<16x14xi1> - %1 = vector.transfer_read %mem_1[%c0, %c0, %c0, %c0], %cst, %mask1 {in_bounds = [true, false, true, false], permutation_map = #map1} : memref, vector<7x14x8x16xf32> -// CHECK: vector.transfer_read {{.*}} %[[MASK1]] {in_bounds = [false, false, true, true], permutation_map = #[[$MAP0]]} : memref, vector<16x14x7x8xf32> + %1 = vector.transfer_read %mem_1[%c0, %c0, %c0, %c0], %cst, %mask1 {in_bounds = array, permutation_map = #map1} : memref, vector<7x14x8x16xf32> +// CHECK: vector.transfer_read {{.*}} %[[MASK1]] {in_bounds = array, permutation_map = #[[$MAP0]]} : memref, vector<16x14x7x8xf32> // CHECK: vector.transpose %{{.*}}, [2, 1, 3, 0] : vector<16x14x7x8xf32> to vector<7x14x8x16xf32> // CHECK: %[[MASK3:.*]] = vector.splat %{{.*}} : vector<14x7xi1> %mask2 = vector.splat %m : vector<14x7xi1> - %2 = vector.transfer_read %mem_1[%c0, %c0, %c0, %c0], %cst, %mask2 {in_bounds = [true, false, true, true], permutation_map = #map2} : memref, vector<7x14x8x16xf32> -// CHECK: vector.transfer_read {{.*}} %[[MASK3]] {in_bounds = [false, true, true], permutation_map = #[[$MAP1]]} : memref, vector<14x16x7xf32> + %2 = vector.transfer_read %mem_1[%c0, %c0, %c0, %c0], %cst, %mask2 {in_bounds = array, permutation_map = #map2} : memref, vector<7x14x8x16xf32> +// CHECK: vector.transfer_read {{.*}} %[[MASK3]] {in_bounds = array, permutation_map = #[[$MAP1]]} : memref, vector<14x16x7xf32> // CHECK: vector.broadcast %{{.*}} : vector<14x16x7xf32> to vector<8x14x16x7xf32> // CHECK: vector.transpose %{{.*}}, [3, 1, 0, 2] : vector<8x14x16x7xf32> to vector<7x14x8x16xf32> - %3 = vector.transfer_read %mem_0[%c0, %c0], %cst {in_bounds = [false, false, true, true], permutation_map = #map3} : memref, vector<7x14x8x16xf32> + %3 = vector.transfer_read %mem_0[%c0, %c0], %cst {in_bounds = array, permutation_map = #map3} : memref, vector<7x14x8x16xf32> // CHECK: vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %[[CF0]] : memref, vector<14x7xf32> // CHECK: vector.broadcast %{{.*}} : vector<14x7xf32> to vector<8x16x14x7xf32> // CHECK: vector.transpose %{{.*}}, [3, 2, 0, 1] : vector<8x16x14x7xf32> to vector<7x14x8x16xf32> - %4 = vector.transfer_read %mem_0[%c0, %c0], %cst {in_bounds = [true, false, true, false], permutation_map = #map4} : memref, vector<7x14x8x16xf32> + %4 = vector.transfer_read %mem_0[%c0, %c0], %cst {in_bounds = array, permutation_map = #map4} : memref, vector<7x14x8x16xf32> // CHECK: vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %[[CF0]] : memref, vector<16x14xf32> // CHECK: vector.broadcast %{{.*}} : vector<16x14xf32> to vector<7x8x16x14xf32> // CHECK: vector.transpose %{{.*}}, [0, 3, 1, 2] : vector<7x8x16x14xf32> to vector<7x14x8x16xf32> @@ -321,7 +321,7 @@ func.func @transfer_read_permutations(%mem_0 : memref, %mem_1 : memref< // CHECK: vector.transfer_read %{{.*}}[%[[C0]], %[[C0]], %[[C0]], %[[C0]]], %[[CF0]] : memref, vector<16x14x7x8xf32> // CHECK: vector.transpose %{{.*}}, [2, 1, 3, 0] : vector<16x14x7x8xf32> to vector<7x14x8x16xf32> - %6 = vector.transfer_read %mem_0[%c0, %c0], %cst {in_bounds = [true], permutation_map = #map6} : memref, vector<8xf32> + %6 = vector.transfer_read %mem_0[%c0, %c0], %cst {in_bounds = array, permutation_map = #map6} : memref, vector<8xf32> // CHECK: memref.load %{{.*}}[%[[C0]], %[[C0]]] : memref // CHECK: vector.broadcast %{{.*}} : f32 to vector<8xf32> @@ -342,9 +342,9 @@ func.func @transfer_write_permutations_tensor_masked( // CHECK: %[[MASK:.*]] = vector.splat %[[M]] : vector<16x14x7x8xi1> %mask0 = vector.splat %m : vector<16x14x7x8xi1> - %res = vector.transfer_write %vec, %dst[%c0, %c0, %c0, %c0], %mask0 {in_bounds = [true, false, false, true], permutation_map = affine_map<(d0, d1, d2, d3) -> (d2, d1, d3, d0)>} : vector<7x14x8x16xf32>, tensor + %res = vector.transfer_write %vec, %dst[%c0, %c0, %c0, %c0], %mask0 {in_bounds = array, permutation_map = affine_map<(d0, d1, d2, d3) -> (d2, d1, d3, d0)>} : vector<7x14x8x16xf32>, tensor // CHECK: %[[NEW_VEC0:.*]] = vector.transpose %{{.*}} [3, 1, 0, 2] : vector<7x14x8x16xf32> to vector<16x14x7x8xf32> - // CHECK: %[[NEW_RES0:.*]] = vector.transfer_write %[[NEW_VEC0]], %[[DST]][%c0, %c0, %c0, %c0], %[[MASK]] {in_bounds = [true, false, true, false]} : vector<16x14x7x8xf32>, tensor + // CHECK: %[[NEW_RES0:.*]] = vector.transfer_write %[[NEW_VEC0]], %[[DST]][%c0, %c0, %c0, %c0], %[[MASK]] {in_bounds = array} : vector<16x14x7x8xf32>, tensor return %res : tensor } @@ -372,10 +372,10 @@ func.func @transfer_write_broadcast_unit_dim_tensor( // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index %c0 = arith.constant 0 : index - %res = vector.transfer_write %vec_0, %dst_0[%c0, %c0, %c0, %c0] {in_bounds = [false, false, true], permutation_map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>} : vector<14x8x16xf32>, tensor + %res = vector.transfer_write %vec_0, %dst_0[%c0, %c0, %c0, %c0] {in_bounds = array, permutation_map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>} : vector<14x8x16xf32>, tensor // CHECK: %[[NEW_VEC0:.*]] = vector.broadcast %{{.*}} : vector<14x8x16xf32> to vector<1x14x8x16xf32> // CHECK: %[[NEW_VEC1:.*]] = vector.transpose %[[NEW_VEC0]], [1, 2, 0, 3] : vector<1x14x8x16xf32> to vector<14x8x1x16xf32> - // CHECK: %[[NEW_RES0:.*]] = vector.transfer_write %[[NEW_VEC1]], %[[DST0]][%[[C0]], %[[C0]], %[[C0]], %[[C0]]] {in_bounds = [false, false, true, true]} : vector<14x8x1x16xf32>, tensor + // CHECK: %[[NEW_RES0:.*]] = vector.transfer_write %[[NEW_VEC1]], %[[DST0]][%[[C0]], %[[C0]], %[[C0]], %[[C0]]] {in_bounds = array} : vector<14x8x1x16xf32>, tensor return %res : tensor } @@ -391,7 +391,7 @@ func.func @transfer_write_broadcast_unit_dim_memref( vector.transfer_write %vec_0, %mem_0[%c0, %c0, %c0, %c0] {permutation_map = affine_map<(d0, d1, d2, d3) -> (d1, d2)>} : vector<8x16xf32>, memref // CHECK: %[[NEW_VEC0:.*]] = vector.broadcast %{{.*}} : vector<8x16xf32> to vector<1x8x16xf32> // CHECK: %[[NEW_VEC1:.*]] = vector.transpose %[[NEW_VEC0]], [1, 2, 0] : vector<1x8x16xf32> to vector<8x16x1xf32> - // CHECK: vector.transfer_write %[[NEW_VEC1]], %[[MEM0]][%[[C0]], %[[C0]], %[[C0]], %[[C0]]] {in_bounds = [false, false, true]} : vector<8x16x1xf32>, memref + // CHECK: vector.transfer_write %[[NEW_VEC1]], %[[MEM0]][%[[C0]], %[[C0]], %[[C0]], %[[C0]]] {in_bounds = array} : vector<8x16x1xf32>, memref return } @@ -420,8 +420,8 @@ module attributes {transform.with_named_sequence} { func.func @transfer_2D_masked(%mem : memref, %mask : vector<2x4xi1>) -> vector<2x4xf32> { %c0 = arith.constant 0 : index %pad = arith.constant 0.0 : f32 - %res = vector.transfer_read %mem[%c0, %c0], %pad, %mask {in_bounds = [true, true]} : memref, vector<2x4xf32> - vector.transfer_write %res, %mem[%c0, %c0], %mask {in_bounds = [true, true]} : vector<2x4xf32>, memref + %res = vector.transfer_read %mem[%c0, %c0], %pad, %mask {in_bounds = array} : memref, vector<2x4xf32> + vector.transfer_write %res, %mem[%c0, %c0], %mask {in_bounds = array} : vector<2x4xf32>, memref return %res : vector<2x4xf32> } diff --git a/mlir/test/Dialect/Vector/vector-transfer-unroll.mlir b/mlir/test/Dialect/Vector/vector-transfer-unroll.mlir index 75c5ad26fcf23..a290f3fcc32d8 100644 --- a/mlir/test/Dialect/Vector/vector-transfer-unroll.mlir +++ b/mlir/test/Dialect/Vector/vector-transfer-unroll.mlir @@ -207,7 +207,7 @@ func.func @transfer_read_unroll_permutation(%mem : memref<6x4xf32>) -> vector<4x func.func @transfer_read_unroll_broadcast(%mem : memref<6x4xf32>) -> vector<6x4xf32> { %c0 = arith.constant 0 : index %cf0 = arith.constant 0.0 : f32 - %res = vector.transfer_read %mem[%c0, %c0], %cf0 {in_bounds = [true, false], permutation_map = #map0} : memref<6x4xf32>, vector<6x4xf32> + %res = vector.transfer_read %mem[%c0, %c0], %cf0 {in_bounds = array, permutation_map = #map0} : memref<6x4xf32>, vector<6x4xf32> return %res : vector<6x4xf32> } @@ -234,7 +234,7 @@ func.func @transfer_read_unroll_broadcast(%mem : memref<6x4xf32>) -> vector<6x4x func.func @transfer_read_unroll_broadcast_permuation(%mem : memref<6x4xf32>) -> vector<4x6xf32> { %c0 = arith.constant 0 : index %cf0 = arith.constant 0.0 : f32 - %res = vector.transfer_read %mem[%c0, %c0], %cf0 {in_bounds = [true, false], permutation_map = #map0} : memref<6x4xf32>, vector<4x6xf32> + %res = vector.transfer_read %mem[%c0, %c0], %cf0 {in_bounds = array, permutation_map = #map0} : memref<6x4xf32>, vector<4x6xf32> return %res : vector<4x6xf32> } diff --git a/mlir/test/Dialect/Vector/vector-transferop-opt.mlir b/mlir/test/Dialect/Vector/vector-transferop-opt.mlir index 07e6647533c6f..18840bcc1c1f6 100644 --- a/mlir/test/Dialect/Vector/vector-transferop-opt.mlir +++ b/mlir/test/Dialect/Vector/vector-transferop-opt.mlir @@ -13,16 +13,16 @@ func.func @forward_dead_store(%arg0: i1, %arg1 : memref<4x4xf32>, %c4 = arith.constant 4 : index %c0 = arith.constant 0 : index %cf0 = arith.constant 0.0 : f32 - vector.transfer_write %v0, %arg1[%c1, %c0] {in_bounds = [true, true]} : + vector.transfer_write %v0, %arg1[%c1, %c0] {in_bounds = array} : vector<1x4xf32>, memref<4x4xf32> - %0 = vector.transfer_read %arg1[%c1, %c0], %cf0 {in_bounds = [true, true]} : + %0 = vector.transfer_read %arg1[%c1, %c0], %cf0 {in_bounds = array} : memref<4x4xf32>, vector<1x4xf32> %x = scf.for %i0 = %c0 to %c4 step %c1 iter_args(%acc = %0) -> (vector<1x4xf32>) { %1 = arith.addf %acc, %acc : vector<1x4xf32> scf.yield %1 : vector<1x4xf32> } - vector.transfer_write %x, %arg1[%c1, %c0] {in_bounds = [true, true]} : + vector.transfer_write %x, %arg1[%c1, %c0] {in_bounds = array} : vector<1x4xf32>, memref<4x4xf32> return } @@ -40,18 +40,18 @@ func.func @forward_nested(%arg0: i1, %arg1 : memref<4x4xf32>, %v0 : vector<1x4xf %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %cf0 = arith.constant 0.0 : f32 - vector.transfer_write %v1, %arg1[%i, %c0] {in_bounds = [true, true]} : + vector.transfer_write %v1, %arg1[%i, %c0] {in_bounds = array} : vector<1x4xf32>, memref<4x4xf32> - vector.transfer_write %v0, %arg1[%c1, %c0] {in_bounds = [true, true]} : + vector.transfer_write %v0, %arg1[%c1, %c0] {in_bounds = array} : vector<1x4xf32>, memref<4x4xf32> %x = scf.if %arg0 -> (vector<1x4xf32>) { - %0 = vector.transfer_read %arg1[%c1, %c0], %cf0 {in_bounds = [true, true]} : + %0 = vector.transfer_read %arg1[%c1, %c0], %cf0 {in_bounds = array} : memref<4x4xf32>, vector<1x4xf32> scf.yield %0 : vector<1x4xf32> } else { scf.yield %v1 : vector<1x4xf32> } - vector.transfer_write %x, %arg1[%c0, %c0] {in_bounds = [true, true]} : + vector.transfer_write %x, %arg1[%c0, %c0] {in_bounds = array} : vector<1x4xf32>, memref<4x4xf32> return } @@ -73,18 +73,18 @@ func.func @forward_nested_negative(%arg0: i1, %arg1 : memref<4x4xf32>, %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %cf0 = arith.constant 0.0 : f32 - vector.transfer_write %v0, %arg1[%c1, %c0] {in_bounds = [true, true]} : + vector.transfer_write %v0, %arg1[%c1, %c0] {in_bounds = array} : vector<1x4xf32>, memref<4x4xf32> %x = scf.if %arg0 -> (vector<1x4xf32>) { - %0 = vector.transfer_read %arg1[%c1, %c0], %cf0 {in_bounds = [true, true]} : + %0 = vector.transfer_read %arg1[%c1, %c0], %cf0 {in_bounds = array} : memref<4x4xf32>, vector<1x4xf32> scf.yield %0 : vector<1x4xf32> } else { - vector.transfer_write %v1, %arg1[%i, %c0] {in_bounds = [true, true]} : + vector.transfer_write %v1, %arg1[%i, %c0] {in_bounds = array} : vector<1x4xf32>, memref<4x4xf32> scf.yield %v1 : vector<1x4xf32> } - vector.transfer_write %x, %arg1[%c0, %i] {in_bounds = [true, true]} : + vector.transfer_write %x, %arg1[%c0, %i] {in_bounds = array} : vector<1x4xf32>, memref<4x4xf32> return } @@ -108,24 +108,24 @@ func.func @dead_store_region(%arg0: i1, %arg1 : memref<4x4xf32>, %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %cf0 = arith.constant 0.0 : f32 - vector.transfer_write %v0, %arg1[%c1, %c0] {in_bounds = [true, true]} : + vector.transfer_write %v0, %arg1[%c1, %c0] {in_bounds = array} : vector<1x4xf32>, memref<4x4xf32> %x = scf.if %arg0 -> (vector<1x4xf32>) { scf.yield %v1 : vector<1x4xf32> } else { - %0 = vector.transfer_read %arg1[%i, %c0], %cf0 {in_bounds = [true, true]} : + %0 = vector.transfer_read %arg1[%i, %c0], %cf0 {in_bounds = array} : memref<4x4xf32>, vector<1x4xf32> scf.yield %0 : vector<1x4xf32> } scf.if %arg0 { - vector.transfer_write %v0, %arg1[%c1, %c0] {in_bounds = [true, true]} : + vector.transfer_write %v0, %arg1[%c1, %c0] {in_bounds = array} : vector<1x4xf32>, memref<4x4xf32> } - vector.transfer_write %x, %arg1[%c1, %c0] {in_bounds = [true, true]} : + vector.transfer_write %x, %arg1[%c1, %c0] {in_bounds = array} : vector<1x4xf32>, memref<4x4xf32> - vector.transfer_write %x, %arg1[%c1, %c0] {in_bounds = [true, true]} : + vector.transfer_write %x, %arg1[%c1, %c0] {in_bounds = array} : vector<1x4xf32>, memref<4x4xf32> - %1 = vector.transfer_read %arg1[%i, %c0], %cf0 {in_bounds = [true, true]} : + %1 = vector.transfer_read %arg1[%i, %c0], %cf0 {in_bounds = array} : memref<4x4xf32>, vector<1x4xf32> return %1 : vector<1x4xf32> } @@ -144,15 +144,15 @@ func.func @dead_store_negative(%arg0: i1, %arg1 : memref<4x4xf32>, %c1 = arith.constant 1 : index %cf0 = arith.constant 0.0 : f32 %x = scf.if %arg0 -> (vector<1x4xf32>) { - vector.transfer_write %v0, %arg1[%c1, %c0] {in_bounds = [true, true]} : + vector.transfer_write %v0, %arg1[%c1, %c0] {in_bounds = array} : vector<1x4xf32>, memref<4x4xf32> - %0 = vector.transfer_read %arg1[%i, %c0], %cf0 {in_bounds = [true, true]} : + %0 = vector.transfer_read %arg1[%i, %c0], %cf0 {in_bounds = array} : memref<4x4xf32>, vector<1x4xf32> scf.yield %0 : vector<1x4xf32> } else { scf.yield %v1 : vector<1x4xf32> } - vector.transfer_write %x, %arg1[%c1, %c0] {in_bounds = [true, true]} : + vector.transfer_write %x, %arg1[%c1, %c0] {in_bounds = array} : vector<1x4xf32>, memref<4x4xf32> return } @@ -172,13 +172,13 @@ func.func @dead_store_nested_region(%arg0: i1, %arg1: i1, %arg2 : memref<4x4xf32 %c1 = arith.constant 1 : index %cf0 = arith.constant 0.0 : f32 scf.if %arg0 { - %0 = vector.transfer_read %arg2[%i, %c0], %cf0 {in_bounds = [true, true]} : + %0 = vector.transfer_read %arg2[%i, %c0], %cf0 {in_bounds = array} : memref<4x4xf32>, vector<1x4xf32> scf.if %arg1 { - vector.transfer_write %v1, %arg2[%c1, %c0] {in_bounds = [true, true]} : + vector.transfer_write %v1, %arg2[%c1, %c0] {in_bounds = array} : vector<1x4xf32>, memref<4x4xf32> } - vector.transfer_write %v0, %arg2[%c1, %c0] {in_bounds = [true, true]} : + vector.transfer_write %v0, %arg2[%c1, %c0] {in_bounds = array} : vector<1x4xf32>, memref<4x4xf32> } return @@ -200,19 +200,19 @@ func.func @forward_dead_store_negative(%arg0: i1, %arg1 : memref<4x4xf32>, %c4 = arith.constant 4 : index %c0 = arith.constant 0 : index %cf0 = arith.constant 0.0 : f32 - vector.transfer_write %v0, %arg1[%c1, %c0] {in_bounds = [true, true]} : + vector.transfer_write %v0, %arg1[%c1, %c0] {in_bounds = array} : vector<1x4xf32>, memref<4x4xf32> // blocking write. - vector.transfer_write %v1, %alias[%c0, %c0] {in_bounds = [true, true]} : + vector.transfer_write %v1, %alias[%c0, %c0] {in_bounds = array} : vector<1x1xf32>, memref<2x2xf32, strided<[4, 1]>> - vector.transfer_write %v2, %arg1[%c1, %c0] {in_bounds = [true, true]} : + vector.transfer_write %v2, %arg1[%c1, %c0] {in_bounds = array} : vector<1x4xf32>, memref<4x4xf32> // blocking write. - vector.transfer_write %v1, %alias[%c1, %c0] {in_bounds = [true, true]} : + vector.transfer_write %v1, %alias[%c1, %c0] {in_bounds = array} : vector<1x1xf32>, memref<2x2xf32, strided<[4, 1]>> - %0 = vector.transfer_read %arg1[%c1, %c0], %cf0 {in_bounds = [true, true]} : + %0 = vector.transfer_read %arg1[%c1, %c0], %cf0 {in_bounds = array} : memref<4x4xf32>, vector<1x4xf32> - vector.transfer_write %v2, %arg1[%c1, %c0] {in_bounds = [true, true]} : + vector.transfer_write %v2, %arg1[%c1, %c0] {in_bounds = array} : vector<1x4xf32>, memref<4x4xf32> return %0 : vector<1x4xf32> } @@ -247,12 +247,12 @@ func.func @collapse_shape(%in_0: memref<1x20x1xi32>, %vec: vector<4xi32>) { %collapse_shape = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x4x1xi32> into memref<4xi32> scf.for %arg0 = %c0 to %c20 step %c4 { %subview = memref.subview %in_0[0, %arg0, 0] [1, 4, 1] [1, 1, 1] : memref<1x20x1xi32> to memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>> - %1 = vector.transfer_read %subview[%c0, %c0, %c0], %c0_i32 {in_bounds = [true, true, true]} : memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>, vector<1x4x1xi32> + %1 = vector.transfer_read %subview[%c0, %c0, %c0], %c0_i32 {in_bounds = array} : memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>, vector<1x4x1xi32> // $alloca and $collapse_shape alias - vector.transfer_write %1, %alloca[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x4x1xi32>, memref<1x4x1xi32> - vector.transfer_write %vec, %collapse_shape[%c0] {in_bounds = [true]} : vector<4xi32>, memref<4xi32> - %2 = vector.transfer_read %alloca[%c0, %c0, %c0], %c0_i32 {in_bounds = [true, true, true]} : memref<1x4x1xi32>, vector<1x4x1xi32> - vector.transfer_write %2, %subview[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x4x1xi32>, memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>> + vector.transfer_write %1, %alloca[%c0, %c0, %c0] {in_bounds = array} : vector<1x4x1xi32>, memref<1x4x1xi32> + vector.transfer_write %vec, %collapse_shape[%c0] {in_bounds = array} : vector<4xi32>, memref<4xi32> + %2 = vector.transfer_read %alloca[%c0, %c0, %c0], %c0_i32 {in_bounds = array} : memref<1x4x1xi32>, vector<1x4x1xi32> + vector.transfer_write %2, %subview[%c0, %c0, %c0] {in_bounds = array} : vector<1x4x1xi32>, memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>> } return } @@ -270,14 +270,14 @@ func.func @forward_dead_store_dynamic_same_index( %c4 = arith.constant 4 : index %c0 = arith.constant 0 : index %cf0 = arith.constant 0.0 : f32 - vector.transfer_write %v0, %buffer[%i, %i] {in_bounds = [true]} : vector<4xf32>, memref + vector.transfer_write %v0, %buffer[%i, %i] {in_bounds = array} : vector<4xf32>, memref // The following transfer op reads/writes to the same address so that we can forward. - %0 = vector.transfer_read %buffer[%i, %i], %cf0 {in_bounds = [true]} : memref, vector<4xf32> + %0 = vector.transfer_read %buffer[%i, %i], %cf0 {in_bounds = array} : memref, vector<4xf32> %x = scf.for %i0 = %c0 to %c4 step %c1 iter_args(%acc = %0) -> (vector<4xf32>) { %1 = arith.addf %acc, %acc : vector<4xf32> scf.yield %1 : vector<4xf32> } - vector.transfer_write %x, %buffer[%i, %i] {in_bounds = [true]} : vector<4xf32>, memref + vector.transfer_write %x, %buffer[%i, %i] {in_bounds = array} : vector<4xf32>, memref return } @@ -295,15 +295,15 @@ func.func @dont_forward_dead_store_dynamic_overlap( %c0 = arith.constant 0 : index %cf0 = arith.constant 0.0 : f32 %i1 = affine.apply affine_map<(d0) -> (d0 + 3)>(%i0) - vector.transfer_write %v0, %buffer[%i0, %i0] {in_bounds = [true]} : vector<4xf32>, memref + vector.transfer_write %v0, %buffer[%i0, %i0] {in_bounds = array} : vector<4xf32>, memref // The following transfer op writes to an overlapping range so we cannot forward. - vector.transfer_write %v0, %buffer[%i0, %i1] {in_bounds = [true]} : vector<4xf32>, memref - %0 = vector.transfer_read %buffer[%i0, %i0], %cf0 {in_bounds = [true]} : memref, vector<4xf32> + vector.transfer_write %v0, %buffer[%i0, %i1] {in_bounds = array} : vector<4xf32>, memref + %0 = vector.transfer_read %buffer[%i0, %i0], %cf0 {in_bounds = array} : memref, vector<4xf32> %x = scf.for %iv = %c0 to %c4 step %c1 iter_args(%acc = %0) -> (vector<4xf32>) { %1 = arith.addf %acc, %acc : vector<4xf32> scf.yield %1 : vector<4xf32> } - vector.transfer_write %x, %buffer[%i0, %i0] {in_bounds = [true]} : vector<4xf32>, memref + vector.transfer_write %x, %buffer[%i0, %i0] {in_bounds = array} : vector<4xf32>, memref return } @@ -322,15 +322,15 @@ func.func @forward_dead_store_dynamic_non_overlap_leading_dim( %c0 = arith.constant 0 : index %cf0 = arith.constant 0.0 : f32 %i1 = affine.apply affine_map<(d0) -> (d0 + 1)>(%i0) - vector.transfer_write %v0, %buffer[%i0, %i0] {in_bounds = [true]} : vector<4xf32>, memref + vector.transfer_write %v0, %buffer[%i0, %i0] {in_bounds = array} : vector<4xf32>, memref // The following transfer op writes to an non-overlapping range so we can forward. - vector.transfer_write %v0, %buffer[%i1, %i0] {in_bounds = [true]} : vector<4xf32>, memref - %0 = vector.transfer_read %buffer[%i0, %i0], %cf0 {in_bounds = [true]} : memref, vector<4xf32> + vector.transfer_write %v0, %buffer[%i1, %i0] {in_bounds = array} : vector<4xf32>, memref + %0 = vector.transfer_read %buffer[%i0, %i0], %cf0 {in_bounds = array} : memref, vector<4xf32> %x = scf.for %iv = %c0 to %c4 step %c1 iter_args(%acc = %0) -> (vector<4xf32>) { %1 = arith.addf %acc, %acc : vector<4xf32> scf.yield %1 : vector<4xf32> } - vector.transfer_write %x, %buffer[%i0, %i0] {in_bounds = [true]} : vector<4xf32>, memref + vector.transfer_write %x, %buffer[%i0, %i0] {in_bounds = array} : vector<4xf32>, memref return } @@ -349,15 +349,15 @@ func.func @forward_dead_store_dynamic_non_overlap_trailing_dim( %c0 = arith.constant 0 : index %cf0 = arith.constant 0.0 : f32 %i1 = affine.apply affine_map<(d0) -> (d0 + 4)>(%i0) - vector.transfer_write %v0, %buffer[%i0, %i0] {in_bounds = [true]} : vector<4xf32>, memref + vector.transfer_write %v0, %buffer[%i0, %i0] {in_bounds = array} : vector<4xf32>, memref // The following transfer op writes to an non-overlapping range so we can forward. - vector.transfer_write %v0, %buffer[%i0, %i1] {in_bounds = [true]} : vector<4xf32>, memref - %0 = vector.transfer_read %buffer[%i0, %i0], %cf0 {in_bounds = [true]} : memref, vector<4xf32> + vector.transfer_write %v0, %buffer[%i0, %i1] {in_bounds = array} : vector<4xf32>, memref + %0 = vector.transfer_read %buffer[%i0, %i0], %cf0 {in_bounds = array} : memref, vector<4xf32> %x = scf.for %iv = %c0 to %c4 step %c1 iter_args(%acc = %0) -> (vector<4xf32>) { %1 = arith.addf %acc, %acc : vector<4xf32> scf.yield %1 : vector<4xf32> } - vector.transfer_write %x, %buffer[%i0, %i0] {in_bounds = [true]} : vector<4xf32>, memref + vector.transfer_write %x, %buffer[%i0, %i0] {in_bounds = array} : vector<4xf32>, memref return } @@ -376,13 +376,13 @@ func.func @forward_dead_constant_splat_store_with_masking(%buffer : memref, memref - %0 = vector.transfer_read %buffer[%c0, %c0], %read_padding, %mask {in_bounds = [true, true]} : memref, vector<[8]x[8]xf32> + vector.transfer_write %zero_splat, %buffer[%c0, %c0], %mask {in_bounds = array} : vector<[8]x[8]xf32>, memref + %0 = vector.transfer_read %buffer[%c0, %c0], %read_padding, %mask {in_bounds = array} : memref, vector<[8]x[8]xf32> %x = scf.for %arg2 = %c0 to %c512 step %c1 iter_args(%acc = %0) -> (vector<[8]x[8]xf32>) { %1 = arith.addf %acc, %acc : vector<[8]x[8]xf32> scf.yield %1 : vector<[8]x[8]xf32> } - vector.transfer_write %x, %buffer[%c0, %c0], %mask {in_bounds = [true, true]} : vector<[8]x[8]xf32>, memref + vector.transfer_write %x, %buffer[%c0, %c0], %mask {in_bounds = array} : vector<[8]x[8]xf32>, memref return } @@ -401,13 +401,13 @@ func.func @forward_dead_constant_splat_store_with_masking_unmasked_write(%buffer %c1 = arith.constant 1 : index %c0 = arith.constant 0 : index %c512 = arith.constant 512 : index - vector.transfer_write %zero_splat, %buffer[%c0, %c0] {in_bounds = [true, true]} : vector<[8]x[8]xf32>, memref - %0 = vector.transfer_read %buffer[%c0, %c0], %read_padding, %mask {in_bounds = [true, true]} : memref, vector<[8]x[8]xf32> + vector.transfer_write %zero_splat, %buffer[%c0, %c0] {in_bounds = array} : vector<[8]x[8]xf32>, memref + %0 = vector.transfer_read %buffer[%c0, %c0], %read_padding, %mask {in_bounds = array} : memref, vector<[8]x[8]xf32> %x = scf.for %arg2 = %c0 to %c512 step %c1 iter_args(%acc = %0) -> (vector<[8]x[8]xf32>) { %1 = arith.addf %acc, %acc : vector<[8]x[8]xf32> scf.yield %1 : vector<[8]x[8]xf32> } - vector.transfer_write %x, %buffer[%c0, %c0], %mask {in_bounds = [true, true]} : vector<[8]x[8]xf32>, memref + vector.transfer_write %x, %buffer[%c0, %c0], %mask {in_bounds = array} : vector<[8]x[8]xf32>, memref return } @@ -426,13 +426,13 @@ func.func @forward_dead_constant_splat_store_with_masking_negative_0(%buffer : m %c1 = arith.constant 1 : index %c0 = arith.constant 0 : index %c512 = arith.constant 512 : index - vector.transfer_write %zero_splat, %buffer[%c0, %c0], %mask {in_bounds = [true, true]} : vector<[8]x[8]xf32>, memref - %0 = vector.transfer_read %buffer[%c0, %c0], %read_padding, %mask {in_bounds = [true, true]} : memref, vector<[8]x[8]xf32> + vector.transfer_write %zero_splat, %buffer[%c0, %c0], %mask {in_bounds = array} : vector<[8]x[8]xf32>, memref + %0 = vector.transfer_read %buffer[%c0, %c0], %read_padding, %mask {in_bounds = array} : memref, vector<[8]x[8]xf32> %x = scf.for %arg2 = %c0 to %c512 step %c1 iter_args(%acc = %0) -> (vector<[8]x[8]xf32>) { %1 = arith.addf %acc, %acc : vector<[8]x[8]xf32> scf.yield %1 : vector<[8]x[8]xf32> } - vector.transfer_write %x, %buffer[%c0, %c0], %mask {in_bounds = [true, true]} : vector<[8]x[8]xf32>, memref + vector.transfer_write %x, %buffer[%c0, %c0], %mask {in_bounds = array} : vector<[8]x[8]xf32>, memref return } @@ -451,13 +451,13 @@ func.func @forward_dead_constant_splat_store_with_masking_negative_1(%buffer : m %c1 = arith.constant 1 : index %c0 = arith.constant 0 : index %c512 = arith.constant 512 : index - vector.transfer_write %zero_splat, %buffer[%c0, %c0], %mask_a {in_bounds = [true, true]} : vector<[8]x[8]xf32>, memref - %0 = vector.transfer_read %buffer[%c0, %c0], %read_padding, %mask_b {in_bounds = [true, true]} : memref, vector<[8]x[8]xf32> + vector.transfer_write %zero_splat, %buffer[%c0, %c0], %mask_a {in_bounds = array} : vector<[8]x[8]xf32>, memref + %0 = vector.transfer_read %buffer[%c0, %c0], %read_padding, %mask_b {in_bounds = array} : memref, vector<[8]x[8]xf32> %x = scf.for %arg2 = %c0 to %c512 step %c1 iter_args(%acc = %0) -> (vector<[8]x[8]xf32>) { %1 = arith.addf %acc, %acc : vector<[8]x[8]xf32> scf.yield %1 : vector<[8]x[8]xf32> } - vector.transfer_write %x, %buffer[%c0, %c0], %mask_a {in_bounds = [true, true]} : vector<[8]x[8]xf32>, memref + vector.transfer_write %x, %buffer[%c0, %c0], %mask_a {in_bounds = array} : vector<[8]x[8]xf32>, memref return } @@ -476,13 +476,13 @@ func.func @forward_dead_constant_splat_store_with_masking_negative_3(%buffer : m %c1 = arith.constant 1 : index %c0 = arith.constant 0 : index %c512 = arith.constant 512 : index - vector.transfer_write %zero_splat, %buffer[%c0, %c0], %mask {in_bounds = [true, true]} : vector<[8]x[8]xf32>, memref - %0 = vector.transfer_read %buffer[%c0, %c0], %read_padding {in_bounds = [true, true]} : memref, vector<[8]x[8]xf32> + vector.transfer_write %zero_splat, %buffer[%c0, %c0], %mask {in_bounds = array} : vector<[8]x[8]xf32>, memref + %0 = vector.transfer_read %buffer[%c0, %c0], %read_padding {in_bounds = array} : memref, vector<[8]x[8]xf32> %x = scf.for %arg2 = %c0 to %c512 step %c1 iter_args(%acc = %0) -> (vector<[8]x[8]xf32>) { %1 = arith.addf %acc, %acc : vector<[8]x[8]xf32> scf.yield %1 : vector<[8]x[8]xf32> } - vector.transfer_write %x, %buffer[%c0, %c0], %mask {in_bounds = [true, true]} : vector<[8]x[8]xf32>, memref + vector.transfer_write %x, %buffer[%c0, %c0], %mask {in_bounds = array} : vector<[8]x[8]xf32>, memref return } @@ -503,15 +503,15 @@ func.func @forward_and_eliminate_stores_through_trivial_aliases( %c1 = arith.constant 1 : index %c32 = arith.constant 32 : index %cst = arith.constant 0.0 : f32 - vector.transfer_write %vec, %buffer[%c0, %c0] {in_bounds = [true, true]} : vector<[8]x[8]xf32>, memref + vector.transfer_write %vec, %buffer[%c0, %c0] {in_bounds = array} : vector<[8]x[8]xf32>, memref %direct_subview = memref.subview %buffer[0, 0] [%a_size, %a_size] [1, 1] : memref to memref> %cast = memref.cast %direct_subview : memref> to memref %subview_of_cast = memref.subview %cast[0, 0] [%another_size, %another_size] [1, 1] : memref to memref> - %21 = vector.transfer_read %direct_subview[%c0, %c0], %cst {in_bounds = [true, true]} : memref>, vector<[8]x[8]xf32> + %21 = vector.transfer_read %direct_subview[%c0, %c0], %cst {in_bounds = array} : memref>, vector<[8]x[8]xf32> %23 = scf.for %arg2 = %c0 to %c32 step %c1 iter_args(%arg3 = %21) -> (vector<[8]x[8]xf32>) { %24 = arith.addf %arg3, %arg3 : vector<[8]x[8]xf32> scf.yield %24 : vector<[8]x[8]xf32> } - vector.transfer_write %23, %subview_of_cast[%c0, %c0] {in_bounds = [true, true]} : vector<[8]x[8]xf32>, memref> + vector.transfer_write %23, %subview_of_cast[%c0, %c0] {in_bounds = array} : vector<[8]x[8]xf32>, memref> return } diff --git a/mlir/test/Dialect/Vector/vector-transforms.mlir b/mlir/test/Dialect/Vector/vector-transforms.mlir index eda6a5cc40d99..2682c3c02a3bc 100644 --- a/mlir/test/Dialect/Vector/vector-transforms.mlir +++ b/mlir/test/Dialect/Vector/vector-transforms.mlir @@ -117,10 +117,10 @@ func.func @add4x4(%0: vector<4x4xf32>, %1: vector<4x4xf32>) -> vector<4x4xf32> { // CHECK-NEXT: %[[R2:.*]] = vector.contract {indexing_maps = [#map{{.*}}, #map{{.*}}, #map{{.*}}], iterator_types = ["parallel", "reduction", "parallel"], kind = #vector.kind} %[[VTR1]], %[[VTR2]], %[[VTR6]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32> // CHECK-NEXT: %[[R3:.*]] = vector.contract {indexing_maps = [#map{{.*}}, #map{{.*}}, #map{{.*}}], iterator_types = ["parallel", "reduction", "parallel"], kind = #vector.kind} %[[VTR1]], %[[VTR3]], %[[VTR7]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32> -// CHECK-NEXT: vector.transfer_write %[[R0]], %{{.*}}[%[[C0]], %[[C0]]] {in_bounds = [true, true]} : vector<2x2xf32>, memref<4x4xf32> -// CHECK-NEXT: vector.transfer_write %[[R1]], %{{.*}}[%[[C0]], %[[C2]]] {in_bounds = [true, true]} : vector<2x2xf32>, memref<4x4xf32> -// CHECK-NEXT: vector.transfer_write %[[R2]], %{{.*}}[%[[C2]], %[[C0]]] {in_bounds = [true, true]} : vector<2x2xf32>, memref<4x4xf32> -// CHECK-NEXT: vector.transfer_write %[[R3]], %{{.*}}[%[[C2]], %[[C2]]] {in_bounds = [true, true]} : vector<2x2xf32>, memref<4x4xf32> +// CHECK-NEXT: vector.transfer_write %[[R0]], %{{.*}}[%[[C0]], %[[C0]]] {in_bounds = array} : vector<2x2xf32>, memref<4x4xf32> +// CHECK-NEXT: vector.transfer_write %[[R1]], %{{.*}}[%[[C0]], %[[C2]]] {in_bounds = array} : vector<2x2xf32>, memref<4x4xf32> +// CHECK-NEXT: vector.transfer_write %[[R2]], %{{.*}}[%[[C2]], %[[C0]]] {in_bounds = array} : vector<2x2xf32>, memref<4x4xf32> +// CHECK-NEXT: vector.transfer_write %[[R3]], %{{.*}}[%[[C2]], %[[C2]]] {in_bounds = array} : vector<2x2xf32>, memref<4x4xf32> // CHECK-NEXT: return #contraction_accesses1 = [ @@ -256,10 +256,10 @@ func.func @elementwise_unroll(%arg0 : memref<4x4xf32>, %arg1 : memref<4x4xf32>) // CHECK-NEXT: %[[R2:.*]] = vector.contract {indexing_maps = [#map{{.*}}, #map{{.*}}, #map{{.*}}], iterator_types = ["parallel", "reduction", "parallel"], kind = #vector.kind} %[[VTR1]], %[[VTR2]], %[[VTR6]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32> // CHECK-NEXT: %[[R3:.*]] = vector.contract {indexing_maps = [#map{{.*}}, #map{{.*}}, #map{{.*}}], iterator_types = ["parallel", "reduction", "parallel"], kind = #vector.kind} %[[VTR1]], %[[VTR3]], %[[VTR7]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32> -// CHECK-NEXT: %[[VTW0:.*]] = vector.transfer_write %[[R0]], %{{.*}}[%[[C0]], %[[C0]]] {in_bounds = [true, true]} : vector<2x2xf32>, tensor<4x4xf32> -// CHECK-NEXT: %[[VTW1:.*]] = vector.transfer_write %[[R1]], %[[VTW0]][%[[C0]], %[[C2]]] {in_bounds = [true, true]} : vector<2x2xf32>, tensor<4x4xf32> -// CHECK-NEXT: %[[VTW2:.*]] = vector.transfer_write %[[R2]], %[[VTW1]][%[[C2]], %[[C0]]] {in_bounds = [true, true]} : vector<2x2xf32>, tensor<4x4xf32> -// CHECK-NEXT: %[[VTW3:.*]] = vector.transfer_write %[[R3]], %[[VTW2]][%[[C2]], %[[C2]]] {in_bounds = [true, true]} : vector<2x2xf32>, tensor<4x4xf32> +// CHECK-NEXT: %[[VTW0:.*]] = vector.transfer_write %[[R0]], %{{.*}}[%[[C0]], %[[C0]]] {in_bounds = array} : vector<2x2xf32>, tensor<4x4xf32> +// CHECK-NEXT: %[[VTW1:.*]] = vector.transfer_write %[[R1]], %[[VTW0]][%[[C0]], %[[C2]]] {in_bounds = array} : vector<2x2xf32>, tensor<4x4xf32> +// CHECK-NEXT: %[[VTW2:.*]] = vector.transfer_write %[[R2]], %[[VTW1]][%[[C2]], %[[C0]]] {in_bounds = array} : vector<2x2xf32>, tensor<4x4xf32> +// CHECK-NEXT: %[[VTW3:.*]] = vector.transfer_write %[[R3]], %[[VTW2]][%[[C2]], %[[C2]]] {in_bounds = array} : vector<2x2xf32>, tensor<4x4xf32> // CHECK-NEXT: return %[[VTW3]] : tensor<4x4xf32> func.func @contraction4x4_ikj_xfer_read_tensor(%arg0 : tensor<4x2xf32>, diff --git a/mlir/test/Dialect/Vector/vector-warp-distribute.mlir b/mlir/test/Dialect/Vector/vector-warp-distribute.mlir index 0544cef3e3828..9bf26b2ed8b44 100644 --- a/mlir/test/Dialect/Vector/vector-warp-distribute.mlir +++ b/mlir/test/Dialect/Vector/vector-warp-distribute.mlir @@ -47,8 +47,8 @@ func.func @rewrite_warp_op_to_scf_if(%laneid: index, %r:2 = vector.warp_execute_on_lane_0(%laneid)[32] args(%v0, %v1 : vector<4xf32>, vector<8xf32>) -> (vector<1xf32>, vector<2xf32>) { ^bb0(%arg0: vector<128xf32>, %arg1: vector<256xf32>): -// CHECK-SCF-IF: %[[arg1:.*]] = vector.transfer_read %[[buffer_v1]][%[[c0]]], %{{.*}} {in_bounds = [true]} : memref<256xf32, 3>, vector<256xf32> -// CHECK-SCF-IF: %[[arg0:.*]] = vector.transfer_read %[[buffer_v0]][%[[c0]]], %{{.*}} {in_bounds = [true]} : memref<128xf32, 3>, vector<128xf32> +// CHECK-SCF-IF: %[[arg1:.*]] = vector.transfer_read %[[buffer_v1]][%[[c0]]], %{{.*}} {in_bounds = array} : memref<256xf32, 3>, vector<256xf32> +// CHECK-SCF-IF: %[[arg0:.*]] = vector.transfer_read %[[buffer_v0]][%[[c0]]], %{{.*}} {in_bounds = array} : memref<128xf32, 3>, vector<128xf32> // CHECK-SCF-IF: %[[def_0:.*]] = "some_def"(%[[arg0]]) : (vector<128xf32>) -> vector<32xf32> // CHECK-SCF-IF: %[[def_1:.*]] = "some_def"(%[[arg1]]) : (vector<256xf32>) -> vector<64xf32> %2 = "some_def"(%arg0) : (vector<128xf32>) -> vector<32xf32> @@ -60,8 +60,8 @@ func.func @rewrite_warp_op_to_scf_if(%laneid: index, // CHECK-SCF-IF: } // CHECK-SCF-IF: gpu.barrier // CHECK-SCF-IF: %[[o1:.*]] = affine.apply #[[$TIMES2]]()[%[[laneid]]] -// CHECK-SCF-IF: %[[r1:.*]] = vector.transfer_read %[[buffer_def_1]][%[[o1]]], %{{.*}} {in_bounds = [true]} : memref<64xf32, 3>, vector<2xf32> -// CHECK-SCF-IF: %[[r0:.*]] = vector.transfer_read %[[buffer_def_0]][%[[laneid]]], %{{.*}} {in_bounds = [true]} : memref<32xf32, 3>, vector<1xf32> +// CHECK-SCF-IF: %[[r1:.*]] = vector.transfer_read %[[buffer_def_1]][%[[o1]]], %{{.*}} {in_bounds = array} : memref<64xf32, 3>, vector<2xf32> +// CHECK-SCF-IF: %[[r0:.*]] = vector.transfer_read %[[buffer_def_0]][%[[laneid]]], %{{.*}} {in_bounds = array} : memref<32xf32, 3>, vector<1xf32> // CHECK-SCF-IF: "some_use"(%[[r0]]) : (vector<1xf32>) -> () // CHECK-SCF-IF: "some_use"(%[[r1]]) : (vector<2xf32>) -> () "some_use"(%r#0) : (vector<1xf32>) -> () @@ -83,9 +83,9 @@ func.func @rewrite_warp_op_to_scf_if(%laneid: index, // CHECK-D: arith.addf {{.*}} : vector<32xf32> // CHECK-D: arith.addf {{.*}} : vector<64xf32> // CHECK-D: vector.yield %{{.*}}, %{{.*}} : vector<64xf32>, vector<32xf32> -// CHECK-D-DAG: vector.transfer_write %[[R]]#1, %{{.*}}[%{{.*}}] {in_bounds = [true]} : vector<1xf32>, memref<128xf32 +// CHECK-D-DAG: vector.transfer_write %[[R]]#1, %{{.*}}[%{{.*}}] {in_bounds = array} : vector<1xf32>, memref<128xf32 // CHECK-D-DAG: %[[ID1:.*]] = affine.apply #[[MAP1]]()[%{{.*}}] -// CHECK-D-DAG: vector.transfer_write %[[R]]#0, %{{.*}}[%[[ID1]]] {in_bounds = [true]} : vector<2xf32>, memref<128xf32 +// CHECK-D-DAG: vector.transfer_write %[[R]]#0, %{{.*}}[%[[ID1]]] {in_bounds = array} : vector<2xf32>, memref<128xf32 // CHECK-DIST-AND-PROP-NOT: vector.warp_execute_on_lane_0 // CHECK-DIST-AND-PROP: vector.transfer_read {{.*}} vector<1xf32> @@ -562,14 +562,14 @@ func.func @warp_scf_for_multiple_yield(%arg0: index, %arg1: memref, %arg2 %0:3 = vector.warp_execute_on_lane_0(%arg0)[32] -> (vector<1xf32>, vector<4xf32>, vector<4xf32>) { %def = "some_def"() : () -> (vector<32xf32>) - %r1 = vector.transfer_read %arg2[%c0], %cst {in_bounds = [true]} : memref, vector<128xf32> - %r2 = vector.transfer_read %arg2[%c128], %cst {in_bounds = [true]} : memref, vector<128xf32> + %r1 = vector.transfer_read %arg2[%c0], %cst {in_bounds = array} : memref, vector<128xf32> + %r2 = vector.transfer_read %arg2[%c128], %cst {in_bounds = array} : memref, vector<128xf32> %3:2 = scf.for %arg3 = %c0 to %c128 step %c1 iter_args(%arg4 = %r1, %arg5 = %r2) -> (vector<128xf32>, vector<128xf32>) { %o1 = affine.apply #map1()[%arg3] %o2 = affine.apply #map2()[%arg3] - %4 = vector.transfer_read %arg1[%o1], %cst {in_bounds = [true]} : memref, vector<128xf32> - %5 = vector.transfer_read %arg1[%o2], %cst {in_bounds = [true]} : memref, vector<128xf32> + %4 = vector.transfer_read %arg1[%o1], %cst {in_bounds = array} : memref, vector<128xf32> + %5 = vector.transfer_read %arg1[%o2], %cst {in_bounds = array} : memref, vector<128xf32> %6 = arith.addf %4, %arg4 : vector<128xf32> %7 = arith.addf %5, %arg5 : vector<128xf32> scf.yield %6, %7 : vector<128xf32>, vector<128xf32> @@ -577,9 +577,9 @@ func.func @warp_scf_for_multiple_yield(%arg0: index, %arg1: memref, %arg2 vector.yield %def, %3#0, %3#1 : vector<32xf32>, vector<128xf32>, vector<128xf32> } %1 = affine.apply #map()[%arg0] - vector.transfer_write %0#1, %arg2[%1] {in_bounds = [true]} : vector<4xf32>, memref + vector.transfer_write %0#1, %arg2[%1] {in_bounds = array} : vector<4xf32>, memref %2 = affine.apply #map2()[%arg0] - vector.transfer_write %0#2, %arg2[%2] {in_bounds = [true]} : vector<4xf32>, memref + vector.transfer_write %0#2, %arg2[%2] {in_bounds = array} : vector<4xf32>, memref "some_use"(%0#0) : (vector<1xf32>) -> () return } @@ -644,7 +644,7 @@ func.func @warp_distribute(%arg0: index, %src: memref<128xf32>, %dest: memref<12 %2 = vector.broadcast %1 : f32 to vector<1xf32> %3 = arith.divf %2, %cst_1 : vector<1xf32> scf.for %arg1 = %c0 to %c128 step %c1 { - %4 = vector.transfer_read %src[%arg1], %f0 {in_bounds = [true]} : memref<128xf32>, vector<1xf32> + %4 = vector.transfer_read %src[%arg1], %f0 {in_bounds = array} : memref<128xf32>, vector<1xf32> %5 = arith.subf %4, %3 : vector<1xf32> vector.transfer_write %5, %dest[%arg1] : vector<1xf32>, memref<128xf32> } @@ -661,7 +661,7 @@ func.func @vector_reduction(%laneid: index, %m0: memref<4x2x32xf32>, %m1: memref // CHECK-D: vector.warp_execute_on_lane_0(%{{.*}})[32] { // CHECK-D: vector.transfer_write %[[R]], %{{.*}}[] : vector, memref vector.warp_execute_on_lane_0(%laneid)[32] { - %0 = vector.transfer_read %m0[%c0, %c0, %c0], %f0 {in_bounds = [true]} : memref<4x2x32xf32>, vector<32xf32> + %0 = vector.transfer_read %m0[%c0, %c0, %c0], %f0 {in_bounds = array} : memref<4x2x32xf32>, vector<32xf32> %1 = vector.transfer_read %m1[], %f0 : memref, vector %2 = vector.extractelement %1[] : vector %3 = vector.reduction , %0 : vector<32xf32> into f32 @@ -999,7 +999,7 @@ func.func @warp_propagate_read_broadcast(%laneid: index, %src: memref<32x1xf32>) %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %r = vector.warp_execute_on_lane_0(%laneid)[512] -> (vector<1x4xf32>) { - %2 = vector.transfer_read %src[%c0, %c0], %cst {in_bounds = [true, true], permutation_map = affine_map<(d0, d1) -> (d0, 0)>} : memref<32x1xf32>, vector<32x64xf32> + %2 = vector.transfer_read %src[%c0, %c0], %cst {in_bounds = array, permutation_map = affine_map<(d0, d1) -> (d0, 0)>} : memref<32x1xf32>, vector<32x64xf32> vector.yield %2 : vector<32x64xf32> } return %r : vector<1x4xf32> @@ -1011,7 +1011,7 @@ func.func @warp_propagate_read_broadcast(%laneid: index, %src: memref<32x1xf32>) // CHECK-PROP-SAME: (%[[LANE:.+]]: index, %[[SRC:.+]]: memref<32x1xf32>) // CHECK-PROP: %[[C0:.+]] = arith.constant 0 : index // CHECK-PROP: %[[ID:.+]] = affine.apply #[[$MAP]]()[%[[LANE]]] -// CHECK-PROP: %[[READ:.+]] = vector.transfer_read %[[SRC]][%[[ID]], %[[C0]]], %{{.+}} {in_bounds = [true, true], permutation_map = #[[$READMAP]]} : memref<32x1xf32>, vector<1x4xf32> +// CHECK-PROP: %[[READ:.+]] = vector.transfer_read %[[SRC]][%[[ID]], %[[C0]]], %{{.+}} {in_bounds = array, permutation_map = #[[$READMAP]]} : memref<32x1xf32>, vector<1x4xf32> // CHECK-PROP: return %[[READ]] : vector<1x4xf32> // ----- @@ -1067,8 +1067,8 @@ func.func @warp_execute_has_broadcast_semantics(%laneid: index, %s0: f32, %v0: v args(%s0, %v0, %v1, %v2 : f32, vector, vector<1xf32>, vector<1x1xf32>) -> (f32, vector, vector<1xf32>, vector<1x1xf32>) { ^bb0(%bs0: f32, %bv0: vector, %bv1: vector<1xf32>, %bv2: vector<1x1xf32>): - // CHECK-SCF-IF: vector.transfer_read {{.*}}[%[[C0]], %[[C0]]]{{.*}} {in_bounds = [true, true]} : memref<1x1xf32, 3>, vector<1x1xf32> - // CHECK-SCF-IF: vector.transfer_read {{.*}}[%[[C0]]]{{.*}} {in_bounds = [true]} : memref<1xf32, 3>, vector<1xf32> + // CHECK-SCF-IF: vector.transfer_read {{.*}}[%[[C0]], %[[C0]]]{{.*}} {in_bounds = array} : memref<1x1xf32, 3>, vector<1x1xf32> + // CHECK-SCF-IF: vector.transfer_read {{.*}}[%[[C0]]]{{.*}} {in_bounds = array} : memref<1xf32, 3>, vector<1xf32> // CHECK-SCF-IF: vector.transfer_read {{.*}}[]{{.*}} : memref, vector // CHECK-SCF-IF: memref.load {{.*}}[%[[C0]]] : memref<1xf32, 3> // CHECK-SCF-IF: "some_def_0"(%{{.*}}) : (f32) -> f32 @@ -1077,8 +1077,8 @@ func.func @warp_execute_has_broadcast_semantics(%laneid: index, %s0: f32, %v0: v // CHECK-SCF-IF: "some_def_1"(%{{.*}}) : (vector<1x1xf32>) -> vector<1x1xf32> // CHECK-SCF-IF: memref.store {{.*}}[%[[C0]]] : memref<1xf32, 3> // CHECK-SCF-IF: vector.transfer_write {{.*}}[] : vector, memref - // CHECK-SCF-IF: vector.transfer_write {{.*}}[%[[C0]]] {in_bounds = [true]} : vector<1xf32>, memref<1xf32, 3> - // CHECK-SCF-IF: vector.transfer_write {{.*}}[%[[C0]], %[[C0]]] {in_bounds = [true, true]} : vector<1x1xf32>, memref<1x1xf32, 3> + // CHECK-SCF-IF: vector.transfer_write {{.*}}[%[[C0]]] {in_bounds = array} : vector<1xf32>, memref<1xf32, 3> + // CHECK-SCF-IF: vector.transfer_write {{.*}}[%[[C0]], %[[C0]]] {in_bounds = array} : vector<1x1xf32>, memref<1x1xf32, 3> %rs0 = "some_def_0"(%bs0) : (f32) -> f32 %rv0 = "some_def_1"(%bv0) : (vector) -> vector @@ -1090,8 +1090,8 @@ func.func @warp_execute_has_broadcast_semantics(%laneid: index, %s0: f32, %v0: v } // CHECK-SCF-IF: gpu.barrier - // CHECK-SCF-IF: %[[RV2:.*]] = vector.transfer_read {{.*}}[%[[C0]], %[[C0]]]{{.*}} {in_bounds = [true, true]} : memref<1x1xf32, 3>, vector<1x1xf32> - // CHECK-SCF-IF: %[[RV1:.*]] = vector.transfer_read {{.*}}[%[[C0]]]{{.*}} {in_bounds = [true]} : memref<1xf32, 3>, vector<1xf32> + // CHECK-SCF-IF: %[[RV2:.*]] = vector.transfer_read {{.*}}[%[[C0]], %[[C0]]]{{.*}} {in_bounds = array} : memref<1x1xf32, 3>, vector<1x1xf32> + // CHECK-SCF-IF: %[[RV1:.*]] = vector.transfer_read {{.*}}[%[[C0]]]{{.*}} {in_bounds = array} : memref<1xf32, 3>, vector<1xf32> // CHECK-SCF-IF: %[[RV0:.*]] = vector.transfer_read {{.*}}[]{{.*}} : memref, vector // CHECK-SCF-IF: %[[RS0:.*]] = memref.load {{.*}}[%[[C0]]] : memref<1xf32, 3> // CHECK-SCF-IF: return %[[RS0]], %[[RV0]], %[[RV1]], %[[RV2]] : f32, vector, vector<1xf32>, vector<1x1xf32> @@ -1108,9 +1108,9 @@ func.func @warp_execute_nd_distribute(%laneid: index, %v0: vector<1x64x1xf32>, % -> (vector<1x64x1xf32>, vector<1x2x128xf32>) { // CHECK-SCF-IF-DAG: %[[C0:.*]] = arith.constant 0 : index - // CHECK-SCF-IF: vector.transfer_write %{{.*}}, %{{.*}}[%[[LANEID]], %c0, %c0] {in_bounds = [true, true, true]} : vector<1x64x1xf32>, memref<32x64x1xf32, 3> + // CHECK-SCF-IF: vector.transfer_write %{{.*}}, %{{.*}}[%[[LANEID]], %c0, %c0] {in_bounds = array} : vector<1x64x1xf32>, memref<32x64x1xf32, 3> // CHECK-SCF-IF: %[[RID:.*]] = affine.apply #[[$TIMES2]]()[%[[LANEID]]] - // CHECK-SCF-IF: vector.transfer_write %{{.*}}, %{{.*}}[%[[C0]], %[[RID]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x2x128xf32>, memref<1x64x128xf32, 3> + // CHECK-SCF-IF: vector.transfer_write %{{.*}}, %{{.*}}[%[[C0]], %[[RID]], %[[C0]]] {in_bounds = array} : vector<1x2x128xf32>, memref<1x64x128xf32, 3> // CHECK-SCF-IF: gpu.barrier // CHECK-SCF-IF: scf.if{{.*}}{ @@ -1118,12 +1118,12 @@ func.func @warp_execute_nd_distribute(%laneid: index, %v0: vector<1x64x1xf32>, % args(%v0, %v1 : vector<1x64x1xf32>, vector<1x2x128xf32>) -> (vector<1x64x1xf32>, vector<1x2x128xf32>) { ^bb0(%arg0: vector<32x64x1xf32>, %arg1: vector<1x64x128xf32>): - // CHECK-SCF-IF-DAG: %[[SR0:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]], %[[C0]]], %{{.*}} {in_bounds = [true, true, true]} : memref<32x64x1xf32, 3>, vector<32x64x1xf32> - // CHECK-SCF-IF-DAG: %[[SR1:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]], %[[C0]]], %{{.*}} {in_bounds = [true, true, true]} : memref<1x64x128xf32, 3>, vector<1x64x128xf32> + // CHECK-SCF-IF-DAG: %[[SR0:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]], %[[C0]]], %{{.*}} {in_bounds = array} : memref<32x64x1xf32, 3>, vector<32x64x1xf32> + // CHECK-SCF-IF-DAG: %[[SR1:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]], %[[C0]]], %{{.*}} {in_bounds = array} : memref<1x64x128xf32, 3>, vector<1x64x128xf32> // CHECK-SCF-IF: %[[W0:.*]] = "some_def_0"(%[[SR0]]) : (vector<32x64x1xf32>) -> vector<32x64x1xf32> // CHECK-SCF-IF: %[[W1:.*]] = "some_def_1"(%[[SR1]]) : (vector<1x64x128xf32>) -> vector<1x64x128xf32> - // CHECK-SCF-IF-DAG: vector.transfer_write %[[W0]], %{{.*}}[%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<32x64x1xf32>, memref<32x64x1xf32, 3> - // CHECK-SCF-IF-DAG: vector.transfer_write %[[W1]], %{{.*}}[%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x64x128xf32>, memref<1x64x128xf32, 3> + // CHECK-SCF-IF-DAG: vector.transfer_write %[[W0]], %{{.*}}[%[[C0]], %[[C0]], %[[C0]]] {in_bounds = array} : vector<32x64x1xf32>, memref<32x64x1xf32, 3> + // CHECK-SCF-IF-DAG: vector.transfer_write %[[W1]], %{{.*}}[%[[C0]], %[[C0]], %[[C0]]] {in_bounds = array} : vector<1x64x128xf32>, memref<1x64x128xf32, 3> %r0 = "some_def_0"(%arg0) : (vector<32x64x1xf32>) -> vector<32x64x1xf32> %r1 = "some_def_1"(%arg1) : (vector<1x64x128xf32>) -> vector<1x64x128xf32> @@ -1134,8 +1134,8 @@ func.func @warp_execute_nd_distribute(%laneid: index, %v0: vector<1x64x1xf32>, % // CHECK-SCF-IF: gpu.barrier // CHECK-SCF-IF: %[[WID:.*]] = affine.apply #[[$TIMES2]]()[%[[LANEID]]] - // CHECK-SCF-IF-DAG: %[[R0:.*]] = vector.transfer_read %{{.*}}[%[[LANEID]], %[[C0]], %[[C0]]], %cst {in_bounds = [true, true, true]} : memref<32x64x1xf32, 3>, vector<1x64x1xf32> - // CHECK-SCF-IF-DAG: %[[R1:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[WID]], %[[C0]]], %cst {in_bounds = [true, true, true]} : memref<1x64x128xf32, 3>, vector<1x2x128xf32> + // CHECK-SCF-IF-DAG: %[[R0:.*]] = vector.transfer_read %{{.*}}[%[[LANEID]], %[[C0]], %[[C0]]], %cst {in_bounds = array} : memref<32x64x1xf32, 3>, vector<1x64x1xf32> + // CHECK-SCF-IF-DAG: %[[R1:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[WID]], %[[C0]]], %cst {in_bounds = array} : memref<1x64x128xf32, 3>, vector<1x2x128xf32> // CHECK-SCF-IF: return %[[R0]], %[[R1]] : vector<1x64x1xf32>, vector<1x2x128xf32> return %r#0, %r#1 : vector<1x64x1xf32>, vector<1x2x128xf32> } @@ -1337,7 +1337,7 @@ func.func @transfer_read_prop_operands(%in2: vector<1x2xindex>, %ar1 : memref<1 %29 = vector.extract %28[0] : vector<64xi32> from vector<1x64xi32> %30 = arith.index_cast %29 : vector<64xi32> to vector<64xindex> %36 = vector.extractelement %30[%c0_i32 : i32] : vector<64xindex> - %37 = vector.transfer_read %ar2[%c0, %36, %c0], %cst_6 {in_bounds = [true]} : memref<1x4x1024xf32>, vector<64xf32> + %37 = vector.transfer_read %ar2[%c0, %36, %c0], %cst_6 {in_bounds = array} : memref<1x4x1024xf32>, vector<64xf32> vector.yield %37 : vector<64xf32> } return %18 : vector<2xf32> @@ -1387,7 +1387,7 @@ func.func @warp_propagate_shape_cast(%laneid: index, %src: memref<32x4x32xf32>) func.func @warp_propagate_uniform_transfer_read(%laneid: index, %src: memref<4096xf32>, %index: index) -> vector<1xf32> { %f0 = arith.constant 0.000000e+00 : f32 %r = vector.warp_execute_on_lane_0(%laneid)[64] -> (vector<1xf32>) { - %1 = vector.transfer_read %src[%index], %f0 {in_bounds = [true]} : memref<4096xf32>, vector<1xf32> + %1 = vector.transfer_read %src[%index], %f0 {in_bounds = array} : memref<4096xf32>, vector<1xf32> vector.yield %1 : vector<1xf32> } return %r : vector<1xf32> @@ -1395,7 +1395,7 @@ func.func @warp_propagate_uniform_transfer_read(%laneid: index, %src: memref<409 // CHECK-PROP-LABEL: func.func @warp_propagate_uniform_transfer_read // CHECK-PROP-SAME: (%{{.+}}: index, %[[SRC:.+]]: memref<4096xf32>, %[[INDEX:.+]]: index) -// CHECK-PROP: %[[READ:.+]] = vector.transfer_read %[[SRC]][%[[INDEX]]], %cst {in_bounds = [true]} : memref<4096xf32>, vector<1xf32> +// CHECK-PROP: %[[READ:.+]] = vector.transfer_read %[[SRC]][%[[INDEX]]], %cst {in_bounds = array} : memref<4096xf32>, vector<1xf32> // CHECK-PROP: return %[[READ]] : vector<1xf32> // ----- @@ -1403,9 +1403,9 @@ func.func @warp_propagate_uniform_transfer_read(%laneid: index, %src: memref<409 func.func @warp_propagate_multi_transfer_read(%laneid: index, %src: memref<4096xf32>, %index: index, %index1: index) -> (vector<1xf32>, vector<1xf32>) { %f0 = arith.constant 0.000000e+00 : f32 %r:2 = vector.warp_execute_on_lane_0(%laneid)[64] -> (vector<1xf32>, vector<1xf32>) { - %0 = vector.transfer_read %src[%index], %f0 {in_bounds = [true]} : memref<4096xf32>, vector<1xf32> + %0 = vector.transfer_read %src[%index], %f0 {in_bounds = array} : memref<4096xf32>, vector<1xf32> "some_use"(%0) : (vector<1xf32>) -> () - %1 = vector.transfer_read %src[%index1], %f0 {in_bounds = [true]} : memref<4096xf32>, vector<1xf32> + %1 = vector.transfer_read %src[%index1], %f0 {in_bounds = array} : memref<4096xf32>, vector<1xf32> vector.yield %0, %1 : vector<1xf32>, vector<1xf32> } return %r#0, %r#1 : vector<1xf32>, vector<1xf32> @@ -1423,8 +1423,8 @@ func.func @warp_propagate_multi_transfer_read(%laneid: index, %src: memref<4096x func.func @warp_propagate_dead_user_multi_read(%laneid: index, %src: memref<4096xf32>, %index: index, %index1: index) -> (vector<1xf32>) { %f0 = arith.constant 0.000000e+00 : f32 %r = vector.warp_execute_on_lane_0(%laneid)[64] -> (vector<1xf32>) { - %0 = vector.transfer_read %src[%index], %f0 {in_bounds = [true]} : memref<4096xf32>, vector<64xf32> - %1 = vector.transfer_read %src[%index1], %f0 {in_bounds = [true]} : memref<4096xf32>, vector<64xf32> + %0 = vector.transfer_read %src[%index], %f0 {in_bounds = array} : memref<4096xf32>, vector<64xf32> + %1 = vector.transfer_read %src[%index1], %f0 {in_bounds = array} : memref<4096xf32>, vector<64xf32> %max = arith.maximumf %0, %1 : vector<64xf32> vector.yield %max : vector<64xf32> } @@ -1460,8 +1460,8 @@ func.func @warp_propagate_masked_write(%laneid: index, %dest: memref<4096xf32>) // CHECK-DIST-AND-PROP: vector.yield %[[V1]], %[[M1]], %[[V0]], %[[M0]] // CHECK-DIST-AND-PROP-SAME: vector<32xf32>, vector<32xi1>, vector<4096xf32>, vector<4096xi1> // CHECK-DIST-AND-PROP: } -// CHECK-DIST-AND-PROP: vector.transfer_write %[[W]]#2, {{.*}}, %[[W]]#3 {in_bounds = [true]} : vector<128xf32>, memref<4096xf32> -// CHECK-DIST-AND-PROP: vector.transfer_write %[[W]]#0, {{.*}}, %[[W]]#1 {in_bounds = [true]} : vector<1xf32>, memref<4096xf32> +// CHECK-DIST-AND-PROP: vector.transfer_write %[[W]]#2, {{.*}}, %[[W]]#3 {in_bounds = array} : vector<128xf32>, memref<4096xf32> +// CHECK-DIST-AND-PROP: vector.transfer_write %[[W]]#0, {{.*}}, %[[W]]#1 {in_bounds = array} : vector<1xf32>, memref<4096xf32> // ----- @@ -1470,9 +1470,9 @@ func.func @warp_propagate_masked_transfer_read(%laneid: index, %src: memref<4096 %c0 = arith.constant 0 : index %r:2 = vector.warp_execute_on_lane_0(%laneid)[64] -> (vector<2xf32>, vector<2x2xf32>) { %mask = "mask_def_0"() : () -> (vector<128xi1>) - %0 = vector.transfer_read %src[%c0, %index], %f0, %mask {in_bounds = [true]} : memref<4096x4096xf32>, vector<128xf32> + %0 = vector.transfer_read %src[%c0, %index], %f0, %mask {in_bounds = array} : memref<4096x4096xf32>, vector<128xf32> %mask2 = "mask_def_1"() : () -> (vector<128x2xi1>) - %1 = vector.transfer_read %src[%c0, %index], %f0, %mask2 {in_bounds = [true, true]} : memref<4096x4096xf32>, vector<128x2xf32> + %1 = vector.transfer_read %src[%c0, %index], %f0, %mask2 {in_bounds = array} : memref<4096x4096xf32>, vector<128x2xf32> vector.yield %0, %1 : vector<128xf32>, vector<128x2xf32> } return %r#0, %r#1 : vector<2xf32>, vector<2x2xf32> @@ -1500,7 +1500,7 @@ func.func @warp_propagate_nontrivial_map_masked_transfer_read(%laneid: index, %s %c0 = arith.constant 0 : index %r = vector.warp_execute_on_lane_0(%laneid)[64] -> (vector<2xf32>) { %mask = "mask_def_0"() : () -> (vector<128xi1>) - %0 = vector.transfer_read %src[%index, %c0], %f0, %mask {in_bounds = [true], permutation_map = affine_map<(d0, d1) -> (d0)>} : memref<4096x4096xf32>, vector<128xf32> + %0 = vector.transfer_read %src[%index, %c0], %f0, %mask {in_bounds = array, permutation_map = affine_map<(d0, d1) -> (d0)>} : memref<4096x4096xf32>, vector<128xf32> vector.yield %0 : vector<128xf32> } return %r : vector<2xf32> @@ -1526,8 +1526,8 @@ func.func @warp_propagate_masked_transfer_read_shared_mask(%laneid: index, %src: %c0 = arith.constant 0 : index %r:2 = vector.warp_execute_on_lane_0(%laneid)[64] -> (vector<2xf32>, vector<2xf32>) { %mask = vector.create_mask %mask_ub: vector<128xi1> - %0 = vector.transfer_read %src[%c0, %index], %f0, %mask {in_bounds = [true]} : memref<4096x4096xf32>, vector<128xf32> - %1 = vector.transfer_read %src[%c0, %index2], %f0, %mask {in_bounds = [true]} : memref<4096x4096xf32>, vector<128xf32> + %0 = vector.transfer_read %src[%c0, %index], %f0, %mask {in_bounds = array} : memref<4096x4096xf32>, vector<128xf32> + %1 = vector.transfer_read %src[%c0, %index2], %f0, %mask {in_bounds = array} : memref<4096x4096xf32>, vector<128xf32> vector.yield %0, %1 : vector<128xf32>, vector<128xf32> } return %r#0, %r#1 : vector<2xf32>, vector<2xf32> @@ -1546,7 +1546,7 @@ func.func @warp_propagate_unconnected_read_write(%laneid: index, %buffer: memref %c0 = arith.constant 0 : index %r:2 = vector.warp_execute_on_lane_0(%laneid)[32] -> (vector<2xf32>, vector<4xf32>) { %cst = arith.constant dense<2.0> : vector<128xf32> - %0 = vector.transfer_read %buffer[%c0], %f0 {in_bounds = [true]} : memref<128xf32>, vector<128xf32> + %0 = vector.transfer_read %buffer[%c0], %f0 {in_bounds = array} : memref<128xf32>, vector<128xf32> vector.transfer_write %cst, %buffer[%c0] : vector<128xf32>, memref<128xf32> %1 = vector.broadcast %f1 : f32 to vector<64xf32> vector.yield %1, %0 : vector<64xf32>, vector<128xf32> diff --git a/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-mma-2-4-f16.mlir b/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-mma-2-4-f16.mlir index 5a624e6434297..23e4b3fd9054e 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-mma-2-4-f16.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-mma-2-4-f16.mlir @@ -129,18 +129,18 @@ module attributes {gpu.container_module} { %quad_col = affine.apply affine_map<()[s0]->(s0 * 2)>()[%col_8x4] // account for 2xf16/col // Load quad (0, 0) - %A_quad00 = vector.transfer_read %argA[%quad_row, %quad_col], %f0 {in_bounds = [true]} : memref<16x16xf16>, vector<2xf16> + %A_quad00 = vector.transfer_read %argA[%quad_row, %quad_col], %f0 {in_bounds = array} : memref<16x16xf16>, vector<2xf16> // Load quad (1, 0). Just shift row down 8. %quad_row_plus_8 = affine.apply affine_map<(d0)[]->(d0+8)>(%quad_row)[] - %A_quad10 = vector.transfer_read %argA[%quad_row_plus_8, %quad_col], %f0 {in_bounds = [true]} : memref<16x16xf16>, vector<2xf16> + %A_quad10 = vector.transfer_read %argA[%quad_row_plus_8, %quad_col], %f0 {in_bounds = array} : memref<16x16xf16>, vector<2xf16> // Load quad (0, 1). Just shift col right 8 (4 2xf16 values) %quad_col_plus_8 = affine.apply affine_map<(d0)[]->(d0+8)>(%quad_col)[] - %A_quad01 = vector.transfer_read %argA[%quad_row, %quad_col_plus_8], %f0 {in_bounds = [true]} : memref<16x16xf16>, vector<2xf16> + %A_quad01 = vector.transfer_read %argA[%quad_row, %quad_col_plus_8], %f0 {in_bounds = array} : memref<16x16xf16>, vector<2xf16> // Load quad (1, 1) - %A_quad11 = vector.transfer_read %argA[%quad_row_plus_8, %quad_col_plus_8], %f0 {in_bounds = [true]} : memref<16x16xf16>, vector<2xf16> + %A_quad11 = vector.transfer_read %argA[%quad_row_plus_8, %quad_col_plus_8], %f0 {in_bounds = array} : memref<16x16xf16>, vector<2xf16> // Assemble the elements into a vector %A_init0 = arith.constant dense<0.0> : vector<4x2xf16> @@ -165,18 +165,18 @@ module attributes {gpu.container_module} { // (t) -> (t/4, t % 4). So we can re-use some of the calculation from A. // Load quad (0, 0) - %B_quad0 = vector.transfer_read %argB[%quad_row, %quad_col], %f0 {in_bounds = [true]} : memref<8x32xf16>, vector<2xf16> + %B_quad0 = vector.transfer_read %argB[%quad_row, %quad_col], %f0 {in_bounds = array} : memref<8x32xf16>, vector<2xf16> // Load quad (0, 1) - %B_quad1 = vector.transfer_read %argB[%quad_row, %quad_col_plus_8], %f0 {in_bounds = [true]} : memref<8x32xf16>, vector<2xf16> + %B_quad1 = vector.transfer_read %argB[%quad_row, %quad_col_plus_8], %f0 {in_bounds = array} : memref<8x32xf16>, vector<2xf16> // Load quad (0, 2) %quad_col_plus_16 = affine.apply affine_map<()[s0]->(s0 + 16)>()[%quad_col] - %B_quad2 = vector.transfer_read %argB[%quad_row, %quad_col_plus_16], %f0 {in_bounds = [true]} : memref<8x32xf16>, vector<2xf16> + %B_quad2 = vector.transfer_read %argB[%quad_row, %quad_col_plus_16], %f0 {in_bounds = array} : memref<8x32xf16>, vector<2xf16> // Load quad (0, 3) %quad_col_plus_24 = affine.apply affine_map<()[s0]->(s0 + 24)>()[%quad_col] - %B_quad3 = vector.transfer_read %argB[%quad_row, %quad_col_plus_24], %f0 {in_bounds = [true]} : memref<8x32xf16>, vector<2xf16> + %B_quad3 = vector.transfer_read %argB[%quad_row, %quad_col_plus_24], %f0 {in_bounds = array} : memref<8x32xf16>, vector<2xf16> // Assemble into vector %B_init0 = arith.constant dense<0.0> : vector<4x2xf16> @@ -207,8 +207,8 @@ module attributes {gpu.container_module} { // vector1: (tid) -> (tid / 4 + 8, tid %4) %C_0 = vector.extract %d[0] : vector<2xf16> from vector<2x2xf16> %C_1 = vector.extract %d[1] : vector<2xf16> from vector<2x2xf16> - vector.transfer_write %C_0, %argC[%quad_row, %quad_col] {in_bounds = [true]} : vector<2xf16>, memref<16x8xf16> - vector.transfer_write %C_1, %argC[%quad_row_plus_8, %quad_col] {in_bounds = [true]} : vector<2xf16>, memref<16x8xf16> + vector.transfer_write %C_0, %argC[%quad_row, %quad_col] {in_bounds = array} : vector<2xf16>, memref<16x8xf16> + vector.transfer_write %C_1, %argC[%quad_row_plus_8, %quad_col] {in_bounds = array} : vector<2xf16>, memref<16x8xf16> gpu.return } diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/multi-tile-transpose.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/multi-tile-transpose.mlir index 7f1566d635cbb..2840febebb210 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/multi-tile-transpose.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/multi-tile-transpose.mlir @@ -40,10 +40,10 @@ func.func @testTransposedReadWithMask(%maskRows: index, %maskCols: index) { /// A vector.transfer_read with a transpose permutation map. So the input data /// (and mask) have a [4]x[16] shape, but the output is [16]x[4]. %readTransposed = vector.transfer_read %inDyn[%c0, %c0], %pad, %mask - {permutation_map = #transpose, in_bounds = [true, true]} : memref, vector<[16]x[4]xf32> + {permutation_map = #transpose, in_bounds = array} : memref, vector<[16]x[4]xf32> /// Write the vector back to a memref (that also has a transposed shape). - vector.transfer_write %readTransposed, %outDyn[%c0, %c0] {in_bounds = [true, true]} : vector<[16]x[4]xf32>, memref + vector.transfer_write %readTransposed, %outDyn[%c0, %c0] {in_bounds = array} : vector<[16]x[4]xf32>, memref /// Print the input memref. vector.print str "Input memref:\n" @@ -72,7 +72,7 @@ func.func @testTransposedWriteWithMask(%maskRows: index, %maskCols: index) { /// A regular read. %c0 = arith.constant 0 : index - %read = vector.transfer_read %inDyn[%c0, %c0], %c0_f32 {in_bounds = [true, true]} + %read = vector.transfer_read %inDyn[%c0, %c0], %c0_f32 {in_bounds = array} : memref, vector<[16]x[4]xf32> /// A mask so we only write the first maskRows x maskCols portion of transpose(%in). @@ -80,7 +80,7 @@ func.func @testTransposedWriteWithMask(%maskRows: index, %maskCols: index) { /// Write out the data with a transpose. Here (like the read test) the mask /// matches the shape of the memory, not the vector. - vector.transfer_write %read, %outDyn[%c0, %c0], %mask {permutation_map = #transpose, in_bounds = [true, true]} + vector.transfer_write %read, %outDyn[%c0, %c0], %mask {permutation_map = #transpose, in_bounds = array} : vector<[16]x[4]xf32>, memref /// Print the input memref. diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/transfer-read-2d.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/transfer-read-2d.mlir index 77f5a325728b3..4ac506312a879 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/transfer-read-2d.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/transfer-read-2d.mlir @@ -40,7 +40,7 @@ func.func @transfer_read_2d_mask(%A : memref, %base1: index, %base2: in %pad = arith.constant 0.0 : f32 %mask = vector.create_mask %c2, %c3 : vector<[4]x[4]xi1> %0 = vector.transfer_read %A[%base1, %base2], %pad, %mask - {in_bounds = [true, true]} : memref, vector<[4]x[4]xf32> + {in_bounds = array} : memref, vector<[4]x[4]xf32> vector.print str "TILE BEGIN:\n" vector.print %0: vector<[4]x[4]xf32> @@ -71,7 +71,7 @@ func.func @transfer_read_2d_mask_non_zero_pad(%A : memref, %base1: inde %pad = arith.constant -42.0 : f32 %mask = vector.create_mask %c2, %c3 : vector<[4]x[4]xi1> %0 = vector.transfer_read %A[%base1, %base2], %pad, %mask - {in_bounds = [true, true]} : memref, vector<[4]x[4]xf32> + {in_bounds = array} : memref, vector<[4]x[4]xf32> vector.print str "TILE BEGIN:\n" vector.print %0: vector<[4]x[4]xf32> diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-ops.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-ops.mlir index 697fb90f63154..ea36450559c7a 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-ops.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-ops.mlir @@ -88,7 +88,7 @@ func.func @entry() -> i32 { // ... // %cst_0 = arith.constant dense<0> : vector<[16]x[16]xi8> - vector.transfer_write %cst_0, %za_b[%c0, %c0] {in_bounds = [true, true]} : vector<[16]x[16]xi8>, memref + vector.transfer_write %cst_0, %za_b[%c0, %c0] {in_bounds = array} : vector<[16]x[16]xi8>, memref // Verify memory is zeroed by doing an add reduction with initial value of // zero. diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/arrays-of-scalable-vectors.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/arrays-of-scalable-vectors.mlir index 4ce6281d9fd45..208b83adf5140 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/arrays-of-scalable-vectors.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/arrays-of-scalable-vectors.mlir @@ -14,7 +14,7 @@ func.func @read_and_print_2d_vector(%memref: memref<3x?xf32>) { %c2 = arith.constant 2 : index %dim = memref.dim %memref, %c1 : memref<3x?xf32> %mask = vector.create_mask %c2, %dim : vector<3x[8]xi1> - %vector = vector.transfer_read %memref[%c0,%c0], %cst, %mask {in_bounds = [true, true]} : memref<3x?xf32>, vector<3x[8]xf32> + %vector = vector.transfer_read %memref[%c0,%c0], %cst, %mask {in_bounds = array} : memref<3x?xf32>, vector<3x[8]xf32> /// TODO: Support vector.print for arrays of scalable vectors. %row0 = vector.extract %vector[0] : vector<[8]xf32> from vector<3x[8]xf32> @@ -63,14 +63,14 @@ func.func @add_arrays_of_scalable_vectors(%a: memref<1x2x?xf32>, %b: memref<1x2x // CHECK-NEXT: ( 5, 5, 5, 5 // CHECK-NEXT: ( 5, 5, 5, 5 vector.print str "\nVector A\n" - %vector_a = vector.transfer_read %a[%c0, %c0, %c0], %cst, %mask_a {in_bounds = [true, true, true]} : memref<1x2x?xf32>, vector<1x2x[4]xf32> + %vector_a = vector.transfer_read %a[%c0, %c0, %c0], %cst, %mask_a {in_bounds = array} : memref<1x2x?xf32>, vector<1x2x[4]xf32> func.call @print_1x2xVSCALExf32(%vector_a) : (vector<1x2x[4]xf32>) -> () // CHECK-LABEL: Vector B // CHECK-NEXT: ( 4, 4, 4, 4 // CHECK-NEXT: ( 4, 4, 4, 4 vector.print str "\nVector B\n" - %vector_b = vector.transfer_read %b[%c0, %c0, %c0], %cst, %mask_b {in_bounds = [true, true, true]} : memref<1x2x?xf32>, vector<1x2x[4]xf32> + %vector_b = vector.transfer_read %b[%c0, %c0, %c0], %cst, %mask_b {in_bounds = array} : memref<1x2x?xf32>, vector<1x2x[4]xf32> func.call @print_1x2xVSCALExf32(%vector_b) : (vector<1x2x[4]xf32>) -> () // CHECK-LABEL: Sum diff --git a/mlir/test/Integration/Dialect/Vector/CPU/transfer-read-1d.mlir b/mlir/test/Integration/Dialect/Vector/CPU/transfer-read-1d.mlir index 12b0511d486ea..bde1d856e35ee 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/transfer-read-1d.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/transfer-read-1d.mlir @@ -41,7 +41,7 @@ func.func @transfer_read_1d_unit_stride(%A : memref) { scf.for %arg3 = %c0 to %c6 step %c3 { %0 = memref.subview %A[%arg2, %arg3] [1, 2] [1, 1] : memref to memref<1x2xf32, strided<[?, 1], offset: ?>> - %1 = vector.transfer_read %0[%c0, %c0], %fm42 {in_bounds=[true]} + %1 = vector.transfer_read %0[%c0, %c0], %fm42 {in_bounds = array} : memref<1x2xf32, strided<[?, 1], offset: ?>>, vector<2xf32> vector.print %1 : vector<2xf32> } @@ -59,7 +59,7 @@ func.func @transfer_read_1d_non_static_unit_stride(%A : memref) { %fm42 = arith.constant -42.0: f32 %1 = memref.reinterpret_cast %A to offset: [%c6], sizes: [%c4, %c6], strides: [%c6, %c1] : memref to memref> - %2 = vector.transfer_read %1[%c2, %c1], %fm42 {in_bounds=[true]} + %2 = vector.transfer_read %1[%c2, %c1], %fm42 {in_bounds = array} : memref>, vector<4xf32> vector.print %2 : vector<4xf32> return @@ -72,7 +72,7 @@ func.func @transfer_read_1d_non_unit_stride(%A : memref) { %c1 = arith.constant 1 : index %c2 = arith.constant 2 : index %fm42 = arith.constant -42.0: f32 - %vec = vector.transfer_read %B[%c2, %c1], %fm42 {in_bounds=[false]} : memref<4x3xf32, strided<[6, 2]>>, vector<3xf32> + %vec = vector.transfer_read %B[%c2, %c1], %fm42 {in_bounds = array} : memref<4x3xf32, strided<[6, 2]>>, vector<3xf32> vector.print %vec : vector<3xf32> return } @@ -82,7 +82,7 @@ func.func @transfer_read_1d_broadcast( %A : memref, %base1 : index, %base2 : index) { %fm42 = arith.constant -42.0: f32 %f = vector.transfer_read %A[%base1, %base2], %fm42 - {in_bounds = [true], permutation_map = affine_map<(d0, d1) -> (0)>} + {in_bounds = array, permutation_map = affine_map<(d0, d1) -> (0)>} : memref, vector<9xf32> vector.print %f: vector<9xf32> return @@ -93,7 +93,7 @@ func.func @transfer_read_1d_in_bounds( %A : memref, %base1 : index, %base2 : index) { %fm42 = arith.constant -42.0: f32 %f = vector.transfer_read %A[%base1, %base2], %fm42 - {permutation_map = affine_map<(d0, d1) -> (d0)>, in_bounds = [true]} + {permutation_map = affine_map<(d0, d1) -> (d0)>, in_bounds = array} : memref, vector<3xf32> vector.print %f: vector<3xf32> return @@ -116,7 +116,7 @@ func.func @transfer_read_1d_out_of_bounds( %A : memref, %base1 : index, %base2 : index) { %fm42 = arith.constant -42.0: f32 %f = vector.transfer_read %A[%base1, %base2], %fm42 - {permutation_map = affine_map<(d0, d1) -> (d0)>, in_bounds = [false]} + {permutation_map = affine_map<(d0, d1) -> (d0)>, in_bounds = array} : memref, vector<3xf32> vector.print %f: vector<3xf32> return @@ -128,7 +128,7 @@ func.func @transfer_read_1d_mask_in_bounds( %fm42 = arith.constant -42.0: f32 %mask = arith.constant dense<[1, 0, 1]> : vector<3xi1> %f = vector.transfer_read %A[%base1, %base2], %fm42, %mask - {permutation_map = affine_map<(d0, d1) -> (d0)>, in_bounds = [true]} + {permutation_map = affine_map<(d0, d1) -> (d0)>, in_bounds = array} : memref, vector<3xf32> vector.print %f: vector<3xf32> return diff --git a/mlir/test/Integration/Dialect/Vector/CPU/transfer-read-2d.mlir b/mlir/test/Integration/Dialect/Vector/CPU/transfer-read-2d.mlir index 9f8849fa9a148..56e37681c9261 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/transfer-read-2d.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/transfer-read-2d.mlir @@ -57,7 +57,7 @@ func.func @transfer_read_2d_mask_broadcast( %fm42 = arith.constant -42.0: f32 %mask = arith.constant dense<[1, 0, 1, 0, 1, 1, 1, 0, 1]> : vector<9xi1> %f = vector.transfer_read %A[%base1, %base2], %fm42, %mask - {in_bounds = [true, false], permutation_map = affine_map<(d0, d1) -> (0, d1)>} : + {in_bounds = array, permutation_map = affine_map<(d0, d1) -> (0, d1)>} : memref, vector<4x9xf32> vector.print %f: vector<4x9xf32> return @@ -69,7 +69,7 @@ func.func @transfer_read_2d_mask_transpose_broadcast_last_dim( %fm42 = arith.constant -42.0: f32 %mask = arith.constant dense<[1, 0, 1, 1]> : vector<4xi1> %f = vector.transfer_read %A[%base1, %base2], %fm42, %mask - {in_bounds = [false, true], permutation_map = affine_map<(d0, d1) -> (d1, 0)>} : + {in_bounds = array, permutation_map = affine_map<(d0, d1) -> (d1, 0)>} : memref, vector<4x9xf32> vector.print %f: vector<4x9xf32> return @@ -91,7 +91,7 @@ func.func @transfer_read_2d_broadcast( %A : memref, %base1: index, %base2: index) { %fm42 = arith.constant -42.0: f32 %f = vector.transfer_read %A[%base1, %base2], %fm42 - {in_bounds = [false, true], permutation_map = affine_map<(d0, d1) -> (d1, 0)>} : + {in_bounds = array, permutation_map = affine_map<(d0, d1) -> (d1, 0)>} : memref, vector<4x9xf32> vector.print %f: vector<4x9xf32> return diff --git a/mlir/test/Integration/Dialect/Vector/CPU/transfer-read-3d.mlir b/mlir/test/Integration/Dialect/Vector/CPU/transfer-read-3d.mlir index 466afeec459b4..cff8fcce11057 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/transfer-read-3d.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/transfer-read-3d.mlir @@ -21,7 +21,7 @@ func.func @transfer_read_3d_and_extract(%A : memref, %o: index, %a: index, %b: index, %c: index) { %fm42 = arith.constant -42.0: f32 %f = vector.transfer_read %A[%o, %a, %b, %c], %fm42 - {in_bounds = [true, true, true]} + {in_bounds = array} : memref, vector<2x5x3xf32> %sub = vector.extract %f[0] : vector<5x3xf32> from vector<2x5x3xf32> vector.print %sub: vector<5x3xf32> @@ -32,7 +32,7 @@ func.func @transfer_read_3d_broadcast(%A : memref, %o: index, %a: index, %b: index, %c: index) { %fm42 = arith.constant -42.0: f32 %f = vector.transfer_read %A[%o, %a, %b, %c], %fm42 - {in_bounds = [false, true, false], permutation_map = affine_map<(d0, d1, d2, d3) -> (d1, 0, d3)>} + {in_bounds = array, permutation_map = affine_map<(d0, d1, d2, d3) -> (d1, 0, d3)>} : memref, vector<2x5x3xf32> vector.print %f: vector<2x5x3xf32> return @@ -43,7 +43,7 @@ func.func @transfer_read_3d_mask_broadcast( %fm42 = arith.constant -42.0: f32 %mask = arith.constant dense<[0, 1]> : vector<2xi1> %f = vector.transfer_read %A[%o, %a, %b, %c], %fm42, %mask - {in_bounds = [false, true, true], permutation_map = affine_map<(d0, d1, d2, d3) -> (d1, 0, 0)>} + {in_bounds = array, permutation_map = affine_map<(d0, d1, d2, d3) -> (d1, 0, 0)>} : memref, vector<2x5x3xf32> vector.print %f: vector<2x5x3xf32> return diff --git a/mlir/test/Integration/Dialect/Vector/CPU/transfer-read.mlir b/mlir/test/Integration/Dialect/Vector/CPU/transfer-read.mlir index 91dc945cd3432..41fc111619575 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/transfer-read.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/transfer-read.mlir @@ -28,7 +28,7 @@ func.func @transfer_read_mask_1d(%A : memref, %base: index) { func.func @transfer_read_inbounds_4(%A : memref, %base: index) { %fm42 = arith.constant -42.0: f32 %f = vector.transfer_read %A[%base], %fm42 - {permutation_map = affine_map<(d0) -> (d0)>, in_bounds = [true]} : + {permutation_map = affine_map<(d0) -> (d0)>, in_bounds = array} : memref, vector<4xf32> vector.print %f: vector<4xf32> return @@ -37,7 +37,7 @@ func.func @transfer_read_inbounds_4(%A : memref, %base: index) { func.func @transfer_read_mask_inbounds_4(%A : memref, %base: index) { %fm42 = arith.constant -42.0: f32 %m = arith.constant dense<[0, 1, 0, 1]> : vector<4xi1> - %f = vector.transfer_read %A[%base], %fm42, %m {in_bounds = [true]} + %f = vector.transfer_read %A[%base], %fm42, %m {in_bounds = array} : memref, vector<4xf32> vector.print %f: vector<4xf32> return diff --git a/mlir/test/Integration/Dialect/Vector/CPU/transfer-write.mlir b/mlir/test/Integration/Dialect/Vector/CPU/transfer-write.mlir index cc6763e54c1cb..820238c98fc2b 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/transfer-write.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/transfer-write.mlir @@ -7,7 +7,7 @@ func.func @transfer_write16_inbounds_1d(%A : memref, %base: index) { %f = arith.constant 16.0 : f32 %v = vector.splat %f : vector<16xf32> vector.transfer_write %v, %A[%base] - {permutation_map = affine_map<(d0) -> (d0)>, in_bounds = [true]} + {permutation_map = affine_map<(d0) -> (d0)>, in_bounds = array} : vector<16xf32>, memref return } @@ -62,7 +62,7 @@ func.func @transfer_write_inbounds_3d(%A : memref<4x4x4xf32>) { %v8 = vector.insert %f8, %v7[1, 2, 3] : f32 into vector<2x3x4xf32> vector.transfer_write %v8, %A[%c0, %c0, %c0] {permutation_map = affine_map<(d0, d1, d2) -> (d2, d0, d1)>, - in_bounds = [true, true, true]} + in_bounds = array} : vector<2x3x4xf32>, memref<4x4x4xf32> return } diff --git a/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-reduction-distribute.mlir b/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-reduction-distribute.mlir index 378e5b39415b5..35f43af050d8f 100644 --- a/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-reduction-distribute.mlir +++ b/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-reduction-distribute.mlir @@ -21,16 +21,16 @@ func.func @gpu_func(%in: memref<1024xf32>, %out: memref<1xf32>) { in (%arg9 = %c1, %arg10 = %c1, %arg11 = %c1) threads(%arg6, %arg7, %arg8) in (%arg12 = %c32, %arg13 = %c1, %arg14 = %c1) { vector.warp_execute_on_lane_0(%arg6)[32] { - %init = vector.transfer_read %out[%c0], %cst_0 {in_bounds = [true]} : memref<1xf32>, vector<1xf32> + %init = vector.transfer_read %out[%c0], %cst_0 {in_bounds = array} : memref<1xf32>, vector<1xf32> %13 = scf.for %arg0 = %c0 to %c1024 step %c32 iter_args(%arg1 = %init) -> (vector<1xf32>) { - %20 = vector.transfer_read %in[%arg0], %cst_0 {in_bounds = [true]} : memref<1024xf32>, vector<32xf32> + %20 = vector.transfer_read %in[%arg0], %cst_0 {in_bounds = array} : memref<1024xf32>, vector<32xf32> %21 = vector.reduction , %20 : vector<32xf32> into f32 %22 = vector.broadcast %21 : f32 to vector<1xf32> %23 = arith.addf %22, %arg1 : vector<1xf32> scf.yield %23 : vector<1xf32> } %14 = arith.divf %13, %cst : vector<1xf32> - vector.transfer_write %14, %out[%c0] {in_bounds = [true]} : vector<1xf32>, memref<1xf32> + vector.transfer_write %14, %out[%c0] {in_bounds = array} : vector<1xf32>, memref<1xf32> } gpu.terminator } @@ -51,9 +51,9 @@ func.func @main() { %cst_2 = arith.constant dense<2.000000e+00> : vector<1xf32> // init the buffers. scf.for %i = %c0 to %c1024 step %c32 { - vector.transfer_write %cst_1, %0[%i] {in_bounds = [true]} : vector<32xf32>, memref<1024xf32> + vector.transfer_write %cst_1, %0[%i] {in_bounds = array} : vector<32xf32>, memref<1024xf32> } - vector.transfer_write %cst_2, %1[%c0] {in_bounds = [true]} : vector<1xf32>, memref<1xf32> + vector.transfer_write %cst_2, %1[%c0] {in_bounds = array} : vector<1xf32>, memref<1xf32> %3 = memref.cast %0 : memref<1024xf32> to memref<*xf32> gpu.host_register %3 : memref<*xf32> %5 = memref.cast %1 : memref<1xf32> to memref<*xf32> diff --git a/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-warp-distribute.mlir b/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-warp-distribute.mlir index 7e9234901ffa1..9db34eeaba89f 100644 --- a/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-warp-distribute.mlir +++ b/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-warp-distribute.mlir @@ -39,10 +39,10 @@ func.func @gpu_func(%arg1: memref<32xf32>, %arg2: memref<32xf32>) { in (%arg9 = %c1, %arg10 = %c1, %arg11 = %c1) threads(%arg6, %arg7, %arg8) in (%arg12 = %c32, %arg13 = %c1, %arg14 = %c1) { vector.warp_execute_on_lane_0(%arg6)[32] { - %0 = vector.transfer_read %arg1[%c0], %cst {in_bounds = [true]} : memref<32xf32>, vector<32xf32> + %0 = vector.transfer_read %arg1[%c0], %cst {in_bounds = array} : memref<32xf32>, vector<32xf32> %1 = vector.transfer_read %arg2[%c0], %cst {in_bound = [true]} : memref<32xf32>, vector<32xf32> %2 = arith.addf %0, %1 : vector<32xf32> - vector.transfer_write %2, %arg1[%c0] {in_bounds = [true]} : vector<32xf32>, memref<32xf32> + vector.transfer_write %2, %arg1[%c0] {in_bounds = array} : vector<32xf32>, memref<32xf32> } gpu.terminator } @@ -60,8 +60,8 @@ func.func @main() { 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0]> : vector<32xf32> %cst_2 = arith.constant dense<2.000000e+00> : vector<32xf32> // init the buffers. - vector.transfer_write %cst_1, %0[%c0] {in_bounds = [true]} : vector<32xf32>, memref<32xf32> - vector.transfer_write %cst_2, %1[%c0] {in_bounds = [true]} : vector<32xf32>, memref<32xf32> + vector.transfer_write %cst_1, %0[%c0] {in_bounds = array} : vector<32xf32>, memref<32xf32> + vector.transfer_write %cst_2, %1[%c0] {in_bounds = array} : vector<32xf32>, memref<32xf32> %3 = memref.cast %0 : memref<32xf32> to memref<*xf32> gpu.host_register %3 : memref<*xf32> %5 = memref.cast %1 : memref<32xf32> to memref<*xf32>