@@ -403,9 +403,6 @@ namespace {
403403struct ConvertVectorStore final : OpConversionPattern<vector::StoreOp> {
404404 using OpConversionPattern::OpConversionPattern;
405405
406- ConvertVectorStore (MLIRContext *context, bool useAtomicWrites)
407- : OpConversionPattern<vector::StoreOp>(context) {}
408-
409406 LogicalResult
410407 matchAndRewrite (vector::StoreOp op, OpAdaptor adaptor,
411408 ConversionPatternRewriter &rewriter) const override {
@@ -416,10 +413,10 @@ struct ConvertVectorStore final : OpConversionPattern<vector::StoreOp> {
416413 " only 1-D vectors are supported ATM" );
417414
418415 auto loc = op.getLoc ();
419- auto convertedType = cast<MemRefType>(adaptor.getBase ().getType ());
420416 auto valueToStore = cast<VectorValue>(op.getValueToStore ());
421417 auto oldElementType = valueToStore.getType ().getElementType ();
422- auto newElementType = convertedType.getElementType ();
418+ auto newElementType =
419+ cast<MemRefType>(adaptor.getBase ().getType ()).getElementType ();
423420 int srcBits = oldElementType.getIntOrFloatBitWidth ();
424421 int dstBits = newElementType.getIntOrFloatBitWidth ();
425422
@@ -464,21 +461,24 @@ struct ConvertVectorStore final : OpConversionPattern<vector::StoreOp> {
464461 : getConstantIntValue (linearizedInfo.intraDataOffset );
465462
466463 if (!foldedNumFrontPadElems) {
467- return failure (" subbyte store emulation: dynamic front padding size is "
468- " not yet implemented" );
464+ return rewriter.notifyMatchFailure (
465+ op, " subbyte store emulation: dynamic front padding size is "
466+ " not yet implemented" );
469467 }
470468
471469 auto memrefBase = cast<MemRefValue>(adaptor.getBase ());
472470
473- // Shortcut: conditions when subbyte emulated store at the front is not
474- // needed:
471+ // Conditions when subbyte emulated store is not needed:
475472 // 1. The source vector size (in bits) is a multiple of byte size.
476473 // 2. The address of the store is aligned to the emulated width boundary.
477474 //
478475 // For example, to store a vector<4xi2> to <13xi2> at offset 4, does not
479476 // need unaligned emulation because the store address is aligned and the
480477 // source is a whole byte.
481- if (isAlignedEmulation && *foldedNumFrontPadElems == 0 ) {
478+ bool emulationRequiresPartialStores =
479+ !isAlignedEmulation || *foldedNumFrontPadElems != 0 ;
480+ if (!emulationRequiresPartialStores) {
481+ // Basic case: storing full bytes.
482482 auto numElements = origElements / numSrcElemsPerDest;
483483 auto bitCast = rewriter.create <vector::BitCastOp>(
484484 loc, VectorType::get (numElements, newElementType),
0 commit comments