@@ -990,18 +990,18 @@ tileAndFuseFirstExtractUse(RewriterBase &rewriter, Diagnostic &diag,
990990 // from the bbArg instead. This allows to reuse the output tensor (instead of
991991 // creating a new one) of the container when both producer and container write
992992 // to the same output.
993+ bool cloned = false ;
993994 if (LoopLikeOpInterface containerLoop =
994- dyn_cast<LoopLikeOpInterface>(sliceOpToTile->getParentOp ())) {
995+ dyn_cast<LoopLikeOpInterface>(sliceOpToTile->getParentOp ());
996+ containerLoop && dyn_cast<DestinationStyleOpInterface>(producerOp)) {
995997 Operation *clone = rewriter.clone (*producerOp);
998+ cloned = true ;
996999 rewriter.modifyOpInPlace (clone, [&]() {
9971000 // Iterate over the outputs of the producer and over the loop bbArgs and
9981001 // check if any bbArg points to the same value as the producer output. In
9991002 // such case, make the producer output point to the bbArg directly.
1000- auto dpsInterface = dyn_cast<DestinationStyleOpInterface>(clone);
1001- if (!dpsInterface)
1002- return ;
1003-
1004- for (OpOperand &initOperandPtr : dpsInterface.getDpsInitsMutable ()) {
1003+ for (OpOperand &initOperandPtr :
1004+ cast<DestinationStyleOpInterface>(clone).getDpsInitsMutable ()) {
10051005 Value producerOperand =
10061006 clone->getOperand (initOperandPtr.getOperandNumber ());
10071007 for (BlockArgument containerIterArg :
@@ -1063,7 +1063,7 @@ tileAndFuseFirstExtractUse(RewriterBase &rewriter, Diagnostic &diag,
10631063 resultNumber, offsets, sizes);
10641064
10651065 // Cleanup clone.
1066- if (dyn_cast<LoopLikeOpInterface>(containingOp))
1066+ if (dyn_cast<LoopLikeOpInterface>(containingOp) && cloned )
10671067 rewriter.eraseOp (tileableProducer);
10681068
10691069 return std::make_tuple (tileAndFuseResult->tiledOps , newContainingOp);
0 commit comments