|
21 | 21 | #include "mlir/IR/PatternMatch.h" |
22 | 22 | #include "mlir/Interfaces/DestinationStyleOpInterface.h" |
23 | 23 | #include "mlir/Interfaces/TilingInterface.h" |
| 24 | +#include "mlir/Transforms/RegionUtils.h" |
24 | 25 | #include "llvm/ADT/TypeSwitch.h" |
25 | 26 | #include "llvm/Support/Debug.h" |
26 | 27 | #include <optional> |
@@ -255,6 +256,28 @@ SmallVector<LoopLikeOpInterface> mlir::scfX::getOuterNestLoopsWhile( |
255 | 256 | return {nestLoops.rbegin(), nestLoops.rend()}; |
256 | 257 | } |
257 | 258 |
|
| 259 | +/// A listener that watches which ops were erased. |
| 260 | +struct ErasedOpListener : public RewriterBase::Listener { |
| 261 | +private: |
| 262 | + /// Pointers to all erased operations and blocks. |
| 263 | + DenseSet<void *> erased; |
| 264 | + // Hook old listener. |
| 265 | + OpBuilder::Listener *oldListenerHook = nullptr; |
| 266 | + |
| 267 | +public: |
| 268 | + ErasedOpListener() = default; |
| 269 | + ErasedOpListener(OpBuilder::Listener *oldListener) |
| 270 | + : oldListenerHook(oldListener) {} |
| 271 | + void notifyOperationErased(Operation *op) override { |
| 272 | + // Call old listener hook. |
| 273 | + if (auto *oldListener = |
| 274 | + dyn_cast_if_present<RewriterBase::Listener>(oldListenerHook)) |
| 275 | + oldListener->notifyOperationErased(op); |
| 276 | + erased.insert(op); |
| 277 | + } |
| 278 | + bool isErased(Operation *op) { return erased.count(op); } |
| 279 | +}; |
| 280 | + |
258 | 281 | /// Enhanced version of `tileAndFuseProducerOfSliceImpl`, which can deal with |
259 | 282 | /// multi-level `extractSliceOp`. E.g. |
260 | 283 | /// |
@@ -296,6 +319,55 @@ mlir::scfX::tileAndFuseProducerOfSlice(RewriterBase &rewriter, |
296 | 319 | tileAndFuseProducerOfSliceImpl(rewriter, sliceOp, outerLoops); |
297 | 320 | if (!fuseProducerResult) |
298 | 321 | return std::nullopt; |
| 322 | + |
| 323 | + // Cache old listener. |
| 324 | + OpBuilder::Listener *oldListener = rewriter.getListener(); |
| 325 | + // Set new listener. |
| 326 | + ErasedOpListener newListener = ErasedOpListener(oldListener); |
| 327 | + rewriter.setListener(&newListener); |
| 328 | + |
| 329 | + auto producerOp = |
| 330 | + cast<TilingInterface>(fuseProducerResult->origProducer.getDefiningOp()); |
| 331 | + unsigned resultNumber = fuseProducerResult->origProducer.getResultNumber(); |
| 332 | + // cache candidate slice |
| 333 | + auto extractSliceOp = cast<tensor::ExtractSliceOp>(candidateSliceOp); |
| 334 | + SmallVector<OpFoldResult> offsets = extractSliceOp.getMixedOffsets(), |
| 335 | + sizes = extractSliceOp.getMixedSizes(), |
| 336 | + strides = extractSliceOp.getMixedStrides(); |
| 337 | + // Explicitly execute DCE. |
| 338 | + (void)mlir::simplifyRegions(rewriter, {*producerOp->getParentRegion()}); |
| 339 | + // If fused producer has multiple users. |
| 340 | + bool yieldReplacement = !newListener.isErased(producerOp); |
| 341 | + // Reset to old listener. |
| 342 | + rewriter.setListener(oldListener); |
| 343 | + |
| 344 | + if (yieldReplacement) { |
| 345 | + OpBuilder::InsertionGuard g(rewriter); |
| 346 | + // Set insertPoint right before tiled op. |
| 347 | + rewriter.setInsertionPoint(fuseProducerResult->tiledOps[0]); |
| 348 | + // Manually clone new candidate slice. |
| 349 | + auto clonedExtractSliceOp = rewriter.create<tensor::ExtractSliceOp>( |
| 350 | + producerOp->getLoc(), producerOp->getResult(resultNumber), offsets, |
| 351 | + sizes, strides); |
| 352 | + // Yield replacement for fused producer in avoid of repeated computation. |
| 353 | + if (failed(scf::yieldReplacementForFusedProducer( |
| 354 | + rewriter, clonedExtractSliceOp, fuseProducerResult.value(), |
| 355 | + outerLoops))) |
| 356 | + return std::nullopt; |
| 357 | + // Erase cloned candidate slice. |
| 358 | + rewriter.eraseOp(clonedExtractSliceOp); |
| 359 | + |
| 360 | + unsigned loopNumResults = outerLoops.front()->getNumResults(), |
| 361 | + producerNumResults = producerOp->getNumResults(); |
| 362 | + // Replace other users of fused producer with new loop results. |
| 363 | + for (auto &&[index, result] : llvm::enumerate(producerOp->getResults())) { |
| 364 | + rewriter.replaceAllUsesWith( |
| 365 | + result, outerLoops.front()->getResult(loopNumResults - |
| 366 | + producerNumResults + index)); |
| 367 | + } |
| 368 | + // Erase fused producer op. |
| 369 | + rewriter.eraseOp(producerOp); |
| 370 | + } |
299 | 371 | } |
300 | 372 | return fuseProducerResult; |
301 | 373 | } |
|
0 commit comments