Skip to content

Commit e231d46

Browse files
committed
Rebase, and move pass back to where it was.
1 parent cabb6f2 commit e231d46

File tree

1 file changed

+5
-3
lines changed
  • compiler/src/iree/compiler/Codegen/LLVMGPU

1 file changed

+5
-3
lines changed

compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -482,6 +482,11 @@ void addGPUTileAndFusePassPipeline(OpPassManager &funcPassManager,
482482
funcPassManager.addPass(createCSEPass());
483483
}
484484

485+
// Convert global load DMAs after reduction tiling but before pack
486+
// decomposition. DecomposePackUnPackOps introduces linalg.transpose which
487+
// breaks the source tracing in the coalesced DMA conversion.
488+
funcPassManager.addPass(createGPUConvertToCoalescedDMAPass());
489+
485490
// Step 3. Decompose pack and unpack ops and propagate the resulting reshapes.
486491
funcPassManager.addPass(createDecomposePackUnPackOpsPass(
487492
DecomposePackUnPackOpsPassOptions{/*tileOuterToOne=*/false,
@@ -502,9 +507,6 @@ void addGPUTileAndFusePassPipeline(OpPassManager &funcPassManager,
502507
funcPassManager.addPass(createConfigTrackingCanonicalizerPass());
503508
funcPassManager.addPass(createCSEPass());
504509

505-
// Convert global load DMAs after pack decomposition but before thread tiling.
506-
funcPassManager.addPass(createGPUConvertToCoalescedDMAPass());
507-
508510
// Step 4. Tile and fuse tileable ops to subgroups/threads.
509511
{
510512
GPUApplyTilingLevelPassOptions options;

0 commit comments

Comments
 (0)