File tree Expand file tree Collapse file tree 1 file changed +5
-3
lines changed
compiler/src/iree/compiler/Codegen/LLVMGPU Expand file tree Collapse file tree 1 file changed +5
-3
lines changed Original file line number Diff line number Diff line change @@ -482,6 +482,11 @@ void addGPUTileAndFusePassPipeline(OpPassManager &funcPassManager,
482482 funcPassManager.addPass (createCSEPass ());
483483 }
484484
485+ // Convert global load DMAs after reduction tiling but before pack
486+ // decomposition. DecomposePackUnPackOps introduces linalg.transpose which
487+ // breaks the source tracing in the coalesced DMA conversion.
488+ funcPassManager.addPass (createGPUConvertToCoalescedDMAPass ());
489+
485490 // Step 3. Decompose pack and unpack ops and propagate the resulting reshapes.
486491 funcPassManager.addPass (createDecomposePackUnPackOpsPass (
487492 DecomposePackUnPackOpsPassOptions{/* tileOuterToOne=*/ false ,
@@ -502,9 +507,6 @@ void addGPUTileAndFusePassPipeline(OpPassManager &funcPassManager,
502507 funcPassManager.addPass (createConfigTrackingCanonicalizerPass ());
503508 funcPassManager.addPass (createCSEPass ());
504509
505- // Convert global load DMAs after pack decomposition but before thread tiling.
506- funcPassManager.addPass (createGPUConvertToCoalescedDMAPass ());
507-
508510 // Step 4. Tile and fuse tileable ops to subgroups/threads.
509511 {
510512 GPUApplyTilingLevelPassOptions options;
You can’t perform that action at this time.
0 commit comments