File tree Expand file tree Collapse file tree 1 file changed +3
-2
lines changed
third_party/amd/lib/TritonAMDGPUTransforms Expand file tree Collapse file tree 1 file changed +3
-2
lines changed Original file line number Diff line number Diff line change @@ -765,14 +765,15 @@ struct PipelinePass : impl::TritonAMDGPUStreamPipelineBase<PipelinePass> {
765
765
useAsyncCopy, waitAtTail);
766
766
}
767
767
768
- if (useAsyncCopy) {
768
+ if (useAsyncCopy && numStages != 3 ) {
769
769
llvm::SmallSetVector<ttg::AsyncWaitOp, 8 > waitOps;
770
770
moduleOp.walk ([&](ttg::AsyncWaitOp waitOp) {
771
771
if (auto maybeForOp = dyn_cast<scf::ForOp>(waitOp->getParentOp ()))
772
772
// FIXME: There's potential bug in combinRedundantWaitOps(), it
773
773
// generate incorrect IR order when numStages==3.
774
774
if (tt::getNumStagesOrDefault (maybeForOp, numStages) == 3 )
775
- waitOps.insert (waitOp);
775
+ return ;
776
+ waitOps.insert (waitOp);
776
777
});
777
778
tt::combineRedundantWaitOps (waitOps);
778
779
}
You can’t perform that action at this time.
0 commit comments