File tree Expand file tree Collapse file tree 1 file changed +5
-7
lines changed
mlir/lib/Conversion/SCFToGPU Expand file tree Collapse file tree 1 file changed +5
-7
lines changed Original file line number Diff line number Diff line change @@ -633,16 +633,14 @@ ParallelToGpuLaunchLowering::matchAndRewrite(ParallelOp parallelOp,
633633 Operation *op = worklist.pop_back_val ();
634634 // Now walk over the body and clone it.
635635 // TODO: This is only correct if there either is no further scf.parallel
636- // nested or this code is side-effect free. Otherwise we might need
637- // predication. We are overly conservative for now and only allow
638- // side-effects in the innermost scope .
636+ // nested or this code has side-effect but the memory buffer is not
637+ // alias to inner loop access buffer. Otherwise we might need
638+ // predication .
639639 if (auto nestedParallel = dyn_cast<ParallelOp>(op)) {
640640 // Before entering a nested scope, make sure there have been no
641- // sideeffects until now.
641+ // sideeffects until now or the nested operations do not access the
642+ // buffer written by outer scope.
642643 if (seenSideeffects) {
643- // Go through all operations in the nested parallel and check if any
644- // of the side-effecting operations access buffers that have been
645- // written to in the outer scope.
646644 bool accessesWrittenBuffer = false ;
647645 nestedParallel.walk ([&](Operation *nestedOp) {
648646 if (accessesWrittenBuffer)
You can’t perform that action at this time.
0 commit comments