Skip to content

Commit f724540

Browse files
authored
[flang][cuda] Fix retrieval of nested evaluation in cuf kernel (#91298)
`loopEval` was declared inside the for loop to iterate over the nested loops so the same loop control was redeclared for each level of the loop nest. Make sure we are iterating over all the loops by putting `loopEval` declaration ouside of the for loop.
1 parent d5cabf8 commit f724540

File tree

2 files changed

+8
-5
lines changed

2 files changed

+8
-5
lines changed

flang/lib/Lower/Bridge.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2585,11 +2585,10 @@ class FirConverter : public Fortran::lower::AbstractConverter {
25852585
llvm::SmallVector<mlir::Type> ivTypes;
25862586
llvm::SmallVector<mlir::Location> ivLocs;
25872587
llvm::SmallVector<mlir::Value> ivValues;
2588+
Fortran::lower::pft::Evaluation *loopEval =
2589+
&getEval().getFirstNestedEvaluation();
25882590
for (unsigned i = 0; i < nestedLoops; ++i) {
25892591
const Fortran::parser::LoopControl *loopControl;
2590-
Fortran::lower::pft::Evaluation *loopEval =
2591-
&getEval().getFirstNestedEvaluation();
2592-
25932592
mlir::Location crtLoc = loc;
25942593
if (i == 0) {
25952594
loopControl = &*outerDoConstruct->GetLoopControl();

flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ subroutine sub1()
1111

1212
! CHECK-LABEL: func.func @_QPsub1()
1313
! CHECK: %[[IV:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFsub1Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
14-
14+
! CHECK: %[[IV_J:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFsub1Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
1515
!$cuf kernel do <<< 1, 2 >>>
1616
do i = 1, n
1717
a(i) = a(i) * b(i)
@@ -41,7 +41,11 @@ subroutine sub1()
4141
end do
4242
end do
4343

44-
! CHECK: fir.cuda_kernel<<<%c1{{.*}}, (%c256{{.*}}, %c1{{.*}})>>> (%{{.*}} : index, %{{.*}} : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index) step (%{{.*}}, %{{.*}} : index, index)
44+
! CHECK: fir.cuda_kernel<<<%c1{{.*}}, (%c256{{.*}}, %c1{{.*}})>>> (%[[ARG0:.*]] : index, %[[ARG1:.*]] : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index) step (%{{.*}}, %{{.*}} : index, index)
45+
! CHECK: %[[ARG0_I32:.*]] = fir.convert %[[ARG0]] : (index) -> i32
46+
! CHECK: fir.store %[[ARG0_I32]] to %[[IV]]#1 : !fir.ref<i32>
47+
! CHECK: %[[ARG1_I32:.*]] = fir.convert %[[ARG1]] : (index) -> i32
48+
! CHECK: fir.store %[[ARG1_I32]] to %[[IV_J]]#1 : !fir.ref<i32>
4549
! CHECK: {n = 2 : i64}
4650

4751
!$cuf kernel do(2) <<< (1,*), (256,1) >>>

0 commit comments

Comments
 (0)