Skip to content

Commit eef4b5a

Browse files
authored
[flang] [cuda] Fix CUDA implicit data transfer entity creation (llvm#139414)
Fixed an issue in `genCUDAImplicitDataTransfer` where creating an `hlfir::Entity` from a symbol address could fail when the address comes from a `hlfir.declare` operation. Fix is to check if the address comes from a `hlfir.declare` operation. If so, use the base value from the declare op when available. Falling back to the original address otherwise.
1 parent bfd4af8 commit eef4b5a

File tree

2 files changed

+35
-1
lines changed

2 files changed

+35
-1
lines changed

flang/lib/Lower/Bridge.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4778,7 +4778,14 @@ class FirConverter : public Fortran::lower::AbstractConverter {
47784778
nbDeviceResidentObject <= 1 &&
47794779
"Only one reference to the device resident object is supported");
47804780
auto addr = getSymbolAddress(sym);
4781-
hlfir::Entity entity{addr};
4781+
mlir::Value baseValue;
4782+
if (auto declareOp =
4783+
llvm::dyn_cast<hlfir::DeclareOp>(addr.getDefiningOp()))
4784+
baseValue = declareOp.getBase();
4785+
else
4786+
baseValue = addr;
4787+
4788+
hlfir::Entity entity{baseValue};
47824789
auto [temp, cleanup] =
47834790
hlfir::createTempFromMold(loc, builder, entity);
47844791
auto needCleanup = fir::getIntIfConstant(cleanup);
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s
2+
3+
subroutine testr2(N1,N2)
4+
real(4), managed :: ai4(N1,N2)
5+
real(4), allocatable :: bRefi4(:)
6+
7+
integer :: i1, i2
8+
9+
do i2 = 1, N2
10+
do i1 = 1, N1
11+
ai4(i1,i2) = i1 + N1*(i2-1)
12+
enddo
13+
enddo
14+
15+
allocate(bRefi4 (N1))
16+
do i1 = 1, N1
17+
bRefi4(i1) = (ai4(i1,1)+ai4(i1,N2))*N2/2
18+
enddo
19+
deallocate(bRefi4)
20+
21+
end subroutine
22+
23+
!CHECK-LABEL: func.func @_QPtestr2
24+
!CHECK: %[[ALLOC:.*]] = cuf.alloc !fir.array<?x?xf32>, %{{.*}}, %{{.*}} : index, index {bindc_name = "ai4", data_attr = #cuf.cuda<managed>, uniq_name = "_QFtestr2Eai4"} -> !fir.ref<!fir.array<?x?xf32>>
25+
!CHECK: %[[DECLARE:.*]]:2 = hlfir.declare %[[ALLOC]](%{{.*}}) {data_attr = #cuf.cuda<managed>, uniq_name = "_QFtestr2Eai4"} : (!fir.ref<!fir.array<?x?xf32>>, !fir.shape<2>) -> (!fir.box<!fir.array<?x?xf32>>, !fir.ref<!fir.array<?x?xf32>>)
26+
!CHECK: %[[DEST:.*]] = hlfir.designate %[[DECLARE]]#0 (%{{.*}}, %{{.*}}) : (!fir.box<!fir.array<?x?xf32>>, i64, i64) -> !fir.ref<f32>
27+
!CHECK: cuf.data_transfer %{{.*}}#0 to %[[DEST]] {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<f32>, !fir.ref<f32>

0 commit comments

Comments
 (0)