-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[WIP] [flang] Always lower ALLOCATE/DEALLOCATE to runtime calls. #133238
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-flang-openmp @llvm/pr-subscribers-flang-fir-hlfir Author: Chaitanya (skc7) ChangesThis is still WIP patch. More changes incoming if required. Patch is 362.69 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/133238.diff 23 Files Affected:
diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp
index 9938bd573d1fa..932bb0aafe8fe 100644
--- a/flang/lib/Lower/Allocatable.cpp
+++ b/flang/lib/Lower/Allocatable.cpp
@@ -470,21 +470,21 @@ class AllocateStmtHelper {
void genSimpleAllocation(const Allocation &alloc,
const fir::MutableBoxValue &box) {
bool isCudaSymbol = Fortran::semantics::HasCUDAAttr(alloc.getSymbol());
- bool isCudaDeviceContext = cuf::isCUDADeviceContext(builder.getRegion());
- bool inlineAllocation = !box.isDerived() && !errorManager.hasStatSpec() &&
- !alloc.type.IsPolymorphic() &&
- !alloc.hasCoarraySpec() && !useAllocateRuntime &&
- !box.isPointer();
+
unsigned allocatorIdx = Fortran::lower::getAllocatorIdx(alloc.getSymbol());
- if (inlineAllocation &&
- ((isCudaSymbol && isCudaDeviceContext) || !isCudaSymbol)) {
- // Pointers must use PointerAllocate so that their deallocations
- // can be validated.
- genInlinedAllocation(alloc, box);
- postAllocationAction(alloc);
- setPinnedToFalse();
- return;
+ if (isCudaSymbol) {
+ bool inlineAllocation = !box.isDerived() && !errorManager.hasStatSpec() &&
+ !alloc.type.IsPolymorphic() &&
+ !alloc.hasCoarraySpec() && !useAllocateRuntime &&
+ !box.isPointer();
+ bool isCudaDeviceContext = cuf::isCUDADeviceContext(builder.getRegion());
+ if (inlineAllocation && isCudaDeviceContext) {
+ genInlinedAllocation(alloc, box);
+ postAllocationAction(alloc);
+ setPinnedToFalse();
+ return;
+ }
}
// Generate a sequence of runtime calls.
@@ -863,29 +863,27 @@ genDeallocate(fir::FirOpBuilder &builder,
const Fortran::semantics::Symbol *symbol = nullptr) {
bool isCudaSymbol = symbol && Fortran::semantics::HasCUDAAttr(*symbol);
bool isCudaDeviceContext = cuf::isCUDADeviceContext(builder.getRegion());
- bool inlineDeallocation =
- !box.isDerived() && !box.isPolymorphic() && !box.hasAssumedRank() &&
- !box.isUnlimitedPolymorphic() && !errorManager.hasStatSpec() &&
- !useAllocateRuntime && !box.isPointer();
- // Deallocate intrinsic types inline.
- if (inlineDeallocation &&
- ((isCudaSymbol && isCudaDeviceContext) || !isCudaSymbol)) {
- // Pointers must use PointerDeallocate so that their deallocations
- // can be validated.
- mlir::Value ret = fir::factory::genFreemem(builder, loc, box);
- if (symbol)
- postDeallocationAction(converter, builder, *symbol);
- return ret;
- }
- // Use runtime calls to deallocate descriptor cases. Sync MutableBoxValue
- // with its descriptor before and after calls if needed.
- errorManager.genStatCheck(builder, loc);
- mlir::Value stat;
- if (!isCudaSymbol)
+ mlir::Value stat = nullptr;
+ if (!isCudaSymbol) {
+ // For non-CUDA symbols, always use runtime deallocation.
+ errorManager.genStatCheck(builder, loc);
stat =
genRuntimeDeallocate(builder, loc, box, errorManager, declaredTypeDesc);
- else
- stat = genCudaDeallocate(builder, loc, box, errorManager, *symbol);
+ } else {
+ bool inlineDeallocation =
+ !box.isDerived() && !box.isPolymorphic() && !box.hasAssumedRank() &&
+ !box.isUnlimitedPolymorphic() && !errorManager.hasStatSpec() &&
+ !useAllocateRuntime && !box.isPointer();
+
+ if (inlineDeallocation && isCudaDeviceContext) {
+ // Inline deallocation for CUDA when conditions hold.
+ stat = fir::factory::genFreemem(builder, loc, box);
+ } else {
+ // Otherwise, use the CUDA-specific runtime deallocation.
+ errorManager.genStatCheck(builder, loc);
+ stat = genCudaDeallocate(builder, loc, box, errorManager, *symbol);
+ }
+ }
fir::factory::syncMutableBoxFromIRBox(builder, loc, box);
if (symbol)
postDeallocationAction(converter, builder, *symbol);
diff --git a/flang/test/HLFIR/dummy_deallocation.f90 b/flang/test/HLFIR/dummy_deallocation.f90
index 9d3c51c843bcc..a28ee7f4b00a6 100644
--- a/flang/test/HLFIR/dummy_deallocation.f90
+++ b/flang/test/HLFIR/dummy_deallocation.f90
@@ -5,10 +5,10 @@
! is not deallocated in entry SUB_B.
! CHECK-LABEL: func.func @_QPsub_a
-! CHECK: fir.freemem
+! CHECK: fir.call @_FortranAAllocatableDeallocate
! CHECK-LABEL: func.func @_QPsub_b
-! CHECK-NOT: fir.freemem
+! CHECK-NOT: fir.call @_FortranAAllocatableDeallocate
SUBROUTINE SUB_A(A)
INTEGER, INTENT(out), ALLOCATABLE, DIMENSION (:) :: A
RETURN
diff --git a/flang/test/Lower/CUDA/cuda-allocatable.cuf b/flang/test/Lower/CUDA/cuda-allocatable.cuf
index 4aa86165252a7..f9c60aa9fcb19 100644
--- a/flang/test/Lower/CUDA/cuda-allocatable.cuf
+++ b/flang/test/Lower/CUDA/cuda-allocatable.cuf
@@ -124,7 +124,7 @@ end subroutine
! CHECK: fir.call @_FortranAAllocatableSetBounds
! CHECK: %{{.*}} = cuf.allocate %[[BOX_A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> source(%[[LOAD_B]] : !fir.box<!fir.heap<!fir.array<?xf32>>>) {data_attr = #cuf.cuda<device>} -> i32
! CHECK: fir.if
-! CHECK: fir.freemem
+! CHECK: fir.call @_FortranAAllocatableDeallocate
! CHECK: fir.if %{{.*}} {
! CHECK: %{{.*}} = cuf.deallocate %[[BOX_A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
! CHECK: }
@@ -206,6 +206,7 @@ end
! CHECK-LABEL: func.func @_QPsetpinned()
! CHECK: %[[PLOG:.*]] = fir.alloca !fir.logical<4> {bindc_name = "plog", uniq_name = "_QFsetpinnedEplog"}
! CHECK: %[[PLOG_DECL:.*]]:2 = hlfir.declare %[[PLOG]] {uniq_name = "_QFsetpinnedEplog"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+! CHECK: fir.call @_FortranAAllocatableAllocate
! CHECK: %[[FALSE:.*]] = arith.constant false
! CHECK: %[[FLASE_CONV:.*]] = fir.convert %[[FALSE]] : (i1) -> !fir.logical<4>
! CHECK: fir.store %[[FLASE_CONV]] to %[[PLOG_DECL]]#1 : !fir.ref<!fir.logical<4>>
diff --git a/flang/test/Lower/HLFIR/allocatable-and-pointer-status-change.f90 b/flang/test/Lower/HLFIR/allocatable-and-pointer-status-change.f90
index 328fb778eaf88..4285edc329292 100644
--- a/flang/test/Lower/HLFIR/allocatable-and-pointer-status-change.f90
+++ b/flang/test/Lower/HLFIR/allocatable-and-pointer-status-change.f90
@@ -4,108 +4,158 @@
subroutine allocation(x)
character(*), allocatable :: x(:)
-! CHECK-LABEL: func.func @_QPallocation(
-! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0:[a-z0-9]*]] typeparams %[[VAL_2:[a-z0-9]*]] dummy_scope %{{[0-9]+}} {fortran_attrs = #fir.var_attrs<allocatable>, {{.*}}Ex
deallocate(x)
-! CHECK: %[[VAL_4:.*]] = fir.load %[[VAL_3]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,?>>>>>
-! CHECK: %[[VAL_5:.*]] = fir.box_addr %[[VAL_4]] : (!fir.box<!fir.heap<!fir.array<?x!fir.char<1,?>>>>) -> !fir.heap<!fir.array<?x!fir.char<1,?>>>
-! CHECK: fir.freemem %[[VAL_5]] : !fir.heap<!fir.array<?x!fir.char<1,?>>>
-! CHECK: %[[VAL_6:.*]] = fir.zero_bits !fir.heap<!fir.array<?x!fir.char<1,?>>>
-! CHECK: %[[VAL_7:.*]] = arith.constant 0 : index
-! CHECK: %[[VAL_8:.*]] = fir.shape %[[VAL_7]] : (index) -> !fir.shape<1>
-! CHECK: %[[VAL_9:.*]] = fir.embox %[[VAL_6]](%[[VAL_8]]) typeparams %[[VAL_2]] : (!fir.heap<!fir.array<?x!fir.char<1,?>>>, !fir.shape<1>, index) -> !fir.box<!fir.heap<!fir.array<?x!fir.char<1,?>>>>
allocate(x(100))
-! CHECK: %[[VAL_10:.*]] = arith.constant 100 : i32
-! CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_10]] : (i32) -> index
-! CHECK: %[[VAL_12:.*]] = arith.constant 0 : index
-! CHECK: %[[VAL_13:.*]] = arith.cmpi sgt, %[[VAL_11]], %[[VAL_12]] : index
-! CHECK: %[[VAL_14:.*]] = arith.select %[[VAL_13]], %[[VAL_11]], %[[VAL_12]] : index
-! CHECK: %[[VAL_15:.*]] = fir.allocmem !fir.array<?x!fir.char<1,?>>(%[[VAL_2]] : index), %[[VAL_14]] {fir.must_be_heap = true, uniq_name = "_QFallocationEx.alloc"}
-! CHECK: %[[VAL_16:.*]] = fir.shape %[[VAL_14]] : (index) -> !fir.shape<1>
-! CHECK: %[[VAL_17:.*]] = fir.embox %[[VAL_15]](%[[VAL_16]]) typeparams %[[VAL_2]] : (!fir.heap<!fir.array<?x!fir.char<1,?>>>, !fir.shape<1>, index) -> !fir.box<!fir.heap<!fir.array<?x!fir.char<1,?>>>>
-! CHECK: fir.store %[[VAL_17]] to %[[VAL_3]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,?>>>>>
end subroutine
+! CHECK-LABEL: func.func @_QPallocation(
+! CHECK-SAME: %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,?>>>>> {fir.bindc_name = "x"}) {
+! CHECK: %[[VAL_1:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,?>>>>>
+! CHECK: %[[VAL_3:.*]] = fir.box_elesize %[[VAL_2]] : (!fir.box<!fir.heap<!fir.array<?x!fir.char<1,?>>>>) -> index
+! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_0]] typeparams %[[VAL_3]] dummy_scope %[[VAL_1]] {fortran_attrs = {{.*}}<allocatable>, uniq_name = "_QFallocationEx"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,?>>>>>, index, !fir.dscope) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,?>>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,?>>>>>)
+! CHECK: %[[VAL_5:.*]] = arith.constant false
+! CHECK: %[[VAL_6:.*]] = fir.absent !fir.box<none>
+! CHECK: %[[VAL_7:.*]] = fir.address_of(@_QQclXca783e65b88d3c02cf95fcee70c426bc) : !fir.ref<!fir.char<1,96>>
+! CHECK: %[[VAL_8:.*]] = arith.constant 7 : i32
+! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_4]]#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,?>>>>>) -> !fir.ref<!fir.box<none>>
+! CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_7]] : (!fir.ref<!fir.char<1,96>>) -> !fir.ref<i8>
+! CHECK: %[[VAL_11:.*]] = fir.call @_FortranAAllocatableDeallocate(%[[VAL_9]], %[[VAL_5]], %[[VAL_6]], %[[VAL_10]], %[[VAL_8]]) fastmath<contract> : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+! CHECK: %[[VAL_12:.*]] = arith.constant false
+! CHECK: %[[VAL_13:.*]] = fir.absent !fir.box<none>
+! CHECK: %[[VAL_14:.*]] = fir.address_of(@_QQclXca783e65b88d3c02cf95fcee70c426bc) : !fir.ref<!fir.char<1,96>>
+! CHECK: %[[VAL_15:.*]] = arith.constant 8 : i32
+! CHECK: %[[VAL_16:.*]] = arith.constant 1 : index
+! CHECK: %[[VAL_17:.*]] = arith.constant 100 : i32
+! CHECK: %[[VAL_18:.*]] = arith.constant 0 : i32
+! CHECK: %[[VAL_19:.*]] = fir.convert %[[VAL_4]]#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,?>>>>>) -> !fir.ref<!fir.box<none>>
+! CHECK: %[[VAL_20:.*]] = fir.convert %[[VAL_16]] : (index) -> i64
+! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_17]] : (i32) -> i64
+! CHECK: fir.call @_FortranAAllocatableSetBounds(%[[VAL_19]], %[[VAL_18]], %[[VAL_20]], %[[VAL_21]]) fastmath<contract> : (!fir.ref<!fir.box<none>>, i32, i64, i64) -> ()
+! CHECK: %[[VAL_22:.*]] = fir.convert %[[VAL_4]]#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,?>>>>>) -> !fir.ref<!fir.box<none>>
+! CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_14]] : (!fir.ref<!fir.char<1,96>>) -> !fir.ref<i8>
+! CHECK: %[[VAL_24:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_22]], %[[VAL_12]], %[[VAL_13]], %[[VAL_23]], %[[VAL_15]]) fastmath<contract> : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
+! CHECK: return
+! CHECK: }
subroutine pointer_assignment(p, ziel)
real, pointer :: p(:)
real, target :: ziel(42:)
-! CHECK-LABEL: func.func @_QPpointer_assignment(
-! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_0:[a-z0-9]*]] dummy_scope %{{[0-9]+}} {fortran_attrs = #fir.var_attrs<pointer>, {{.*}}Ep
-! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_1:[a-z0-9]*]](%[[VAL_5:[a-z0-9]*]]) dummy_scope %{{[0-9]+}} {fortran_attrs = #fir.var_attrs<target>, {{.*}}Eziel
p => ziel
-! CHECK: %[[VAL_7:.*]] = fir.shift %[[VAL_4:.*]] : (index) -> !fir.shift<1>
-! CHECK: %[[VAL_8:.*]] = fir.rebox %[[VAL_6]]#1(%[[VAL_7]]) : (!fir.box<!fir.array<?xf32>>, !fir.shift<1>) -> !fir.box<!fir.ptr<!fir.array<?xf32>>>
-! CHECK: fir.store %[[VAL_8]] to %[[VAL_2]]#1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>
p => ziel(42:77:3)
-! CHECK: %[[VAL_14:.*]] = hlfir.designate %{{.*}}#0 (%{{.*}}:%{{.*}}:%{{.*}}) shape %{{.*}} : (!fir.box<!fir.array<?xf32>>, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<12xf32>>
-! CHECK: %[[VAL_15:.*]] = fir.rebox %[[VAL_14]] : (!fir.box<!fir.array<12xf32>>) -> !fir.box<!fir.ptr<!fir.array<?xf32>>>
-! CHECK: fir.store %[[VAL_15]] to %[[VAL_2]]#1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>
end subroutine
+! CHECK-LABEL: func.func @_QPpointer_assignment(
+! CHECK-SAME: %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>> {fir.bindc_name = "p"},
+! CHECK-SAME: %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "ziel", fir.target}) {
+! CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_2]] {fortran_attrs = {{.*}}<pointer>, uniq_name = "_QFpointer_assignmentEp"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>)
+! CHECK: %[[VAL_4:.*]] = arith.constant 42 : i64
+! CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_4]] : (i64) -> index
+! CHECK: %[[VAL_6:.*]] = fir.shift %[[VAL_5]] : (index) -> !fir.shift<1>
+! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_1]](%[[VAL_6]]) dummy_scope %[[VAL_2]] {fortran_attrs = {{.*}}<target>, uniq_name = "_QFpointer_assignmentEziel"} : (!fir.box<!fir.array<?xf32>>, !fir.shift<1>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
+! CHECK: %[[VAL_8:.*]] = fir.shift %[[VAL_5]] : (index) -> !fir.shift<1>
+! CHECK: %[[VAL_9:.*]] = fir.rebox %[[VAL_7]]#1(%[[VAL_8]]) : (!fir.box<!fir.array<?xf32>>, !fir.shift<1>) -> !fir.box<!fir.ptr<!fir.array<?xf32>>>
+! CHECK: fir.store %[[VAL_9]] to %[[VAL_3]]#1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>
+! CHECK: %[[VAL_10:.*]] = arith.constant 42 : index
+! CHECK: %[[VAL_11:.*]] = arith.constant 77 : index
+! CHECK: %[[VAL_12:.*]] = arith.constant 3 : index
+! CHECK: %[[VAL_13:.*]] = arith.constant 12 : index
+! CHECK: %[[VAL_14:.*]] = fir.shape %[[VAL_13]] : (index) -> !fir.shape<1>
+! CHECK: %[[VAL_15:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_10]]:%[[VAL_11]]:%[[VAL_12]]) shape %[[VAL_14]] : (!fir.box<!fir.array<?xf32>>, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<12xf32>>
+! CHECK: %[[VAL_16:.*]] = fir.rebox %[[VAL_15]] : (!fir.box<!fir.array<12xf32>>) -> !fir.box<!fir.ptr<!fir.array<?xf32>>>
+! CHECK: fir.store %[[VAL_16]] to %[[VAL_3]]#1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>
+! CHECK: return
+! CHECK: }
subroutine pointer_remapping(p, ziel)
real, pointer :: p(:, :)
real, target :: ziel(10, 20, 30)
-! CHECK-LABEL: func.func @_QPpointer_remapping(
-! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_0:[a-z0-9]*]] dummy_scope %{{[0-9]+}} {fortran_attrs = #fir.var_attrs<pointer>, {{.*}}Ep
-! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_1:[a-z0-9]*]](%[[VAL_6:[a-z0-9]*]]) dummy_scope %{{[0-9]+}} {fortran_attrs = #fir.var_attrs<target>, {{.*}}Eziel
p(2:7, 3:102) => ziel
-! CHECK: %[[VAL_8:.*]] = arith.constant 2 : i64
-! CHECK: %[[VAL_9:.*]] = arith.constant 7 : i64
-! CHECK: %[[VAL_10:.*]] = arith.constant 3 : i64
-! CHECK: %[[VAL_11:.*]] = arith.constant 102 : i64
-! CHECK: %[[VAL_12:.*]] = arith.constant 1 : index
-! CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_8]] : (i64) -> index
-! CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_9]] : (i64) -> index
-! CHECK: %[[VAL_15:.*]] = arith.subi %[[VAL_14]], %[[VAL_13]] : index
-! CHECK: %[[VAL_16:.*]] = arith.addi %[[VAL_15]], %[[VAL_12]] : index
-! CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_10]] : (i64) -> index
-! CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_11]] : (i64) -> index
-! CHECK: %[[VAL_19:.*]] = arith.subi %[[VAL_18]], %[[VAL_17]] : index
-! CHECK: %[[VAL_20:.*]] = arith.addi %[[VAL_19]], %[[VAL_12]] : index
-! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_7]]#1 : (!fir.ref<!fir.array<10x20x30xf32>>) -> !fir.ref<!fir.array<?x?xf32>>
-! CHECK: %[[VAL_22:.*]] = fir.shape_shift %[[VAL_8]], %[[VAL_16]], %[[VAL_10]], %[[VAL_20]] : (i64, index, i64, index) -> !fir.shapeshift<2>
-! CHECK: %[[VAL_23:.*]] = fir.embox %[[VAL_21]](%[[VAL_22]]) : (!fir.ref<!fir.array<?x?xf32>>, !fir.shapeshift<2>) -> !fir.box<!fir.ptr<!fir.array<?x?xf32>>>
-! CHECK: fir.store %[[VAL_23]] to %[[VAL_2]]#1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>
end subroutine
+! CHECK-LABEL: func.func @_QPpointer_remapping(
+! CHECK-SAME: %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>> {fir.bindc_name = "p"},
+! CHECK-SAME: %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.ref<!fir.array<10x20x30xf32>> {fir.bindc_name = "ziel", fir.target}) {
+! CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_2]] {fortran_attrs = {{.*}}<pointer>, uniq_name = "_QFpointer_remappingEp"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>)
+! CHECK: %[[VAL_4:.*]] = arith.constant 10 : index
+! CHECK: %[[VAL_5:.*]] = arith.constant 20 : index
+! CHECK: %[[VAL_6:.*]] = arith.constant 30 : index
+! CHECK: %[[VAL_7:.*]] = fir.shape %[[VAL_4]], %[[VAL_5]], %[[VAL_6]] : (index, index, index) -> !fir.shape<3>
+! CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_1]](%[[VAL_7]]) dummy_scope %[[VAL_2]] {fortran_attrs = {{.*}}<target>, uniq_name = "_QFpointer_remappingEziel"} : (!fir.ref<!fir.array<10x20x30xf32>>, !fir.shape<3>, !fir.dscope) -> (!fir.ref<!fir.array<10x20x30xf32>>, !fir.ref<!fir.array<10x20x30xf32>>)
+! CHECK: %[[VAL_9:.*]] = arith.constant 2 : i64
+! CHECK: %[[VAL_10:.*]] = arith.constant 7 : i64
+! CHECK: %[[VAL_11:.*]] = arith.constant 3 : i64
+! CHECK: %[[VAL_12:.*]] = arith.constant 102 : i64
+! CHECK: %[[VAL_13:.*]] = arith.constant 1 : index
+! CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_9]] : (i64) -> index
+! CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_10]] : (i64) -> index
+! CHECK: %[[VAL_16:.*]] = arith.subi %[[VAL_15]], %[[VAL_14]] : index
+! CHECK: %[[VAL_17:.*]] = arith.addi %[[VAL_16]], %[[VAL_13]] : index
+! CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_11]] : (i64) -> index
+! CHECK: %[[VAL_19:.*]] = fir.convert %[[VAL_12]] : (i64) -> index
+! CHECK: %[[VAL_20:.*]] = arith.subi %[[VAL_19]], %[[VAL_18]] : index
+! CHECK: %[[VAL_21:.*]] = arith.addi %[[VAL_20]], %[[VAL_13]] : index
+! CHECK: %[[VAL_22:.*]] = fir.convert %[[VAL_8]]#1 : (!fir.ref<!fir.array<10x20x30xf32>>) -> !fir.ref<!fir.array<?x?xf32>>
+! CHECK: %[[VAL_23:.*]] = fir.shape_shift %[[VAL_9]], %[[VAL_17]], %[[VAL_11]], %[[VAL_21]] : (i64, index, i64, index) -> !fir.shapeshift<2>
+! CHECK: %[[VAL_24:.*]] = fir.embox %[[VAL_22]](%[[VAL_23]]) : (!fir.ref<!fir.array<?x?xf32>>, !fir.shapeshift<2>) -> !fir.box<!fir.ptr<!fir.array<?x?xf32>>>
+! CHECK: fir.store %[[VAL_24]] to %[[VAL_3]]#1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>
+! CHECK: return
+! CHECK: }
subroutine alloc_comp(x)
type t
real, allocatable :: a(:)
end type
type(t) :: x(10)
-! CHECK-LABEL: func.func @_QPalloc_comp(
-! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0:[a-z0-9]*]](%[[VAL_2:[a-z0-9]*]]) {{.*}}Ex
allocate(x(10_8)%a(100_8))
-! CHECK: %[[VAL_4:.*]] = arith....
[truncated]
|
clementval
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What is this supposed to change? We already support ALLOCATE/DEALLOCATE as expected.
428479c to
b0594a2
Compare
| postAllocationAction(alloc); | ||
| setPinnedToFalse(); | ||
| return; | ||
| if (isCudaSymbol) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Want want inline allocation for non cuda symbol as well.
Why? What is the rational? What is the use case? Going through the runtime is not good for simple cases. |
| use acc_declare_allocatable_test | ||
| allocate(data1(100)) | ||
| ! CHECK: fir.store %{{.*}} to %{{.*}} {acc.declare_action = #acc.declare_action<postAlloc = @_QMacc_declare_allocatable_testEdata1_acc_declare_update_desc_post_alloc>} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> | ||
| ! CHECK: fir.call @_FortranAAllocatableAllocate |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This test is checking for the declare action attached to the allocate operation so changing it this way makes no sense.
The current pattern makes it rather hard to replace the default |
Everything is already in place in the descriptor and allocation of the allocatables to use a different allocator. The descriptor holds an allocator id that reference a position in the allocator registry. The allocation from the runtime uses this allocator id to fetch the correct allocator/deallocator to perform the allocation and free actions.
Depending on your use case, you can register new allocator and you can override the default allocator of you want. There is a |
Ah, right. Thanks for pointing me to that! However, I do not see how such an allocator would be invoked, when Flang emits direct calls to |
They don't get called that's why you can force the allocation via runtime with |
I'm sorry, but I do not see that: module alloc
contains
subroutine alloc_array(array, n)
implicit none
real, dimension(:), allocatable :: array
integer :: n
allocate(array(n))
end subroutine alloc_array
subroutine dealloc_array(array)
implicit none
real, dimension(:), allocatable :: array
deallocate(array)
end subroutine dealloc_array
end module allocgives me these pieces of LLVM IR: This seems to indicate to me that the compiler indeed uses the default |
Have you used the flag to force runtime allocation? What's your command line? I see this:
|
I'm using: It seems that Flang does not have that flag. But: because it's in |
Yeah the option is not wired all the way to the driver. I guess it can be done if needed. |
ALLOCATE/DEALLOCATE statements in flang are lowered to malloc/free in simpler cases and to runtime calls in complex cases. This PR always lowers ALLOCATE/DEALLOCATE to runtime calls.