Skip to content

Commit 478974c

Browse files
authored
[Flang][AMD] Skip setting alloc idx for allocate in target (llvm#2550)
This patch skips setting _FortranAAMDAllocatableSetAllocIdx for allocate within "omp target" or for functions declared as "declare target".
1 parent 4aea8db commit 478974c

File tree

4 files changed

+97
-0
lines changed

4 files changed

+97
-0
lines changed

flang/lib/Lower/Allocatable.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#include "flang/Runtime/pointer.h"
3838
#include "flang/Semantics/tools.h"
3939
#include "flang/Semantics/type.h"
40+
#include "mlir/Dialect/OpenMP/OpenMPInterfaces.h"
4041
#include "llvm/Support/CommandLine.h"
4142

4243
/// By default fir memory operation fir::AllocMemOp/fir::FreeMemOp are used.
@@ -174,11 +175,34 @@ static void genRuntimeInitCharacter(fir::FirOpBuilder &builder,
174175
builder.create<fir::CallOp>(loc, callee, convertedArgs);
175176
}
176177

178+
/// Check if region is nested in omp.target or
179+
/// region nested in function with declare target
180+
bool isRegionNestedInOmpTarget(mlir::Region &region) {
181+
mlir::Operation *parentOp = region.getParentOp();
182+
while (parentOp) {
183+
if (auto declareTargetOp =
184+
llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(parentOp)) {
185+
if (declareTargetOp.isDeclareTarget())
186+
return true;
187+
}
188+
if (llvm::isa<mlir::omp::TargetOp>(parentOp))
189+
return true;
190+
mlir::Region *parentRegion = parentOp->getParentRegion();
191+
if (!parentRegion)
192+
break;
193+
parentOp = parentRegion->getParentOp();
194+
}
195+
196+
return false;
197+
}
198+
177199
/// Generate a runtime call to set allocator idx of descriptor for target amd.
178200
static void genAMDRuntimeDescriptorSetAllocIdx(fir::FirOpBuilder &builder,
179201
mlir::Location loc,
180202
const fir::MutableBoxValue &box,
181203
int allocatorId) {
204+
if (isRegionNestedInOmpTarget(builder.getRegion()))
205+
return;
182206
auto *context = builder.getContext();
183207
mlir::Type descriptorTy = box.getAddr().getType();
184208
mlir::IntegerType posTy = builder.getI32Type();
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
! RUN: %flang -ffast-amd-memory-allocator -S -emit-llvm -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa --offload-arch=gfx90a -o - %s | FileCheck %s --check-prefix=CHECK-OMP
2+
! RUN: %flang -ffast-amd-memory-allocator -S -emit-llvm -target amdgcn-- -o - %s | FileCheck %s --check-prefix=CHECK
3+
4+
subroutine func_t_device()
5+
!$omp declare target enter(func_t_device) device_type(nohost)
6+
integer, ALLOCATABLE :: poly
7+
8+
! CHECK-OMP-NOT: call void @_FortranAAMDAllocatableSetAllocIdx({{.*}}, i32 1)
9+
! CHECK: call void @_FortranAAMDAllocatableSetAllocIdx({{.*}}, i32 1)
10+
! CHECK-OMP: call i32 @_FortranAAllocatableAllocate
11+
! CHECK: call i32 @_FortranAAllocatableAllocate
12+
ALLOCATE(poly)
13+
14+
! CHECK-OMP: call i32 @_FortranAAllocatableDeallocate
15+
! CHECK: call i32 @_FortranAAllocatableDeallocate
16+
DEALLOCATE(poly)
17+
end subroutine func_t_device
18+
19+
program main
20+
implicit none
21+
!$omp target
22+
call func_t_device()
23+
!$omp end target
24+
end program
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
! RUN: %flang -ffast-amd-memory-allocator -S -emit-llvm -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa --offload-arch=gfx90a -o - %s | FileCheck %s --check-prefix=CHECK-OMP
2+
! RUN: %flang -ffast-amd-memory-allocator -S -emit-llvm -target amdgcn-- -o - %s | FileCheck %s --check-prefix=CHECK
3+
4+
subroutine func_t_device()
5+
!$omp declare target enter(func_t_device) device_type(nohost)
6+
integer, ALLOCATABLE :: poly
7+
do j=1,10
8+
! CHECK-OMP-NOT: call void @_FortranAAMDAllocatableSetAllocIdx({{.*}}, i32 1)
9+
! CHECK: call void @_FortranAAMDAllocatableSetAllocIdx({{.*}}, i32 1)
10+
! CHECK-OMP: call i32 @_FortranAAllocatableAllocate
11+
! CHECK: call i32 @_FortranAAllocatableAllocate
12+
ALLOCATE(poly)
13+
14+
! CHECK-OMP: call i32 @_FortranAAllocatableDeallocate
15+
! CHECK: call i32 @_FortranAAllocatableDeallocate
16+
DEALLOCATE(poly)
17+
end do
18+
end subroutine func_t_device
19+
20+
program main
21+
implicit none
22+
!$omp target
23+
call func_t_device()
24+
!$omp end target
25+
end program
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
! RUN: %flang -ffast-amd-memory-allocator -S -emit-llvm -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa --offload-arch=gfx90a -o - %s | FileCheck %s --check-prefix=CHECK-OMP
2+
! RUN: %flang -ffast-amd-memory-allocator -S -emit-llvm -target amdgcn-- -o - %s | FileCheck %s --check-prefix=CHECK
3+
4+
program main
5+
implicit none
6+
!$omp requires unified_shared_memory
7+
REAL, DIMENSION(:), ALLOCATABLE :: poly
8+
integer,parameter :: n = 10
9+
integer :: i,j
10+
!$omp target teams distribute parallel do private(poly)
11+
do j=1,n
12+
13+
! CHECK-OMP-NOT: call void @_FortranAAMDAllocatableSetAllocIdx({{.*}}, i32 1)
14+
! CHECK: call void @_FortranAAMDAllocatableSetAllocIdx({{.*}}, i32 1)
15+
! CHECK-OMP: call i32 @_FortranAAllocatableAllocate
16+
! CHECK: call i32 @_FortranAAllocatableAllocate
17+
ALLOCATE(poly(1:3))
18+
poly = 2.0_8
19+
! CHECK-OMP: call i32 @_FortranAAllocatableDeallocate
20+
! CHECK: call i32 @_FortranAAllocatableDeallocate
21+
DEALLOCATE(poly)
22+
enddo
23+
!$omp end target teams distribute parallel do
24+
end program

0 commit comments

Comments
 (0)