Skip to content

Commit 086be17

Browse files
authored
[Flang][OpenMP] Update device compilation to remove host operations (llvm#1898)
2 parents 6fb8900 + 17962c5 commit 086be17

20 files changed

+1454
-879
lines changed

flang/include/flang/Optimizer/OpenMP/Passes.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,8 @@ def FunctionFilteringPass : Pass<"omp-function-filtering"> {
5353
"for the target device.";
5454
let dependentDialects = [
5555
"mlir::func::FuncDialect",
56-
"fir::FIROpsDialect"
56+
"fir::FIROpsDialect",
57+
"mlir::omp::OpenMPDialect"
5758
];
5859
}
5960

flang/lib/Optimizer/OpenMP/FunctionFiltering.cpp

Lines changed: 463 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
!===----------------------------------------------------------------------===!
2+
! This directory can be used to add Integration tests involving multiple
3+
! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
4+
! contain executable tests. We should only add tests here sparingly and only
5+
! if there is no other way to test. Repeat this message in each test that is
6+
! added to this directory and sub-directories.
7+
!===----------------------------------------------------------------------===!
8+
9+
!REQUIRES: amdgpu-registered-target
10+
!RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-llvm -fopenmp -fopenmp-version=50 -fopenmp-is-target-device %s -o - | FileCheck %s
11+
12+
! CHECK-NOT: define void @nested_target_in_parallel
13+
! CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_nested_target_in_parallel_{{.*}}(ptr %{{.*}}, ptr %{{.*}})
14+
subroutine nested_target_in_parallel(v)
15+
implicit none
16+
integer, intent(inout) :: v(10)
17+
18+
!$omp parallel
19+
!$omp target map(tofrom: v)
20+
!$omp end target
21+
!$omp end parallel
22+
end subroutine
23+
24+
! CHECK-NOT: define void @nested_target_in_wsloop
25+
! CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_nested_target_in_wsloop_{{.*}}(ptr %{{.*}}, ptr %{{.*}})
26+
subroutine nested_target_in_wsloop(v)
27+
implicit none
28+
integer, intent(inout) :: v(10)
29+
integer :: i
30+
31+
!$omp do
32+
do i=1, 10
33+
!$omp target map(tofrom: v)
34+
!$omp end target
35+
end do
36+
end subroutine
37+
38+
! CHECK-NOT: define void @nested_target_in_parallel_with_private
39+
! CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_nested_target_in_parallel_with_private_{{.*}}(ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}})
40+
subroutine nested_target_in_parallel_with_private(v)
41+
implicit none
42+
integer, intent(inout) :: v(10)
43+
integer :: x
44+
x = 10
45+
46+
!$omp parallel firstprivate(x)
47+
!$omp target map(tofrom: v(1:x))
48+
!$omp end target
49+
!$omp end parallel
50+
end subroutine
51+
52+
! CHECK-NOT: define void @nested_target_in_task_with_private
53+
! CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_nested_target_in_task_with_private_{{.*}}(ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}})
54+
subroutine nested_target_in_task_with_private(v)
55+
implicit none
56+
integer, intent(inout) :: v(10)
57+
integer :: x
58+
x = 10
59+
60+
!$omp task firstprivate(x)
61+
!$omp target map(tofrom: v(1:x))
62+
!$omp end target
63+
!$omp end task
64+
end subroutine
65+
66+
! CHECK-NOT: define void @target_and_atomic_update
67+
! CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_target_and_atomic_update_{{.*}}(ptr %{{.*}})
68+
subroutine target_and_atomic_update(x, expr)
69+
implicit none
70+
integer, intent(inout) :: x, expr
71+
72+
!$omp target
73+
!$omp end target
74+
75+
!$omp atomic update
76+
x = x + expr
77+
end subroutine
78+
79+
! CHECK-NOT: define void @nested_target_in_associate
80+
! CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_nested_target_in_associate_{{.*}}(ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}})
81+
subroutine nested_target_in_associate(x)
82+
integer, pointer, contiguous :: x(:)
83+
associate(y => x)
84+
!$omp target map(tofrom: y)
85+
!$omp end target
86+
end associate
87+
end subroutine
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
!===----------------------------------------------------------------------===!
2+
! This directory can be used to add Integration tests involving multiple
3+
! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
4+
! contain executable tests. We should only add tests here sparingly and only
5+
! if there is no other way to test. Repeat this message in each test that is
6+
! added to this directory and sub-directories.
7+
!===----------------------------------------------------------------------===!
8+
9+
!REQUIRES: amdgpu-registered-target
10+
!RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-llvm -fopenmp -fopenmp-version=50 -fopenmp-is-target-device %s -o - | FileCheck %s
11+
12+
! This tests the fix for https://github.com/llvm/llvm-project/issues/84606
13+
! We are only interested in ensuring that the -mlir-to-llmvir pass doesn't crash.
14+
15+
! CHECK: define weak_odr protected amdgpu_kernel void @{{.*}}QQmain{{.*}}({{.*}})
16+
program main
17+
implicit none
18+
integer, parameter :: N = 5
19+
integer, dimension(5) :: a
20+
integer :: i
21+
integer :: target_a = 0
22+
23+
!$omp task depend(out:a)
24+
do i = 1, N
25+
a(i) = i
26+
end do
27+
!$omp end task
28+
29+
!$omp target map(tofrom:target_a) map(tofrom:a)
30+
do i = 1, N
31+
target_a = target_a + i
32+
a(i) = a(i) + i
33+
end do
34+
!$omp end target
35+
print*, target_a
36+
print*, a
37+
end program main
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
!===----------------------------------------------------------------------===!
2+
! This directory can be used to add Integration tests involving multiple
3+
! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
4+
! contain executable tests. We should only add tests here sparingly and only
5+
! if there is no other way to test. Repeat this message in each test that is
6+
! added to this directory and sub-directories.
7+
!===----------------------------------------------------------------------===!
8+
9+
!REQUIRES: amdgpu-registered-target
10+
!RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-llvm -fopenmp -fopenmp-version=50 -fopenmp-is-target-device %s -o - | FileCheck %s
11+
12+
! The aim of this test is to verify host threadprivate directives do not cause
13+
! crashes during OpenMP target device codegen when used in conjunction with
14+
! target code in the same function.
15+
16+
! CHECK: define weak_odr protected amdgpu_kernel void @{{.*}}(ptr %{{.*}}, ptr %[[ARG1:.*]], ptr %[[ARG2:.*]]) #{{[0-9]+}} {
17+
! CHECK: %[[ALLOCA_X:.*]] = alloca ptr, align 8, addrspace(5)
18+
! CHECK: %[[ASCAST_X:.*]] = addrspacecast ptr addrspace(5) %[[ALLOCA_X]] to ptr
19+
! CHECK: store ptr %[[ARG1]], ptr %[[ASCAST_X]], align 8
20+
21+
! CHECK: %[[ALLOCA_N:.*]] = alloca ptr, align 8, addrspace(5)
22+
! CHECK: %[[ASCAST_N:.*]] = addrspacecast ptr addrspace(5) %[[ALLOCA_N]] to ptr
23+
! CHECK: store ptr %[[ARG2]], ptr %[[ASCAST_N]], align 8
24+
25+
! CHECK: %[[LOAD_X:.*]] = load ptr, ptr %[[ASCAST_X]], align 8
26+
! CHECK: call void @bar_(ptr %[[LOAD_X]], ptr %[[ASCAST_N]])
27+
28+
module test
29+
implicit none
30+
integer :: n
31+
!$omp threadprivate(n)
32+
33+
contains
34+
subroutine foo(x)
35+
integer, intent(inout) :: x(10)
36+
!$omp target map(tofrom: x(1:n))
37+
call bar(x, n)
38+
!$omp end target
39+
end subroutine
40+
end module

flang/test/Lower/OpenMP/declare-target-link-tarop-cap.f90

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
2-
!RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-is-device %s -o - | FileCheck %s
3-
!RUN: bbc -emit-hlfir -fopenmp %s -o - | FileCheck %s
4-
!RUN: bbc -emit-hlfir -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s
1+
!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s --check-prefixes=BOTH,HOST
2+
!RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-is-device %s -o - | FileCheck %s --check-prefixes=BOTH,DEVICE
3+
!RUN: bbc -emit-hlfir -fopenmp %s -o - | FileCheck %s --check-prefixes=BOTH,HOST
4+
!RUN: bbc -emit-hlfir -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s --check-prefixes=BOTH,DEVICE
55

66
program test_link
77

@@ -20,13 +20,14 @@ program test_link
2020
integer, pointer :: test_ptr2
2121
!$omp declare target link(test_ptr2)
2222

23-
!CHECK-DAG: {{%.*}} = omp.map.info var_ptr({{%.*}} : !fir.ref<i32>, i32) map_clauses(implicit, tofrom) capture(ByRef) -> !fir.ref<i32> {name = "test_int"}
23+
!BOTH-DAG: {{%.*}} = omp.map.info var_ptr({{%.*}} : !fir.ref<i32>, i32) map_clauses(implicit, tofrom) capture(ByRef) -> !fir.ref<i32> {name = "test_int"}
2424
!$omp target
2525
test_int = test_int + 1
2626
!$omp end target
2727

2828

29-
!CHECK-DAG: {{%.*}} = omp.map.info var_ptr({{%.*}} : !fir.ref<!fir.array<3xi32>>, !fir.array<3xi32>) map_clauses(implicit, tofrom) capture(ByRef) bounds({{%.*}}) -> !fir.ref<!fir.array<3xi32>> {name = "test_array_1d"}
29+
!HOST-DAG: {{%.*}} = omp.map.info var_ptr({{%.*}} : !fir.ref<!fir.array<3xi32>>, !fir.array<3xi32>) map_clauses(implicit, tofrom) capture(ByRef) bounds({{%.*}}) -> !fir.ref<!fir.array<3xi32>> {name = "test_array_1d"}
30+
!DEVICE-DAG: {{%.*}} = omp.map.info var_ptr({{%.*}} : !fir.ref<!fir.array<3xi32>>, !fir.array<3xi32>) map_clauses(implicit, tofrom) capture(ByRef) -> !fir.ref<!fir.array<3xi32>> {name = "test_array_1d"}
3031
!$omp target
3132
do i = 1,3
3233
test_array_1d(i) = i * 2
@@ -35,18 +36,18 @@ program test_link
3536

3637
allocate(test_ptr1)
3738
test_ptr1 = 1
38-
!CHECK-DAG: {{%.*}} = omp.map.info var_ptr({{%.*}} : !fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.box<!fir.ptr<i32>>) map_clauses(always, implicit, descriptor, to) capture(ByRef) members({{%.*}} : !fir.llvm_ptr<!fir.ref<i32>>) -> !fir.ref<!fir.box<!fir.ptr<i32>>> {name = "test_ptr1"}
39+
!BOTH-DAG: {{%.*}} = omp.map.info var_ptr({{%.*}} : !fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.box<!fir.ptr<i32>>) map_clauses(always, implicit, descriptor, to) capture(ByRef) members({{%.*}} : !fir.llvm_ptr<!fir.ref<i32>>) -> !fir.ref<!fir.box<!fir.ptr<i32>>> {name = "test_ptr1"}
3940
!$omp target
4041
test_ptr1 = test_ptr1 + 1
4142
!$omp end target
4243

43-
!CHECK-DAG: {{%.*}} = omp.map.info var_ptr({{%.*}} : !fir.ref<i32>, i32) map_clauses(implicit, tofrom) capture(ByRef) -> !fir.ref<i32> {name = "test_target"}
44+
!BOTH-DAG: {{%.*}} = omp.map.info var_ptr({{%.*}} : !fir.ref<i32>, i32) map_clauses(implicit, tofrom) capture(ByRef) -> !fir.ref<i32> {name = "test_target"}
4445
!$omp target
4546
test_target = test_target + 1
4647
!$omp end target
4748

4849

49-
!CHECK-DAG: {{%.*}} = omp.map.info var_ptr({{%.*}} : !fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.box<!fir.ptr<i32>>) map_clauses(always, implicit, descriptor, to) capture(ByRef) members({{%.*}} : !fir.llvm_ptr<!fir.ref<i32>>) -> !fir.ref<!fir.box<!fir.ptr<i32>>> {name = "test_ptr2"}
50+
!BOTH-DAG: {{%.*}} = omp.map.info var_ptr({{%.*}} : !fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.box<!fir.ptr<i32>>) map_clauses(always, implicit, descriptor, to) capture(ByRef) members({{%.*}} : !fir.llvm_ptr<!fir.ref<i32>>) -> !fir.ref<!fir.box<!fir.ptr<i32>>> {name = "test_ptr2"}
5051
test_ptr2 => test_target
5152
!$omp target
5253
test_ptr2 = test_ptr2 + 1

0 commit comments

Comments
 (0)