Skip to content

Commit 7472759

Browse files
committed
merge main into amd-staging
2 parents ec5efc4 + 0e92beb commit 7472759

File tree

63 files changed

+3402
-1469
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

63 files changed

+3402
-1469
lines changed

clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1510,6 +1510,9 @@ void CGOpenMPRuntimeGPU::emitParallelCall(
15101510
else
15111511
NumThreadsVal = Bld.CreateZExtOrTrunc(NumThreadsVal, CGF.Int32Ty);
15121512

1513+
// No strict prescriptiveness for the number of threads.
1514+
llvm::Value *StrictNumThreadsVal = llvm::ConstantInt::get(CGF.Int32Ty, 0);
1515+
15131516
assert(IfCondVal && "Expected a value");
15141517
llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
15151518
llvm::Value *Args[] = {
@@ -1522,9 +1525,11 @@ void CGOpenMPRuntimeGPU::emitParallelCall(
15221525
ID,
15231526
Bld.CreateBitOrPointerCast(CapturedVarsAddrs.emitRawPointer(CGF),
15241527
CGF.VoidPtrPtrTy),
1525-
llvm::ConstantInt::get(CGM.SizeTy, CapturedVars.size())};
1528+
llvm::ConstantInt::get(CGM.SizeTy, CapturedVars.size()),
1529+
StrictNumThreadsVal};
1530+
15261531
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1527-
CGM.getModule(), OMPRTL___kmpc_parallel_51),
1532+
CGM.getModule(), OMPRTL___kmpc_parallel_60),
15281533
Args);
15291534
};
15301535

clang/test/OpenMP/amdgcn_target_device_vla.cpp

Lines changed: 331 additions & 110 deletions
Large diffs are not rendered by default.

clang/test/OpenMP/amdgpu_target_with_aligned_attribute.c

Lines changed: 257 additions & 47 deletions
Large diffs are not rendered by default.

clang/test/OpenMP/declare_target_codegen_globalization.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ int maini1() {
4040
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
4141
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
4242
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8
43-
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6maini1v_l16_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
43+
// CHECK1-NEXT: call void @__kmpc_parallel_60(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6maini1v_l16_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1, i32 0)
4444
// CHECK1-NEXT: call void @__kmpc_target_deinit()
4545
// CHECK1-NEXT: ret void
4646
// CHECK1: worker.exit:

clang/test/OpenMP/metadirective_device_arch_codegen.cpp

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
// REQUIRES: amdgpu-registered-target
2-
3-
// RUN: %clang_cc1 -fopenmp -fno-openmp-target-big-jump-loop -x c++ -w -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc
4-
// RUN: %clang_cc1 -fopenmp -fno-openmp-target-big-jump-loop -x c++ -w -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -target-cpu gfx906 -o - | FileCheck %s
1+
// RUN: %clang_cc1 -fopenmp -x c++ -w -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc
2+
// RUN: %clang_cc1 -fopenmp -x c++ -w -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -target-cpu gfx906 -o - | FileCheck %s
3+
// RUN: %clang_cc1 -fopenmp -x c++ -w -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=spirv64-intel -emit-llvm-bc %s -o %t-ppc-spirv-host.bc
4+
// RUN: %clang_cc1 -fopenmp -x c++ -w -std=c++11 -triple spirv64-intel -fopenmp-targets=spirv64-intel -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-spirv-host.bc -o - | FileCheck %s
55
// expected-no-diagnostics
66

77

@@ -16,6 +16,12 @@ Inspired from SOLLVE tests:
1616

1717
#define N 1024
1818

19+
#ifdef __AMDGPU__
20+
#define GPU "amdgcn"
21+
#else
22+
#define GPU "spirv64"
23+
#endif
24+
1925
int metadirective1() {
2026

2127
int v1[N], v2[N], v3[N];
@@ -26,7 +32,7 @@ int metadirective1() {
2632
#pragma omp target map(to:v1,v2) map(from:v3, target_device_num) device(default_device)
2733
{
2834
#pragma omp metadirective \
29-
when(device={arch("amdgcn")}: teams distribute parallel for) \
35+
when(device={arch(GPU)}: teams distribute parallel for) \
3036
default(parallel for)
3137

3238
for (int i = 0; i < N; i++) {
@@ -38,28 +44,28 @@ int metadirective1() {
3844
return errors;
3945
}
4046

41-
// CHECK: define weak_odr protected amdgpu_kernel void @[[METADIRECTIVE:.+metadirective1[a-z0-9_]+]]
47+
// CHECK: define weak_odr protected {{amdgpu|spir}}_kernel void @[[METADIRECTIVE:.+metadirective1[a-z0-9_]+]]
4248
// CHECK: entry:
43-
// CHECK: %{{[0-9]}} = call i32 @__kmpc_target_init
49+
// CHECK: %{{[0-9]}} = call{{.*}} i32 @__kmpc_target_init
4450
// CHECK: user_code.entry:
45-
// CHECK: call void @[[METADIRECTIVE]]_omp_outlined
46-
// CHECK-NOT: call void @__kmpc_parallel_51
51+
// CHECK: call{{.*}} void @[[METADIRECTIVE]]_omp_outlined
52+
// CHECK-NOT: call{{.*}} void @__kmpc_parallel_60
4753
// CHECK: ret void
4854

4955

5056
// CHECK: define internal void @[[METADIRECTIVE]]_omp_outlined
5157
// CHECK: entry:
52-
// CHECK: call void @__kmpc_distribute_static_init
58+
// CHECK: call{{.*}} void @__kmpc_distribute_static_init
5359
// CHECK: omp.loop.exit:
54-
// CHECK: call void @__kmpc_distribute_static_fini
60+
// CHECK: call{{.*}} void @__kmpc_distribute_static_fini
5561

5662

5763
// CHECK: define internal void @[[METADIRECTIVE]]_omp_outlined_omp_outlined
5864
// CHECK: entry:
59-
// CHECK: call void @__kmpc_for_static_init_4
65+
// CHECK: call{{.*}} void @__kmpc_for_static_init_4
6066
// CHECK: omp.inner.for.body:
6167
// CHECK: store atomic {{.*}} monotonic
6268
// CHECK: omp.loop.exit:
63-
// CHECK-NEXT: call void @__kmpc_for_static_fini
69+
// CHECK-NEXT: call{{.*}} void @__kmpc_for_static_fini
6470
// CHECK-NEXT: ret void
6571

clang/test/OpenMP/metadirective_device_isa_codegen_amdgcn.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ int amdgcn_device_isa_selected() {
2424

2525
// CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}amdgcn_device_isa_selected
2626
// CHECK: user_code.entry:
27-
// CHECK: call void @__kmpc_parallel_51
27+
// CHECK: call void @__kmpc_parallel_60
2828
// CHECK-NOT: call i32 @__kmpc_single
2929
// CHECK: ret void
3030

@@ -47,7 +47,7 @@ int amdgcn_device_isa_not_selected() {
4747
// CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}amdgcn_device_isa_not_selected
4848
// CHECK: user_code.entry:
4949
// CHECK: call i32 @__kmpc_single
50-
// CHECK-NOT: call void @__kmpc_parallel_51
50+
// CHECK-NOT: call void @__kmpc_parallel_60
5151
// CHECK: ret void
5252

5353
#endif

0 commit comments

Comments
 (0)