Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -870,9 +870,6 @@ CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM)
hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
OMPBuilder.setConfig(Config);

if (!CGM.getLangOpts().OpenMPIsTargetDevice)
llvm_unreachable("OpenMP can only handle device code.");

if (CGM.getLangOpts().OpenMPCUDAMode)
CurrentDataSharingMode = CGOpenMPRuntimeGPU::DS_CUDA;

Expand Down
3 changes: 2 additions & 1 deletion clang/lib/CodeGen/CGStmtOpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6801,7 +6801,8 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
CodeGenModule &CGM = CGF.CGM;

// On device emit this construct as inlined code.
if (CGM.getLangOpts().OpenMPIsTargetDevice) {
if (CGM.getLangOpts().OpenMPIsTargetDevice ||
CGM.getOpenMPRuntime().isGPU()) {
OMPLexicalScope Scope(CGF, S, OMPD_target);
CGM.getOpenMPRuntime().emitInlinedDirective(
CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
Expand Down
2 changes: 0 additions & 2 deletions clang/lib/CodeGen/CodeGenModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -483,8 +483,6 @@ void CodeGenModule::createOpenMPRuntime() {
case llvm::Triple::nvptx:
case llvm::Triple::nvptx64:
case llvm::Triple::amdgcn:
assert(getLangOpts().OpenMPIsTargetDevice &&
"OpenMP AMDGPU/NVPTX is only prepared to deal with device code.");
OpenMPRuntime.reset(new CGOpenMPRuntimeGPU(*this));
break;
default:
Expand Down
13 changes: 0 additions & 13 deletions clang/lib/Frontend/CompilerInvocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4210,19 +4210,6 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
Args, OPT_fopenmp_version_EQ,
(IsSimdSpecified || IsTargetSpecified) ? 51 : Opts.OpenMP, Diags))
Opts.OpenMP = Version;
// Provide diagnostic when a given target is not expected to be an OpenMP
// device or host.
if (!Opts.OpenMPIsTargetDevice) {
switch (T.getArch()) {
default:
break;
// Add unsupported host targets here:
case llvm::Triple::nvptx:
case llvm::Triple::nvptx64:
Diags.Report(diag::err_drv_omp_host_target_not_supported) << T.str();
break;
}
}
}

// Set the flag to prevent the implementation from emitting device exception
Expand Down
220 changes: 220 additions & 0 deletions clang/test/OpenMP/gpu_target.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --version 5
// expected-no-diagnostics

// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=AMDGCN
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix=NVPTX

typedef enum omp_allocator_handle_t {
omp_null_allocator = 0,
omp_default_mem_alloc = 1,
omp_large_cap_mem_alloc = 2,
omp_const_mem_alloc = 3,
omp_high_bw_mem_alloc = 4,
omp_low_lat_mem_alloc = 5,
omp_cgroup_mem_alloc = 6,
omp_pteam_mem_alloc = 7,
omp_thread_mem_alloc = 8,
KMP_ALLOCATOR_MAX_HANDLE = ~(0LU)
} omp_allocator_handle_t;

int d = 0;
#pragma omp allocate(d) allocator(omp_default_mem_alloc)

int g = 0;
#pragma omp allocate(g) allocator(omp_cgroup_mem_alloc)

extern const int c = 0;
#pragma omp allocate(c) allocator(omp_const_mem_alloc)


int foo() {
int t = 0;
#pragma omp allocate(t) allocator(omp_thread_mem_alloc)
return t;
}

void bar() {
#pragma omp target
;
#pragma omp parallel
;
}

void baz(int *p) {
#pragma omp atomic
*p += 1;
}

int qux() {
#if defined(__NVPTX__)
return 1;
#elif defined(__AMDGPU__)
return 2;
#endif
}
//.
// AMDGCN: @c = addrspace(4) constant i32 0, align 4
// AMDGCN: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
// AMDGCN: @[[GLOB1:[0-9]+]] = private unnamed_addr addrspace(1) constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
// AMDGCN: @d = global i32 0, align 4
// AMDGCN: @g = global i32 0, align 4
// AMDGCN: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
//.
// NVPTX: @d = global i32 0, align 4
// NVPTX: @g = global i32 0, align 4
// NVPTX: @c = addrspace(4) constant i32 0, align 4
// NVPTX: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
// NVPTX: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
//.
// AMDGCN-LABEL: define dso_local noundef i32 @_Z3foov(
// AMDGCN-SAME: ) #[[ATTR0:[0-9]+]] {
// AMDGCN-NEXT: [[ENTRY:.*:]]
// AMDGCN-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN-NEXT: [[T:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN-NEXT: [[T_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[T]] to ptr
// AMDGCN-NEXT: store i32 0, ptr [[T_ASCAST]], align 4
// AMDGCN-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_ASCAST]], align 4
// AMDGCN-NEXT: ret i32 [[TMP0]]
//
//
// AMDGCN-LABEL: define dso_local void @_Z3barv(
// AMDGCN-SAME: ) #[[ATTR0]] {
// AMDGCN-NEXT: [[ENTRY:.*:]]
// AMDGCN-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(5)
// AMDGCN-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr))
// AMDGCN-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
// AMDGCN-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @_Z3barv_omp_outlined, ptr @_Z3barv_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0)
// AMDGCN-NEXT: ret void
//
//
// AMDGCN-LABEL: define internal void @_Z3barv_omp_outlined(
// AMDGCN-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
// AMDGCN-NEXT: [[ENTRY:.*:]]
// AMDGCN-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
// AMDGCN-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
// AMDGCN-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// AMDGCN-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
// AMDGCN-NEXT: ret void
//
//
// AMDGCN-LABEL: define internal void @_Z3barv_omp_outlined_wrapper(
// AMDGCN-SAME: i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
// AMDGCN-NEXT: [[ENTRY:.*:]]
// AMDGCN-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(5)
// AMDGCN-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
// AMDGCN-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
// AMDGCN-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
// AMDGCN-NEXT: [[GLOBAL_ARGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
// AMDGCN-NEXT: store i16 [[TMP0]], ptr [[DOTADDR_ASCAST]], align 2
// AMDGCN-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
// AMDGCN-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
// AMDGCN-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_ASCAST]])
// AMDGCN-NEXT: call void @_Z3barv_omp_outlined(ptr [[DOTADDR1_ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR3:[0-9]+]]
// AMDGCN-NEXT: ret void
//
//
// AMDGCN-LABEL: define dso_local void @_Z3bazPi(
// AMDGCN-SAME: ptr noundef [[P:%.*]]) #[[ATTR0]] {
// AMDGCN-NEXT: [[ENTRY:.*:]]
// AMDGCN-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN-NEXT: [[P_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_ADDR]] to ptr
// AMDGCN-NEXT: store ptr [[P]], ptr [[P_ADDR_ASCAST]], align 8
// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P_ADDR_ASCAST]], align 8
// AMDGCN-NEXT: [[TMP1:%.*]] = atomicrmw add ptr [[TMP0]], i32 1 monotonic, align 4
// AMDGCN-NEXT: ret void
//
//
// AMDGCN-LABEL: define dso_local noundef i32 @_Z3quxv(
// AMDGCN-SAME: ) #[[ATTR0]] {
// AMDGCN-NEXT: [[ENTRY:.*:]]
// AMDGCN-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
// AMDGCN-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN-NEXT: ret i32 2
//
//
// NVPTX-LABEL: define dso_local noundef i32 @_Z3foov(
// NVPTX-SAME: ) #[[ATTR0:[0-9]+]] {
// NVPTX-NEXT: [[ENTRY:.*:]]
// NVPTX-NEXT: [[T:%.*]] = alloca i32, align 4
// NVPTX-NEXT: store i32 0, ptr [[T]], align 4
// NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[T]], align 4
// NVPTX-NEXT: ret i32 [[TMP0]]
//
//
// NVPTX-LABEL: define dso_local void @_Z3barv(
// NVPTX-SAME: ) #[[ATTR0]] {
// NVPTX-NEXT: [[ENTRY:.*:]]
// NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
// NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @_Z3barv_omp_outlined, ptr @_Z3barv_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
// NVPTX-NEXT: ret void
//
//
// NVPTX-LABEL: define internal void @_Z3barv_omp_outlined(
// NVPTX-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
// NVPTX-NEXT: [[ENTRY:.*:]]
// NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// NVPTX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// NVPTX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// NVPTX-NEXT: ret void
//
//
// NVPTX-LABEL: define internal void @_Z3barv_omp_outlined_wrapper(
// NVPTX-SAME: i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
// NVPTX-NEXT: [[ENTRY:.*:]]
// NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
// NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
// NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
// NVPTX-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2
// NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
// NVPTX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
// NVPTX-NEXT: call void @_Z3barv_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3:[0-9]+]]
// NVPTX-NEXT: ret void
//
//
// NVPTX-LABEL: define dso_local void @_Z3bazPi(
// NVPTX-SAME: ptr noundef [[P:%.*]]) #[[ATTR0]] {
// NVPTX-NEXT: [[ENTRY:.*:]]
// NVPTX-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 8
// NVPTX-NEXT: store ptr [[P]], ptr [[P_ADDR]], align 8
// NVPTX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P_ADDR]], align 8
// NVPTX-NEXT: [[TMP1:%.*]] = atomicrmw add ptr [[TMP0]], i32 1 monotonic, align 4
// NVPTX-NEXT: ret void
//
//
// NVPTX-LABEL: define dso_local noundef i32 @_Z3quxv(
// NVPTX-SAME: ) #[[ATTR0]] {
// NVPTX-NEXT: [[ENTRY:.*:]]
// NVPTX-NEXT: ret i32 1
//
//.
// AMDGCN: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
// AMDGCN: attributes #[[ATTR1]] = { convergent noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
// AMDGCN: attributes #[[ATTR2]] = { convergent noinline norecurse nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
// AMDGCN: attributes #[[ATTR3]] = { nounwind }
// AMDGCN: attributes #[[ATTR4:[0-9]+]] = { alwaysinline }
//.
// NVPTX: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32" }
// NVPTX: attributes #[[ATTR1]] = { convergent noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32" }
// NVPTX: attributes #[[ATTR2]] = { convergent noinline norecurse nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32" }
// NVPTX: attributes #[[ATTR3]] = { nounwind }
// NVPTX: attributes #[[ATTR4:[0-9]+]] = { alwaysinline }
//.
// AMDGCN: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
// AMDGCN: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
// AMDGCN: [[META2:![0-9]+]] = !{i32 7, !"openmp", i32 45}
// AMDGCN: [[META3:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
//.
// NVPTX: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
// NVPTX: [[META1:![0-9]+]] = !{i32 7, !"openmp", i32 45}
// NVPTX: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
//.
3 changes: 0 additions & 3 deletions clang/test/OpenMP/target_messages.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@
// RUN: %clang_cc1 -verify=expected,omp4 -fopenmp-simd -fopenmp-version=45 -std=c++11 -o - %s
// RUN: %clang_cc1 -verify=expected,omp5 -fopenmp-simd -std=c++11 -o - %s
// CHECK: error: OpenMP target is invalid: 'aaa-bbb-ccc-ddd'
// RUN: not %clang_cc1 -fopenmp -std=c++11 -triple nvptx64-nvidia-cuda -o - %s 2>&1 | FileCheck --check-prefix CHECK-UNSUPPORTED-HOST-TARGET %s
// RUN: not %clang_cc1 -fopenmp -std=c++11 -triple nvptx-nvidia-cuda -o - %s 2>&1 | FileCheck --check-prefix CHECK-UNSUPPORTED-HOST-TARGET %s
// CHECK-UNSUPPORTED-HOST-TARGET: error: target '{{nvptx64-nvidia-cuda|nvptx-nvidia-cuda}}' is not a supported OpenMP host target
// RUN: not %clang_cc1 -fopenmp -std=c++11 -fopenmp-targets=hexagon-linux-gnu -o - %s 2>&1 | FileCheck --check-prefix CHECK-UNSUPPORTED-DEVICE-TARGET %s
// CHECK-UNSUPPORTED-DEVICE-TARGET: OpenMP target is invalid: 'hexagon-linux-gnu'

Expand Down
Loading