Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 120 additions & 0 deletions clang/lib/CodeGen/CGOpenMPRuntime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1771,12 +1771,118 @@ void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD,
Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
}

// Register the indirect Vtable:
// This is similar to OMPTargetGlobalVarEntryIndirect, except that the
// size field refers to the size of memory pointed to, not the size of
// the pointer symbol itself (which is implicitly the size of a pointer).
OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(),
llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
llvm::GlobalValue::WeakODRLinkage);
}

void CGOpenMPRuntime::registerVTableOffloadEntry(llvm::GlobalVariable *VTable,
const VarDecl *VD) {
// TODO: add logic to avoid duplicate vtable registrations per
// translation unit; though for external linkage, this should no
// longer be an issue - or at least we can avoid the issue by
// checking for an existing offloading entry. But, perhaps the
// better approach is to defer emission of the vtables and offload
// entries until later (by tracking a list of items that need to be
// emitted).

llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();

// Generate a new externally visible global to point to the
// internally visible vtable. Doing this allows us to keep the
// visibility and linkage of the associated vtable unchanged while
// allowing the runtime to access its value. The externally
// visible global var needs to be emitted with a unique mangled
// name that won't conflict with similarly named (internal)
// vtables in other translation units.

// Register vtable with source location of dynamic object in map
// clause.
llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
CGM, OMPBuilder, VD->getCanonicalDecl()->getBeginLoc(),
VTable->getName());

llvm::GlobalVariable *Addr = VTable;
size_t PointerSize = CGM.getDataLayout().getPointerSize();
SmallString<128> AddrName;
OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(AddrName, EntryInfo);
AddrName.append("addr");

if (CGM.getLangOpts().OpenMPIsTargetDevice) {
Addr = new llvm::GlobalVariable(
CGM.getModule(), VTable->getType(),
/*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, VTable,
AddrName,
/*InsertBefore*/ nullptr, llvm::GlobalValue::NotThreadLocal,
CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
}
OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
AddrName, VTable,
CGM.getDataLayout().getTypeAllocSize(VTable->getInitializer()->getType()),
llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirectVTable,
llvm::GlobalValue::WeakODRLinkage);
}

void CGOpenMPRuntime::emitAndRegisterVTable(CodeGenModule &CGM,
CXXRecordDecl *CXXRecord,
const VarDecl *VD) {
// Register C++ VTable to OpenMP Offload Entry if it's a new
// CXXRecordDecl.
if (CXXRecord && CXXRecord->isDynamicClass() &&
CGM.getOpenMPRuntime().VTableDeclMap.find(CXXRecord) ==
CGM.getOpenMPRuntime().VTableDeclMap.end()) {
CGM.getOpenMPRuntime().VTableDeclMap.try_emplace(CXXRecord, VD);
CGM.EmitVTable(CXXRecord);
CodeGenVTables VTables = CGM.getVTables();
llvm::GlobalVariable *VTablesAddr = VTables.GetAddrOfVTable(CXXRecord);
if (VTablesAddr)
CGM.getOpenMPRuntime().registerVTableOffloadEntry(VTablesAddr, VD);
// Emit VTable for all the fields containing dynamic CXXRecord
for (const FieldDecl *Field : CXXRecord->fields()) {
if (CXXRecordDecl *RecordDecl = Field->getType()->getAsCXXRecordDecl())
emitAndRegisterVTable(CGM, RecordDecl, VD);
}
// Emit VTable for all dynamic parent class
for (CXXBaseSpecifier &Base : CXXRecord->bases()) {
if (CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl())
emitAndRegisterVTable(CGM, BaseDecl, VD);
}
}
};

void CGOpenMPRuntime::registerVTable(const OMPExecutableDirective &D) {
// Register VTable by scanning through the map clause of OpenMP target region.
// Get CXXRecordDecl and VarDecl from Expr.
auto GetVTableDecl = [](const Expr *E) {
QualType VDTy = E->getType();
CXXRecordDecl *CXXRecord = nullptr;
if (const auto *RefType = VDTy->getAs<LValueReferenceType>())
VDTy = RefType->getPointeeType();
if (VDTy->isPointerType())
CXXRecord = VDTy->getPointeeType()->getAsCXXRecordDecl();
else
CXXRecord = VDTy->getAsCXXRecordDecl();

const VarDecl *VD = nullptr;
if (auto *DRE = dyn_cast<DeclRefExpr>(E))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if it is a MemberExprRef?

VD = cast<VarDecl>(DRE->getDecl());
return std::pair<CXXRecordDecl *, const VarDecl *>(CXXRecord, VD);
};
// Collect VTable from OpenMP map clause.
for (const auto *C : D.getClausesOfKind<OMPMapClause>()) {
for (const auto *E : C->varlist()) {
auto DeclPair = GetVTableDecl(E);
emitAndRegisterVTable(CGM, DeclPair.first, DeclPair.second);
}
}
}

Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
QualType VarType,
StringRef Name) {
Expand Down Expand Up @@ -6249,6 +6355,7 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
}
}
registerVTable(D);
}

/// Checks if the expression is constant or does not have non-trivial function
Expand Down Expand Up @@ -9955,6 +10062,19 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
if (!S)
return;

// Register vtable from device for target data and target directives.
// Add this block here since scanForTargetRegionsFunctions ignores
// target data by checking if S is a executable directive (target).
if (isa<OMPExecutableDirective>(S) &&
isOpenMPTargetDataManagementDirective(
dyn_cast<OMPExecutableDirective>(S)->getDirectiveKind())) {
Comment on lines +10068 to +10070
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (isa<OMPExecutableDirective>(S) &&
isOpenMPTargetDataManagementDirective(
dyn_cast<OMPExecutableDirective>(S)->getDirectiveKind())) {
if (auto *E = dyn_cast<OMPExecutableDirective>(S);E &&
isOpenMPTargetDataManagementDirective(
E->getDirectiveKind())) {

auto &E = *dyn_cast<OMPExecutableDirective>(S);
// Don't need to check if it's device compile
// since scanForTargetRegionsFunctions currently only called
// in device compilation.
registerVTable(E);
}

// Codegen OMP target directives that offload compute to the device.
bool RequiresDeviceCodegen =
isa<OMPExecutableDirective>(S) &&
Expand Down
20 changes: 20 additions & 0 deletions clang/lib/CodeGen/CGOpenMPRuntime.h
Original file line number Diff line number Diff line change
Expand Up @@ -605,6 +605,9 @@ class CGOpenMPRuntime {
LValue PosLVal, const OMPTaskDataTy::DependData &Data,
Address DependenciesArray);

/// Keep track of VTable Declarations so we don't register duplicate VTable.
llvm::SmallDenseMap<CXXRecordDecl *, const VarDecl *> VTableDeclMap;

public:
explicit CGOpenMPRuntime(CodeGenModule &CGM);
virtual ~CGOpenMPRuntime() {}
Expand Down Expand Up @@ -1111,6 +1114,23 @@ class CGOpenMPRuntime {
virtual void emitDeclareTargetFunction(const FunctionDecl *FD,
llvm::GlobalValue *GV);

/// Register VTable to OpenMP offload entry.
/// \param VTable VTable of the C++ class.
/// \param RD C++ class decl.
virtual void registerVTableOffloadEntry(llvm::GlobalVariable *VTable,
const VarDecl *VD);
/// Emit code for registering vtable by scanning through map clause
/// in OpenMP target region.
/// \param D OpenMP target directive.
virtual void registerVTable(const OMPExecutableDirective &D);

/// Emit and register VTable for the C++ class in OpenMP offload entry.
/// \param CXXRecord C++ class decl.
/// \param VD Variable decl which holds VTable.
virtual void emitAndRegisterVTable(CodeGenModule &CGM,
CXXRecordDecl *CXXRecord,
const VarDecl *VD);

/// Creates artificial threadprivate variable with name \p Name and type \p
/// VarType.
/// \param VarType Type of the artificial threadprivate variable.
Expand Down
4 changes: 4 additions & 0 deletions clang/lib/CodeGen/CGStmtOpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7617,6 +7617,10 @@ void CodeGenFunction::EmitOMPUseDeviceAddrClause(
// Generate the instructions for '#pragma omp target data' directive.
void CodeGenFunction::EmitOMPTargetDataDirective(
const OMPTargetDataDirective &S) {
// Emit vtable only from host for target data directive.
if (!CGM.getLangOpts().OpenMPIsTargetDevice)
CGM.getOpenMPRuntime().registerVTable(S);

CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true,
/*SeparateBeginEndCalls=*/true);

Expand Down
6 changes: 6 additions & 0 deletions clang/lib/CodeGen/CGVTables.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ llvm::Constant *CodeGenModule::GetAddrOfThunk(StringRef Name, llvm::Type *FnTy,
/*DontDefer=*/true, /*IsThunk=*/true);
}

llvm::GlobalVariable *CodeGenVTables::GetAddrOfVTable(const CXXRecordDecl *RD) {
llvm::GlobalVariable *VTable =
CGM.getCXXABI().getAddrOfVTable(RD, CharUnits());
return VTable;
}

static void setThunkProperties(CodeGenModule &CGM, const ThunkInfo &Thunk,
llvm::Function *ThunkFn, bool ForVTable,
GlobalDecl GD) {
Expand Down
4 changes: 4 additions & 0 deletions clang/lib/CodeGen/CGVTables.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,10 @@ class CodeGenVTables {
llvm::GlobalVariable::LinkageTypes Linkage,
const CXXRecordDecl *RD);

/// GetAddrOfVTable - Get the address of the VTable for the given record
/// decl.
llvm::GlobalVariable *GetAddrOfVTable(const CXXRecordDecl *RD);

/// EmitThunks - Emit the associated thunks for the given global decl.
void EmitThunks(GlobalDecl GD);

Expand Down
3 changes: 3 additions & 0 deletions clang/lib/CodeGen/CodeGenModule.h
Original file line number Diff line number Diff line change
Expand Up @@ -754,6 +754,9 @@ class CodeGenModule : public CodeGenTypeCache {
// i32 @__isPlatformVersionAtLeast(i32, i32, i32, i32)
llvm::FunctionCallee IsPlatformVersionAtLeastFn = nullptr;

// Store indirect CallExprs that are within an omp target region
llvm::SmallPtrSet<const CallExpr *, 16> OMPTargetCalls;

InstrProfStats &getPGOStats() { return PGOStats; }
llvm::IndexedInstrProfReader *getPGOReader() const { return PGOReader.get(); }

Expand Down
42 changes: 42 additions & 0 deletions clang/test/OpenMP/target_vtable_codegen_container.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52 -stdlib=libc++
// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 -stdlib=libc++ | FileCheck %s
// expected-no-diagnostics

// CHECK-DAG: @_ZTV7Derived
// CHECK-DAG: @_ZTV4Base
template <typename T>
class Container {
private:
T value;
public:
Container() : value() {}
Container(T val) : value(val) {}

T getValue() const { return value; }

void setValue(T val) { value = val; }
};

class Base {
public:
virtual void foo() {}
};
class Derived : public Base {};

class Test {
public:
Container<Derived> v;
};

int main() {
Test test;
Derived d;
test.v.setValue(d);

// Make sure we emit VTable for type indirectly (template specialized type)
#pragma omp target map(test)
{
test.v.getValue().foo();
}
return 0;
}
48 changes: 48 additions & 0 deletions clang/test/OpenMP/target_vtable_codegen_explicit.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52
// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 | FileCheck %s
// expected-no-diagnostics

// Make sure both host and device compilation emit vtable for Dervied
// CHECK-DAG: $_ZN7DerivedD1Ev = comdat any
// CHECK-DAG: $_ZN7DerivedD0Ev = comdat any
// CHECK-DAG: $_ZN7Derived5BaseAEi = comdat any
// CHECK-DAG: $_ZN7Derived8DerivedBEv = comdat any
// CHECK-DAG: $_ZN7DerivedD2Ev = comdat any
// CHECK-DAG: $_ZN4BaseD2Ev = comdat any
// CHECK-DAG: $_ZTV7Derived = comdat any
class Base {
public:

virtual ~Base() = default;

virtual void BaseA(int a) { }
};

// CHECK: @_ZTV7Derived = linkonce_odr unnamed_addr constant { [6 x ptr] }
class Derived : public Base {
public:

~Derived() override = default;

void BaseA(int a) override { x = a; }

virtual void DerivedB() { }
private:
int x;
};

int main() {

Derived d;
Base& c = d;
int a = 50;
// Should emit vtable for Derived since d is added to map clause
#pragma omp target data map (to: d, a)
{
#pragma omp target map(d)
{
c.BaseA(a);
}
}
return 0;
}
43 changes: 43 additions & 0 deletions clang/test/OpenMP/target_vtable_codegen_implicit_namespace.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52
// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 | FileCheck %s
// expected-no-diagnostics

namespace {

// Make sure both host and device compilation emit vtable for Dervied
// CHECK-DAG: @_ZTVN12_GLOBAL__N_17DerivedE
// CHECK-DAG: @_ZN12_GLOBAL__N_17DerivedD1Ev
// CHECK-DAG: @_ZN12_GLOBAL__N_17DerivedD0Ev
// CHECK-DAG: @_ZN12_GLOBAL__N_17Derived5BaseAEi
// CHECK-DAG: @_ZN12_GLOBAL__N_17Derived8DerivedBEv
class Base {
public:
virtual ~Base() = default;
virtual void BaseA(int a) { }
};

class Derived : public Base {
public:
~Derived() override = default;
void BaseA(int a) override { x = a; }
virtual void DerivedB() { }
private:
int x;
};

};

int main() {

Derived d;
Base& c = d;
int a = 50;
#pragma omp target data map (to: d, a)
{
#pragma omp target
{
c.BaseA(a);
}
}
return 0;
}
Loading