Skip to content

Commit a00def3

Browse files
Jason-VanBeusekomchichunchenJeffery Sandoval
committed
[OpenMP][clang] Register Vtables on device for indirect calls - clang/llvm changes
- Register Vtable's on device during codegen - Add support in OMPIRBuilder - Add test cases for vtable codegen Co-authored-by: Chi-Chun Chen <[email protected]> Co-authored-by: Jeffery Sandoval <[email protected]>
1 parent 714f032 commit a00def3

14 files changed

+504
-4
lines changed

clang/lib/CodeGen/CGOpenMPRuntime.cpp

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1771,12 +1771,129 @@ void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD,
17711771
Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
17721772
}
17731773

1774+
// Register the indirect Vtable:
1775+
// This is similar to OMPTargetGlobalVarEntryIndirect, except that the
1776+
// size field refers to the size of memory pointed to, not the size of
1777+
// the pointer symbol itself (which is implicitly the size of a pointer).
17741778
OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
17751779
Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(),
17761780
llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
17771781
llvm::GlobalValue::WeakODRLinkage);
17781782
}
17791783

1784+
void CGOpenMPRuntime::registerVTableOffloadEntry(llvm::GlobalVariable *VTable,
1785+
const VarDecl *VD) {
1786+
// TODO: add logic to avoid duplicate vtable registrations per
1787+
// translation unit; though for external linkage, this should no
1788+
// longer be an issue - or at least we can avoid the issue by
1789+
// checking for an existing offloading entry. But, perhaps the
1790+
// better approach is to defer emission of the vtables and offload
1791+
// entries until later (by tracking a list of items that need to be
1792+
// emitted).
1793+
1794+
llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1795+
1796+
// Generate a new externally visible global to point to the
1797+
// internally visible vtable. Doing this allows us to keep the
1798+
// visibility and linkage of the associated vtable unchanged while
1799+
// allowing the runtime to access its value. The externally
1800+
// visible global var needs to be emitted with a unique mangled
1801+
// name that won't conflict with similarly named (internal)
1802+
// vtables in other translation units.
1803+
1804+
// Register vtable with source location of dynamic object in map
1805+
// clause.
1806+
llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1807+
CGM, OMPBuilder, VD->getCanonicalDecl()->getBeginLoc(),
1808+
VTable->getName());
1809+
1810+
llvm::GlobalVariable *Addr = VTable;
1811+
size_t PointerSize = CGM.getDataLayout().getPointerSize();
1812+
SmallString<128> AddrName;
1813+
OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(AddrName, EntryInfo);
1814+
AddrName.append("addr");
1815+
1816+
if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1817+
Addr = new llvm::GlobalVariable(
1818+
CGM.getModule(), VTable->getType(),
1819+
/*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, VTable,
1820+
AddrName,
1821+
/*InsertBefore*/ nullptr, llvm::GlobalValue::NotThreadLocal,
1822+
CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1823+
Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1824+
}
1825+
OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1826+
AddrName, VTable,
1827+
CGM.getDataLayout().getTypeAllocSize(VTable->getInitializer()->getType()),
1828+
llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirectVTable,
1829+
llvm::GlobalValue::WeakODRLinkage);
1830+
}
1831+
1832+
void CGOpenMPRuntime::emitAndRegisterVTable(CodeGenModule &CGM,
1833+
CXXRecordDecl *CXXRecord,
1834+
const VarDecl *VD) {
1835+
// Register C++ VTable to OpenMP Offload Entry if it's a new
1836+
// CXXRecordDecl.
1837+
if (CXXRecord && CXXRecord->isDynamicClass() &&
1838+
!CGM.getOpenMPRuntime().VTableDeclMap.contains(CXXRecord)) {
1839+
CGM.getOpenMPRuntime().VTableDeclMap.try_emplace(CXXRecord, VD);
1840+
CGM.EmitVTable(CXXRecord);
1841+
CodeGenVTables VTables = CGM.getVTables();
1842+
llvm::GlobalVariable *VTablesAddr = VTables.GetAddrOfVTable(CXXRecord);
1843+
if (VTablesAddr)
1844+
CGM.getOpenMPRuntime().registerVTableOffloadEntry(VTablesAddr, VD);
1845+
// Emit VTable for all the fields containing dynamic CXXRecord
1846+
for (const FieldDecl *Field : CXXRecord->fields()) {
1847+
if (CXXRecordDecl *RecordDecl = Field->getType()->getAsCXXRecordDecl())
1848+
emitAndRegisterVTable(CGM, RecordDecl, VD);
1849+
}
1850+
// Emit VTable for all dynamic parent class
1851+
for (CXXBaseSpecifier &Base : CXXRecord->bases()) {
1852+
if (CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl())
1853+
emitAndRegisterVTable(CGM, BaseDecl, VD);
1854+
}
1855+
}
1856+
};
1857+
1858+
void CGOpenMPRuntime::registerVTable(const OMPExecutableDirective &D) {
1859+
// Register VTable by scanning through the map clause of OpenMP target region.
1860+
// Get CXXRecordDecl and VarDecl from Expr.
1861+
auto GetVTableDecl = [](const Expr *E) {
1862+
QualType VDTy = E->getType();
1863+
CXXRecordDecl *CXXRecord = nullptr;
1864+
if (const auto *RefType = VDTy->getAs<LValueReferenceType>())
1865+
VDTy = RefType->getPointeeType();
1866+
if (VDTy->isPointerType())
1867+
CXXRecord = VDTy->getPointeeType()->getAsCXXRecordDecl();
1868+
else
1869+
CXXRecord = VDTy->getAsCXXRecordDecl();
1870+
1871+
const VarDecl *VD = nullptr;
1872+
if (auto *DRE = dyn_cast<DeclRefExpr>(E))
1873+
VD = cast<VarDecl>(DRE->getDecl());
1874+
else if (auto *MRE = dyn_cast<MemberExpr>(E)){
1875+
printf("here\n");
1876+
if (auto *BaseDRE = dyn_cast<DeclRefExpr>(MRE->getBase())){
1877+
printf("here 1\n");
1878+
if (auto *BaseVD = dyn_cast<VarDecl>(BaseDRE->getDecl())){
1879+
VD = BaseVD;
1880+
printf("here 2\n");
1881+
}
1882+
}
1883+
}
1884+
return std::pair<CXXRecordDecl *, const VarDecl *>(CXXRecord, VD);
1885+
};
1886+
// Collect VTable from OpenMP map clause.
1887+
for (const auto *C : D.getClausesOfKind<OMPMapClause>()) {
1888+
for (const auto *E : C->varlist()) {
1889+
auto DeclPair = GetVTableDecl(E);
1890+
// Ensure VD is not null
1891+
if (DeclPair.second)
1892+
emitAndRegisterVTable(CGM, DeclPair.first, DeclPair.second);
1893+
}
1894+
}
1895+
}
1896+
17801897
Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
17811898
QualType VarType,
17821899
StringRef Name) {
@@ -6249,6 +6366,7 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
62496366
CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
62506367
}
62516368
}
6369+
registerVTable(D);
62526370
}
62536371

62546372
/// Checks if the expression is constant or does not have non-trivial function
@@ -9955,6 +10073,17 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
995510073
if (!S)
995610074
return;
995710075

10076+
// Register vtable from device for target data and target directives.
10077+
// Add this block here since scanForTargetRegionsFunctions ignores
10078+
// target data by checking if S is a executable directive (target).
10079+
if (auto *E = dyn_cast<OMPExecutableDirective>(S);
10080+
E && isOpenMPTargetDataManagementDirective(E->getDirectiveKind())) {
10081+
// Don't need to check if it's device compile
10082+
// since scanForTargetRegionsFunctions currently only called
10083+
// in device compilation.
10084+
registerVTable(*E);
10085+
}
10086+
995810087
// Codegen OMP target directives that offload compute to the device.
995910088
bool RequiresDeviceCodegen =
996010089
isa<OMPExecutableDirective>(S) &&

clang/lib/CodeGen/CGOpenMPRuntime.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -605,6 +605,9 @@ class CGOpenMPRuntime {
605605
LValue PosLVal, const OMPTaskDataTy::DependData &Data,
606606
Address DependenciesArray);
607607

608+
/// Keep track of VTable Declarations so we don't register duplicate VTable.
609+
llvm::SmallDenseMap<CXXRecordDecl *, const VarDecl *> VTableDeclMap;
610+
608611
public:
609612
explicit CGOpenMPRuntime(CodeGenModule &CGM);
610613
virtual ~CGOpenMPRuntime() {}
@@ -1111,6 +1114,23 @@ class CGOpenMPRuntime {
11111114
virtual void emitDeclareTargetFunction(const FunctionDecl *FD,
11121115
llvm::GlobalValue *GV);
11131116

1117+
/// Register VTable to OpenMP offload entry.
1118+
/// \param VTable VTable of the C++ class.
1119+
/// \param RD C++ class decl.
1120+
virtual void registerVTableOffloadEntry(llvm::GlobalVariable *VTable,
1121+
const VarDecl *VD);
1122+
/// Emit code for registering vtable by scanning through map clause
1123+
/// in OpenMP target region.
1124+
/// \param D OpenMP target directive.
1125+
virtual void registerVTable(const OMPExecutableDirective &D);
1126+
1127+
/// Emit and register VTable for the C++ class in OpenMP offload entry.
1128+
/// \param CXXRecord C++ class decl.
1129+
/// \param VD Variable decl which holds VTable.
1130+
virtual void emitAndRegisterVTable(CodeGenModule &CGM,
1131+
CXXRecordDecl *CXXRecord,
1132+
const VarDecl *VD);
1133+
11141134
/// Creates artificial threadprivate variable with name \p Name and type \p
11151135
/// VarType.
11161136
/// \param VarType Type of the artificial threadprivate variable.

clang/lib/CodeGen/CGStmtOpenMP.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7617,6 +7617,10 @@ void CodeGenFunction::EmitOMPUseDeviceAddrClause(
76177617
// Generate the instructions for '#pragma omp target data' directive.
76187618
void CodeGenFunction::EmitOMPTargetDataDirective(
76197619
const OMPTargetDataDirective &S) {
7620+
// Emit vtable only from host for target data directive.
7621+
if (!CGM.getLangOpts().OpenMPIsTargetDevice)
7622+
CGM.getOpenMPRuntime().registerVTable(S);
7623+
76207624
CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true,
76217625
/*SeparateBeginEndCalls=*/true);
76227626

clang/lib/CodeGen/CGVTables.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,12 @@ llvm::Constant *CodeGenModule::GetAddrOfThunk(StringRef Name, llvm::Type *FnTy,
3838
/*DontDefer=*/true, /*IsThunk=*/true);
3939
}
4040

41+
llvm::GlobalVariable *CodeGenVTables::GetAddrOfVTable(const CXXRecordDecl *RD) {
42+
llvm::GlobalVariable *VTable =
43+
CGM.getCXXABI().getAddrOfVTable(RD, CharUnits());
44+
return VTable;
45+
}
46+
4147
static void setThunkProperties(CodeGenModule &CGM, const ThunkInfo &Thunk,
4248
llvm::Function *ThunkFn, bool ForVTable,
4349
GlobalDecl GD) {

clang/lib/CodeGen/CGVTables.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,10 @@ class CodeGenVTables {
122122
llvm::GlobalVariable::LinkageTypes Linkage,
123123
const CXXRecordDecl *RD);
124124

125+
/// GetAddrOfVTable - Get the address of the VTable for the given record
126+
/// decl.
127+
llvm::GlobalVariable *GetAddrOfVTable(const CXXRecordDecl *RD);
128+
125129
/// EmitThunks - Emit the associated thunks for the given global decl.
126130
void EmitThunks(GlobalDecl GD);
127131

clang/lib/CodeGen/CodeGenModule.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -754,6 +754,9 @@ class CodeGenModule : public CodeGenTypeCache {
754754
// i32 @__isPlatformVersionAtLeast(i32, i32, i32, i32)
755755
llvm::FunctionCallee IsPlatformVersionAtLeastFn = nullptr;
756756

757+
// Store indirect CallExprs that are within an omp target region
758+
llvm::SmallPtrSet<const CallExpr *, 16> OMPTargetCalls;
759+
757760
InstrProfStats &getPGOStats() { return PGOStats; }
758761
llvm::IndexedInstrProfReader *getPGOReader() const { return PGOReader.get(); }
759762

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52 -stdlib=libc++
2+
// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 -stdlib=libc++ | FileCheck %s
3+
// expected-no-diagnostics
4+
5+
// CHECK-DAG: @_ZTV7Derived
6+
// CHECK-DAG: @_ZTV4Base
7+
template <typename T>
8+
class Container {
9+
private:
10+
T value;
11+
public:
12+
Container() : value() {}
13+
Container(T val) : value(val) {}
14+
15+
T getValue() const { return value; }
16+
17+
void setValue(T val) { value = val; }
18+
};
19+
20+
class Base {
21+
public:
22+
virtual void foo() {}
23+
};
24+
class Derived : public Base {};
25+
26+
class Test {
27+
public:
28+
Container<Derived> v;
29+
};
30+
31+
int main() {
32+
Test test;
33+
Derived d;
34+
test.v.setValue(d);
35+
36+
// Make sure we emit VTable for type indirectly (template specialized type)
37+
#pragma omp target map(test)
38+
{
39+
test.v.getValue().foo();
40+
}
41+
return 0;
42+
}
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52
2+
// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 | FileCheck %s
3+
// expected-no-diagnostics
4+
5+
// Make sure both host and device compilation emit vtable for Dervied
6+
// CHECK-DAG: $_ZN7DerivedD1Ev = comdat any
7+
// CHECK-DAG: $_ZN7DerivedD0Ev = comdat any
8+
// CHECK-DAG: $_ZN7Derived5BaseAEi = comdat any
9+
// CHECK-DAG: $_ZN7Derived8DerivedBEv = comdat any
10+
// CHECK-DAG: $_ZN7DerivedD2Ev = comdat any
11+
// CHECK-DAG: $_ZN4BaseD2Ev = comdat any
12+
// CHECK-DAG: $_ZTV7Derived = comdat any
13+
class Base {
14+
public:
15+
16+
virtual ~Base() = default;
17+
18+
virtual void BaseA(int a) { }
19+
};
20+
21+
// CHECK: @_ZTV7Derived = linkonce_odr unnamed_addr constant { [6 x ptr] }
22+
class Derived : public Base {
23+
public:
24+
25+
~Derived() override = default;
26+
27+
void BaseA(int a) override { x = a; }
28+
29+
virtual void DerivedB() { }
30+
private:
31+
int x;
32+
};
33+
34+
int main() {
35+
36+
Derived d;
37+
Base& c = d;
38+
int a = 50;
39+
// Should emit vtable for Derived since d is added to map clause
40+
#pragma omp target data map (to: d, a)
41+
{
42+
#pragma omp target map(d)
43+
{
44+
c.BaseA(a);
45+
}
46+
}
47+
return 0;
48+
}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52
2+
// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 | FileCheck %s
3+
// expected-no-diagnostics
4+
5+
namespace {
6+
7+
// Make sure both host and device compilation emit vtable for Dervied
8+
// CHECK-DAG: @_ZTVN12_GLOBAL__N_17DerivedE
9+
// CHECK-DAG: @_ZN12_GLOBAL__N_17DerivedD1Ev
10+
// CHECK-DAG: @_ZN12_GLOBAL__N_17DerivedD0Ev
11+
// CHECK-DAG: @_ZN12_GLOBAL__N_17Derived5BaseAEi
12+
// CHECK-DAG: @_ZN12_GLOBAL__N_17Derived8DerivedBEv
13+
class Base {
14+
public:
15+
virtual ~Base() = default;
16+
virtual void BaseA(int a) { }
17+
};
18+
19+
class Derived : public Base {
20+
public:
21+
~Derived() override = default;
22+
void BaseA(int a) override { x = a; }
23+
virtual void DerivedB() { }
24+
private:
25+
int x;
26+
};
27+
28+
};
29+
30+
int main() {
31+
32+
Derived d;
33+
Base& c = d;
34+
int a = 50;
35+
#pragma omp target data map (to: d, a)
36+
{
37+
#pragma omp target
38+
{
39+
c.BaseA(a);
40+
}
41+
}
42+
return 0;
43+
}

0 commit comments

Comments
 (0)