Skip to content

Commit e7aafa4

Browse files
Jason-VanBeusekomchichunchenJeffery Sandoval
committed
[OpenMP][clang] Register Vtables on device for indirect calls
Runtime / Registration support for indirect and virtual function calls in OpenMP target regions - Register Vtable's to OpenMP offload table - Modify PluginInterface to register Vtables to indirect call table This Patch does not have the logic for calling __llvm_omp_indirect_call_lookup, and lacks implementation logic --------- Co-authored-by: Chi-Chun Chen <[email protected]> Co-authored-by: Jeffery Sandoval <[email protected]>
1 parent 714f032 commit e7aafa4

File tree

12 files changed

+497
-11
lines changed

12 files changed

+497
-11
lines changed

clang/lib/CodeGen/CGOpenMPRuntime.cpp

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1771,12 +1771,126 @@ void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD,
17711771
Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
17721772
}
17731773

1774+
// Register the indirect Vtable:
1775+
// This is similar to OMPTargetGlobalVarEntryIndirect, except that the
1776+
// size field refers to the size of memory pointed to, not the size of
1777+
// the pointer symbol itself (which is implicitly the size of a pointer).
17741778
OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
17751779
Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(),
17761780
llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
17771781
llvm::GlobalValue::WeakODRLinkage);
17781782
}
17791783

1784+
void CGOpenMPRuntime::registerVTableOffloadEntry(llvm::GlobalVariable *VTable,
1785+
const VarDecl *VD) {
1786+
// TODO: add logic to avoid duplicate vtable registrations per
1787+
// translation unit; though for external linkage, this should no
1788+
// longer be an issue - or at least we can avoid the issue by
1789+
// checking for an existing offloading entry. But, perhaps the
1790+
// better approach is to defer emission of the vtables and offload
1791+
// entries until later (by tracking a list of items that need to be
1792+
// emitted).
1793+
1794+
llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1795+
1796+
// Generate a new externally visible global to point to the
1797+
// internally visible vtable. Doing this allows us to keep the
1798+
// visibility and linkage of the associated vtable unchanged while
1799+
// allowing the runtime to access its value. The externally
1800+
// visible global var needs to be emitted with a unique mangled
1801+
// name that won't conflict with similarly named (internal)
1802+
// vtables in other translation units.
1803+
1804+
// Register vtable with source location of dynamic object in map
1805+
// clause.
1806+
llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1807+
CGM, OMPBuilder, VD->getCanonicalDecl()->getBeginLoc(),
1808+
VTable->getName());
1809+
1810+
llvm::GlobalVariable *Addr = VTable;
1811+
size_t PointerSize = CGM.getDataLayout().getPointerSize();
1812+
SmallString<128> AddrName;
1813+
OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(AddrName, EntryInfo);
1814+
AddrName.append("addr");
1815+
1816+
if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1817+
Addr = new llvm::GlobalVariable(
1818+
CGM.getModule(), VTable->getType(),
1819+
/*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, VTable,
1820+
AddrName,
1821+
/*InsertBefore*/ nullptr, llvm::GlobalValue::NotThreadLocal,
1822+
CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1823+
Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1824+
}
1825+
OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1826+
AddrName, VTable,
1827+
CGM.getDataLayout().getTypeAllocSize(VTable->getInitializer()->getType()),
1828+
llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirectVTable,
1829+
llvm::GlobalValue::WeakODRLinkage);
1830+
}
1831+
1832+
// Register VTable by scanning through the map clause of OpenMP target region.
1833+
void CGOpenMPRuntime::registerVTable(const OMPExecutableDirective &D) {
1834+
// Get CXXRecordDecl and VarDecl from Expr.
1835+
auto getVTableDecl = [](const Expr *E) {
1836+
QualType VDTy = E->getType();
1837+
CXXRecordDecl *CXXRecord = nullptr;
1838+
if (const auto *RefType = VDTy->getAs<LValueReferenceType>())
1839+
VDTy = RefType->getPointeeType();
1840+
if (VDTy->isPointerType())
1841+
CXXRecord = VDTy->getPointeeType()->getAsCXXRecordDecl();
1842+
else
1843+
CXXRecord = VDTy->getAsCXXRecordDecl();
1844+
1845+
const VarDecl *VD = nullptr;
1846+
if (auto *DRE = dyn_cast<DeclRefExpr>(E))
1847+
VD = cast<VarDecl>(DRE->getDecl());
1848+
return std::pair<CXXRecordDecl *, const VarDecl *>(CXXRecord, VD);
1849+
};
1850+
1851+
// Emit VTable and register the VTable to OpenMP offload entry recursively.
1852+
std::function<void(CodeGenModule &, CXXRecordDecl *, const VarDecl *)>
1853+
emitAndRegisterVTable = [&emitAndRegisterVTable](CodeGenModule &CGM,
1854+
CXXRecordDecl *CXXRecord,
1855+
const VarDecl *VD) {
1856+
// Register C++ VTable to OpenMP Offload Entry if it's a new
1857+
// CXXRecordDecl.
1858+
if (CXXRecord && CXXRecord->isDynamicClass() &&
1859+
CGM.getOpenMPRuntime().VTableDeclMap.find(CXXRecord) ==
1860+
CGM.getOpenMPRuntime().VTableDeclMap.end()) {
1861+
CGM.getOpenMPRuntime().VTableDeclMap.try_emplace(CXXRecord, VD);
1862+
CGM.EmitVTable(CXXRecord);
1863+
auto VTables = CGM.getVTables();
1864+
auto *VTablesAddr = VTables.GetAddrOfVTable(CXXRecord);
1865+
if (VTablesAddr) {
1866+
CGM.getOpenMPRuntime().registerVTableOffloadEntry(VTablesAddr, VD);
1867+
}
1868+
// Emit VTable for all the fields containing dynamic CXXRecord
1869+
for (const FieldDecl *Field : CXXRecord->fields()) {
1870+
if (CXXRecordDecl *RecordDecl =
1871+
Field->getType()->getAsCXXRecordDecl()) {
1872+
emitAndRegisterVTable(CGM, RecordDecl, VD);
1873+
}
1874+
}
1875+
// Emit VTable for all dynamic parent class
1876+
for (CXXBaseSpecifier &Base : CXXRecord->bases()) {
1877+
if (CXXRecordDecl *BaseDecl =
1878+
Base.getType()->getAsCXXRecordDecl()) {
1879+
emitAndRegisterVTable(CGM, BaseDecl, VD);
1880+
}
1881+
}
1882+
}
1883+
};
1884+
1885+
// Collect VTable from OpenMP map clause.
1886+
for (const auto *C : D.getClausesOfKind<OMPMapClause>()) {
1887+
for (const auto *E : C->varlist()) {
1888+
auto DeclPair = getVTableDecl(E);
1889+
emitAndRegisterVTable(CGM, DeclPair.first, DeclPair.second);
1890+
}
1891+
}
1892+
}
1893+
17801894
Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
17811895
QualType VarType,
17821896
StringRef Name) {
@@ -6249,6 +6363,7 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
62496363
CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
62506364
}
62516365
}
6366+
registerVTable(D);
62526367
}
62536368

62546369
/// Checks if the expression is constant or does not have non-trivial function
@@ -9955,6 +10070,19 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
995510070
if (!S)
995610071
return;
995710072

10073+
// Register vtable from device for target data and target directives.
10074+
// Add this block here since scanForTargetRegionsFunctions ignores
10075+
// target data by checking if S is a executable directive (target).
10076+
if (isa<OMPExecutableDirective>(S) &&
10077+
isOpenMPTargetDataManagementDirective(
10078+
cast<OMPExecutableDirective>(S)->getDirectiveKind())) {
10079+
auto &E = *cast<OMPExecutableDirective>(S);
10080+
// Don't need to check if it's device compile
10081+
// since scanForTargetRegionsFunctions currently only called
10082+
// in device compilation.
10083+
registerVTable(E);
10084+
}
10085+
995810086
// Codegen OMP target directives that offload compute to the device.
995910087
bool RequiresDeviceCodegen =
996010088
isa<OMPExecutableDirective>(S) &&

clang/lib/CodeGen/CGOpenMPRuntime.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -605,6 +605,9 @@ class CGOpenMPRuntime {
605605
LValue PosLVal, const OMPTaskDataTy::DependData &Data,
606606
Address DependenciesArray);
607607

608+
/// Keep track of VTable Declarations so we don't register duplicate VTable.
609+
llvm::DenseMap<CXXRecordDecl*, const VarDecl*> VTableDeclMap;
610+
608611
public:
609612
explicit CGOpenMPRuntime(CodeGenModule &CGM);
610613
virtual ~CGOpenMPRuntime() {}
@@ -1111,6 +1114,16 @@ class CGOpenMPRuntime {
11111114
virtual void emitDeclareTargetFunction(const FunctionDecl *FD,
11121115
llvm::GlobalValue *GV);
11131116

1117+
/// Register VTable to OpenMP offload entry.
1118+
/// \param VTable VTable of the C++ class.
1119+
/// \param RD C++ class decl.
1120+
virtual void registerVTableOffloadEntry(llvm::GlobalVariable *VTable,
1121+
const VarDecl *VD);
1122+
/// Emit code for registering vtable by scanning through map clause
1123+
/// in OpenMP target region.
1124+
/// \param D OpenMP target directive.
1125+
virtual void registerVTable(const OMPExecutableDirective &D);
1126+
11141127
/// Creates artificial threadprivate variable with name \p Name and type \p
11151128
/// VarType.
11161129
/// \param VarType Type of the artificial threadprivate variable.

clang/lib/CodeGen/CGStmtOpenMP.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7617,6 +7617,10 @@ void CodeGenFunction::EmitOMPUseDeviceAddrClause(
76177617
// Generate the instructions for '#pragma omp target data' directive.
76187618
void CodeGenFunction::EmitOMPTargetDataDirective(
76197619
const OMPTargetDataDirective &S) {
7620+
// Emit vtable only from host for target data directive.
7621+
if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
7622+
CGM.getOpenMPRuntime().registerVTable(S);
7623+
}
76207624
CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true,
76217625
/*SeparateBeginEndCalls=*/true);
76227626

clang/lib/CodeGen/CGVTables.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,12 @@ llvm::Constant *CodeGenModule::GetAddrOfThunk(StringRef Name, llvm::Type *FnTy,
3838
/*DontDefer=*/true, /*IsThunk=*/true);
3939
}
4040

41+
llvm::GlobalVariable *CodeGenVTables::GetAddrOfVTable(const CXXRecordDecl *RD) {
42+
llvm::GlobalVariable *VTable =
43+
CGM.getCXXABI().getAddrOfVTable(RD, CharUnits());
44+
return VTable;
45+
}
46+
4147
static void setThunkProperties(CodeGenModule &CGM, const ThunkInfo &Thunk,
4248
llvm::Function *ThunkFn, bool ForVTable,
4349
GlobalDecl GD) {

clang/lib/CodeGen/CGVTables.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,10 @@ class CodeGenVTables {
122122
llvm::GlobalVariable::LinkageTypes Linkage,
123123
const CXXRecordDecl *RD);
124124

125+
/// GetAddrOfVTable - Get the address of the VTable for the given record
126+
/// decl.
127+
llvm::GlobalVariable *GetAddrOfVTable(const CXXRecordDecl *RD);
128+
125129
/// EmitThunks - Emit the associated thunks for the given global decl.
126130
void EmitThunks(GlobalDecl GD);
127131

clang/lib/CodeGen/CodeGenModule.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -754,6 +754,9 @@ class CodeGenModule : public CodeGenTypeCache {
754754
// i32 @__isPlatformVersionAtLeast(i32, i32, i32, i32)
755755
llvm::FunctionCallee IsPlatformVersionAtLeastFn = nullptr;
756756

757+
// Store indirect CallExprs that are within an omp target region
758+
llvm::SmallPtrSet<const CallExpr *, 16> OMPTargetCalls;
759+
757760
InstrProfStats &getPGOStats() { return PGOStats; }
758761
llvm::IndexedInstrProfReader *getPGOReader() const { return PGOReader.get(); }
759762

0 commit comments

Comments
 (0)