@@ -1771,12 +1771,126 @@ void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD,
17711771 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
17721772 }
17731773
1774+ // Register the indirect Vtable:
1775+ // This is similar to OMPTargetGlobalVarEntryIndirect, except that the
1776+ // size field refers to the size of memory pointed to, not the size of
1777+ // the pointer symbol itself (which is implicitly the size of a pointer).
17741778 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
17751779 Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(),
17761780 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
17771781 llvm::GlobalValue::WeakODRLinkage);
17781782}
17791783
1784+ void CGOpenMPRuntime::registerVTableOffloadEntry(llvm::GlobalVariable *VTable,
1785+ const VarDecl *VD) {
1786+ // TODO: add logic to avoid duplicate vtable registrations per
1787+ // translation unit; though for external linkage, this should no
1788+ // longer be an issue - or at least we can avoid the issue by
1789+ // checking for an existing offloading entry. But, perhaps the
1790+ // better approach is to defer emission of the vtables and offload
1791+ // entries until later (by tracking a list of items that need to be
1792+ // emitted).
1793+
1794+ llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1795+
1796+ // Generate a new externally visible global to point to the
1797+ // internally visible vtable. Doing this allows us to keep the
1798+ // visibility and linkage of the associated vtable unchanged while
1799+ // allowing the runtime to access its value. The externally
1800+ // visible global var needs to be emitted with a unique mangled
1801+ // name that won't conflict with similarly named (internal)
1802+ // vtables in other translation units.
1803+
1804+ // Register vtable with source location of dynamic object in map
1805+ // clause.
1806+ llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1807+ CGM, OMPBuilder, VD->getCanonicalDecl()->getBeginLoc(),
1808+ VTable->getName());
1809+
1810+ llvm::GlobalVariable *Addr = VTable;
1811+ size_t PointerSize = CGM.getDataLayout().getPointerSize();
1812+ SmallString<128> AddrName;
1813+ OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(AddrName, EntryInfo);
1814+ AddrName.append("addr");
1815+
1816+ if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1817+ Addr = new llvm::GlobalVariable(
1818+ CGM.getModule(), VTable->getType(),
1819+ /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, VTable,
1820+ AddrName,
1821+ /*InsertBefore*/ nullptr, llvm::GlobalValue::NotThreadLocal,
1822+ CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1823+ Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1824+ }
1825+ OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1826+ AddrName, VTable,
1827+ CGM.getDataLayout().getTypeAllocSize(VTable->getInitializer()->getType()),
1828+ llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirectVTable,
1829+ llvm::GlobalValue::WeakODRLinkage);
1830+ }
1831+
1832+ // Register VTable by scanning through the map clause of OpenMP target region.
1833+ void CGOpenMPRuntime::registerVTable(const OMPExecutableDirective &D) {
1834+ // Get CXXRecordDecl and VarDecl from Expr.
1835+ auto getVTableDecl = [](const Expr *E) {
1836+ QualType VDTy = E->getType();
1837+ CXXRecordDecl *CXXRecord = nullptr;
1838+ if (const auto *RefType = VDTy->getAs<LValueReferenceType>())
1839+ VDTy = RefType->getPointeeType();
1840+ if (VDTy->isPointerType())
1841+ CXXRecord = VDTy->getPointeeType()->getAsCXXRecordDecl();
1842+ else
1843+ CXXRecord = VDTy->getAsCXXRecordDecl();
1844+
1845+ const VarDecl *VD = nullptr;
1846+ if (auto *DRE = dyn_cast<DeclRefExpr>(E))
1847+ VD = cast<VarDecl>(DRE->getDecl());
1848+ return std::pair<CXXRecordDecl *, const VarDecl *>(CXXRecord, VD);
1849+ };
1850+
1851+ // Emit VTable and register the VTable to OpenMP offload entry recursively.
1852+ std::function<void(CodeGenModule &, CXXRecordDecl *, const VarDecl *)>
1853+ emitAndRegisterVTable = [&emitAndRegisterVTable](CodeGenModule &CGM,
1854+ CXXRecordDecl *CXXRecord,
1855+ const VarDecl *VD) {
1856+ // Register C++ VTable to OpenMP Offload Entry if it's a new
1857+ // CXXRecordDecl.
1858+ if (CXXRecord && CXXRecord->isDynamicClass() &&
1859+ CGM.getOpenMPRuntime().VTableDeclMap.find(CXXRecord) ==
1860+ CGM.getOpenMPRuntime().VTableDeclMap.end()) {
1861+ CGM.getOpenMPRuntime().VTableDeclMap.try_emplace(CXXRecord, VD);
1862+ CGM.EmitVTable(CXXRecord);
1863+ auto VTables = CGM.getVTables();
1864+ auto *VTablesAddr = VTables.GetAddrOfVTable(CXXRecord);
1865+ if (VTablesAddr) {
1866+ CGM.getOpenMPRuntime().registerVTableOffloadEntry(VTablesAddr, VD);
1867+ }
1868+ // Emit VTable for all the fields containing dynamic CXXRecord
1869+ for (const FieldDecl *Field : CXXRecord->fields()) {
1870+ if (CXXRecordDecl *RecordDecl =
1871+ Field->getType()->getAsCXXRecordDecl()) {
1872+ emitAndRegisterVTable(CGM, RecordDecl, VD);
1873+ }
1874+ }
1875+ // Emit VTable for all dynamic parent class
1876+ for (CXXBaseSpecifier &Base : CXXRecord->bases()) {
1877+ if (CXXRecordDecl *BaseDecl =
1878+ Base.getType()->getAsCXXRecordDecl()) {
1879+ emitAndRegisterVTable(CGM, BaseDecl, VD);
1880+ }
1881+ }
1882+ }
1883+ };
1884+
1885+ // Collect VTable from OpenMP map clause.
1886+ for (const auto *C : D.getClausesOfKind<OMPMapClause>()) {
1887+ for (const auto *E : C->varlist()) {
1888+ auto DeclPair = getVTableDecl(E);
1889+ emitAndRegisterVTable(CGM, DeclPair.first, DeclPair.second);
1890+ }
1891+ }
1892+ }
1893+
17801894Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
17811895 QualType VarType,
17821896 StringRef Name) {
@@ -6249,6 +6363,7 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
62496363 CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
62506364 }
62516365 }
6366+ registerVTable(D);
62526367}
62536368
62546369/// Checks if the expression is constant or does not have non-trivial function
@@ -9955,6 +10070,19 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
995510070 if (!S)
995610071 return;
995710072
10073+ // Register vtable from device for target data and target directives.
10074+ // Add this block here since scanForTargetRegionsFunctions ignores
10075+ // target data by checking if S is a executable directive (target).
10076+ if (isa<OMPExecutableDirective>(S) &&
10077+ isOpenMPTargetDataManagementDirective(
10078+ cast<OMPExecutableDirective>(S)->getDirectiveKind())) {
10079+ auto &E = *cast<OMPExecutableDirective>(S);
10080+ // Don't need to check if it's device compile
10081+ // since scanForTargetRegionsFunctions currently only called
10082+ // in device compilation.
10083+ registerVTable(E);
10084+ }
10085+
995810086 // Codegen OMP target directives that offload compute to the device.
995910087 bool RequiresDeviceCodegen =
996010088 isa<OMPExecutableDirective>(S) &&
0 commit comments