diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index a503aaf613e30..8fbf1c8035a6b 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1771,12 +1771,124 @@ void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD, Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility); } + // Register the indirect Vtable: + // This is similar to OMPTargetGlobalVarEntryIndirect, except that the + // size field refers to the size of memory pointed to, not the size of + // the pointer symbol itself (which is implicitly the size of a pointer). OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo( Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(), llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect, llvm::GlobalValue::WeakODRLinkage); } +void CGOpenMPRuntime::registerVTableOffloadEntry(llvm::GlobalVariable *VTable, + const VarDecl *VD) { + // TODO: add logic to avoid duplicate vtable registrations per + // translation unit; though for external linkage, this should no + // longer be an issue - or at least we can avoid the issue by + // checking for an existing offloading entry. But, perhaps the + // better approach is to defer emission of the vtables and offload + // entries until later (by tracking a list of items that need to be + // emitted). + + llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); + + // Generate a new externally visible global to point to the + // internally visible vtable. Doing this allows us to keep the + // visibility and linkage of the associated vtable unchanged while + // allowing the runtime to access its value. The externally + // visible global var needs to be emitted with a unique mangled + // name that won't conflict with similarly named (internal) + // vtables in other translation units. + + // Register vtable with source location of dynamic object in map + // clause. + llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc( + CGM, OMPBuilder, VD->getCanonicalDecl()->getBeginLoc(), + VTable->getName()); + + llvm::GlobalVariable *Addr = VTable; + size_t PointerSize = CGM.getDataLayout().getPointerSize(); + SmallString<128> AddrName; + OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(AddrName, EntryInfo); + AddrName.append("addr"); + + if (CGM.getLangOpts().OpenMPIsTargetDevice) { + Addr = new llvm::GlobalVariable( + CGM.getModule(), VTable->getType(), + /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, VTable, + AddrName, + /*InsertBefore*/ nullptr, llvm::GlobalValue::NotThreadLocal, + CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace()); + Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility); + } + OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo( + AddrName, VTable, + CGM.getDataLayout().getTypeAllocSize(VTable->getInitializer()->getType()), + llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirectVTable, + llvm::GlobalValue::WeakODRLinkage); +} + +void CGOpenMPRuntime::emitAndRegisterVTable(CodeGenModule &CGM, + CXXRecordDecl *CXXRecord, + const VarDecl *VD) { + // Register C++ VTable to OpenMP Offload Entry if it's a new + // CXXRecordDecl. + if (CXXRecord && CXXRecord->isDynamicClass() && + CGM.getOpenMPRuntime().VTableDeclMap.find(CXXRecord) == + CGM.getOpenMPRuntime().VTableDeclMap.end()) { + CGM.getOpenMPRuntime().VTableDeclMap.try_emplace(CXXRecord, VD); + CGM.EmitVTable(CXXRecord); + CodeGenVTables VTables = CGM.getVTables(); + llvm::GlobalVariable *VTablesAddr = VTables.GetAddrOfVTable(CXXRecord); + if (VTablesAddr) + CGM.getOpenMPRuntime().registerVTableOffloadEntry(VTablesAddr, VD); + // Emit VTable for all the fields containing dynamic CXXRecord + for (const FieldDecl *Field : CXXRecord->fields()) { + if (CXXRecordDecl *RecordDecl = Field->getType()->getAsCXXRecordDecl()) + emitAndRegisterVTable(CGM, RecordDecl, VD); + } + // Emit VTable for all dynamic parent class + for (CXXBaseSpecifier &Base : CXXRecord->bases()) { + if (CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl()) + emitAndRegisterVTable(CGM, BaseDecl, VD); + } + } +}; + +void CGOpenMPRuntime::registerVTable(const OMPExecutableDirective &D) { + // Register VTable by scanning through the map clause of OpenMP target region. + // Get CXXRecordDecl and VarDecl from Expr. + auto GetVTableDecl = [](const Expr *E) { + QualType VDTy = E->getType(); + CXXRecordDecl *CXXRecord = nullptr; + if (const auto *RefType = VDTy->getAs()) + VDTy = RefType->getPointeeType(); + if (VDTy->isPointerType()) + CXXRecord = VDTy->getPointeeType()->getAsCXXRecordDecl(); + else + CXXRecord = VDTy->getAsCXXRecordDecl(); + + const VarDecl *VD = nullptr; + if (auto *DRE = dyn_cast(E)) + VD = cast(DRE->getDecl()); + else if (auto *MRE = dyn_cast(E)) + if (auto *BaseDRE = dyn_cast(MRE->getBase())) + if (auto *BaseVD = dyn_cast(BaseDRE->getDecl())) + VD = BaseVD; + return std::pair(CXXRecord, VD); + }; + // Collect VTable from OpenMP map clause. + for (const auto *C : D.getClausesOfKind()) { + for (const auto *E : C->varlist()) { + auto DeclPair = GetVTableDecl(E); + // Ensure VD is not null + if (DeclPair.second) + emitAndRegisterVTable(CGM, DeclPair.first, DeclPair.second); + } + } +} + Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name) { @@ -6249,6 +6361,7 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr); } } + registerVTable(D); } /// Checks if the expression is constant or does not have non-trivial function @@ -9955,6 +10068,17 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, if (!S) return; + // Register vtable from device for target data and target directives. + // Add this block here since scanForTargetRegionsFunctions ignores + // target data by checking if S is a executable directive (target). + if (auto *E = dyn_cast(S); + E && isOpenMPTargetDataManagementDirective(E->getDirectiveKind())) { + // Don't need to check if it's device compile + // since scanForTargetRegionsFunctions currently only called + // in device compilation. + registerVTable(*E); + } + // Codegen OMP target directives that offload compute to the device. bool RequiresDeviceCodegen = isa(S) && diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index eb04eceee236c..7f8a81d4090e2 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -605,6 +605,9 @@ class CGOpenMPRuntime { LValue PosLVal, const OMPTaskDataTy::DependData &Data, Address DependenciesArray); + /// Keep track of VTable Declarations so we don't register duplicate VTable. + llvm::SmallDenseMap VTableDeclMap; + public: explicit CGOpenMPRuntime(CodeGenModule &CGM); virtual ~CGOpenMPRuntime() {} @@ -1111,6 +1114,23 @@ class CGOpenMPRuntime { virtual void emitDeclareTargetFunction(const FunctionDecl *FD, llvm::GlobalValue *GV); + /// Register VTable to OpenMP offload entry. + /// \param VTable VTable of the C++ class. + /// \param RD C++ class decl. + virtual void registerVTableOffloadEntry(llvm::GlobalVariable *VTable, + const VarDecl *VD); + /// Emit code for registering vtable by scanning through map clause + /// in OpenMP target region. + /// \param D OpenMP target directive. + virtual void registerVTable(const OMPExecutableDirective &D); + + /// Emit and register VTable for the C++ class in OpenMP offload entry. + /// \param CXXRecord C++ class decl. + /// \param VD Variable decl which holds VTable. + virtual void emitAndRegisterVTable(CodeGenModule &CGM, + CXXRecordDecl *CXXRecord, + const VarDecl *VD); + /// Creates artificial threadprivate variable with name \p Name and type \p /// VarType. /// \param VarType Type of the artificial threadprivate variable. diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index d72cd8fbfd608..0b88f1dc5f0ea 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -7617,6 +7617,10 @@ void CodeGenFunction::EmitOMPUseDeviceAddrClause( // Generate the instructions for '#pragma omp target data' directive. void CodeGenFunction::EmitOMPTargetDataDirective( const OMPTargetDataDirective &S) { + // Emit vtable only from host for target data directive. + if (!CGM.getLangOpts().OpenMPIsTargetDevice) + CGM.getOpenMPRuntime().registerVTable(S); + CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true, /*SeparateBeginEndCalls=*/true); diff --git a/clang/lib/CodeGen/CGVTables.cpp b/clang/lib/CodeGen/CGVTables.cpp index e14e883a55ac5..de4a67db313ea 100644 --- a/clang/lib/CodeGen/CGVTables.cpp +++ b/clang/lib/CodeGen/CGVTables.cpp @@ -38,6 +38,12 @@ llvm::Constant *CodeGenModule::GetAddrOfThunk(StringRef Name, llvm::Type *FnTy, /*DontDefer=*/true, /*IsThunk=*/true); } +llvm::GlobalVariable *CodeGenVTables::GetAddrOfVTable(const CXXRecordDecl *RD) { + llvm::GlobalVariable *VTable = + CGM.getCXXABI().getAddrOfVTable(RD, CharUnits()); + return VTable; +} + static void setThunkProperties(CodeGenModule &CGM, const ThunkInfo &Thunk, llvm::Function *ThunkFn, bool ForVTable, GlobalDecl GD) { diff --git a/clang/lib/CodeGen/CGVTables.h b/clang/lib/CodeGen/CGVTables.h index 5c45e355fb145..37458eee02e34 100644 --- a/clang/lib/CodeGen/CGVTables.h +++ b/clang/lib/CodeGen/CGVTables.h @@ -122,6 +122,10 @@ class CodeGenVTables { llvm::GlobalVariable::LinkageTypes Linkage, const CXXRecordDecl *RD); + /// GetAddrOfVTable - Get the address of the VTable for the given record + /// decl. + llvm::GlobalVariable *GetAddrOfVTable(const CXXRecordDecl *RD); + /// EmitThunks - Emit the associated thunks for the given global decl. void EmitThunks(GlobalDecl GD); diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 3971b296b3f80..49dcba4b7618b 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -754,6 +754,9 @@ class CodeGenModule : public CodeGenTypeCache { // i32 @__isPlatformVersionAtLeast(i32, i32, i32, i32) llvm::FunctionCallee IsPlatformVersionAtLeastFn = nullptr; + // Store indirect CallExprs that are within an omp target region + llvm::SmallPtrSet OMPTargetCalls; + InstrProfStats &getPGOStats() { return PGOStats; } llvm::IndexedInstrProfReader *getPGOReader() const { return PGOReader.get(); } diff --git a/clang/test/OpenMP/target_vtable_codegen_container.cpp b/clang/test/OpenMP/target_vtable_codegen_container.cpp new file mode 100644 index 0000000000000..9fd4c6b736163 --- /dev/null +++ b/clang/test/OpenMP/target_vtable_codegen_container.cpp @@ -0,0 +1,42 @@ +// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52 -stdlib=libc++ +// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 -stdlib=libc++ | FileCheck %s +// expected-no-diagnostics + +// CHECK-DAG: @_ZTV7Derived +// CHECK-DAG: @_ZTV4Base +template +class Container { +private: +T value; +public: +Container() : value() {} +Container(T val) : value(val) {} + +T getValue() const { return value; } + +void setValue(T val) { value = val; } +}; + +class Base { +public: + virtual void foo() {} +}; +class Derived : public Base {}; + +class Test { +public: + Container v; +}; + +int main() { + Test test; + Derived d; + test.v.setValue(d); + +// Make sure we emit VTable for type indirectly (template specialized type) +#pragma omp target map(test) + { + test.v.getValue().foo(); + } + return 0; +} diff --git a/clang/test/OpenMP/target_vtable_codegen_explicit.cpp b/clang/test/OpenMP/target_vtable_codegen_explicit.cpp new file mode 100644 index 0000000000000..001ed8fdd9cd7 --- /dev/null +++ b/clang/test/OpenMP/target_vtable_codegen_explicit.cpp @@ -0,0 +1,48 @@ +// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52 +// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 | FileCheck %s +// expected-no-diagnostics + +// Make sure both host and device compilation emit vtable for Dervied +// CHECK-DAG: $_ZN7DerivedD1Ev = comdat any +// CHECK-DAG: $_ZN7DerivedD0Ev = comdat any +// CHECK-DAG: $_ZN7Derived5BaseAEi = comdat any +// CHECK-DAG: $_ZN7Derived8DerivedBEv = comdat any +// CHECK-DAG: $_ZN7DerivedD2Ev = comdat any +// CHECK-DAG: $_ZN4BaseD2Ev = comdat any +// CHECK-DAG: $_ZTV7Derived = comdat any +class Base { +public: + + virtual ~Base() = default; + + virtual void BaseA(int a) { } +}; + +// CHECK: @_ZTV7Derived = linkonce_odr unnamed_addr constant { [6 x ptr] } +class Derived : public Base { +public: + + ~Derived() override = default; + + void BaseA(int a) override { x = a; } + + virtual void DerivedB() { } +private: + int x; +}; + +int main() { + + Derived d; + Base& c = d; + int a = 50; + // Should emit vtable for Derived since d is added to map clause +#pragma omp target data map (to: d, a) + { + #pragma omp target map(d) + { + c.BaseA(a); + } + } + return 0; +} diff --git a/clang/test/OpenMP/target_vtable_codegen_implicit_namespace.cpp b/clang/test/OpenMP/target_vtable_codegen_implicit_namespace.cpp new file mode 100644 index 0000000000000..364c55cd07985 --- /dev/null +++ b/clang/test/OpenMP/target_vtable_codegen_implicit_namespace.cpp @@ -0,0 +1,43 @@ +// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52 +// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 | FileCheck %s +// expected-no-diagnostics + +namespace { + +// Make sure both host and device compilation emit vtable for Dervied +// CHECK-DAG: @_ZTVN12_GLOBAL__N_17DerivedE +// CHECK-DAG: @_ZN12_GLOBAL__N_17DerivedD1Ev +// CHECK-DAG: @_ZN12_GLOBAL__N_17DerivedD0Ev +// CHECK-DAG: @_ZN12_GLOBAL__N_17Derived5BaseAEi +// CHECK-DAG: @_ZN12_GLOBAL__N_17Derived8DerivedBEv +class Base { +public: + virtual ~Base() = default; + virtual void BaseA(int a) { } +}; + +class Derived : public Base { +public: + ~Derived() override = default; + void BaseA(int a) override { x = a; } + virtual void DerivedB() { } +private: + int x; +}; + +}; + +int main() { + + Derived d; + Base& c = d; + int a = 50; +#pragma omp target data map (to: d, a) + { + #pragma omp target + { + c.BaseA(a); + } + } + return 0; +} diff --git a/clang/test/OpenMP/target_vtable_codegen_mult_inherritence.cpp b/clang/test/OpenMP/target_vtable_codegen_mult_inherritence.cpp new file mode 100644 index 0000000000000..3069a4994a479 --- /dev/null +++ b/clang/test/OpenMP/target_vtable_codegen_mult_inherritence.cpp @@ -0,0 +1,46 @@ +// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52 +// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 | FileCheck %s +// expected-no-diagnostics + +// CHECK-DAG: @_ZTV6Base_1 +// CHECK-DAG: @_ZTV7Derived +// CHECK-DAG: @_ZTV6Base_2 +#pragma omp begin declare target + +class Base_1 { +public: + virtual void foo() { } + virtual void bar() { } +}; + +class Base_2 { +public: + virtual void foo() { } + virtual void bar() { } +}; + +class Derived : public Base_1, public Base_2 { +public: + virtual void foo() override { } + virtual void bar() override { } +}; + +#pragma omp end declare target + +int main() { + Base_1 base; + Derived derived; + + // Make sure we emit vtable for parent class (Base_1 and Base_2) +#pragma omp target data map(derived) + { + Base_1 *p1 = &derived; + +#pragma omp target + { + p1->foo(); + p1->bar(); + } + } + return 0; +} diff --git a/clang/test/OpenMP/target_vtable_codegen_nested.cpp b/clang/test/OpenMP/target_vtable_codegen_nested.cpp new file mode 100644 index 0000000000000..1ece83d60ac58 --- /dev/null +++ b/clang/test/OpenMP/target_vtable_codegen_nested.cpp @@ -0,0 +1,82 @@ +// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52 +// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 | FileCheck %s +// expected-no-diagnostics + +// CHECK-DAG: @_ZTV3Car +// CHECK-DAG: @_ZTV6Engine +// CHECK-DAG: @_ZTV6Wheels +// CHECK-DAG: @_ZTV7Vehicle +// CHECK-DAG: @_ZTV5Brand +class Engine { +public: + Engine(const char *type) : type(type) {} + virtual ~Engine() {} + + virtual void start() const { } + +protected: + const char *type; +}; + +class Wheels { +public: + Wheels(int count) : count(count) {} + virtual ~Wheels() {} + + virtual void roll() const { } + +protected: + int count; +}; + +class Vehicle { +public: + Vehicle(int speed) : speed(speed) {} + virtual ~Vehicle() {} + + virtual void move() const { } + +protected: + int speed; +}; + +class Brand { +public: + Brand(const char *brandName) : brandName(brandName) {} + virtual ~Brand() {} + + void showBrand() const { } + +protected: + const char *brandName; +}; + +class Car : public Vehicle, public Brand { +public: + Car(const char *brand, int speed, const char *engineType, int wheelCount) + : Vehicle(speed), Brand(brand), engine(engineType), wheels(wheelCount) {} + + void move() const override { } + + void drive() const { + showBrand(); + engine.start(); + wheels.roll(); + move(); + } + +private: + Engine engine; + Wheels wheels; +}; + +int main() { + Car myActualCar("Ford", 100, "Hybrid", 4); + + // Make sure we emit VTable for dynamic class as field +#pragma omp target map(myActualCar) + { + myActualCar.drive(); + } + return 0; +} diff --git a/clang/test/OpenMP/target_vtable_memberexpr_indirect_vtable_codegen.cpp b/clang/test/OpenMP/target_vtable_memberexpr_indirect_vtable_codegen.cpp new file mode 100644 index 0000000000000..0535ba1dec741 --- /dev/null +++ b/clang/test/OpenMP/target_vtable_memberexpr_indirect_vtable_codegen.cpp @@ -0,0 +1,56 @@ +// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52 +// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 | FileCheck %s +// expected-no-diagnostics + + +// CHECK-DAG: $_ZN4Base5BaseAEi = comdat any +// CHECK-DAG: $_ZN7Derived5BaseAEi = comdat any +// CHECK-DAG: $_ZN7Derived8DerivedBEv = comdat any +// CHECK-DAG: $_ZN4BaseD1Ev = comdat any +// CHECK-DAG: $_ZN4BaseD0Ev = comdat any +// CHECK-DAG: $_ZN7DerivedD1Ev = comdat any +// CHECK-DAG: $_ZN7DerivedD0Ev = comdat any +// CHECK-DAG: $_ZN4BaseD2Ev = comdat any +// CHECK-DAG: $_ZN7DerivedD2Ev = comdat any +// CHECK-DAG: $_ZTV4Base = comdat any +// CHECK-DAG: $_ZTV7Derived = comdat any +class Base { +public: + + virtual ~Base() = default; + + virtual void BaseA(int a) { } +}; + +class Derived : public Base { +public: + + ~Derived() override = default; + + void BaseA(int a) override { x = a; } + + virtual void DerivedB() { } +private: + int x; +}; + +struct VirtualContainer { + Base baseObj; + Derived derivedObj; + Base *basePtr; +}; + +int main() { + VirtualContainer container; + container.basePtr = &container.derivedObj; + int a = 50; +#pragma omp target map(container.baseObj, container.derivedObj, \ + container.basePtr[ : 1]) + { + container.baseObj.BaseA(a); + container.derivedObj.BaseA(a); + container.derivedObj.DerivedB(); + container.basePtr->BaseA(a); + } + return 0; +} diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index f43ef932e965a..cc0d4c89f9b9f 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -390,6 +390,8 @@ class OffloadEntriesInfoManager { OMPTargetGlobalVarEntryIndirect = 0x8, /// Mark the entry as a register requires global. OMPTargetGlobalRegisterRequires = 0x10, + /// Mark the entry as a declare target indirect vtable. + OMPTargetGlobalVarEntryIndirectVTable = 0x20, }; /// Kind of device clause for declare target variables @@ -2666,7 +2668,8 @@ class OpenMPIRBuilder { enum EmitMetadataErrorKind { EMIT_MD_TARGET_REGION_ERROR, EMIT_MD_DECLARE_TARGET_ERROR, - EMIT_MD_GLOBAL_VAR_LINK_ERROR + EMIT_MD_GLOBAL_VAR_LINK_ERROR, + EMIT_MD_GLOBAL_VAR_INDIRECT_ERROR }; /// Callback function type diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 220eee3cb8b08..236cfab3f031c 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -10246,6 +10246,13 @@ void OpenMPIRBuilder::createOffloadEntriesAndInfoMetadata( continue; } break; + case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect: + case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirectVTable: + if (!CE->getAddress()) { + ErrorFn(EMIT_MD_GLOBAL_VAR_INDIRECT_ERROR, E.second); + continue; + } + break; default: break; } @@ -10255,12 +10262,17 @@ void OpenMPIRBuilder::createOffloadEntriesAndInfoMetadata( // entry. Indirect variables are handled separately on the device. if (auto *GV = dyn_cast(CE->getAddress())) if ((GV->hasLocalLinkage() || GV->hasHiddenVisibility()) && - Flags != OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect) + (Flags != + OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect || + Flags != OffloadEntriesInfoManager:: + OMPTargetGlobalVarEntryIndirectVTable)) continue; // Indirect globals need to use a special name that doesn't match the name // of the associated host global. - if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect) + if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect || + Flags == + OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirectVTable) createOffloadEntry(CE->getAddress(), CE->getAddress(), CE->getVarSize(), Flags, CE->getLinkage(), CE->getVarName()); else @@ -10689,7 +10701,9 @@ void OffloadEntriesInfoManager::registerDeviceGlobalVarEntryInfo( } return; } - if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect) + if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect || + Flags == + OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirectVTable) OffloadEntriesDeviceGlobalVar.try_emplace(VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage, VarName.str()); diff --git a/offload/include/omptarget.h b/offload/include/omptarget.h index 8fd722bb15022..3317441f04eba 100644 --- a/offload/include/omptarget.h +++ b/offload/include/omptarget.h @@ -94,6 +94,8 @@ enum OpenMPOffloadingDeclareTargetFlags { OMP_DECLARE_TARGET_INDIRECT = 0x08, /// This is an entry corresponding to a requirement to be registered. OMP_REGISTER_REQUIRES = 0x10, + /// Mark the entry global as being an indirect vtable. + OMP_DECLARE_TARGET_INDIRECT_VTABLE = 0x20, }; enum TargetAllocTy : int32_t { diff --git a/offload/libomptarget/PluginManager.cpp b/offload/libomptarget/PluginManager.cpp index b57a2f815cba6..6fc330b92f0f5 100644 --- a/offload/libomptarget/PluginManager.cpp +++ b/offload/libomptarget/PluginManager.cpp @@ -434,7 +434,8 @@ static int loadImagesOntoDevice(DeviceTy &Device) { llvm::offloading::EntryTy DeviceEntry = Entry; if (Entry.Size) { - if (Device.RTL->get_global(Binary, Entry.Size, Entry.SymbolName, + if (!(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT_VTABLE) && + Device.RTL->get_global(Binary, Entry.Size, Entry.SymbolName, &DeviceEntry.Address) != OFFLOAD_SUCCESS) REPORT("Failed to load symbol %s\n", Entry.SymbolName); @@ -443,7 +444,9 @@ static int loadImagesOntoDevice(DeviceTy &Device) { // the device to point to the memory on the host. if ((PM->getRequirements() & OMP_REQ_UNIFIED_SHARED_MEMORY) || (PM->getRequirements() & OMPX_REQ_AUTO_ZERO_COPY)) { - if (Device.RTL->data_submit(DeviceId, DeviceEntry.Address, + if (!(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT_VTABLE) && + !(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT) && + Device.RTL->data_submit(DeviceId, DeviceEntry.Address, Entry.Address, Entry.Size) != OFFLOAD_SUCCESS) REPORT("Failed to write symbol for USM %s\n", Entry.SymbolName); diff --git a/offload/libomptarget/device.cpp b/offload/libomptarget/device.cpp index 71423ae0c94d9..d5436bde47ba5 100644 --- a/offload/libomptarget/device.cpp +++ b/offload/libomptarget/device.cpp @@ -112,21 +112,58 @@ setupIndirectCallTable(DeviceTy &Device, __tgt_device_image *Image, llvm::SmallVector> IndirectCallTable; for (const auto &Entry : Entries) { if (Entry.Kind != llvm::object::OffloadKind::OFK_OpenMP || - Entry.Size == 0 || !(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT)) + Entry.Size == 0 || + (!(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT) && + !(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT_VTABLE))) continue; - assert(Entry.Size == sizeof(void *) && "Global not a function pointer?"); - auto &[HstPtr, DevPtr] = IndirectCallTable.emplace_back(); - - void *Ptr; - if (Device.RTL->get_global(Binary, Entry.Size, Entry.SymbolName, &Ptr)) - return error::createOffloadError(error::ErrorCode::INVALID_BINARY, - "failed to load %s", Entry.SymbolName); - - HstPtr = Entry.Address; - if (Device.retrieveData(&DevPtr, Ptr, Entry.Size, AsyncInfo)) - return error::createOffloadError(error::ErrorCode::INVALID_BINARY, - "failed to load %s", Entry.SymbolName); + size_t PtrSize = sizeof(void *); + if (Entry.Flags & OMP_DECLARE_TARGET_INDIRECT_VTABLE) { + // This is a VTable entry, the current entry is the first index of the + // VTable and Entry.Size is the total size of the VTable. Unlike the + // indirect function case below, the Global is not of size Entry.Size and + // is instead of size PtrSize (sizeof(void*)). + void *Vtable; + void *res; + if (Device.RTL->get_global(Binary, PtrSize, Entry.SymbolName, &Vtable)) + return error::createOffloadError(error::ErrorCode::INVALID_BINARY, + "failed to load %s", Entry.SymbolName); + + // HstPtr = Entry.Address; + if (Device.retrieveData(&res, Vtable, PtrSize, AsyncInfo)) + return error::createOffloadError(error::ErrorCode::INVALID_BINARY, + "failed to load %s", Entry.SymbolName); + if (Device.synchronize(AsyncInfo)) + return error::createOffloadError( + error::ErrorCode::INVALID_BINARY, + "failed to synchronize after retrieving %s", Entry.SymbolName); + // Calculate and emplace entire Vtable from first Vtable byte + for (uint64_t i = 0; i < Entry.Size / PtrSize; ++i) { + auto &[HstPtr, DevPtr] = IndirectCallTable.emplace_back(); + HstPtr = reinterpret_cast( + reinterpret_cast(Entry.Address) + i * PtrSize); + DevPtr = reinterpret_cast(reinterpret_cast(res) + + i * PtrSize); + } + } else { + // Indirect function case: Entry.Size should equal PtrSize since we're + // dealing with a single function pointer (not a VTable) + assert(Entry.Size == PtrSize && "Global not a function pointer?"); + auto &[HstPtr, DevPtr] = IndirectCallTable.emplace_back(); + void *Ptr; + if (Device.RTL->get_global(Binary, Entry.Size, Entry.SymbolName, &Ptr)) + return error::createOffloadError(error::ErrorCode::INVALID_BINARY, + "failed to load %s", Entry.SymbolName); + + HstPtr = Entry.Address; + if (Device.retrieveData(&DevPtr, Ptr, Entry.Size, AsyncInfo)) + return error::createOffloadError(error::ErrorCode::INVALID_BINARY, + "failed to load %s", Entry.SymbolName); + } + if (Device.synchronize(AsyncInfo)) + return error::createOffloadError( + error::ErrorCode::INVALID_BINARY, + "failed to synchronize after retrieving %s", Entry.SymbolName); } // If we do not have any indirect globals we exit early. diff --git a/offload/test/api/omp_indirect_call_table_manual.c b/offload/test/api/omp_indirect_call_table_manual.c new file mode 100644 index 0000000000000..e958d47d69dad --- /dev/null +++ b/offload/test/api/omp_indirect_call_table_manual.c @@ -0,0 +1,107 @@ +// RUN: %libomptarget-compile-run-and-check-generic +#include +#include +#include + +// --------------------------------------------------------------------------- +// Various definitions copied from OpenMP RTL + +typedef struct { + uint64_t Reserved; + uint16_t Version; + uint16_t Kind; // OpenMP==1 + uint32_t Flags; + void *Address; + char *SymbolName; + uint64_t Size; + uint64_t Data; + void *AuxAddr; +} __tgt_offload_entry; + +enum OpenMPOffloadingDeclareTargetFlags { + /// Mark the entry global as having a 'link' attribute. + OMP_DECLARE_TARGET_LINK = 0x01, + /// Mark the entry global as being an indirectly callable function. + OMP_DECLARE_TARGET_INDIRECT = 0x08, + /// This is an entry corresponding to a requirement to be registered. + OMP_REGISTER_REQUIRES = 0x10, + /// Mark the entry global as being an indirect vtable. + OMP_DECLARE_TARGET_INDIRECT_VTABLE = 0x20, +}; + +#pragma omp begin declare variant match(device = {kind(gpu)}) +// Provided by the runtime. +void *__llvm_omp_indirect_call_lookup(void *host_ptr); +#pragma omp declare target to(__llvm_omp_indirect_call_lookup) \ + device_type(nohost) +#pragma omp end declare variant + +#pragma omp begin declare variant match(device = {kind(cpu)}) +// We assume unified addressing on the CPU target. +void *__llvm_omp_indirect_call_lookup(void *host_ptr) { return host_ptr; } +#pragma omp end declare variant + +#pragma omp begin declare target +void foo(int *i) { *i += 1; } +void bar(int *i) { *i += 10; } +void baz(int *i) { *i += 100; } +#pragma omp end declare target + +typedef void (*fptr_t)(int *i); + +// Dispatch Table - declare separately on host and device to avoid +// registering with the library; this also allows us to use separate +// names, which is convenient for debugging. This dispatchTable is +// intended to mimic what Clang emits for C++ vtables. +fptr_t dispatchTable[] = {foo, bar, baz}; +#pragma omp begin declare target device_type(nohost) +fptr_t GPUdispatchTable[] = {foo, bar, baz}; +fptr_t *GPUdispatchTablePtr = GPUdispatchTable; +#pragma omp end declare target + +// Define "manual" OpenMP offload entries, where we emit Clang +// offloading entry structure definitions in the appropriate ELF +// section. This allows us to emulate the offloading entries that Clang would +// normally emit for us + +__attribute__((weak, section("llvm_offload_entries"), aligned(8))) +const __tgt_offload_entry __offloading_entry[] = {{ + 0ULL, // Reserved + 1, // Version + 1, // Kind + OMP_DECLARE_TARGET_INDIRECT_VTABLE, // Flags + &dispatchTable, // Address + "GPUdispatchTablePtr", // SymbolName + (size_t)(sizeof(dispatchTable)), // Size + 0ULL, // Data + NULL // AuxAddr +}}; + +// Mimic how Clang emits vtable pointers for C++ classes +typedef struct { + fptr_t *dispatchPtr; +} myClass; + +// --------------------------------------------------------------------------- +int main() { + myClass obj_foo = {dispatchTable + 0}; + myClass obj_bar = {dispatchTable + 1}; + myClass obj_baz = {dispatchTable + 2}; + int aaa = 0; + +#pragma omp target map(aaa) map(to : obj_foo, obj_bar, obj_baz) + { + // Lookup + fptr_t *foo_ptr = __llvm_omp_indirect_call_lookup(obj_foo.dispatchPtr); + fptr_t *bar_ptr = __llvm_omp_indirect_call_lookup(obj_bar.dispatchPtr); + fptr_t *baz_ptr = __llvm_omp_indirect_call_lookup(obj_baz.dispatchPtr); + foo_ptr[0](&aaa); + bar_ptr[0](&aaa); + baz_ptr[0](&aaa); + } + + assert(aaa == 111); + // CHECK: PASS + printf("PASS\n"); + return 0; +}