@@ -32,7 +32,9 @@ class SPIRVABIInfo : public CommonSPIRABIInfo {
3232 void computeInfo (CGFunctionInfo &FI) const override ;
3333
3434private:
35+ ABIArgInfo classifyReturnType (QualType RetTy) const ;
3536 ABIArgInfo classifyKernelArgumentType (QualType Ty) const ;
37+ ABIArgInfo classifyArgumentType (QualType Ty) const ;
3638};
3739} // end anonymous namespace
3840namespace {
@@ -56,14 +58,66 @@ class SPIRVTargetCodeGenInfo : public CommonSPIRTargetCodeGenInfo {
5658 SPIRVTargetCodeGenInfo (CodeGen::CodeGenTypes &CGT)
5759 : CommonSPIRTargetCodeGenInfo(std::make_unique<SPIRVABIInfo>(CGT)) {}
5860 void setCUDAKernelCallingConvention (const FunctionType *&FT) const override ;
61+ LangAS getGlobalVarAddressSpace (CodeGenModule &CGM,
62+ const VarDecl *D) const override ;
63+ llvm::SyncScope::ID getLLVMSyncScopeID (const LangOptions &LangOpts,
64+ SyncScope Scope,
65+ llvm::AtomicOrdering Ordering,
66+ llvm::LLVMContext &Ctx) const override ;
5967};
68+
69+ inline StringRef mapClangSyncScopeToLLVM (SyncScope Scope) {
70+ switch (Scope) {
71+ case SyncScope::HIPSingleThread:
72+ case SyncScope::SingleScope:
73+ return " singlethread" ;
74+ case SyncScope::HIPWavefront:
75+ case SyncScope::OpenCLSubGroup:
76+ case SyncScope::WavefrontScope:
77+ return " subgroup" ;
78+ case SyncScope::HIPWorkgroup:
79+ case SyncScope::OpenCLWorkGroup:
80+ case SyncScope::WorkgroupScope:
81+ return " workgroup" ;
82+ case SyncScope::HIPAgent:
83+ case SyncScope::OpenCLDevice:
84+ case SyncScope::DeviceScope:
85+ return " device" ;
86+ case SyncScope::SystemScope:
87+ case SyncScope::HIPSystem:
88+ case SyncScope::OpenCLAllSVMDevices:
89+ return " " ;
90+ }
91+ return " " ;
92+ }
6093} // End anonymous namespace.
6194
6295void CommonSPIRABIInfo::setCCs () {
6396 assert (getRuntimeCC () == llvm::CallingConv::C);
6497 RuntimeCC = llvm::CallingConv::SPIR_FUNC;
6598}
6699
100+ ABIArgInfo SPIRVABIInfo::classifyReturnType (QualType RetTy) const {
101+ if (getTarget ().getTriple ().getVendor () != llvm::Triple::AMD)
102+ return DefaultABIInfo::classifyReturnType (RetTy);
103+ if (!isAggregateTypeForABI (RetTy) || getRecordArgABI (RetTy, getCXXABI ()))
104+ return DefaultABIInfo::classifyReturnType (RetTy);
105+
106+ if (const RecordType *RT = RetTy->getAs <RecordType>()) {
107+ const RecordDecl *RD = RT->getDecl ();
108+ if (RD->hasFlexibleArrayMember ())
109+ return DefaultABIInfo::classifyReturnType (RetTy);
110+ }
111+
112+ // TODO: The AMDGPU ABI is non-trivial to represent in SPIR-V; in order to
113+ // avoid encoding various architecture specific bits here we return everything
114+ // as direct to retain type info for things like aggregates, for later perusal
115+ // when translating back to LLVM/lowering in the BE. This is also why we
116+ // disable flattening as the outcomes can mismatch between SPIR-V and AMDGPU.
117+ // This will be revisited / optimised in the future.
118+ return ABIArgInfo::getDirect (CGT.ConvertType (RetTy), 0u , nullptr , false );
119+ }
120+
67121ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType (QualType Ty) const {
68122 if (getContext ().getLangOpts ().CUDAIsDevice ) {
69123 // Coerce pointer arguments with default address space to CrossWorkGroup
@@ -78,18 +132,51 @@ ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
78132 return ABIArgInfo::getDirect (LTy, 0 , nullptr , false );
79133 }
80134
81- // Force copying aggregate type in kernel arguments by value when
82- // compiling CUDA targeting SPIR-V. This is required for the object
83- // copied to be valid on the device.
84- // This behavior follows the CUDA spec
85- // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-function-argument-processing,
86- // and matches the NVPTX implementation.
87- if (isAggregateTypeForABI (Ty))
135+ if (isAggregateTypeForABI (Ty)) {
136+ if (getTarget ().getTriple ().getVendor () == llvm::Triple::AMD)
137+ // TODO: The AMDGPU kernel ABI passes aggregates byref, which is not
138+ // currently expressible in SPIR-V; SPIR-V passes aggregates byval,
139+ // which the AMDGPU kernel ABI does not allow. Passing aggregates as
140+ // direct works around this impedance mismatch, as it retains type info
141+ // and can be correctly handled, post reverse-translation, by the AMDGPU
142+ // BE, which has to support this CC for legacy OpenCL purposes. It can
143+ // be brittle and does lead to performance degradation in certain
144+ // pathological cases. This will be revisited / optimised in the future,
145+ // once a way to deal with the byref/byval impedance mismatch is
146+ // identified.
147+ return ABIArgInfo::getDirect (LTy, 0 , nullptr , false );
148+ // Force copying aggregate type in kernel arguments by value when
149+ // compiling CUDA targeting SPIR-V. This is required for the object
150+ // copied to be valid on the device.
151+ // This behavior follows the CUDA spec
152+ // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-function-argument-processing,
153+ // and matches the NVPTX implementation.
88154 return getNaturalAlignIndirect (Ty, /* byval */ true );
155+ }
89156 }
90157 return classifyArgumentType (Ty);
91158}
92159
160+ ABIArgInfo SPIRVABIInfo::classifyArgumentType (QualType Ty) const {
161+ if (getTarget ().getTriple ().getVendor () != llvm::Triple::AMD)
162+ return DefaultABIInfo::classifyArgumentType (Ty);
163+ if (!isAggregateTypeForABI (Ty))
164+ return DefaultABIInfo::classifyArgumentType (Ty);
165+
166+ // Records with non-trivial destructors/copy-constructors should not be
167+ // passed by value.
168+ if (auto RAA = getRecordArgABI (Ty, getCXXABI ()))
169+ return getNaturalAlignIndirect (Ty, RAA == CGCXXABI::RAA_DirectInMemory);
170+
171+ if (const RecordType *RT = Ty->getAs <RecordType>()) {
172+ const RecordDecl *RD = RT->getDecl ();
173+ if (RD->hasFlexibleArrayMember ())
174+ return DefaultABIInfo::classifyArgumentType (Ty);
175+ }
176+
177+ return ABIArgInfo::getDirect (CGT.ConvertType (Ty), 0u , nullptr , false );
178+ }
179+
93180void SPIRVABIInfo::computeInfo (CGFunctionInfo &FI) const {
94181 // The logic is same as in DefaultABIInfo with an exception on the kernel
95182 // arguments handling.
@@ -132,6 +219,35 @@ void SPIRVTargetCodeGenInfo::setCUDAKernelCallingConvention(
132219 }
133220}
134221
222+ LangAS
223+ SPIRVTargetCodeGenInfo::getGlobalVarAddressSpace (CodeGenModule &CGM,
224+ const VarDecl *D) const {
225+ assert (!CGM.getLangOpts ().OpenCL &&
226+ !(CGM.getLangOpts ().CUDA && CGM.getLangOpts ().CUDAIsDevice ) &&
227+ " Address space agnostic languages only" );
228+ // If we're here it means that we're using the SPIRDefIsGen ASMap, hence for
229+ // the global AS we can rely on either cuda_device or sycl_global to be
230+ // correct; however, since this is not a CUDA Device context, we use
231+ // sycl_global to prevent confusion with the assertion.
232+ LangAS DefaultGlobalAS = getLangASFromTargetAS (
233+ CGM.getContext ().getTargetAddressSpace (LangAS::sycl_global));
234+ if (!D)
235+ return DefaultGlobalAS;
236+
237+ LangAS AddrSpace = D->getType ().getAddressSpace ();
238+ if (AddrSpace != LangAS::Default)
239+ return AddrSpace;
240+
241+ return DefaultGlobalAS;
242+ }
243+
244+ llvm::SyncScope::ID
245+ SPIRVTargetCodeGenInfo::getLLVMSyncScopeID (const LangOptions &, SyncScope Scope,
246+ llvm::AtomicOrdering,
247+ llvm::LLVMContext &Ctx) const {
248+ return Ctx.getOrInsertSyncScopeID (mapClangSyncScopeToLLVM (Scope));
249+ }
250+
135251// / Construct a SPIR-V target extension type for the given OpenCL image type.
136252static llvm::Type *getSPIRVImageType (llvm::LLVMContext &Ctx, StringRef BaseType,
137253 StringRef OpenCLName,
0 commit comments