ROCm
diff --git a/‎clang/lib/Basic/Targets/SPIR.h‎
Lines changed: 29 additions & 12 deletions b/‎clang/lib/Basic/Targets/SPIR.h‎
Lines changed: 29 additions & 12 deletions
diff --git a/‎clang/lib/CodeGen/CGAtomic.cpp‎
Lines changed: 12 additions & 1 deletion b/‎clang/lib/CodeGen/CGAtomic.cpp‎
Lines changed: 12 additions & 1 deletion
diff --git a/‎clang/lib/CodeGen/CGDeclCXX.cpp‎
Lines changed: 8 additions & 2 deletions b/‎clang/lib/CodeGen/CGDeclCXX.cpp‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎clang/lib/CodeGen/CodeGenModule.cpp‎
Lines changed: 8 additions & 5 deletions b/‎clang/lib/CodeGen/CodeGenModule.cpp‎
Lines changed: 8 additions & 5 deletions
diff --git a/‎clang/lib/CodeGen/Targets/SPIR.cpp‎
Lines changed: 123 additions & 7 deletions b/‎clang/lib/CodeGen/Targets/SPIR.cpp‎
Lines changed: 123 additions & 7 deletions
diff --git a/‎clang/lib/Driver/Driver.cpp‎
Lines changed: 9 additions & 11 deletions b/‎clang/lib/Driver/Driver.cpp‎
Lines changed: 9 additions & 11 deletions
diff --git a/‎clang/lib/Driver/ToolChain.cpp‎
Lines changed: 6 additions & 0 deletions b/‎clang/lib/Driver/ToolChain.cpp‎
Lines changed: 6 additions & 0 deletions
@@ -258,8 +258,11 @@ class LLVM_LIBRARY_VISIBILITY SPIR32TargetInfo : public SPIRTargetInfo {
     PointerWidth = PointerAlign = 32;
     SizeType = TargetInfo::UnsignedInt;
     PtrDiffType = IntPtrType = TargetInfo::SignedInt;
-    resetDataLayout("e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-"
-                    "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1");
+    // SPIR-V has core support for atomic ops, and Int32 is always available;
+    // we take the maximum because it's possible the Host supports wider types.
+    MaxAtomicInlineWidth = std::max<unsigned char>(MaxAtomicInlineWidth, 32);
+    resetDataLayout("e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-"
+                    "v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1");
   }
 
   void getTargetDefines(const LangOptions &Opts,
@@ -275,8 +278,11 @@ class LLVM_LIBRARY_VISIBILITY SPIR64TargetInfo : public SPIRTargetInfo {
     PointerWidth = PointerAlign = 64;
     SizeType = TargetInfo::UnsignedLong;
     PtrDiffType = IntPtrType = TargetInfo::SignedLong;
-    resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-"
-                    "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1");
+    // SPIR-V has core support for atomic ops, and Int64 is always available;
+    // we take the maximum because it's possible the Host supports wider types.
+    MaxAtomicInlineWidth = std::max<unsigned char>(MaxAtomicInlineWidth, 64);
+    resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-"
+                    "v256:256-v512:512-v1024:1024-n8:16:32:64-G1");
   }
 
   void getTargetDefines(const LangOptions &Opts,
@@ -314,8 +320,8 @@ class LLVM_LIBRARY_VISIBILITY SPIRVTargetInfo : public BaseSPIRVTargetInfo {
 
     // SPIR-V IDs are represented with a single 32-bit word.
     SizeType = TargetInfo::UnsignedInt;
-    resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-"
-                    "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1");
+    resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-"
+                    "v256:256-v512:512-v1024:1024-n8:16:32:64-G1");
   }
 
   void getTargetDefines(const LangOptions &Opts,
@@ -335,8 +341,11 @@ class LLVM_LIBRARY_VISIBILITY SPIRV32TargetInfo : public BaseSPIRVTargetInfo {
     PointerWidth = PointerAlign = 32;
     SizeType = TargetInfo::UnsignedInt;
     PtrDiffType = IntPtrType = TargetInfo::SignedInt;
-    resetDataLayout("e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-"
-                    "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1");
+    // SPIR-V has core support for atomic ops, and Int32 is always available;
+    // we take the maximum because it's possible the Host supports wider types.
+    MaxAtomicInlineWidth = std::max<unsigned char>(MaxAtomicInlineWidth, 32);
+    resetDataLayout("e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-"
+                    "v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1");
   }
 
   void getTargetDefines(const LangOptions &Opts,
@@ -356,8 +365,11 @@ class LLVM_LIBRARY_VISIBILITY SPIRV64TargetInfo : public BaseSPIRVTargetInfo {
     PointerWidth = PointerAlign = 64;
     SizeType = TargetInfo::UnsignedLong;
     PtrDiffType = IntPtrType = TargetInfo::SignedLong;
-    resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-"
-                    "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1");
+    // SPIR-V has core support for atomic ops, and Int64 is always available;
+    // we take the maximum because it's possible the Host supports wider types.
+    MaxAtomicInlineWidth = std::max<unsigned char>(MaxAtomicInlineWidth, 64);
+    resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-"
+                    "v256:256-v512:512-v1024:1024-n8:16:32:64-G1");
   }
 
   void getTargetDefines(const LangOptions &Opts,
@@ -380,9 +392,10 @@ class LLVM_LIBRARY_VISIBILITY SPIRV64AMDGCNTargetInfo final
     PointerWidth = PointerAlign = 64;
     SizeType = TargetInfo::UnsignedLong;
     PtrDiffType = IntPtrType = TargetInfo::SignedLong;
+    AddrSpaceMap = &SPIRDefIsGenMap;
 
-    resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-"
-                    "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1-P4-A0");
+    resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-"
+                    "v256:256-v512:512-v1024:1024-n32:64-S32-G1-P4-A0");
 
     BFloat16Width = BFloat16Align = 16;
     BFloat16Format = &llvm::APFloat::BFloat();
@@ -412,6 +425,10 @@ class LLVM_LIBRARY_VISIBILITY SPIRV64AMDGCNTargetInfo final
 
   void setAuxTarget(const TargetInfo *Aux) override;
 
+  void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override {
+    TargetInfo::adjust(Diags, Opts);
+  }
+
   bool hasInt128Type() const override { return TargetInfo::hasInt128Type(); }
 };
 
 
@@ -766,8 +766,19 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest,
   // LLVM atomic instructions always have synch scope. If clang atomic
   // expression has no scope operand, use default LLVM synch scope.
   if (!ScopeModel) {
+    llvm::SyncScope::ID SS;
+    if (CGF.getLangOpts().OpenCL)
+      // OpenCL approach is: "The functions that do not have memory_scope
+      // argument have the same semantics as the corresponding functions with
+      // the memory_scope argument set to memory_scope_device." See ref.:
+      // https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_C.html#atomic-functions
+      SS = CGF.getTargetHooks().getLLVMSyncScopeID(CGF.getLangOpts(),
+                                                   SyncScope::OpenCLDevice,
+                                                   Order, CGF.getLLVMContext());
+    else
+      SS = llvm::SyncScope::System;
     EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size,
-                 Order, CGF.CGM.getLLVMContext().getOrInsertSyncScopeID(""));
+                 Order, SS);
     return;
   }
 
 
@@ -796,7 +796,10 @@ void CodeGenModule::EmitCXXModuleInitFunc(Module *Primary) {
   assert(!getLangOpts().CUDA || !getLangOpts().CUDAIsDevice ||
          getLangOpts().GPUAllowDeviceInit);
   if (getLangOpts().HIP && getLangOpts().CUDAIsDevice) {
-    Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
+    if (getTriple().isSPIRV())
+      Fn->setCallingConv(llvm::CallingConv::SPIR_KERNEL);
+    else
+      Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
     Fn->addFnAttr("device-init");
   }
 
@@ -954,7 +957,10 @@ CodeGenModule::EmitCXXGlobalInitFunc() {
   assert(!getLangOpts().CUDA || !getLangOpts().CUDAIsDevice ||
          getLangOpts().GPUAllowDeviceInit);
   if (getLangOpts().HIP && getLangOpts().CUDAIsDevice) {
-    Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
+    if (getTriple().isSPIRV())
+      Fn->setCallingConv(llvm::CallingConv::SPIR_KERNEL);
+    else
+      Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
     Fn->addFnAttr("device-init");
   }
 
 
@@ -2978,13 +2978,15 @@ static void emitUsed(CodeGenModule &CGM, StringRef Name,
   for (unsigned i = 0, e = List.size(); i != e; ++i) {
     UsedArray[i] = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
         cast<llvm::Constant>(&*List[i]),
-        llvm::PointerType::getUnqual(CGM.getLLVMContext()));
+        CGM.getTarget().getTriple().isAMDGCN() ?
+          llvm::PointerType::getUnqual(CGM.getLLVMContext()) :
+          CGM.Int8PtrTy);
   }
 
   if (UsedArray.empty())
     return;
-  llvm::ArrayType *ATy = llvm::ArrayType::get(
-      llvm::PointerType::getUnqual(CGM.getLLVMContext()), UsedArray.size());
+  llvm::ArrayType *ATy =
+      llvm::ArrayType::get(UsedArray[0]->getType(), UsedArray.size());
 
   auto *GV = new llvm::GlobalVariable(
       CGM.getModule(), ATy, false, llvm::GlobalValue::AppendingLinkage,
@@ -5575,8 +5577,9 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
     emitter->finalize(GV);
 
   // If it is safe to mark the global 'constant', do so now.
-  GV->setConstant(!NeedsGlobalCtor && !NeedsGlobalDtor &&
-                  D->getType().isConstantStorage(getContext(), true, true));
+  GV->setConstant((D->hasAttr<CUDAConstantAttr>() && LangOpts.CUDAIsDevice) ||
+                  (!NeedsGlobalCtor && !NeedsGlobalDtor &&
+                   D->getType().isConstantStorage(getContext(), true, true)));
 
   // If it is in a read-only section, mark it 'constant'.
   if (const SectionAttr *SA = D->getAttr<SectionAttr>()) {
 
@@ -32,7 +32,9 @@ class SPIRVABIInfo : public CommonSPIRABIInfo {
   void computeInfo(CGFunctionInfo &FI) const override;
 
 private:
+  ABIArgInfo classifyReturnType(QualType RetTy) const;
   ABIArgInfo classifyKernelArgumentType(QualType Ty) const;
+  ABIArgInfo classifyArgumentType(QualType Ty) const;
 };
 } // end anonymous namespace
 namespace {
@@ -56,14 +58,66 @@ class SPIRVTargetCodeGenInfo : public CommonSPIRTargetCodeGenInfo {
   SPIRVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
       : CommonSPIRTargetCodeGenInfo(std::make_unique<SPIRVABIInfo>(CGT)) {}
   void setCUDAKernelCallingConvention(const FunctionType *&FT) const override;
+  LangAS getGlobalVarAddressSpace(CodeGenModule &CGM,
+                                  const VarDecl *D) const override;
+  llvm::SyncScope::ID getLLVMSyncScopeID(const LangOptions &LangOpts,
+                                         SyncScope Scope,
+                                         llvm::AtomicOrdering Ordering,
+                                         llvm::LLVMContext &Ctx) const override;
 };
+
+inline StringRef mapClangSyncScopeToLLVM(SyncScope Scope) {
+  switch (Scope) {
+  case SyncScope::HIPSingleThread:
+  case SyncScope::SingleScope:
+    return "singlethread";
+  case SyncScope::HIPWavefront:
+  case SyncScope::OpenCLSubGroup:
+  case SyncScope::WavefrontScope:
+    return "subgroup";
+  case SyncScope::HIPWorkgroup:
+  case SyncScope::OpenCLWorkGroup:
+  case SyncScope::WorkgroupScope:
+    return "workgroup";
+  case SyncScope::HIPAgent:
+  case SyncScope::OpenCLDevice:
+  case SyncScope::DeviceScope:
+    return "device";
+  case SyncScope::SystemScope:
+  case SyncScope::HIPSystem:
+  case SyncScope::OpenCLAllSVMDevices:
+    return "";
+  }
+  return "";
+}
 } // End anonymous namespace.
 
 void CommonSPIRABIInfo::setCCs() {
   assert(getRuntimeCC() == llvm::CallingConv::C);
   RuntimeCC = llvm::CallingConv::SPIR_FUNC;
 }
 
+ABIArgInfo SPIRVABIInfo::classifyReturnType(QualType RetTy) const {
+  if (getTarget().getTriple().getVendor() != llvm::Triple::AMD)
+    return DefaultABIInfo::classifyReturnType(RetTy);
+  if (!isAggregateTypeForABI(RetTy) || getRecordArgABI(RetTy, getCXXABI()))
+    return DefaultABIInfo::classifyReturnType(RetTy);
+
+  if (const RecordType *RT = RetTy->getAs<RecordType>()) {
+    const RecordDecl *RD = RT->getDecl();
+    if (RD->hasFlexibleArrayMember())
+      return DefaultABIInfo::classifyReturnType(RetTy);
+  }
+
+  // TODO: The AMDGPU ABI is non-trivial to represent in SPIR-V; in order to
+  // avoid encoding various architecture specific bits here we return everything
+  // as direct to retain type info for things like aggregates, for later perusal
+  // when translating back to LLVM/lowering in the BE. This is also why we
+  // disable flattening as the outcomes can mismatch between SPIR-V and AMDGPU.
+  // This will be revisited / optimised in the future.
+  return ABIArgInfo::getDirect(CGT.ConvertType(RetTy), 0u, nullptr, false);
+}
+
 ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
   if (getContext().getLangOpts().CUDAIsDevice) {
     // Coerce pointer arguments with default address space to CrossWorkGroup
@@ -78,18 +132,51 @@ ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
       return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
     }
 
-    // Force copying aggregate type in kernel arguments by value when
-    // compiling CUDA targeting SPIR-V. This is required for the object
-    // copied to be valid on the device.
-    // This behavior follows the CUDA spec
-    // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-function-argument-processing,
-    // and matches the NVPTX implementation.
-    if (isAggregateTypeForABI(Ty))
+    if (isAggregateTypeForABI(Ty)) {
+      if (getTarget().getTriple().getVendor() == llvm::Triple::AMD)
+        // TODO: The AMDGPU kernel ABI passes aggregates byref, which is not
+        // currently expressible in SPIR-V; SPIR-V passes aggregates byval,
+        // which the AMDGPU kernel ABI does not allow. Passing aggregates as
+        // direct works around this impedance mismatch, as it retains type info
+        // and can be correctly handled, post reverse-translation, by the AMDGPU
+        // BE, which has to support this CC for legacy OpenCL purposes. It can
+        // be brittle and does lead to performance degradation in certain
+        // pathological cases. This will be revisited / optimised in the future,
+        // once a way to deal with the byref/byval impedance mismatch is
+        // identified.
+        return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
+      // Force copying aggregate type in kernel arguments by value when
+      // compiling CUDA targeting SPIR-V. This is required for the object
+      // copied to be valid on the device.
+      // This behavior follows the CUDA spec
+      // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-function-argument-processing,
+      // and matches the NVPTX implementation.
       return getNaturalAlignIndirect(Ty, /* byval */ true);
+    }
   }
   return classifyArgumentType(Ty);
 }
 
+ABIArgInfo SPIRVABIInfo::classifyArgumentType(QualType Ty) const {
+  if (getTarget().getTriple().getVendor() != llvm::Triple::AMD)
+    return DefaultABIInfo::classifyArgumentType(Ty);
+  if (!isAggregateTypeForABI(Ty))
+    return DefaultABIInfo::classifyArgumentType(Ty);
+
+  // Records with non-trivial destructors/copy-constructors should not be
+  // passed by value.
+  if (auto RAA = getRecordArgABI(Ty, getCXXABI()))
+    return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
+
+  if (const RecordType *RT = Ty->getAs<RecordType>()) {
+    const RecordDecl *RD = RT->getDecl();
+    if (RD->hasFlexibleArrayMember())
+      return DefaultABIInfo::classifyArgumentType(Ty);
+  }
+
+  return ABIArgInfo::getDirect(CGT.ConvertType(Ty), 0u, nullptr, false);
+}
+
 void SPIRVABIInfo::computeInfo(CGFunctionInfo &FI) const {
   // The logic is same as in DefaultABIInfo with an exception on the kernel
   // arguments handling.
@@ -132,6 +219,35 @@ void SPIRVTargetCodeGenInfo::setCUDAKernelCallingConvention(
   }
 }
 
+LangAS
+SPIRVTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
+                                                 const VarDecl *D) const {
+  assert(!CGM.getLangOpts().OpenCL &&
+         !(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) &&
+         "Address space agnostic languages only");
+  // If we're here it means that we're using the SPIRDefIsGen ASMap, hence for
+  // the global AS we can rely on either cuda_device or sycl_global to be
+  // correct; however, since this is not a CUDA Device context, we use
+  // sycl_global to prevent confusion with the assertion.
+  LangAS DefaultGlobalAS = getLangASFromTargetAS(
+      CGM.getContext().getTargetAddressSpace(LangAS::sycl_global));
+  if (!D)
+    return DefaultGlobalAS;
+
+  LangAS AddrSpace = D->getType().getAddressSpace();
+  if (AddrSpace != LangAS::Default)
+    return AddrSpace;
+
+  return DefaultGlobalAS;
+}
+
+llvm::SyncScope::ID
+SPIRVTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &, SyncScope Scope,
+                                           llvm::AtomicOrdering,
+                                           llvm::LLVMContext &Ctx) const {
+  return Ctx.getOrInsertSyncScopeID(mapClangSyncScopeToLLVM(Scope));
+}
+
 /// Construct a SPIR-V target extension type for the given OpenCL image type.
 static llvm::Type *getSPIRVImageType(llvm::LLVMContext &Ctx, StringRef BaseType,
                                      StringRef OpenCLName,
 
@@ -150,13 +150,9 @@ static std::optional<llvm::Triple>
 getHIPOffloadTargetTriple(const Driver &D, const ArgList &Args) {
   if (!Args.hasArg(options::OPT_offload_EQ)) {
     auto OffloadArchs = Args.getAllArgValues(options::OPT_offload_arch_EQ);
-    if (llvm::find(OffloadArchs, "amdgcnspirv") != OffloadArchs.cend()) {
-      if (OffloadArchs.size() == 1)
-        return llvm::Triple("spirv64-amd-amdhsa");
-      // Mixing specific & SPIR-V compilation is not supported for now.
-      D.Diag(diag::err_drv_only_one_offload_target_supported);
-      return std::nullopt;
-    }
+    if (llvm::is_contained(OffloadArchs, "amdgcnspirv") &&
+        OffloadArchs.size() == 1)
+      return llvm::Triple("spirv64-amd-amdhsa");
     return llvm::Triple("amdgcn-amd-amdhsa"); // Default HIP triple.
   }
   auto TT = getOffloadTargetTriple(D, Args);
@@ -3468,9 +3464,11 @@ class OffloadingActionBuilder final {
       llvm::StringMap<bool> Features;
       // getHIPOffloadTargetTriple() is known to return valid value as it has
       // been called successfully in the CreateOffloadingDeviceToolChains().
-      auto ArchStr = parseTargetID(
-          *getHIPOffloadTargetTriple(C.getDriver(), C.getInputArgs()), IdStr,
-          &Features);
+      auto T =
+          (IdStr == "amdgcnspirv")
+              ? llvm::Triple("spirv64-amd-amdhsa")
+              : *getHIPOffloadTargetTriple(C.getDriver(), C.getInputArgs());
+      auto ArchStr = parseTargetID(T, IdStr, &Features);
       if (!ArchStr) {
         C.getDriver().Diag(clang::diag::err_drv_bad_target_id) << IdStr;
         C.setContainsError();
@@ -5992,7 +5990,7 @@ InputInfoList Driver::BuildJobsForActionNoCache(
     // We only have to generate a prefix for the host if this is not a top-level
     // action.
     std::string OffloadingPrefix = Action::GetOffloadingFileNamePrefix(
-        A->getOffloadingDeviceKind(), TC->getTriple().normalize(),
+        A->getOffloadingDeviceKind(), EffectiveTriple.normalize(),
         /*CreatePrefixForHost=*/isa<OffloadPackagerJobAction>(A) ||
             !(A->getOffloadingHostActiveKinds() == Action::OFK_None ||
               AtTopLevel));
 
@@ -1054,6 +1054,12 @@ std::string ToolChain::ComputeLLVMTriple(const ArgList &Args,
   }
   case llvm::Triple::aarch64_32:
     return getTripleString();
+  case llvm::Triple::amdgcn: {
+    llvm::Triple Triple = getTriple();
+    if (Args.getLastArgValue(options::OPT_mcpu_EQ) == "amdgcnspirv")
+      Triple.setArch(llvm::Triple::ArchType::spirv64);
+    return Triple.getTriple();
+  }
   case llvm::Triple::arm:
   case llvm::Triple::armeb:
   case llvm::Triple::thumb: