From 2f71c265d994dabbd391e69cfcd43b4a1eb1d9ac Mon Sep 17 00:00:00 2001 From: srividya sundaram Date: Thu, 20 Feb 2025 15:35:23 -0800 Subject: [PATCH] [Driver][SYCL] Enable --offload-arch support for SYCL offloading. --- clang/include/clang/Basic/Cuda.h | 2 + .../clang/Basic/DiagnosticDriverKinds.td | 10 + clang/include/clang/Basic/DiagnosticGroups.td | 4 + clang/include/clang/Driver/Driver.h | 17 ++ clang/lib/Basic/Cuda.cpp | 2 + clang/lib/Driver/Driver.cpp | 131 ++++++++++- clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp | 5 +- clang/lib/Driver/ToolChains/Clang.cpp | 45 +++- clang/lib/Driver/ToolChains/Cuda.cpp | 4 +- clang/lib/Driver/ToolChains/SYCL.cpp | 216 ++++++++++++++++++ clang/lib/Driver/ToolChains/SYCL.h | 110 +++++++++ .../test/Driver/sycl-offload-arch-amd-gpu.cpp | 125 ++++++++++ .../Driver/sycl-offload-arch-intel-cpu.cpp | 78 +++++++ .../Driver/sycl-offload-arch-intel-gpu.cpp | 123 ++++++++++ .../Driver/sycl-offload-arch-nvidia-gpu.cpp | 51 +++++ llvm/include/llvm/TargetParser/Triple.h | 8 + llvm/lib/TargetParser/Triple.cpp | 11 + 17 files changed, 928 insertions(+), 14 deletions(-) create mode 100644 clang/test/Driver/sycl-offload-arch-amd-gpu.cpp create mode 100644 clang/test/Driver/sycl-offload-arch-intel-cpu.cpp create mode 100644 clang/test/Driver/sycl-offload-arch-intel-gpu.cpp create mode 100644 clang/test/Driver/sycl-offload-arch-nvidia-gpu.cpp diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h index 793cab1f4e84a..f59da935e551e 100644 --- a/clang/include/clang/Basic/Cuda.h +++ b/clang/include/clang/Basic/Cuda.h @@ -106,6 +106,8 @@ enum class OffloadArch { GFX90a, GFX90c, GFX9_4_GENERIC, + GFX940, + GFX941, GFX942, GFX950, GFX10_1_GENERIC, diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 2e571ee33cafa..98ae70f2a1f71 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -843,4 +843,14 @@ def warn_missing_include_dirs : Warning< def err_drv_malformed_warning_suppression_mapping : Error< "failed to process suppression mapping file '%0': %1">; + +def err_drv_sycl_offload_arch_missing_value : Error< + "must pass in an explicit cpu or gpu architecture to '--offload-arch'">; + +def err_drv_invalid_sycl_target : Error<"SYCL target is invalid: '%0'">; + +def warn_drv_sycl_offload_target_duplicate : Warning< + "SYCL offloading target '%0' is similar to target '%1' already specified; " + "will be ignored">, InGroup; + } diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 05e39899e6f25..0ca1bff2fda43 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -1628,3 +1628,7 @@ def ExplicitSpecializationStorageClass : DiagGroup<"explicit-specialization-stor // A warning for options that enable a feature that is not yet complete def ExperimentalOption : DiagGroup<"experimental-option">; + +// SYCL Warnings +def SyclTarget : DiagGroup<"sycl-target">; + diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index b463dc2a93550..5d3d5ad923a5c 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -846,6 +846,23 @@ class Driver { /// Compute the default -fmodule-cache-path. /// \return True if the system provides a default cache directory. static bool getDefaultModuleCachePath(SmallVectorImpl &Result); + + /// Vector of Macros that need to be added to the Host compilation in a + /// SYCL based offloading scenario. These macros are gathered during + /// construction of the device compilations. + mutable std::vector SYCLTargetMacroArgs; + + /// addSYCLTargetMacroArg - Add the given macro to the vector of args to be + /// added to the host compilation step. + void addSYCLTargetMacroArg(const llvm::opt::ArgList &Args, + StringRef Macro) const { + SYCLTargetMacroArgs.push_back(Args.MakeArgString(Macro)); + } + + /// getSYCLTargetMacroArgs - return the previously gathered macro target args. + llvm::ArrayRef getSYCLTargetMacroArgs() const { + return SYCLTargetMacroArgs; + } }; /// \return True if the last defined optimization level is -Ofast. diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp index f45fb0eca3714..2310ccadb2996 100644 --- a/clang/lib/Basic/Cuda.cpp +++ b/clang/lib/Basic/Cuda.cpp @@ -124,6 +124,8 @@ static const OffloadArchToStringMap arch_names[] = { GFX(90a), // gfx90a GFX(90c), // gfx90c {OffloadArch::GFX9_4_GENERIC, "gfx9-4-generic", "compute_amdgcn"}, + GFX(940), // gfx940 + GFX(941), // gfx941 GFX(942), // gfx942 GFX(950), // gfx950 {OffloadArch::GFX10_1_GENERIC, "gfx10-1-generic", "compute_amdgcn"}, diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 50941d2aaa429..546adc179319b 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -833,10 +833,14 @@ Driver::OpenMPRuntimeKind Driver::getOpenMPRuntime(const ArgList &Args) const { static llvm::Triple getSYCLDeviceTriple(StringRef TargetArch) { SmallVector SYCLAlias = {"spir", "spir64", "spirv", "spirv32", - "spirv64"}; + "spirv64", "spir64_x86_64", + "spir64_gen", "nvptx64"}; if (llvm::is_contained(SYCLAlias, TargetArch)) { llvm::Triple TargetTriple; TargetTriple.setArchName(TargetArch); + // Return the full SYCL target triple string for NVidia GPU targets. + if (TargetTriple.getArch() == llvm::Triple::nvptx64) + return llvm::Triple("nvptx64-nvidia-cuda"); TargetTriple.setVendor(llvm::Triple::UnknownVendor); TargetTriple.setOS(llvm::Triple::UnknownOS); return TargetTriple; @@ -846,16 +850,25 @@ static llvm::Triple getSYCLDeviceTriple(StringRef TargetArch) { static bool addSYCLDefaultTriple(Compilation &C, SmallVectorImpl &SYCLTriples) { + + llvm::Triple DefaultTriple = getSYCLDeviceTriple( + C.getDefaultToolChain().getTriple().isArch32Bit() ? "spirv32" + : "spirv64"); + for (const auto &SYCLTriple : SYCLTriples) { + if (SYCLTriple == DefaultTriple) + return false; + // If we encounter a known non-spir* target, do not add the default triple. + if (SYCLTriple.isNVPTX() || SYCLTriple.isAMDGCN()) + return false; + if(SYCLTriple.isSPIRAOT()) + return false; + } // Check current set of triples to see if the default has already been set. for (const auto &SYCLTriple : SYCLTriples) { if (SYCLTriple.getSubArch() == llvm::Triple::NoSubArch && SYCLTriple.isSPIROrSPIRV()) return false; } - // Add the default triple as it was not found. - llvm::Triple DefaultTriple = getSYCLDeviceTriple( - C.getDefaultToolChain().getTriple().isArch32Bit() ? "spirv32" - : "spirv64"); SYCLTriples.insert(SYCLTriples.begin(), DefaultTriple); return true; } @@ -1066,19 +1079,119 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, // -ffreestanding cannot be used with -fsycl argSYCLIncompatible(options::OPT_ffreestanding); + // Map of SYCL target triple strings to their corresponding target archs. + // Example: spir64_x86_64 --> SKYLAKEAVX512 + llvm::StringMap> DerivedArchs; + llvm::StringMap FoundNormalizedTriples; llvm::SmallVector UniqueSYCLTriplesVec; - + // StringSet to contain SYCL target triples. + llvm::StringSet<> SYCLTriples; + // If the user specified --offload-arch, deduce the offloading + // target triple(s) from the set of architecture(s). + // Create a toolchain for each valid triple. + // We do not support SYCL offloading if any of the inputs is a + // .cu (for CUDA type) or .hip (for HIP type) file. if (IsSYCL) { - addSYCLDefaultTriple(C, UniqueSYCLTriplesVec); + if(C.getInputArgs().hasArg(options::OPT_offload_arch_EQ) && !IsHIP && + !IsCuda) { + + const ToolChain *HostTC = C.getSingleOffloadToolChain(); + auto AMDTriple = getHIPOffloadTargetTriple(*this, C.getInputArgs()); + auto NVPTXTriple = getNVIDIAOffloadTargetTriple(*this, C.getInputArgs(), + HostTC->getTriple()); + + // Attempt to deduce the offloading triple from the set of architectures. + // We need to temporarily create these toolchains so that we can access + // tools for inferring architectures. + llvm::DenseSet Archs; + if (NVPTXTriple) { + auto TempTC = std::make_unique( + *this, *NVPTXTriple, *HostTC, C.getInputArgs()); + for (StringRef Arch : + getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &*TempTC, true)) + Archs.insert(Arch); + } + if (AMDTriple) { + auto TempTC = std::make_unique( + *this, *AMDTriple, *HostTC, C.getInputArgs()); + for (StringRef Arch : + getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &*TempTC, true)) + Archs.insert(Arch); + } + if (!AMDTriple && !NVPTXTriple) { + for (StringRef Arch : + getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, nullptr, true)) + Archs.insert(Arch); + } + for (StringRef Arch : Archs) { + if (NVPTXTriple && IsSYCLSupportedNVidiaGPUArch(StringToOffloadArch( + getProcessorFromTargetID(*NVPTXTriple, Arch)))) { + DerivedArchs[NVPTXTriple->getTriple()].insert(Arch); + } else if (AMDTriple && + IsSYCLSupportedAMDGPUArch(StringToOffloadArch( + getProcessorFromTargetID(*AMDTriple, Arch)))) { + DerivedArchs[AMDTriple->getTriple()].insert(Arch); + } else if (IsSYCLSupportedIntelCPUArch(StringToOffloadArchSYCL(Arch))) { + DerivedArchs[getSYCLDeviceTriple("spir64_x86_64").getTriple()].insert( + Arch); + } else if (IsSYCLSupportedIntelGPUArch(StringToOffloadArchSYCL(Arch))) { + StringRef IntelGPUArch; + // For Intel Graphics AOT target, valid values for '--offload-arch' + // are mapped to valid device names accepted by OCLOC (the Intel GPU AOT + // compiler) via the '-device' option. The mapIntelGPUArchName + // function maps the accepted values for '--offload-arch' to enable SYCL + // offloading to Intel GPUs and the corresponding '-device' value passed + // to OCLOC. + IntelGPUArch = mapIntelGPUArchName(Arch).data(); + DerivedArchs[getSYCLDeviceTriple("spir64_gen").getTriple()].insert( + IntelGPUArch); + } else { + Diag(clang::diag::err_drv_invalid_sycl_target) << Arch; + return; + } + } + // Emit an error if architecture value is not provided + // to --offload-arch. + if (Archs.empty()) { + Diag(clang::diag::err_drv_sycl_offload_arch_missing_value); + return; + } + + for (const auto &TripleAndArchs : DerivedArchs) + SYCLTriples.insert(TripleAndArchs.first()); + + for (const auto &Val : SYCLTriples) { + llvm::Triple SYCLTargetTriple(getSYCLDeviceTriple(Val.getKey())); + std::string NormalizedName = SYCLTargetTriple.normalize(); + + // Make sure we don't have a duplicate triple. + auto Duplicate = FoundNormalizedTriples.find(NormalizedName); + if (Duplicate != FoundNormalizedTriples.end()) { + Diag(clang::diag::warn_drv_sycl_offload_target_duplicate) + << Val.getKey() << Duplicate->second; + continue; + } + + // Store the current triple so that we can check for duplicates in the + // following iterations. + FoundNormalizedTriples[NormalizedName] = Val.getKey(); + UniqueSYCLTriplesVec.push_back(SYCLTargetTriple); + } + + addSYCLDefaultTriple(C, UniqueSYCLTriplesVec); + } else + addSYCLDefaultTriple(C, UniqueSYCLTriplesVec); // We'll need to use the SYCL and host triples as the key into - // getOffloadingDeviceToolChain, because the device toolchains we're + // getOffloadToolChain, because the device toolchains we're // going to create will depend on both. const ToolChain *HostTC = C.getSingleOffloadToolChain(); for (const auto &TT : UniqueSYCLTriplesVec) { auto SYCLTC = &getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, TT, HostTC->getTriple()); C.addOffloadDeviceToolChain(SYCLTC, Action::OFK_SYCL); + if (DerivedArchs.contains(TT.getTriple())) + KnownArchs[SYCLTC] = DerivedArchs[TT.getTriple()]; } } @@ -6596,7 +6709,7 @@ const ToolChain &Driver::getOffloadToolChain( if (Kind == Action::OFK_HIP) TC = std::make_unique(*this, Target, *HostTC, Args); - else if (Kind == Action::OFK_OpenMP) + else if ((Kind == Action::OFK_OpenMP) || (Kind == Action::OFK_SYCL)) TC = std::make_unique(*this, Target, *HostTC, Args); break; diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp index fe8bfb361e5a6..a45a139f57e9c 100644 --- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp @@ -46,8 +46,9 @@ void AMDGPUOpenMPToolChain::addClangTargetOptions( Action::OffloadKind DeviceOffloadingKind) const { HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind); - assert(DeviceOffloadingKind == Action::OFK_OpenMP && - "Only OpenMP offloading kinds are supported."); + assert((DeviceOffloadingKind == Action::OFK_OpenMP || + DeviceOffloadingKind == Action::OFK_SYCL) && + "Only OpenMP or SYCL offloading kinds are supported."); if (!DriverArgs.hasFlag(options::OPT_offloadlib, options::OPT_no_offloadlib, true)) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 96af466e067a8..8165d5d0ae3f6 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5091,6 +5091,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, InputInfoList HostOffloadingInputs; const InputInfo *CudaDeviceInput = nullptr; const InputInfo *OpenMPDeviceInput = nullptr; + const InputInfo *SYCLDeviceInput = nullptr; for (const InputInfo &I : Inputs) { if (&I == &Input || I.getType() == types::TY_Nothing) { // This is the primary input or contains nothing. @@ -5108,13 +5109,15 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CudaDeviceInput = &I; } else if (IsOpenMPDevice && !OpenMPDeviceInput) { OpenMPDeviceInput = &I; + } else if (IsSYCL && !SYCLDeviceInput) { + SYCLDeviceInput = &I; } else { llvm_unreachable("unexpectedly given multiple inputs"); } } const llvm::Triple *AuxTriple = - (IsCuda || IsHIP) ? TC.getAuxTriple() : nullptr; + (IsSYCL || IsCuda || IsHIP) ? TC.getAuxTriple() : nullptr; bool IsWindowsMSVC = RawTriple.isWindowsMSVCEnvironment(); bool IsUEFI = RawTriple.isUEFI(); bool IsIAMCU = RawTriple.isOSIAMCU(); @@ -5208,6 +5211,14 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (IsSYCL) { if (IsSYCLDevice) { + if (Triple.isNVPTX()) { + StringRef GPUArchName = JA.getOffloadingArch(); + // TODO: Once default arch is moved to at least SM_53, empty arch should + // also result in the flag added. + if (!GPUArchName.empty() && + StringToOffloadArch(GPUArchName) >= OffloadArch::SM_53) + CmdArgs.push_back("-fnative-half-type"); + } // Host triple is needed when doing SYCL device compilations. llvm::Triple AuxT = C.getDefaultToolChain().getTriple(); std::string NormalizedTriple = AuxT.normalize(); @@ -5220,6 +5231,33 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, // Set O2 optimization level by default if (!Args.getLastArg(options::OPT_O_Group)) CmdArgs.push_back("-O2"); + // Add any predefined macros associated with intel_gpu* type targets + // passed in with -fsycl-targets + // TODO: Macros are populated during device compilations and saved for + // addition to the host compilation. There is no dependence connection + // between device and host where we should be able to use the offloading + // arch to add the macro to the host compile. + auto addTargetMacros = [&](const llvm::Triple &Triple) { + if (!Triple.isSPIR() && !Triple.isNVPTX() && !Triple.isAMDGCN()) + return; + SmallString<64> Macro; + if ((Triple.isSPIR() && + Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen) || + Triple.isNVPTX() || Triple.isAMDGCN()) { + StringRef Device = JA.getOffloadingArch(); + if (!Device.empty() && + !clang::driver::getGenDeviceMacro(Device).empty()) { + Macro = "-D"; + Macro += clang::driver::getGenDeviceMacro(Device); + } + } else if (Triple.getSubArch() == llvm::Triple::SPIRSubArch_x86_64) + Macro = "-D__SYCL_TARGET_INTEL_X86_64__"; + if (Macro.size()) { + CmdArgs.push_back(Args.MakeArgString(Macro)); + D.addSYCLTargetMacroArg(Args, Macro); + } + }; + addTargetMacros(RawTriple); } else { // Add any options that are needed specific to SYCL offload while // performing the host side compilation. @@ -5227,6 +5265,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, // Let the front-end host compilation flow know about SYCL offload // compilation. CmdArgs.push_back("-fsycl-is-host"); + + // Add the SYCL target macro arguments that were generated during the + // device compilation step. + for (auto &Macro : D.getSYCLTargetMacroArgs()) + CmdArgs.push_back(Args.MakeArgString(Macro)); } // Set options for both host and device. diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 06b0b0913d24e..df404b83beb69 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -848,8 +848,8 @@ void CudaToolChain::addClangTargetOptions( StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ); assert((DeviceOffloadingKind == Action::OFK_OpenMP || - DeviceOffloadingKind == Action::OFK_Cuda) && - "Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs."); + DeviceOffloadingKind == Action::OFK_Cuda || DeviceOffloadingKind == Action::OFK_SYCL) && + "Only OpenMP or CUDA or SYCL offloading kinds are supported for NVIDIA GPUs."); CC1Args.append({"-fcuda-is-device", "-mllvm", "-enable-memcpyopt-without-libcalls", diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index a2b07ef4824a1..778c936b44c6d 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -15,6 +15,222 @@ using namespace clang::driver::tools; using namespace clang; using namespace llvm::opt; +// Struct that relates an AOT target value with +// Intel CPUs and Intel GPUs. +struct StringToOffloadArchSYCLMap { + const char *ArchName; + SYCLSupportedIntelArchs IntelArch; +}; + +// Mapping of supported SYCL offloading architectures. +static const StringToOffloadArchSYCLMap StringToArchNamesMap[] = { + // Intel CPU mapping. + {"skylake-avx512", SYCLSupportedIntelArchs::SKYLAKEAVX512}, + {"core-avx2", SYCLSupportedIntelArchs::COREAVX2}, + {"corei7-avx", SYCLSupportedIntelArchs::COREI7AVX}, + {"corei7", SYCLSupportedIntelArchs::COREI7}, + {"westmere", SYCLSupportedIntelArchs::WESTMERE}, + {"sandybridge", SYCLSupportedIntelArchs::SANDYBRIDGE}, + {"ivybridge", SYCLSupportedIntelArchs::IVYBRIDGE}, + {"broadwell", SYCLSupportedIntelArchs::BROADWELL}, + {"coffeelake", SYCLSupportedIntelArchs::COFFEELAKE}, + {"alderlake", SYCLSupportedIntelArchs::ALDERLAKE}, + {"skylake", SYCLSupportedIntelArchs::SKYLAKE}, + {"skx", SYCLSupportedIntelArchs::SKX}, + {"cascadelake", SYCLSupportedIntelArchs::CASCADELAKE}, + {"icelake-client", SYCLSupportedIntelArchs::ICELAKECLIENT}, + {"icelake-server", SYCLSupportedIntelArchs::ICELAKESERVER}, + {"sapphirerapids", SYCLSupportedIntelArchs::SAPPHIRERAPIDS}, + {"graniterapids", SYCLSupportedIntelArchs::GRANITERAPIDS}, + // Intel GPU mapping. + {"bdw", SYCLSupportedIntelArchs::BDW}, + {"skl", SYCLSupportedIntelArchs::SKL}, + {"kbl", SYCLSupportedIntelArchs::KBL}, + {"cfl", SYCLSupportedIntelArchs::CFL}, + {"apl", SYCLSupportedIntelArchs::APL}, + {"bxt", SYCLSupportedIntelArchs::BXT}, + {"glk", SYCLSupportedIntelArchs::GLK}, + {"whl", SYCLSupportedIntelArchs::WHL}, + {"aml", SYCLSupportedIntelArchs::AML}, + {"cml", SYCLSupportedIntelArchs::CML}, + {"icllp", SYCLSupportedIntelArchs::ICLLP}, + {"icl", SYCLSupportedIntelArchs::ICL}, + {"ehl", SYCLSupportedIntelArchs::EHL}, + {"jsl", SYCLSupportedIntelArchs::JSL}, + {"tgllp", SYCLSupportedIntelArchs::TGLLP}, + {"tgl", SYCLSupportedIntelArchs::TGL}, + {"rkl", SYCLSupportedIntelArchs::RKL}, + {"adl_s", SYCLSupportedIntelArchs::ADL_S}, + {"rpl_s", SYCLSupportedIntelArchs::RPL_S}, + {"adl_p", SYCLSupportedIntelArchs::ADL_P}, + {"adl_n", SYCLSupportedIntelArchs::ADL_N}, + {"dg1", SYCLSupportedIntelArchs::DG1}, + {"acm_g10", SYCLSupportedIntelArchs::ACM_G10}, + {"dg2_g10", SYCLSupportedIntelArchs::DG2_G10}, + {"acm_g11", SYCLSupportedIntelArchs::ACM_G11}, + {"dg2_g10", SYCLSupportedIntelArchs::DG2_G10}, + {"dg2_g11", SYCLSupportedIntelArchs::DG2_G11}, + {"acm_g12", SYCLSupportedIntelArchs::ACM_G12}, + {"dg2_g12", SYCLSupportedIntelArchs::DG2_G12}, + {"pvc", SYCLSupportedIntelArchs::PVC}, + {"pvc_vg", SYCLSupportedIntelArchs::PVC_VG}, + {"mtl_u", SYCLSupportedIntelArchs::MTL_U}, + {"mtl_s", SYCLSupportedIntelArchs::MTL_S}, + {"arl_u", SYCLSupportedIntelArchs::ARL_U}, + {"arl_s", SYCLSupportedIntelArchs::ARL_S}, + {"mtl_h", SYCLSupportedIntelArchs::MTL_H}, + {"arl_h", SYCLSupportedIntelArchs::ARL_H}, + {"bmg_g21", SYCLSupportedIntelArchs::BMG_G21}, + {"lnl_m", SYCLSupportedIntelArchs::LNL_M}}; + +// Check if the user provided value for --offload-arch is a valid +// SYCL supported Intel AOT target. +SYCLSupportedIntelArchs +clang::driver::StringToOffloadArchSYCL(llvm::StringRef ArchNameAsString) { + auto result = std::find_if( + std::begin(StringToArchNamesMap), std::end(StringToArchNamesMap), + [ArchNameAsString](const StringToOffloadArchSYCLMap &map) { + return ArchNameAsString == map.ArchName; + }); + if (result == std::end(StringToArchNamesMap)) + return SYCLSupportedIntelArchs::UNKNOWN; + return result->IntelArch; +} + +// This is a mapping between the user provided --offload-arch value for Intel +// GPU targets and the spir64_gen device name accepted by OCLOC (the Intel GPU +// AOT compiler). +StringRef clang::driver::mapIntelGPUArchName(StringRef ArchName) { + StringRef Arch; + Arch = llvm::StringSwitch(ArchName) + .Case("bdw", "bdw") + .Case("skl", "skl") + .Case("kbl", "kbl") + .Case("cfl", "cfl") + .Cases("apl", "bxt", "apl") + .Case("glk", "glk") + .Case("whl", "whl") + .Case("aml", "aml") + .Case("cml", "cml") + .Cases("icllp", "icl", "icllp") + .Cases("ehl", "jsl", "ehl") + .Cases("tgllp", "tgl", "tgllp") + .Case("rkl", "rkl") + .Cases("adl_s", "rpl_s", "adl_s") + .Case("adl_p", "adl_p") + .Case("adl_n", "adl_n") + .Case("dg1", "dg1") + .Cases("acm_g10", "dg2_g10", "acm_g10") + .Cases("acm_g11", "dg2_g11", "acm_g11") + .Cases("acm_g12", "dg2_g12", "acm_g12") + .Case("pvc", "pvc") + .Case("pvc_vg", "pvc_vg") + .Cases("mtl_u", "mtl_s", "arl_u", "arl_s", "mtl_u") + .Case("mtl_h", "mtl_h") + .Case("arl_h", "arl_h") + .Case("bmg_g21", "bmg_g21") + .Case("lnl_m", "lnl_m") + .Default(""); + return Arch; +} + +SmallString<64> clang::driver::getGenDeviceMacro(StringRef DeviceName) { + SmallString<64> Macro; + StringRef Ext = llvm::StringSwitch(DeviceName) + .Case("bdw", "INTEL_GPU_BDW") + .Case("skl", "INTEL_GPU_SKL") + .Case("kbl", "INTEL_GPU_KBL") + .Case("cfl", "INTEL_GPU_CFL") + .Case("apl", "INTEL_GPU_APL") + .Case("glk", "INTEL_GPU_GLK") + .Case("whl", "INTEL_GPU_WHL") + .Case("aml", "INTEL_GPU_AML") + .Case("cml", "INTEL_GPU_CML") + .Case("icllp", "INTEL_GPU_ICLLP") + .Case("ehl", "INTEL_GPU_EHL") + .Case("tgllp", "INTEL_GPU_TGLLP") + .Case("rkl", "INTEL_GPU_RKL") + .Case("adl_s", "INTEL_GPU_ADL_S") + .Case("adl_p", "INTEL_GPU_ADL_P") + .Case("adl_n", "INTEL_GPU_ADL_N") + .Case("dg1", "INTEL_GPU_DG1") + .Case("acm_g10", "INTEL_GPU_ACM_G10") + .Case("acm_g11", "INTEL_GPU_ACM_G11") + .Case("acm_g12", "INTEL_GPU_ACM_G12") + .Case("pvc", "INTEL_GPU_PVC") + .Case("pvc_vg", "INTEL_GPU_PVC_VG") + .Case("mtl_u", "INTEL_GPU_MTL_U") + .Case("mtl_h", "INTEL_GPU_MTL_H") + .Case("arl_h", "INTEL_GPU_ARL_H") + .Case("bmg_g21", "INTEL_GPU_BMG_G21") + .Case("lnl_m", "INTEL_GPU_LNL_M") + .Case("ptl_h", "INTEL_GPU_PTL_H") + .Case("ptl_u", "INTEL_GPU_PTL_U") + .Case("sm_50", "NVIDIA_GPU_SM_50") + .Case("sm_52", "NVIDIA_GPU_SM_52") + .Case("sm_53", "NVIDIA_GPU_SM_53") + .Case("sm_60", "NVIDIA_GPU_SM_60") + .Case("sm_61", "NVIDIA_GPU_SM_61") + .Case("sm_62", "NVIDIA_GPU_SM_62") + .Case("sm_70", "NVIDIA_GPU_SM_70") + .Case("sm_72", "NVIDIA_GPU_SM_72") + .Case("sm_75", "NVIDIA_GPU_SM_75") + .Case("sm_80", "NVIDIA_GPU_SM_80") + .Case("sm_86", "NVIDIA_GPU_SM_86") + .Case("sm_87", "NVIDIA_GPU_SM_87") + .Case("sm_89", "NVIDIA_GPU_SM_89") + .Case("sm_90", "NVIDIA_GPU_SM_90") + .Case("sm_90a", "NVIDIA_GPU_SM_90A") + .Case("gfx700", "AMD_GPU_GFX700") + .Case("gfx701", "AMD_GPU_GFX701") + .Case("gfx702", "AMD_GPU_GFX702") + .Case("gfx703", "AMD_GPU_GFX703") + .Case("gfx704", "AMD_GPU_GFX704") + .Case("gfx705", "AMD_GPU_GFX705") + .Case("gfx801", "AMD_GPU_GFX801") + .Case("gfx802", "AMD_GPU_GFX802") + .Case("gfx803", "AMD_GPU_GFX803") + .Case("gfx805", "AMD_GPU_GFX805") + .Case("gfx810", "AMD_GPU_GFX810") + .Case("gfx900", "AMD_GPU_GFX900") + .Case("gfx902", "AMD_GPU_GFX902") + .Case("gfx904", "AMD_GPU_GFX904") + .Case("gfx906", "AMD_GPU_GFX906") + .Case("gfx908", "AMD_GPU_GFX908") + .Case("gfx909", "AMD_GPU_GFX909") + .Case("gfx90a", "AMD_GPU_GFX90A") + .Case("gfx90c", "AMD_GPU_GFX90C") + .Case("gfx940", "AMD_GPU_GFX940") + .Case("gfx941", "AMD_GPU_GFX941") + .Case("gfx942", "AMD_GPU_GFX942") + .Case("gfx1010", "AMD_GPU_GFX1010") + .Case("gfx1011", "AMD_GPU_GFX1011") + .Case("gfx1012", "AMD_GPU_GFX1012") + .Case("gfx1013", "AMD_GPU_GFX1013") + .Case("gfx1030", "AMD_GPU_GFX1030") + .Case("gfx1031", "AMD_GPU_GFX1031") + .Case("gfx1032", "AMD_GPU_GFX1032") + .Case("gfx1033", "AMD_GPU_GFX1033") + .Case("gfx1034", "AMD_GPU_GFX1034") + .Case("gfx1035", "AMD_GPU_GFX1035") + .Case("gfx1036", "AMD_GPU_GFX1036") + .Case("gfx1100", "AMD_GPU_GFX1100") + .Case("gfx1101", "AMD_GPU_GFX1101") + .Case("gfx1102", "AMD_GPU_GFX1102") + .Case("gfx1103", "AMD_GPU_GFX1103") + .Case("gfx1150", "AMD_GPU_GFX1150") + .Case("gfx1151", "AMD_GPU_GFX1151") + .Case("gfx1200", "AMD_GPU_GFX1200") + .Case("gfx1201", "AMD_GPU_GFX1201") + .Default(""); + if (!Ext.empty()) { + Macro = "__SYCL_TARGET_"; + Macro += Ext; + Macro += "__"; + } + return Macro; +} + SYCLInstallationDetector::SYCLInstallationDetector( const Driver &D, const llvm::Triple &HostTriple, const llvm::opt::ArgList &Args) {} diff --git a/clang/lib/Driver/ToolChains/SYCL.h b/clang/lib/Driver/ToolChains/SYCL.h index 2a8b4eca9e9f8..2529b00b48e6d 100644 --- a/clang/lib/Driver/ToolChains/SYCL.h +++ b/clang/lib/Driver/ToolChains/SYCL.h @@ -9,12 +9,122 @@ #ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_SYCL_H #define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_SYCL_H +#include "clang/Basic/Cuda.h" #include "clang/Driver/Tool.h" #include "clang/Driver/ToolChain.h" namespace clang { namespace driver { +// List of architectures (Intel CPUs and Intel GPUs) +// that support SYCL offloading. +enum class SYCLSupportedIntelArchs { + // Intel CPUs + UNKNOWN, + SKYLAKEAVX512, + COREAVX2, + COREI7AVX, + COREI7, + WESTMERE, + SANDYBRIDGE, + IVYBRIDGE, + BROADWELL, + COFFEELAKE, + ALDERLAKE, + SKYLAKE, + SKX, + CASCADELAKE, + ICELAKECLIENT, + ICELAKESERVER, + SAPPHIRERAPIDS, + GRANITERAPIDS, + // Intel GPUs + BDW, + SKL, + KBL, + CFL, + APL, + BXT, + GLK, + WHL, + AML, + CML, + ICLLP, + ICL, + EHL, + JSL, + TGLLP, + TGL, + RKL, + ADL_S, + RPL_S, + ADL_P, + ADL_N, + DG1, + ACM_G10, + DG2_G10, + ACM_G11, + DG2_G11, + ACM_G12, + DG2_G12, + PVC, + PVC_VG, + MTL_U, + MTL_S, + ARL_U, + ARL_S, + MTL_H, + ARL_H, + BMG_G21, + LNL_M, +}; + +// Check if the given Arch value is a Generic AMD GPU. +// Currently GFX*_GENERIC AMD GPUs do not support SYCL offloading. +// This list is used to filter out GFX*_GENERIC AMD GPUs in +// `IsSYCLSupportedAMDGPUArch`. +static inline bool IsAMDGenericGPUArch(OffloadArch Arch) { + return Arch == OffloadArch::GFX9_GENERIC || + Arch == OffloadArch::GFX10_1_GENERIC || + Arch == OffloadArch::GFX10_3_GENERIC || + Arch == OffloadArch::GFX11_GENERIC || + Arch == OffloadArch::GFX12_GENERIC; +} + +// Check if the given Arch value is a valid SYCL supported AMD GPU. +static inline bool IsSYCLSupportedAMDGPUArch(OffloadArch Arch) { + return Arch >= OffloadArch::GFX700 && Arch < OffloadArch::AMDGCNSPIRV && + !IsAMDGenericGPUArch(Arch); +} + +// Check if the given Arch value is a valid SYCL supported NVidia GPU. +static inline bool IsSYCLSupportedNVidiaGPUArch(OffloadArch Arch) { + return Arch >= OffloadArch::SM_50 && Arch <= OffloadArch::SM_90a; +} + +// Check if the given Arch value is a valid SYCL supported Intel CPU. +static inline bool IsSYCLSupportedIntelCPUArch(SYCLSupportedIntelArchs Arch) { + return Arch >= SYCLSupportedIntelArchs::SKYLAKEAVX512 && + Arch <= SYCLSupportedIntelArchs::GRANITERAPIDS; +} + +// Check if the given Arch value is a valid SYCL supported Intel GPU. +static inline bool IsSYCLSupportedIntelGPUArch(SYCLSupportedIntelArchs Arch) { + return Arch >= SYCLSupportedIntelArchs::BDW && + Arch <= SYCLSupportedIntelArchs::LNL_M; +} + +// Check if the user provided value for --offload-arch is a valid +// SYCL supported Intel AOT target. +SYCLSupportedIntelArchs +StringToOffloadArchSYCL(llvm::StringRef ArchNameAsString); + +// This is a mapping between the user provided --offload-arch value for Intel +// GPU targets and the spir64_gen device name accepted by OCLOC (the Intel GPU +// AOT compiler). +StringRef mapIntelGPUArchName(StringRef ArchName); +SmallString<64> getGenDeviceMacro(StringRef DeviceName); + class SYCLInstallationDetector { public: SYCLInstallationDetector(const Driver &D, const llvm::Triple &HostTriple, diff --git a/clang/test/Driver/sycl-offload-arch-amd-gpu.cpp b/clang/test/Driver/sycl-offload-arch-amd-gpu.cpp new file mode 100644 index 0000000000000..2e96c10ef1f20 --- /dev/null +++ b/clang/test/Driver/sycl-offload-arch-amd-gpu.cpp @@ -0,0 +1,125 @@ +// SYCL AOT compilation to AMD GPUs using --offload-arch and --offload-new-driver + +// AMD GPUs + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=gfx700 -nogpulib %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-AMD-GPU,CLANG-OFFLOAD-PACKAGER-AMD -DDEV_STR=gfx700 -DMAC_STR=GFX700 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=gfx701 -nogpulib %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-AMD-GPU,CLANG-OFFLOAD-PACKAGER-AMD -DDEV_STR=gfx701 -DMAC_STR=GFX701 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=gfx702 -nogpulib %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-AMD-GPU,CLANG-OFFLOAD-PACKAGER-AMD -DDEV_STR=gfx702 -DMAC_STR=GFX702 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=gfx801 -nogpulib %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-AMD-GPU,CLANG-OFFLOAD-PACKAGER-AMD -DDEV_STR=gfx801 -DMAC_STR=GFX801 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=gfx802 -nogpulib %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-AMD-GPU,CLANG-OFFLOAD-PACKAGER-AMD -DDEV_STR=gfx802 -DMAC_STR=GFX802 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=gfx803 -nogpulib %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-AMD-GPU,CLANG-OFFLOAD-PACKAGER-AMD -DDEV_STR=gfx803 -DMAC_STR=GFX803 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=gfx805 -nogpulib %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-AMD-GPU,CLANG-OFFLOAD-PACKAGER-AMD -DDEV_STR=gfx805 -DMAC_STR=GFX805 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=gfx810 -nogpulib %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-AMD-GPU,CLANG-OFFLOAD-PACKAGER-AMD -DDEV_STR=gfx810 -DMAC_STR=GFX810 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=gfx900 -nogpulib %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-AMD-GPU,CLANG-OFFLOAD-PACKAGER-AMD -DDEV_STR=gfx900 -DMAC_STR=GFX900 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=gfx902 -nogpulib %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-AMD-GPU,CLANG-OFFLOAD-PACKAGER-AMD -DDEV_STR=gfx902 -DMAC_STR=GFX902 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=gfx904 -nogpulib %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-AMD-GPU,CLANG-OFFLOAD-PACKAGER-AMD -DDEV_STR=gfx904 -DMAC_STR=GFX904 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=gfx906 -nogpulib %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-AMD-GPU,CLANG-OFFLOAD-PACKAGER-AMD -DDEV_STR=gfx906 -DMAC_STR=GFX906 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=gfx908 -nogpulib %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-AMD-GPU,CLANG-OFFLOAD-PACKAGER-AMD -DDEV_STR=gfx908 -DMAC_STR=GFX908 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=gfx909 -nogpulib %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-AMD-GPU,CLANG-OFFLOAD-PACKAGER-AMD -DDEV_STR=gfx909 -DMAC_STR=GFX909 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=gfx90a -nogpulib %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-AMD-GPU,CLANG-OFFLOAD-PACKAGER-AMD -DDEV_STR=gfx90a -DMAC_STR=GFX90A + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=gfx90c -nogpulib %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-AMD-GPU,CLANG-OFFLOAD-PACKAGER-AMD -DDEV_STR=gfx90c -DMAC_STR=GFX90C + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=gfx942 -nogpulib %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-AMD-GPU,CLANG-OFFLOAD-PACKAGER-AMD -DDEV_STR=gfx942 -DMAC_STR=GFX942 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=gfx1010 -nogpulib %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-AMD-GPU,CLANG-OFFLOAD-PACKAGER-AMD -DDEV_STR=gfx1010 -DMAC_STR=GFX1010 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=gfx1011 -nogpulib %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-AMD-GPU,CLANG-OFFLOAD-PACKAGER-AMD -DDEV_STR=gfx1011 -DMAC_STR=GFX1011 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=gfx1012 -nogpulib %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-AMD-GPU,CLANG-OFFLOAD-PACKAGER-AMD -DDEV_STR=gfx1012 -DMAC_STR=GFX1012 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=gfx1013 -nogpulib %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-AMD-GPU,CLANG-OFFLOAD-PACKAGER-AMD -DDEV_STR=gfx1013 -DMAC_STR=GFX1013 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=gfx1030 -nogpulib %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-AMD-GPU,CLANG-OFFLOAD-PACKAGER-AMD -DDEV_STR=gfx1030 -DMAC_STR=GFX1030 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=gfx1031 -nogpulib %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-AMD-GPU,CLANG-OFFLOAD-PACKAGER-AMD -DDEV_STR=gfx1031 -DMAC_STR=GFX1031 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=gfx1032 -nogpulib %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-AMD-GPU,CLANG-OFFLOAD-PACKAGER-AMD -DDEV_STR=gfx1032 -DMAC_STR=GFX1032 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=gfx1033 -nogpulib %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-AMD-GPU,CLANG-OFFLOAD-PACKAGER-AMD -DDEV_STR=gfx1033 -DMAC_STR=GFX1033 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=gfx1034 -nogpulib %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-AMD-GPU,CLANG-OFFLOAD-PACKAGER-AMD -DDEV_STR=gfx1034 -DMAC_STR=GFX1034 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=gfx1035 -nogpulib %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-AMD-GPU,CLANG-OFFLOAD-PACKAGER-AMD -DDEV_STR=gfx1035 -DMAC_STR=GFX1035 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=gfx1036 -nogpulib %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-AMD-GPU,CLANG-OFFLOAD-PACKAGER-AMD -DDEV_STR=gfx1036 -DMAC_STR=GFX1036 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=gfx1100 -nogpulib %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-AMD-GPU,CLANG-OFFLOAD-PACKAGER-AMD -DDEV_STR=gfx1100 -DMAC_STR=GFX1100 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=gfx1101 -nogpulib %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-AMD-GPU,CLANG-OFFLOAD-PACKAGER-AMD -DDEV_STR=gfx1101 -DMAC_STR=GFX1101 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=gfx1102 -nogpulib %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-AMD-GPU,CLANG-OFFLOAD-PACKAGER-AMD -DDEV_STR=gfx1102 -DMAC_STR=GFX1102 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=gfx1103 -nogpulib %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-AMD-GPU,CLANG-OFFLOAD-PACKAGER-AMD -DDEV_STR=gfx1103 -DMAC_STR=GFX1103 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=gfx1150 -nogpulib %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-AMD-GPU,CLANG-OFFLOAD-PACKAGER-AMD -DDEV_STR=gfx1150 -DMAC_STR=GFX1150 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=gfx1151 -nogpulib %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-AMD-GPU,CLANG-OFFLOAD-PACKAGER-AMD -DDEV_STR=gfx1151 -DMAC_STR=GFX1151 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=gfx1200 -nogpulib %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-AMD-GPU,CLANG-OFFLOAD-PACKAGER-AMD -DDEV_STR=gfx1200 -DMAC_STR=GFX1200 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=gfx1201 -nogpulib %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-AMD-GPU,CLANG-OFFLOAD-PACKAGER-AMD -DDEV_STR=gfx1201 -DMAC_STR=GFX1201 + +// TARGET-TRIPLE-AMD-GPU: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" +// TARGET-TRIPLE-AMD-GPU: "-D__SYCL_TARGET_AMD_GPU_[[MAC_STR]]__" +// CLANG-OFFLOAD-PACKAGER-AMD: clang-offload-packager{{.*}} "--image={{.*}}triple=amdgcn-amd-amdhsa,arch=[[DEV_STR]],kind=sycl" + +// Tests for handling an invalid architecture. +// +// RUN: not %clangxx --offload-new-driver -fsycl --offload-arch=gfx10_3_generic %s -### 2>&1 \ +// RUN: | FileCheck -check-prefix=ERROR %s +// RUN: not %clang_cl --offload-new-driver -fsycl --offload-arch=gfx10_3_generic %s -### 2>&1 \ +// RUN: | FileCheck -check-prefix=ERROR %s + +// ERROR: error: SYCL target is invalid: 'gfx10_3_generic' + diff --git a/clang/test/Driver/sycl-offload-arch-intel-cpu.cpp b/clang/test/Driver/sycl-offload-arch-intel-cpu.cpp new file mode 100644 index 0000000000000..08c5070e30542 --- /dev/null +++ b/clang/test/Driver/sycl-offload-arch-intel-cpu.cpp @@ -0,0 +1,78 @@ +/// Tests the behaviors of using -fsycl --offload-new-driver +// --offload-arch=. + +// SYCL AOT compilation to Intel CPUs using --offload-arch + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=broadwell %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-CPU,CLANG-OFFLOAD-PACKAGER-CPU -DDEV_STR=broadwell + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=coffeelake %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-CPU,CLANG-OFFLOAD-PACKAGER-CPU -DDEV_STR=coffeelake + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=icelake-client %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-CPU,CLANG-OFFLOAD-PACKAGER-CPU -DDEV_STR=icelake-client + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=skylake-avx512 %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-CPU,CLANG-OFFLOAD-PACKAGER-CPU -DDEV_STR=skylake-avx512 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=core-avx2 %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-CPU,CLANG-OFFLOAD-PACKAGER-CPU -DDEV_STR=core-avx2 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=corei7-avx %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-CPU,CLANG-OFFLOAD-PACKAGER-CPU -DDEV_STR=corei7-avx + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=corei7 %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-CPU,CLANG-OFFLOAD-PACKAGER-CPU -DDEV_STR=corei7 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=westmere %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-CPU,CLANG-OFFLOAD-PACKAGER-CPU -DDEV_STR=westmere + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=sandybridge %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-CPU,CLANG-OFFLOAD-PACKAGER-CPU -DDEV_STR=sandybridge + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=ivybridge %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-CPU,CLANG-OFFLOAD-PACKAGER-CPU -DDEV_STR=ivybridge + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=alderlake %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-CPU,CLANG-OFFLOAD-PACKAGER-CPU -DDEV_STR=alderlake + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=skylake %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-CPU,CLANG-OFFLOAD-PACKAGER-CPU -DDEV_STR=skylake + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=skx %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-CPU,CLANG-OFFLOAD-PACKAGER-CPU -DDEV_STR=skx + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=cascadelake %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-CPU,CLANG-OFFLOAD-PACKAGER-CPU -DDEV_STR=cascadelake + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=icelake-server %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-CPU,CLANG-OFFLOAD-PACKAGER-CPU -DDEV_STR=icelake-server + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=sapphirerapids %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-CPU,CLANG-OFFLOAD-PACKAGER-CPU -DDEV_STR=sapphirerapids + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=graniterapids %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-CPU,CLANG-OFFLOAD-PACKAGER-CPU -DDEV_STR=graniterapids + +// TARGET-TRIPLE-CPU: clang{{.*}} "-triple" "spir64_x86_64-unknown-unknown" +// TARGET-TRIPLE-CPU: "-D__SYCL_TARGET_INTEL_X86_64__" +// CLANG-OFFLOAD-PACKAGER-CPU: clang-offload-packager{{.*}} "--image={{.*}}triple=spir64_x86_64-unknown-unknown,arch=[[DEV_STR]],kind=sycl" + +// Tests for handling a missing architecture. +// +// RUN: not %clangxx --offload-new-driver -fsycl --offload-arch= %s -### 2>&1 \ +// RUN: | FileCheck -check-prefix=MISSING %s +// RUN: not %clang_cl --offload-new-driver -fsycl --offload-arch= %s -### 2>&1 \ +// RUN: | FileCheck -check-prefix=MISSING %s + +// MISSING: error: must pass in an explicit cpu or gpu architecture to '--offload-arch' + +// Tests for handling a incorrect architecture. +// +// RUN: not %clangxx --offload-new-driver -fsycl --offload-arch=badArch %s -### 2>&1 \ +// RUN: | FileCheck -check-prefix=BAD-ARCH %s +// RUN: not %clang_cl --offload-new-driver -fsycl --offload-arch=badArch %s -### 2>&1 \ +// RUN: | FileCheck -check-prefix=BAD-ARCH %s + +// BAD-ARCH: error: SYCL target is invalid: 'badArch' + diff --git a/clang/test/Driver/sycl-offload-arch-intel-gpu.cpp b/clang/test/Driver/sycl-offload-arch-intel-gpu.cpp new file mode 100644 index 0000000000000..f04e82a4ffb54 --- /dev/null +++ b/clang/test/Driver/sycl-offload-arch-intel-gpu.cpp @@ -0,0 +1,123 @@ +/// Tests the behaviors of using -fsycl --offload-new-driver +// --offload-arch=. + +// SYCL AOT compilation to Intel GPUs using --offload-arch + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=bdw %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=bdw -DMAC_STR=BDW + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=skl %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=skl -DMAC_STR=SKL + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=kbl %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=kbl -DMAC_STR=KBL + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=cfl %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=cfl -DMAC_STR=CFL + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=apl %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=apl -DMAC_STR=APL + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=bxt %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=apl -DMAC_STR=APL + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=glk %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=glk -DMAC_STR=GLK + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=whl %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=whl -DMAC_STR=WHL + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=aml %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=aml -DMAC_STR=AML + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=cml %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=cml -DMAC_STR=CML + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=icllp %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=icllp -DMAC_STR=ICLLP + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=icl %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=icllp -DMAC_STR=ICLLP + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=ehl %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=ehl -DMAC_STR=EHL + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=jsl %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=ehl -DMAC_STR=EHL + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=tgllp %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=tgllp -DMAC_STR=TGLLP + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=tgl %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=tgllp -DMAC_STR=TGLLP + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=rkl %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=rkl -DMAC_STR=RKL + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=adl_s %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=adl_s -DMAC_STR=ADL_S + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=rpl_s %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=adl_s -DMAC_STR=ADL_S + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=adl_p %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=adl_p -DMAC_STR=ADL_P + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=adl_n %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=adl_n -DMAC_STR=ADL_N + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=dg1 %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=dg1 -DMAC_STR=DG1 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=acm_g10 %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=acm_g10 -DMAC_STR=ACM_G10 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=dg2_g10 %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=acm_g10 -DMAC_STR=ACM_G10 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=acm_g11 %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=acm_g11 -DMAC_STR=ACM_G11 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=dg2_g11 %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=acm_g11 -DMAC_STR=ACM_G11 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=acm_g12 %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=acm_g12 -DMAC_STR=ACM_G12 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=dg2_g12 %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=acm_g12 -DMAC_STR=ACM_G12 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=pvc %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU-OPTS -DDEV_STR=pvc -DMAC_STR=PVC + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=pvc_vg %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU-OPTS -DDEV_STR=pvc_vg -DMAC_STR=PVC_VG + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=mtl_u %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU-OPTS -DDEV_STR=mtl_u -DMAC_STR=MTL_U + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=mtl_s %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU-OPTS -DDEV_STR=mtl_u -DMAC_STR=MTL_U + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=arl_u %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU-OPTS -DDEV_STR=mtl_u -DMAC_STR=MTL_U + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=arl_s %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU-OPTS -DDEV_STR=mtl_u -DMAC_STR=MTL_U + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=mtl_h %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU-OPTS -DDEV_STR=mtl_h -DMAC_STR=MTL_H + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=arl_h %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU-OPTS -DDEV_STR=arl_h -DMAC_STR=ARL_H + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=bmg_g21 %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU-OPTS -DDEV_STR=bmg_g21 -DMAC_STR=BMG_G21 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=lnl_m %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU-OPTS -DDEV_STR=lnl_m -DMAC_STR=LNL_M + +// TARGET-TRIPLE-GPU: clang{{.*}} "-triple" "spir64_gen-unknown-unknown" +// TARGET-TRIPLE-GPU: "-D__SYCL_TARGET_INTEL_GPU_[[MAC_STR]]__" +// CLANG-OFFLOAD-PACKAGER-GPU: clang-offload-packager{{.*}} "--image={{.*}}triple=spir64_gen-unknown-unknown,arch=[[DEV_STR]],kind=sycl" +// CLANG-OFFLOAD-PACKAGER-GPU-OPTS: clang-offload-packager{{.*}} "--image={{.*}}triple=spir64_gen-unknown-unknown,arch=[[DEV_STR]],kind=sycl{{.*}}" diff --git a/clang/test/Driver/sycl-offload-arch-nvidia-gpu.cpp b/clang/test/Driver/sycl-offload-arch-nvidia-gpu.cpp new file mode 100644 index 0000000000000..84829a7291fe6 --- /dev/null +++ b/clang/test/Driver/sycl-offload-arch-nvidia-gpu.cpp @@ -0,0 +1,51 @@ +/// Tests the behaviors of using --offload-arch for offloading +// SYCL kernels to NVidia GPUs using --offload-new-driver. + +// RUN: %clangxx --offload-new-driver -fsycl --offload-arch=sm_50 -nocudalib -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=CLANG-OFFLOAD-PACKAGER-GPU,MACRO_NVIDIA -DDEV_STR=sm_50 -DMAC_STR=SM_50 + +// RUN: %clangxx --offload-new-driver -fsycl --offload-arch=sm_52 -nocudalib -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=CLANG-OFFLOAD-PACKAGER-GPU,MACRO_NVIDIA -DDEV_STR=sm_52 -DMAC_STR=SM_52 + +// RUN: %clangxx --offload-new-driver -fsycl --offload-arch=sm_53 -nocudalib -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=CLANG-OFFLOAD-PACKAGER-GPU,MACRO_NVIDIA -DDEV_STR=sm_53 -DMAC_STR=SM_53 + +// RUN: %clangxx --offload-new-driver -fsycl --offload-arch=sm_60 -nocudalib -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=CLANG-OFFLOAD-PACKAGER-GPU,MACRO_NVIDIA -DDEV_STR=sm_60 -DMAC_STR=SM_60 + +// RUN: %clangxx --offload-new-driver -fsycl --offload-arch=sm_61 -nocudalib -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=CLANG-OFFLOAD-PACKAGER-GPU,MACRO_NVIDIA -DDEV_STR=sm_61 -DMAC_STR=SM_61 + +// RUN: %clangxx --offload-new-driver -fsycl --offload-arch=sm_62 -nocudalib -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=CLANG-OFFLOAD-PACKAGER-GPU,MACRO_NVIDIA -DDEV_STR=sm_62 -DMAC_STR=SM_62 + +// RUN: %clangxx --offload-new-driver -fsycl --offload-arch=sm_70 -nocudalib -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=CLANG-OFFLOAD-PACKAGER-GPU,MACRO_NVIDIA -DDEV_STR=sm_70 -DMAC_STR=SM_70 + +// RUN: %clangxx --offload-new-driver -fsycl --offload-arch=sm_72 -nocudalib -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=CLANG-OFFLOAD-PACKAGER-GPU,MACRO_NVIDIA -DDEV_STR=sm_72 -DMAC_STR=SM_72 + +// RUN: %clangxx --offload-new-driver -fsycl --offload-arch=sm_75 -nocudalib -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=CLANG-OFFLOAD-PACKAGER-GPU,MACRO_NVIDIA -DDEV_STR=sm_75 -DMAC_STR=SM_75 + +// RUN: %clangxx --offload-new-driver -fsycl --offload-arch=sm_80 -nocudalib -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=CLANG-OFFLOAD-PACKAGER-GPU,MACRO_NVIDIA -DDEV_STR=sm_80 -DMAC_STR=SM_80 + +// RUN: %clangxx --offload-new-driver -fsycl --offload-arch=sm_86 -nocudalib -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=CLANG-OFFLOAD-PACKAGER-GPU,MACRO_NVIDIA -DDEV_STR=sm_86 -DMAC_STR=SM_86 + +// RUN: %clangxx --offload-new-driver -fsycl --offload-arch=sm_87 -nocudalib -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=CLANG-OFFLOAD-PACKAGER-GPU,MACRO_NVIDIA -DDEV_STR=sm_87 -DMAC_STR=SM_87 + +// RUN: %clangxx --offload-new-driver -fsycl --offload-arch=sm_89 -nocudalib -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=CLANG-OFFLOAD-PACKAGER-GPU,MACRO_NVIDIA -DDEV_STR=sm_89 -DMAC_STR=SM_89 + +// RUN: %clangxx --offload-new-driver -fsycl --offload-arch=sm_90 -nocudalib -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=CLANG-OFFLOAD-PACKAGER-GPU,MACRO_NVIDIA -DDEV_STR=sm_90 -DMAC_STR=SM_90 + +// RUN: %clangxx --offload-new-driver -fsycl --offload-arch=sm_90a -nocudalib -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=CLANG-OFFLOAD-PACKAGER-GPU,MACRO_NVIDIA -DDEV_STR=sm_90a -DMAC_STR=SM_90A + +// MACRO_NVIDIA: clang{{.*}} "-triple" "nvptx64-nvidia-cuda" +// MACRO_NVIDIA: "-D__SYCL_TARGET_NVIDIA_GPU_[[MAC_STR]]__" +// CLANG-OFFLOAD-PACKAGER-GPU: clang-offload-packager{{.*}} "--image={{.*}}triple=nvptx64-nvidia-cuda,arch=[[DEV_STR]],kind=sycl" diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h index 09c0d223d9b4d..7e15e5ae56c0e 100644 --- a/llvm/include/llvm/TargetParser/Triple.h +++ b/llvm/include/llvm/TargetParser/Triple.h @@ -176,6 +176,8 @@ class Triple { DXILSubArch_v1_7, DXILSubArch_v1_8, LatestDXILSubArch = DXILSubArch_v1_8, + SPIRSubArch_gen, // Sub-arch type for Intel GPUs. + SPIRSubArch_x86_64, // Sub-arch type for Intel CPUs. }; enum VendorType { UnknownVendor, @@ -877,6 +879,12 @@ class Triple { return getArch() == Triple::spirv; } + /// Tests whether the target is SPIR and AOT related. + bool isSPIRAOT() const { + return isSPIR() && (getSubArch() == Triple::SPIRSubArch_gen || + getSubArch() == Triple::SPIRSubArch_x86_64); + } + /// Tests whether the target is NVPTX (32- or 64-bit). bool isNVPTX() const { return getArch() == Triple::nvptx || getArch() == Triple::nvptx64; diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp index e9e6f130f757c..e6ab0afdd0353 100644 --- a/llvm/lib/TargetParser/Triple.cpp +++ b/llvm/lib/TargetParser/Triple.cpp @@ -617,6 +617,7 @@ static Triple::ArchType parseArch(StringRef ArchName) { .Cases("spirv64", "spirv64v1.0", "spirv64v1.1", "spirv64v1.2", "spirv64v1.3", "spirv64v1.4", "spirv64v1.5", "spirv64v1.6", Triple::spirv64) + .StartsWith("spir64", Triple::spir64) .StartsWith("kalimba", Triple::kalimba) .Case("lanai", Triple::lanai) .Case("renderscript32", Triple::renderscript32) @@ -797,6 +798,16 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) { if (SubArchName == "arm64ec") return Triple::AArch64SubArch_arm64ec; + if (SubArchName.starts_with("spir")) { + StringRef SubArch(SubArchName); + if (SubArch.consume_front("spir64_") || SubArch.consume_front("spir_")) { + if (SubArch == "gen") + return Triple::SPIRSubArch_gen; + else if (SubArch == "x86_64") + return Triple::SPIRSubArch_x86_64; + } + } + if (SubArchName.starts_with("spirv")) return StringSwitch(SubArchName) .EndsWith("v1.0", Triple::SPIRVSubArch_v10)