diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 94224e1038758..5550c73d132b4 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -874,4 +874,12 @@ def warn_drv_openacc_without_cir : Warning<"OpenACC directives will result in no runtime behavior; use " "-fclangir to enable runtime effect">, InGroup; +def err_drv_sycl_offload_arch_missing_value : + Error<"must pass in a valid cpu or gpu architecture string to '--offload-arch'">; + +def err_drv_invalid_sycl_target : Error<"SYCL target is invalid: '%0'">; + +def warn_drv_sycl_offload_target_duplicate : Warning< + "SYCL offloading target '%0' is similar to target '%1' already specified; " + "will be ignored">, InGroup; } diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 36fa3227fd6a6..ff382b67bc349 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -1751,3 +1751,8 @@ def ExplicitSpecializationStorageClass : DiagGroup<"explicit-specialization-stor // A warning for options that enable a feature that is not yet complete def ExperimentalOption : DiagGroup<"experimental-option">; + +// SYCL Warnings +def SyclTarget : DiagGroup<"sycl-target">; + + diff --git a/clang/include/clang/Basic/OffloadArch.h b/clang/include/clang/Basic/OffloadArch.h index 99b1024b9d0d4..aa336268d96af 100644 --- a/clang/include/clang/Basic/OffloadArch.h +++ b/clang/include/clang/Basic/OffloadArch.h @@ -102,13 +102,14 @@ enum class OffloadArch { Generic, // A processor model named 'generic' if the target backend defines a // public one. // Intel CPUs - GRANITERAPIDS, + GRANITERAPIDS_CPU, // Intel GPUs - BMG_G21, + BMG_G21_GPU, LAST, CudaDefault = OffloadArch::SM_52, HIPDefault = OffloadArch::GFX906, + SYCLDefault = OffloadArch::BMG_G21_GPU, }; static inline bool IsNVIDIAOffloadArch(OffloadArch A) { @@ -121,11 +122,11 @@ static inline bool IsAMDOffloadArch(OffloadArch A) { } static inline bool IsIntelCPUOffloadArch(OffloadArch Arch) { - return Arch >= OffloadArch::GRANITERAPIDS && Arch < OffloadArch::BMG_G21; + return Arch >= OffloadArch::GRANITERAPIDS_CPU && Arch < OffloadArch::BMG_G21_GPU; } static inline bool IsIntelGPUOffloadArch(OffloadArch Arch) { - return Arch >= OffloadArch::BMG_G21 && Arch < OffloadArch::LAST; + return Arch >= OffloadArch::BMG_G21_GPU && Arch < OffloadArch::LAST; } static inline bool IsIntelOffloadArch(OffloadArch Arch) { diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index 7ca848f11b561..2ffdb4692fb71 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -846,6 +846,22 @@ class Driver { /// Compute the default -fmodule-cache-path. /// \return True if the system provides a default cache directory. static bool getDefaultModuleCachePath(SmallVectorImpl &Result); + /// Vector of Macros that need to be added to the Host compilation in a + /// SYCL based offloading scenario. These macros are gathered during + /// construction of the device compilations. + mutable std::vector SYCLTargetMacro; + + /// addSYCLTargetMacro - Add the given macro to the vector of args to be + /// added to the host compilation step. + void addSYCLTargetMacro(const llvm::opt::ArgList &Args, + StringRef Macro) const { + SYCLTargetMacro.push_back(Args.MakeArgString(Macro)); + } + + /// getSYCLTargetMacro - return the previously gathered macro target args. + llvm::ArrayRef getSYCLTargetMacro() const { + return SYCLTargetMacro; + } }; /// \return True if the last defined optimization level is -Ofast. diff --git a/clang/lib/Basic/OffloadArch.cpp b/clang/lib/Basic/OffloadArch.cpp index a019f0ac18c84..339f63afe13a7 100644 --- a/clang/lib/Basic/OffloadArch.cpp +++ b/clang/lib/Basic/OffloadArch.cpp @@ -88,9 +88,9 @@ static const OffloadArchToStringMap ArchNames[] = { GFX(1201), // gfx1201 {OffloadArch::AMDGCNSPIRV, "amdgcnspirv", "compute_amdgcn"}, // Intel CPUs - {OffloadArch::GRANITERAPIDS, "graniterapids", ""}, + {OffloadArch::GRANITERAPIDS_CPU, "graniterapids_cpu", ""}, // Intel GPUS - {OffloadArch::BMG_G21, "bmg_g21", ""}, + {OffloadArch::BMG_G21_GPU, "bmg_g21_gpu", ""}, {OffloadArch::Generic, "generic", ""}, // clang-format on }; diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index 3235bf2e710da..eea8f64ae381b 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -240,8 +240,8 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, case OffloadArch::GFX1201: case OffloadArch::AMDGCNSPIRV: case OffloadArch::Generic: - case OffloadArch::GRANITERAPIDS: - case OffloadArch::BMG_G21: + case OffloadArch::GRANITERAPIDS_CPU: + case OffloadArch::BMG_G21_GPU: case OffloadArch::LAST: break; case OffloadArch::UNKNOWN: diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index 9e27e634676dc..06e9169eccd50 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -2333,8 +2333,8 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) { case OffloadArch::GFX1201: case OffloadArch::AMDGCNSPIRV: case OffloadArch::Generic: - case OffloadArch::GRANITERAPIDS: - case OffloadArch::BMG_G21: + case OffloadArch::GRANITERAPIDS_CPU: + case OffloadArch::BMG_G21_GPU: case OffloadArch::UNUSED: case OffloadArch::UNKNOWN: break; diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 2f86b6633df1c..4ffa45c5dbf40 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -169,6 +169,17 @@ getHIPOffloadTargetTriple(const Driver &D, const ArgList &Args) { return std::nullopt; } + +static std::optional +getINTELOffloadTargetTriple(const Driver &D, const ArgList &Args, + const llvm::Triple &HostTriple) { + if (!Args.hasArg(options::OPT_offload_EQ)) { + return llvm::Triple(HostTriple.isArch64Bit() ? "spirv64-intel-sycl" + : "spirv32-intel-sycl"); + } + return std::nullopt; +} + template static bool usesInput(const ArgList &Args, F &&Fn) { return llvm::any_of(Args, [&](Arg *A) { return (A->getOption().matches(options::OPT_x) && @@ -918,13 +929,12 @@ Driver::OpenMPRuntimeKind Driver::getOpenMPRuntime(const ArgList &Args) const { } static llvm::Triple getSYCLDeviceTriple(StringRef TargetArch) { - SmallVector SYCLAlias = {"spir", "spir64", "spirv", "spirv32", - "spirv64"}; + SmallVector SYCLAlias = {"spirv", "spirv32", "spirv64"}; if (llvm::is_contained(SYCLAlias, TargetArch)) { llvm::Triple TargetTriple; TargetTriple.setArchName(TargetArch); - TargetTriple.setVendor(llvm::Triple::UnknownVendor); - TargetTriple.setOS(llvm::Triple::UnknownOS); + TargetTriple.setVendor(llvm::Triple::Intel); + TargetTriple.setOS(llvm::Triple::SYCL); return TargetTriple; } return llvm::Triple(TargetArch); @@ -932,16 +942,17 @@ static llvm::Triple getSYCLDeviceTriple(StringRef TargetArch) { static bool addSYCLDefaultTriple(Compilation &C, SmallVectorImpl &SYCLTriples) { - // Check current set of triples to see if the default has already been set. - for (const auto &SYCLTriple : SYCLTriples) { - if (SYCLTriple.getSubArch() == llvm::Triple::NoSubArch && - SYCLTriple.isSPIROrSPIRV()) - return false; - } - // Add the default triple as it was not found. + // Default triple is spirv32-unknown-unknown or + // spirv64-unknown-unknown. llvm::Triple DefaultTriple = getSYCLDeviceTriple( C.getDefaultToolChain().getTriple().isArch32Bit() ? "spirv32" : "spirv64"); + + // Check current triple to see if the default has already been set. + for (const auto &SYCLTriple : SYCLTriples) { + if (SYCLTriple == DefaultTriple) + return false; + } SYCLTriples.insert(SYCLTriples.begin(), DefaultTriple); return true; } @@ -1141,19 +1152,89 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, // -ffreestanding cannot be used with -fsycl argSYCLIncompatible(options::OPT_ffreestanding); + llvm::StringMap> DerivedArchs; + llvm::StringMap FoundNormalizedTriples; + std::multiset SYCLTriples; llvm::SmallVector UniqueSYCLTriplesVec; if (IsSYCL) { - addSYCLDefaultTriple(C, UniqueSYCLTriplesVec); + if (C.getInputArgs().hasArg(options::OPT_offload_arch_EQ) && + !IsHIP && !IsCuda ) { + const ToolChain *HostTC = C.getSingleOffloadToolChain(); + auto IntelTriple = getINTELOffloadTargetTriple(*this, C.getInputArgs(), + HostTC->getTriple()); + // Attempt to deduce the offloading triple from the set of architectures. + // We need to temporarily create these toolchains so that we can access + // tools for inferring architectures. + llvm::DenseSet Archs; + for (const std::optional &TT : {IntelTriple}) { + if (!TT) + continue; + + auto &TC = + getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, *TT, + C.getDefaultToolChain().getTriple()); + for (StringRef Arch : + getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &TC, true)) + Archs.insert(Arch); + } + + for (StringRef Arch : Archs) { + if (IntelTriple && + IsIntelOffloadArch(StringToOffloadArch( + getProcessorFromTargetID(*IntelTriple, Arch)))) { + DerivedArchs[IntelTriple->getTriple()].insert(Arch); + } else { + Diag(clang::diag::err_drv_invalid_sycl_target) << Arch; + return; + } + } + + // If the set is empty then we failed to find a native architecture. + if (Archs.empty()) { + Diag(clang::diag::err_drv_sycl_offload_arch_missing_value); + return; + } + + for (const auto &TripleAndArchs : DerivedArchs) + SYCLTriples.insert(TripleAndArchs.first()); //spirv64-intel-sycl + + for (StringRef Val : SYCLTriples) { + llvm::Triple SYCLTargetTriple(getSYCLDeviceTriple(Val)); + std::string NormalizedName = SYCLTargetTriple.normalize(); + + // Make sure we don't have a duplicate triple. + auto [TripleIt, Inserted] = + FoundNormalizedTriples.try_emplace(NormalizedName, Val); + + if (!Inserted) { + Diag(clang::diag::warn_drv_sycl_offload_target_duplicate) + << Val << TripleIt->second; + continue; + } + + // If the specified target is invalid, emit a diagnostic. + if (SYCLTargetTriple.getArch() == llvm::Triple::UnknownArch) { + Diag(clang::diag::err_drv_invalid_sycl_target) << Val; + continue; + } + + UniqueSYCLTriplesVec.push_back(SYCLTargetTriple); + } + addSYCLDefaultTriple(C, UniqueSYCLTriplesVec); + } else + addSYCLDefaultTriple(C, UniqueSYCLTriplesVec); // We'll need to use the SYCL and host triples as the key into - // getOffloadingDeviceToolChain, because the device toolchains we're + // getOffloadToolChain, because the device toolchains we're // going to create will depend on both. const ToolChain *HostTC = C.getSingleOffloadToolChain(); for (const auto &TT : UniqueSYCLTriplesVec) { auto SYCLTC = &getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, TT, HostTC->getTriple()); C.addOffloadDeviceToolChain(SYCLTC, Action::OFK_SYCL); + if (DerivedArchs.contains(TT.getTriple())) + KnownArchs[SYCLTC] = DerivedArchs[TT.getTriple()]; } } @@ -4846,7 +4927,7 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, } else if (Kind == Action::OFK_HIP) { Archs.insert(OffloadArchToString(OffloadArch::HIPDefault)); } else if (Kind == Action::OFK_SYCL) { - Archs.insert(StringRef()); + Archs.insert(OffloadArchToString(OffloadArch::SYCLDefault)); } else if (Kind == Action::OFK_OpenMP) { // Accept legacy `-march` device arguments for OpenMP. if (auto *Arg = C.getArgsForToolChain(TC, /*BoundArch=*/"", Kind) @@ -6740,7 +6821,7 @@ const ToolChain &Driver::getOffloadToolChain( if (Kind == Action::OFK_HIP) TC = std::make_unique(*this, Target, *HostTC, Args); - else if (Kind == Action::OFK_OpenMP) + else if ((Kind == Action::OFK_OpenMP) || (Kind == Action::OFK_SYCL)) TC = std::make_unique(*this, Target, *HostTC, Args); break; diff --git a/clang/test/Driver/sycl-offload-arch-intel-cpu.cpp b/clang/test/Driver/sycl-offload-arch-intel-cpu.cpp new file mode 100644 index 0000000000000..b5704dd304708 --- /dev/null +++ b/clang/test/Driver/sycl-offload-arch-intel-cpu.cpp @@ -0,0 +1,29 @@ +/// Tests the behaviors of using -fsycl --offload-arch=. + +// SYCL AOT compilation to Intel CPUs using --offload-arch + +// RUN: %clangxx -### -fsycl --offload-arch=graniterapids_cpu %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-CPU,CLANG-OFFLOAD-PACKAGER-CPU -DDEV_STR=graniterapids_cpu + +// TARGET-TRIPLE-CPU: clang{{.*}} "-triple" "spirv64-intel-sycl" +// CLANG-OFFLOAD-PACKAGER-CPU: clang-offload-packager{{.*}} "--image={{.*}}triple=spirv64-intel-sycl,arch=[[DEV_STR]],kind=sycl" + +// Tests for handling a missing architecture. +// +// RUN: not %clangxx -fsycl --offload-arch= %s -### 2>&1 \ +// RUN: | FileCheck -check-prefix=MISSING-OFFLOAD-ARCH-VALUE %s +// RUN: not %clang_cl -fsycl --offload-arch= %s -### 2>&1 \ +// RUN: | FileCheck -check-prefix=MISSING-OFFLOAD-ARCH-VALUE %s + +// MISSING-OFFLOAD-ARCH-VALUE: error: must pass in a valid cpu or gpu architecture string to '--offload-arch' + +// Tests for handling a incorrect --offload-arch architecture vlue. +// +// RUN: not %clangxx -fsycl --offload-arch=badArch %s -### 2>&1 \ +// RUN: | FileCheck -check-prefix=BAD-ARCH %s +// RUN: not %clang_cl -fsycl --offload-arch=badArch %s -### 2>&1 \ +// RUN: | FileCheck -check-prefix=BAD-ARCH %s + +// BAD-ARCH: error: SYCL target is invalid: 'badArch' + + diff --git a/clang/test/Driver/sycl-offload-arch-intel-gpus.cpp b/clang/test/Driver/sycl-offload-arch-intel-gpus.cpp new file mode 100644 index 0000000000000..20b009bd9383c --- /dev/null +++ b/clang/test/Driver/sycl-offload-arch-intel-gpus.cpp @@ -0,0 +1,11 @@ +/// Tests the behaviors of using -fsycl --offload-arch=. + +// SYCL AOT compilation to Intel GPUs using --offload-arch + +// RUN: %clangxx -### -fsycl --offload-arch=bmg_g21_gpu %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=bmg_g21_gpu -DMAC_STR=BMG_G21_GPU + + +// TARGET-TRIPLE-GPU: clang{{.*}} "-triple" "spirv64-intel-sycl" +// CLANG-OFFLOAD-PACKAGER-GPU: clang-offload-packager{{.*}} "--image={{.*}}triple=spirv64-intel-sycl,arch=[[DEV_STR]],kind=sycl" +// CLANG-OFFLOAD-PACKAGER-GPU-OPTS: clang-offload-packager{{.*}} "--image={{.*}}triple=spirv64-intel-sycl,arch=[[DEV_STR]],kind=sycl{{.*}}" diff --git a/clang/unittests/Basic/OffloadArchTest.cpp b/clang/unittests/Basic/OffloadArchTest.cpp index c19ad0043d774..cc3618d2c953d 100644 --- a/clang/unittests/Basic/OffloadArchTest.cpp +++ b/clang/unittests/Basic/OffloadArchTest.cpp @@ -21,14 +21,14 @@ TEST(OffloadArchTest, basic) { EXPECT_TRUE(IsAMDOffloadArch(OffloadArch::GFX1201)); EXPECT_TRUE(IsAMDOffloadArch(OffloadArch::GFX12_GENERIC)); EXPECT_TRUE(IsAMDOffloadArch(OffloadArch::AMDGCNSPIRV)); - EXPECT_FALSE(IsAMDOffloadArch(OffloadArch::GRANITERAPIDS)); + EXPECT_FALSE(IsAMDOffloadArch(OffloadArch::GRANITERAPIDS_CPU)); - EXPECT_TRUE(IsIntelOffloadArch(OffloadArch::GRANITERAPIDS)); - EXPECT_TRUE(IsIntelCPUOffloadArch(OffloadArch::GRANITERAPIDS)); - EXPECT_FALSE(IsIntelGPUOffloadArch(OffloadArch::GRANITERAPIDS)); - EXPECT_TRUE(IsIntelOffloadArch(OffloadArch::BMG_G21)); - EXPECT_FALSE(IsIntelCPUOffloadArch(OffloadArch::BMG_G21)); - EXPECT_TRUE(IsIntelGPUOffloadArch(OffloadArch::BMG_G21)); + EXPECT_TRUE(IsIntelOffloadArch(OffloadArch::GRANITERAPIDS_CPU)); + EXPECT_TRUE(IsIntelCPUOffloadArch(OffloadArch::GRANITERAPIDS_CPU)); + EXPECT_FALSE(IsIntelGPUOffloadArch(OffloadArch::GRANITERAPIDS_CPU)); + EXPECT_TRUE(IsIntelOffloadArch(OffloadArch::BMG_G21_GPU)); + EXPECT_FALSE(IsIntelCPUOffloadArch(OffloadArch::BMG_G21_GPU)); + EXPECT_TRUE(IsIntelGPUOffloadArch(OffloadArch::BMG_G21_GPU)); EXPECT_FALSE(IsNVIDIAOffloadArch(OffloadArch::Generic)); EXPECT_FALSE(IsAMDOffloadArch(OffloadArch::Generic)); diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h index b6f15ef13191f..6c74252d3a22f 100644 --- a/llvm/include/llvm/TargetParser/Triple.h +++ b/llvm/include/llvm/TargetParser/Triple.h @@ -221,6 +221,7 @@ class Triple { NaCl, // Native Client AIX, CUDA, // NVIDIA CUDA + SYCL, // INTEL SYCL NVCL, // NVIDIA OpenCL AMDHSA, // AMD HSA Runtime PS4, diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp index 5718ae385bac1..e25c9c33f71ab 100644 --- a/llvm/lib/TargetParser/Triple.cpp +++ b/llvm/lib/TargetParser/Triple.cpp @@ -284,6 +284,7 @@ StringRef Triple::getOSTypeName(OSType Kind) { case AMDPAL: return "amdpal"; case BridgeOS: return "bridgeos"; case CUDA: return "cuda"; + case SYCL: return "sycl"; case Darwin: return "darwin"; case DragonFly: return "dragonfly"; case DriverKit: return "driverkit"; @@ -695,6 +696,7 @@ static Triple::OSType parseOS(StringRef OSName) { .StartsWith("nacl", Triple::NaCl) .StartsWith("aix", Triple::AIX) .StartsWith("cuda", Triple::CUDA) + .StartsWith("sycl", Triple::SYCL) .StartsWith("nvcl", Triple::NVCL) .StartsWith("amdhsa", Triple::AMDHSA) .StartsWith("ps4", Triple::PS4)