-
Notifications
You must be signed in to change notification settings - Fork 0
[WIP][SYCL][Driver] Initial support to enable --offload-arch option for SYCL. #3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
ef0ddb1
c96d672
0012d95
ffb828a
9cae196
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -874,4 +874,12 @@ def warn_drv_openacc_without_cir | |
| : Warning<"OpenACC directives will result in no runtime behavior; use " | ||
| "-fclangir to enable runtime effect">, | ||
| InGroup<SourceUsesOpenACC>; | ||
| def err_drv_sycl_offload_arch_missing_value : | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are these warnings SYCL specific? Thanks |
||
| Error<"must pass in a valid cpu or gpu architecture string to '--offload-arch'">; | ||
|
|
||
| def err_drv_invalid_sycl_target : Error<"SYCL target is invalid: '%0'">; | ||
|
|
||
| def warn_drv_sycl_offload_target_duplicate : Warning< | ||
| "SYCL offloading target '%0' is similar to target '%1' already specified; " | ||
| "will be ignored">, InGroup<SyclTarget>; | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -102,13 +102,14 @@ enum class OffloadArch { | |
| Generic, // A processor model named 'generic' if the target backend defines a | ||
| // public one. | ||
| // Intel CPUs | ||
| GRANITERAPIDS, | ||
| GRANITERAPIDS_CPU, | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this the format we will follow going forward? Processor name + "_" + CPU/GPU? I am ok with it. Thanks |
||
| // Intel GPUs | ||
| BMG_G21, | ||
| BMG_G21_GPU, | ||
| LAST, | ||
|
|
||
| CudaDefault = OffloadArch::SM_52, | ||
| HIPDefault = OffloadArch::GFX906, | ||
| SYCLDefault = OffloadArch::BMG_G21_GPU, | ||
| }; | ||
|
|
||
| static inline bool IsNVIDIAOffloadArch(OffloadArch A) { | ||
|
|
@@ -121,11 +122,11 @@ static inline bool IsAMDOffloadArch(OffloadArch A) { | |
| } | ||
|
|
||
| static inline bool IsIntelCPUOffloadArch(OffloadArch Arch) { | ||
| return Arch >= OffloadArch::GRANITERAPIDS && Arch < OffloadArch::BMG_G21; | ||
| return Arch >= OffloadArch::GRANITERAPIDS_CPU && Arch < OffloadArch::BMG_G21_GPU; | ||
| } | ||
|
|
||
| static inline bool IsIntelGPUOffloadArch(OffloadArch Arch) { | ||
| return Arch >= OffloadArch::BMG_G21 && Arch < OffloadArch::LAST; | ||
| return Arch >= OffloadArch::BMG_G21_GPU && Arch < OffloadArch::LAST; | ||
| } | ||
|
|
||
| static inline bool IsIntelOffloadArch(OffloadArch Arch) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -846,6 +846,22 @@ class Driver { | |||||
| /// Compute the default -fmodule-cache-path. | ||||||
| /// \return True if the system provides a default cache directory. | ||||||
| static bool getDefaultModuleCachePath(SmallVectorImpl<char> &Result); | ||||||
| /// Vector of Macros that need to be added to the Host compilation in a | ||||||
| /// SYCL based offloading scenario. These macros are gathered during | ||||||
| /// construction of the device compilations. | ||||||
| mutable std::vector<std::string> SYCLTargetMacro; | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
|
|
||||||
| /// addSYCLTargetMacro - Add the given macro to the vector of args to be | ||||||
| /// added to the host compilation step. | ||||||
| void addSYCLTargetMacro(const llvm::opt::ArgList &Args, | ||||||
| StringRef Macro) const { | ||||||
| SYCLTargetMacro.push_back(Args.MakeArgString(Macro)); | ||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I realize that this patch doesn't currently have the macro addition steps - but this may be a good opportunity to reduce macro duplication that is added to the host compilation by only adding unique macro values to the |
||||||
| } | ||||||
|
|
||||||
| /// getSYCLTargetMacro - return the previously gathered macro target args. | ||||||
| llvm::ArrayRef<std::string> getSYCLTargetMacro() const { | ||||||
| return SYCLTargetMacro; | ||||||
| } | ||||||
| }; | ||||||
|
|
||||||
| /// \return True if the last defined optimization level is -Ofast. | ||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -169,6 +169,17 @@ getHIPOffloadTargetTriple(const Driver &D, const ArgList &Args) { | |
| return std::nullopt; | ||
| } | ||
|
|
||
|
|
||
| static std::optional<llvm::Triple> | ||
| getINTELOffloadTargetTriple(const Driver &D, const ArgList &Args, | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The name doesn't seem to fit the triple being returned. The value here is a |
||
| const llvm::Triple &HostTriple) { | ||
| if (!Args.hasArg(options::OPT_offload_EQ)) { | ||
| return llvm::Triple(HostTriple.isArch64Bit() ? "spirv64-intel-sycl" | ||
| : "spirv32-intel-sycl"); | ||
| } | ||
| return std::nullopt; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we emit something if user specifies -offload= for SYCL offloading? Or atleast add an assert?
Owner
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Currently we emit a diagnostic for empty --offload-arch There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This question was about -offload. What will happen if user says '-fsycl -offload=abc'? |
||
| } | ||
|
|
||
| template <typename F> static bool usesInput(const ArgList &Args, F &&Fn) { | ||
| return llvm::any_of(Args, [&](Arg *A) { | ||
| return (A->getOption().matches(options::OPT_x) && | ||
|
|
@@ -918,30 +929,30 @@ Driver::OpenMPRuntimeKind Driver::getOpenMPRuntime(const ArgList &Args) const { | |
| } | ||
|
|
||
| static llvm::Triple getSYCLDeviceTriple(StringRef TargetArch) { | ||
| SmallVector<StringRef, 5> SYCLAlias = {"spir", "spir64", "spirv", "spirv32", | ||
| "spirv64"}; | ||
| SmallVector<StringRef, 5> SYCLAlias = {"spirv", "spirv32", "spirv64"}; | ||
| if (llvm::is_contained(SYCLAlias, TargetArch)) { | ||
| llvm::Triple TargetTriple; | ||
| TargetTriple.setArchName(TargetArch); | ||
| TargetTriple.setVendor(llvm::Triple::UnknownVendor); | ||
| TargetTriple.setOS(llvm::Triple::UnknownOS); | ||
| TargetTriple.setVendor(llvm::Triple::Intel); | ||
| TargetTriple.setOS(llvm::Triple::SYCL); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm..This is a bit confusing. Is it correct to set OS as SYCL? Thanks
Owner
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't need to set it here as this will be used to set the target triple string for the JIT flow (
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So, it looks like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree. clang-linker-wrapper can determine AOT/JIT based on arch=. Thanks
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right - I must have read the testing wrong. For some reason when I was looking at the testing, I was associating the target and triple backwards. Regardless, it looks like we are in agreement on using a single triple and having the
Owner
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @mdtoguchi We could probably drop I'm wondering why we aren't aligning with OpenMP Intel AOT, CUDA, and HIP. I don't see any issues with using a more descriptive target triple string for SYCL AOT to Intel targets.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There should be no issues with moving to a more descriptive triple, but the usage of the triple during the device compilation isn't Intel specific. It is just generating generic IR. We were already using the
Owner
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. An LLVM target triple is a string that describes the target architecture, operating system, and vendor for which LLVM is compiling code. For SYCL AOT to Intel GPUs or CPUS, a target triple string such as spirv64-intel-sycl/unknown describes that we are compiling code for an Intel target compatible with SPIRV. For JIT, the generated SPIRV is not tied to Intel targets, so it seems reasonable to have 'unknown' for the target vendor and OS. AFAIK, even with SYCL offloading to CUDA targets, the generated LLVM IR is generic and the NVPTX Back End adds additional CUDA specific libraries. We could still generate generic SPIV/LLVM IR and yet have a target triple string that describes for which LLVM is compiling code.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks @srividya-sundaram. I'm OK with using |
||
| return TargetTriple; | ||
| } | ||
| return llvm::Triple(TargetArch); | ||
| } | ||
|
|
||
| static bool addSYCLDefaultTriple(Compilation &C, | ||
| SmallVectorImpl<llvm::Triple> &SYCLTriples) { | ||
| // Check current set of triples to see if the default has already been set. | ||
| for (const auto &SYCLTriple : SYCLTriples) { | ||
| if (SYCLTriple.getSubArch() == llvm::Triple::NoSubArch && | ||
| SYCLTriple.isSPIROrSPIRV()) | ||
| return false; | ||
| } | ||
| // Add the default triple as it was not found. | ||
| // Default triple is spirv32-unknown-unknown or | ||
| // spirv64-unknown-unknown. | ||
| llvm::Triple DefaultTriple = getSYCLDeviceTriple( | ||
| C.getDefaultToolChain().getTriple().isArch32Bit() ? "spirv32" | ||
| : "spirv64"); | ||
|
|
||
| // Check current triple to see if the default has already been set. | ||
| for (const auto &SYCLTriple : SYCLTriples) { | ||
| if (SYCLTriple == DefaultTriple) | ||
| return false; | ||
| } | ||
| SYCLTriples.insert(SYCLTriples.begin(), DefaultTriple); | ||
| return true; | ||
| } | ||
|
|
@@ -1141,19 +1152,89 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, | |
| // -ffreestanding cannot be used with -fsycl | ||
| argSYCLIncompatible(options::OPT_ffreestanding); | ||
|
|
||
| llvm::StringMap<llvm::DenseSet<StringRef>> DerivedArchs; | ||
| llvm::StringMap<StringRef> FoundNormalizedTriples; | ||
| std::multiset<StringRef> SYCLTriples; | ||
| llvm::SmallVector<llvm::Triple, 4> UniqueSYCLTriplesVec; | ||
|
|
||
| if (IsSYCL) { | ||
| addSYCLDefaultTriple(C, UniqueSYCLTriplesVec); | ||
| if (C.getInputArgs().hasArg(options::OPT_offload_arch_EQ) && | ||
| !IsHIP && !IsCuda ) { | ||
| const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>(); | ||
| auto IntelTriple = getINTELOffloadTargetTriple(*this, C.getInputArgs(), | ||
| HostTC->getTriple()); | ||
| // Attempt to deduce the offloading triple from the set of architectures. | ||
| // We need to temporarily create these toolchains so that we can access | ||
| // tools for inferring architectures. | ||
| llvm::DenseSet<StringRef> Archs; | ||
| for (const std::optional<llvm::Triple> &TT : {IntelTriple}) { | ||
| if (!TT) | ||
| continue; | ||
|
|
||
| auto &TC = | ||
| getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, *TT, | ||
| C.getDefaultToolChain().getTriple()); | ||
| for (StringRef Arch : | ||
| getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &TC, true)) | ||
| Archs.insert(Arch); | ||
| } | ||
|
|
||
| for (StringRef Arch : Archs) { | ||
| if (IntelTriple && | ||
| IsIntelOffloadArch(StringToOffloadArch( | ||
| getProcessorFromTargetID(*IntelTriple, Arch)))) { | ||
| DerivedArchs[IntelTriple->getTriple()].insert(Arch); | ||
| } else { | ||
| Diag(clang::diag::err_drv_invalid_sycl_target) << Arch; | ||
| return; | ||
| } | ||
| } | ||
|
|
||
| // If the set is empty then we failed to find a native architecture. | ||
| if (Archs.empty()) { | ||
| Diag(clang::diag::err_drv_sycl_offload_arch_missing_value); | ||
| return; | ||
| } | ||
|
|
||
| for (const auto &TripleAndArchs : DerivedArchs) | ||
| SYCLTriples.insert(TripleAndArchs.first()); //spirv64-intel-sycl | ||
|
|
||
| for (StringRef Val : SYCLTriples) { | ||
| llvm::Triple SYCLTargetTriple(getSYCLDeviceTriple(Val)); | ||
| std::string NormalizedName = SYCLTargetTriple.normalize(); | ||
|
|
||
| // Make sure we don't have a duplicate triple. | ||
| auto [TripleIt, Inserted] = | ||
| FoundNormalizedTriples.try_emplace(NormalizedName, Val); | ||
|
|
||
| if (!Inserted) { | ||
| Diag(clang::diag::warn_drv_sycl_offload_target_duplicate) | ||
| << Val << TripleIt->second; | ||
| continue; | ||
| } | ||
|
|
||
| // If the specified target is invalid, emit a diagnostic. | ||
| if (SYCLTargetTriple.getArch() == llvm::Triple::UnknownArch) { | ||
| Diag(clang::diag::err_drv_invalid_sycl_target) << Val; | ||
| continue; | ||
| } | ||
|
|
||
| UniqueSYCLTriplesVec.push_back(SYCLTargetTriple); | ||
| } | ||
| addSYCLDefaultTriple(C, UniqueSYCLTriplesVec); | ||
| } else | ||
| addSYCLDefaultTriple(C, UniqueSYCLTriplesVec); | ||
|
|
||
| // We'll need to use the SYCL and host triples as the key into | ||
| // getOffloadingDeviceToolChain, because the device toolchains we're | ||
| // getOffloadToolChain, because the device toolchains we're | ||
| // going to create will depend on both. | ||
| const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>(); | ||
| for (const auto &TT : UniqueSYCLTriplesVec) { | ||
| auto SYCLTC = &getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, TT, | ||
| HostTC->getTriple()); | ||
| C.addOffloadDeviceToolChain(SYCLTC, Action::OFK_SYCL); | ||
| if (DerivedArchs.contains(TT.getTriple())) | ||
| KnownArchs[SYCLTC] = DerivedArchs[TT.getTriple()]; | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -4846,7 +4927,7 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, | |
| } else if (Kind == Action::OFK_HIP) { | ||
| Archs.insert(OffloadArchToString(OffloadArch::HIPDefault)); | ||
| } else if (Kind == Action::OFK_SYCL) { | ||
| Archs.insert(StringRef()); | ||
| Archs.insert(OffloadArchToString(OffloadArch::SYCLDefault)); | ||
| } else if (Kind == Action::OFK_OpenMP) { | ||
| // Accept legacy `-march` device arguments for OpenMP. | ||
| if (auto *Arg = C.getArgsForToolChain(TC, /*BoundArch=*/"", Kind) | ||
|
|
@@ -6740,7 +6821,7 @@ const ToolChain &Driver::getOffloadToolChain( | |
| if (Kind == Action::OFK_HIP) | ||
| TC = std::make_unique<toolchains::HIPAMDToolChain>(*this, Target, | ||
| *HostTC, Args); | ||
| else if (Kind == Action::OFK_OpenMP) | ||
| else if ((Kind == Action::OFK_OpenMP) || (Kind == Action::OFK_SYCL)) | ||
| TC = std::make_unique<toolchains::AMDGPUOpenMPToolChain>(*this, Target, | ||
| *HostTC, Args); | ||
| break; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,29 @@ | ||
| /// Tests the behaviors of using -fsycl --offload-arch=<intel-cpu-values>. | ||
|
|
||
| // SYCL AOT compilation to Intel CPUs using --offload-arch | ||
|
|
||
| // RUN: %clangxx -### -fsycl --offload-arch=graniterapids_cpu %s 2>&1 | \ | ||
| // RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-CPU,CLANG-OFFLOAD-PACKAGER-CPU -DDEV_STR=graniterapids_cpu | ||
|
|
||
| // TARGET-TRIPLE-CPU: clang{{.*}} "-triple" "spirv64-intel-sycl" | ||
| // CLANG-OFFLOAD-PACKAGER-CPU: clang-offload-packager{{.*}} "--image={{.*}}triple=spirv64-intel-sycl,arch=[[DEV_STR]],kind=sycl" | ||
|
|
||
| // Tests for handling a missing architecture. | ||
| // | ||
| // RUN: not %clangxx -fsycl --offload-arch= %s -### 2>&1 \ | ||
| // RUN: | FileCheck -check-prefix=MISSING-OFFLOAD-ARCH-VALUE %s | ||
| // RUN: not %clang_cl -fsycl --offload-arch= %s -### 2>&1 \ | ||
| // RUN: | FileCheck -check-prefix=MISSING-OFFLOAD-ARCH-VALUE %s | ||
|
|
||
| // MISSING-OFFLOAD-ARCH-VALUE: error: must pass in a valid cpu or gpu architecture string to '--offload-arch' | ||
|
|
||
| // Tests for handling a incorrect --offload-arch architecture vlue. | ||
| // | ||
| // RUN: not %clangxx -fsycl --offload-arch=badArch %s -### 2>&1 \ | ||
| // RUN: | FileCheck -check-prefix=BAD-ARCH %s | ||
| // RUN: not %clang_cl -fsycl --offload-arch=badArch %s -### 2>&1 \ | ||
| // RUN: | FileCheck -check-prefix=BAD-ARCH %s | ||
|
|
||
| // BAD-ARCH: error: SYCL target is invalid: 'badArch' | ||
|
|
||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| /// Tests the behaviors of using -fsycl --offload-arch=<intel-gpu-values>. | ||
|
|
||
| // SYCL AOT compilation to Intel GPUs using --offload-arch | ||
|
|
||
| // RUN: %clangxx -### -fsycl --offload-arch=bmg_g21_gpu %s 2>&1 | \ | ||
| // RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=bmg_g21_gpu -DMAC_STR=BMG_G21_GPU | ||
|
|
||
|
|
||
| // TARGET-TRIPLE-GPU: clang{{.*}} "-triple" "spirv64-intel-sycl" | ||
| // CLANG-OFFLOAD-PACKAGER-GPU: clang-offload-packager{{.*}} "--image={{.*}}triple=spirv64-intel-sycl,arch=[[DEV_STR]],kind=sycl" | ||
| // CLANG-OFFLOAD-PACKAGER-GPU-OPTS: clang-offload-packager{{.*}} "--image={{.*}}triple=spirv64-intel-sycl,arch=[[DEV_STR]],kind=sycl{{.*}}" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hi @srividya-sundaram
I will take a look in a bit.
Thanks