-
Notifications
You must be signed in to change notification settings - Fork 0
[WIP][SYCL][Driver] Initial support to enable --offload-arch option for SYCL. #3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 3 commits
ef0ddb1
c96d672
0012d95
ffb828a
9cae196
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -874,4 +874,12 @@ def warn_drv_openacc_without_cir | |
| : Warning<"OpenACC directives will result in no runtime behavior; use " | ||
| "-fclangir to enable runtime effect">, | ||
| InGroup<SourceUsesOpenACC>; | ||
| def err_drv_sycl_offload_arch_missing_value : | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are these warnings SYCL specific? Thanks |
||
| Error<"must pass in a valid cpu or gpu architecture string to '--offload-arch'">; | ||
|
|
||
| def err_drv_invalid_sycl_target : Error<"SYCL target is invalid: '%0'">; | ||
|
|
||
| def warn_drv_sycl_offload_target_duplicate : Warning< | ||
| "SYCL offloading target '%0' is similar to target '%1' already specified; " | ||
| "will be ignored">, InGroup<SyclTarget>; | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -102,13 +102,14 @@ enum class OffloadArch { | |
| Generic, // A processor model named 'generic' if the target backend defines a | ||
| // public one. | ||
| // Intel CPUs | ||
| GRANITERAPIDS, | ||
| GRANITERAPIDS_CPU, | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this the format we will follow going forward? Processor name + "_" + CPU/GPU? I am ok with it. Thanks |
||
| // Intel GPUs | ||
| BMG_G21, | ||
| BMG_G21_GPU, | ||
| LAST, | ||
|
|
||
| CudaDefault = OffloadArch::SM_52, | ||
| HIPDefault = OffloadArch::GFX906, | ||
| SYCLDefault = OffloadArch::BMG_G21_GPU, | ||
| }; | ||
|
|
||
| static inline bool IsNVIDIAOffloadArch(OffloadArch A) { | ||
|
|
@@ -121,11 +122,11 @@ static inline bool IsAMDOffloadArch(OffloadArch A) { | |
| } | ||
|
|
||
| static inline bool IsIntelCPUOffloadArch(OffloadArch Arch) { | ||
| return Arch >= OffloadArch::GRANITERAPIDS && Arch < OffloadArch::BMG_G21; | ||
| return Arch >= OffloadArch::GRANITERAPIDS_CPU && Arch < OffloadArch::BMG_G21_GPU; | ||
| } | ||
|
|
||
| static inline bool IsIntelGPUOffloadArch(OffloadArch Arch) { | ||
| return Arch >= OffloadArch::BMG_G21 && Arch < OffloadArch::LAST; | ||
| return Arch >= OffloadArch::BMG_G21_GPU && Arch < OffloadArch::LAST; | ||
| } | ||
|
|
||
| static inline bool IsIntelOffloadArch(OffloadArch Arch) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -846,6 +846,22 @@ class Driver { | |||||
| /// Compute the default -fmodule-cache-path. | ||||||
| /// \return True if the system provides a default cache directory. | ||||||
| static bool getDefaultModuleCachePath(SmallVectorImpl<char> &Result); | ||||||
| /// Vector of Macros that need to be added to the Host compilation in a | ||||||
| /// SYCL based offloading scenario. These macros are gathered during | ||||||
| /// construction of the device compilations. | ||||||
| mutable std::vector<std::string> SYCLTargetMacro; | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
|
|
||||||
| /// addSYCLTargetMacro - Add the given macro to the vector of args to be | ||||||
| /// added to the host compilation step. | ||||||
| void addSYCLTargetMacro(const llvm::opt::ArgList &Args, | ||||||
| StringRef Macro) const { | ||||||
| SYCLTargetMacro.push_back(Args.MakeArgString(Macro)); | ||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I realize that this patch doesn't currently have the macro addition steps - but this may be a good opportunity to reduce macro duplication that is added to the host compilation by only adding unique macro values to the |
||||||
| } | ||||||
|
|
||||||
| /// getSYCLTargetMacro - return the previously gathered macro target args. | ||||||
| llvm::ArrayRef<std::string> getSYCLTargetMacro() const { | ||||||
| return SYCLTargetMacro; | ||||||
| } | ||||||
| }; | ||||||
|
|
||||||
| /// \return True if the last defined optimization level is -Ofast. | ||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -169,6 +169,17 @@ getHIPOffloadTargetTriple(const Driver &D, const ArgList &Args) { | |
| return std::nullopt; | ||
| } | ||
|
|
||
|
|
||
| static std::optional<llvm::Triple> | ||
| getINTELOffloadTargetTriple(const Driver &D, const ArgList &Args, | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The name doesn't seem to fit the triple being returned. The value here is a |
||
| const llvm::Triple &HostTriple) { | ||
| if (!Args.hasArg(options::OPT_offload_EQ)) { | ||
| return llvm::Triple(HostTriple.isArch64Bit() ? "spirv64" | ||
| : "spirv32"); | ||
| } | ||
| return std::nullopt; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we emit something if user specifies -offload= for SYCL offloading? Or atleast add an assert?
Owner
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Currently we emit a diagnostic for empty --offload-arch There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This question was about -offload. What will happen if user says '-fsycl -offload=abc'? |
||
| } | ||
|
|
||
| template <typename F> static bool usesInput(const ArgList &Args, F &&Fn) { | ||
| return llvm::any_of(Args, [&](Arg *A) { | ||
| return (A->getOption().matches(options::OPT_x) && | ||
|
|
@@ -918,8 +929,7 @@ Driver::OpenMPRuntimeKind Driver::getOpenMPRuntime(const ArgList &Args) const { | |
| } | ||
|
|
||
| static llvm::Triple getSYCLDeviceTriple(StringRef TargetArch) { | ||
| SmallVector<StringRef, 5> SYCLAlias = {"spir", "spir64", "spirv", "spirv32", | ||
| "spirv64"}; | ||
| SmallVector<StringRef, 5> SYCLAlias = {"spirv", "spirv32", "spirv64"}; | ||
| if (llvm::is_contained(SYCLAlias, TargetArch)) { | ||
| llvm::Triple TargetTriple; | ||
| TargetTriple.setArchName(TargetArch); | ||
|
|
@@ -932,16 +942,23 @@ static llvm::Triple getSYCLDeviceTriple(StringRef TargetArch) { | |
|
|
||
| static bool addSYCLDefaultTriple(Compilation &C, | ||
| SmallVectorImpl<llvm::Triple> &SYCLTriples) { | ||
|
|
||
| llvm::Triple DefaultTriple = getSYCLDeviceTriple( | ||
| C.getDefaultToolChain().getTriple().isArch32Bit() ? "spirv32" | ||
| : "spirv64"); | ||
| for (const auto &SYCLTriple : SYCLTriples) { | ||
| if (SYCLTriple == DefaultTriple) | ||
| return false; | ||
| if(SYCLTriple.isSPIRV()) | ||
| return false; | ||
| } | ||
| // Check current set of triples to see if the default has already been set. | ||
| for (const auto &SYCLTriple : SYCLTriples) { | ||
| if (SYCLTriple.getSubArch() == llvm::Triple::NoSubArch && | ||
| SYCLTriple.isSPIROrSPIRV()) | ||
| return false; | ||
| } | ||
| // Add the default triple as it was not found. | ||
| llvm::Triple DefaultTriple = getSYCLDeviceTriple( | ||
| C.getDefaultToolChain().getTriple().isArch32Bit() ? "spirv32" | ||
| : "spirv64"); | ||
|
|
||
| SYCLTriples.insert(SYCLTriples.begin(), DefaultTriple); | ||
| return true; | ||
| } | ||
|
|
@@ -1141,19 +1158,84 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, | |
| // -ffreestanding cannot be used with -fsycl | ||
| argSYCLIncompatible(options::OPT_ffreestanding); | ||
|
|
||
| llvm::StringMap<llvm::DenseSet<StringRef>> DerivedArchs; | ||
| llvm::StringMap<StringRef> FoundNormalizedTriples; | ||
| std::multiset<StringRef> SYCLTriples; | ||
| llvm::SmallVector<llvm::Triple, 4> UniqueSYCLTriplesVec; | ||
|
|
||
| if (IsSYCL) { | ||
| addSYCLDefaultTriple(C, UniqueSYCLTriplesVec); | ||
| if (C.getInputArgs().hasArg(options::OPT_offload_arch_EQ) && | ||
| !IsHIP && !IsCuda ) { | ||
| const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>(); | ||
| auto IntelTriple = getINTELOffloadTargetTriple(*this, C.getInputArgs(), | ||
| HostTC->getTriple()); | ||
| // Attempt to deduce the offloading triple from the set of architectures. | ||
| // We need to temporarily create these toolchains so that we can access | ||
| // tools for inferring architectures. | ||
| llvm::DenseSet<StringRef> Archs; | ||
| for (const std::optional<llvm::Triple> &TT : {IntelTriple}) { | ||
| if (!TT) | ||
| continue; | ||
|
|
||
| auto &TC = | ||
| getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, *TT, | ||
| C.getDefaultToolChain().getTriple()); | ||
| for (StringRef Arch : | ||
| getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &TC, true)) | ||
| Archs.insert(Arch); | ||
| } | ||
|
|
||
| for (StringRef Arch : Archs) { | ||
| if (IntelTriple && IsIntelOffloadArch(StringToOffloadArch( | ||
| getProcessorFromTargetID(*IntelTriple, Arch))) ) { | ||
| DerivedArchs[IntelTriple->getTriple()].insert(Arch); | ||
| } | ||
| else { | ||
| Diag(clang::diag::err_drv_invalid_sycl_target) << Arch; | ||
| return; | ||
| } | ||
| } | ||
|
|
||
| // If the set is empty then we failed to find a native architecture. | ||
| if (Archs.empty()) { | ||
| Diag(clang::diag::err_drv_sycl_offload_arch_missing_value); | ||
| return; | ||
| } | ||
|
|
||
| for (const auto &TripleAndArchs : DerivedArchs) | ||
| SYCLTriples.insert(TripleAndArchs.first()); | ||
|
|
||
| for (StringRef Val : SYCLTriples) { | ||
| llvm::Triple SYCLTargetTriple(getSYCLDeviceTriple(Val)); | ||
| std::string NormalizedName = SYCLTargetTriple.normalize(); | ||
|
|
||
| // Make sure we don't have a duplicate triple. | ||
| auto [TripleIt, Inserted] = | ||
| FoundNormalizedTriples.try_emplace(NormalizedName, Val); | ||
|
|
||
| if (!Inserted) { | ||
| Diag(clang::diag::warn_drv_sycl_offload_target_duplicate) | ||
| << Val << TripleIt->second; | ||
| continue; | ||
| } | ||
| UniqueSYCLTriplesVec.push_back(SYCLTargetTriple); | ||
| } | ||
|
|
||
| addSYCLDefaultTriple(C, UniqueSYCLTriplesVec); | ||
|
|
||
| } else | ||
| addSYCLDefaultTriple(C, UniqueSYCLTriplesVec); | ||
|
|
||
| // We'll need to use the SYCL and host triples as the key into | ||
| // getOffloadingDeviceToolChain, because the device toolchains we're | ||
| // getOffloadToolChain, because the device toolchains we're | ||
| // going to create will depend on both. | ||
| const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>(); | ||
| for (const auto &TT : UniqueSYCLTriplesVec) { | ||
| auto SYCLTC = &getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, TT, | ||
| HostTC->getTriple()); | ||
| C.addOffloadDeviceToolChain(SYCLTC, Action::OFK_SYCL); | ||
| if (DerivedArchs.contains(TT.getTriple())) | ||
| KnownArchs[SYCLTC] = DerivedArchs[TT.getTriple()]; | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -4846,7 +4928,7 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, | |
| } else if (Kind == Action::OFK_HIP) { | ||
| Archs.insert(OffloadArchToString(OffloadArch::HIPDefault)); | ||
| } else if (Kind == Action::OFK_SYCL) { | ||
| Archs.insert(StringRef()); | ||
| Archs.insert(OffloadArchToString(OffloadArch::SYCLDefault)); | ||
| } else if (Kind == Action::OFK_OpenMP) { | ||
| // Accept legacy `-march` device arguments for OpenMP. | ||
| if (auto *Arg = C.getArgsForToolChain(TC, /*BoundArch=*/"", Kind) | ||
|
|
@@ -6740,7 +6822,7 @@ const ToolChain &Driver::getOffloadToolChain( | |
| if (Kind == Action::OFK_HIP) | ||
| TC = std::make_unique<toolchains::HIPAMDToolChain>(*this, Target, | ||
| *HostTC, Args); | ||
| else if (Kind == Action::OFK_OpenMP) | ||
| else if ((Kind == Action::OFK_OpenMP) || (Kind == Action::OFK_SYCL)) | ||
| TC = std::make_unique<toolchains::AMDGPUOpenMPToolChain>(*this, Target, | ||
| *HostTC, Args); | ||
| break; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hi @srividya-sundaram
I will take a look in a bit.
Thanks