diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index f630735c4875f..91be277d301b2 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -130,6 +130,8 @@ def warn_flag_no_sycl_libspirv InGroup; def err_drv_mix_cuda_hip : Error< "mixed CUDA and HIP compilation is not supported">; +def err_drv_mix_offload : Error< + "mixed %0 and %1 offloading compilation is not supported">; def err_drv_bad_target_id : Error< "invalid target ID '%0'; format is a processor name followed by an optional " "colon-delimited list of features followed by an enable/disable sign (e.g., " diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index dc07d7c352ec1..7546c7838c4a4 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -361,6 +361,9 @@ class Driver { phases::ID getFinalPhase(const llvm::opt::DerivedArgList &DAL, llvm::opt::Arg **FinalPhaseArg = nullptr) const; + llvm::Expected> + executeProgram(llvm::ArrayRef Args) const; + private: /// Certain options suppress the 'no input files' warning. LLVM_PREFERRED_TYPE(bool) @@ -373,10 +376,6 @@ class Driver { /// stored in it, and will clean them up when torn down. mutable llvm::StringMap> ToolChains; - /// The associated offloading architectures with each toolchain. - llvm::DenseMap> - OffloadArchs; - private: /// TranslateInputArgs - Create a new derived argument list from the input /// arguments, after applying the standard argument translations. @@ -549,8 +548,7 @@ class Driver { /// empty string. llvm::SmallVector getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, - Action::OffloadKind Kind, const ToolChain *TC, - bool SpecificToolchain = true) const; + Action::OffloadKind Kind, const ToolChain &TC) const; /// Check that the file referenced by Value exists. If it doesn't, /// issue a diagnostic and return false. diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 78e4fe84fc60c..57f5bb5f6203e 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1236,7 +1236,7 @@ def offload_arch_EQ : CommaJoined<["--"], "offload-arch=">, "If 'native' is used the compiler will detect locally installed architectures. " "For HIP offloading, the device architecture can be followed by target ID features " "delimited by a colon (e.g. gfx908:xnack+:sramecc-). May be specified more than once.">; -def no_offload_arch_EQ : Joined<["--"], "no-offload-arch=">, +def no_offload_arch_EQ : CommaJoined<["--"], "no-offload-arch=">, Visibility<[ClangOption, FlangOption]>, HelpText<"Remove CUDA/HIP offloading device architecture (e.g. sm_35, gfx906) from the list of devices to compile for. " "'all' resets the list to its default value.">; @@ -1732,7 +1732,7 @@ defm auto_import : BoolFOption<"auto-import", // In the future this option will be supported by other offloading // languages and accept other values such as CPU/GPU architectures, // offload kinds and target aliases. -def offload_EQ : CommaJoined<["--"], "offload=">, Flags<[NoXarchOption]>, +def offload_EQ : CommaJoined<["--"], "offload=">, Flags<[NoXarchOption]>, Alias, HelpText<"Specify comma-separated list of offloading target triples (CUDA and HIP only)">; // C++ Coroutines @@ -7268,7 +7268,8 @@ def fno_sycl_esimd_build_host_code : Flag<["-"], "fno-sycl-esimd-build-host-cod Visibility<[ClangOption, CLOption, CC1Option]>, Flags<[HelpHidden]>, HelpText<"Do not build the host implementation of ESIMD functions.">; def fsycl_targets_EQ : CommaJoined<["-"], "fsycl-targets=">, - Flags<[NoXarchOption]>, Visibility<[ClangOption, CLOption, CC1Option]>, + Flags<[NoXarchOption]>, Alias, + Visibility<[ClangOption, CLOption, CC1Option]>, HelpText<"Specify comma-separated list of triples SYCL offloading targets " "to be supported">; def fsycl_force_target_EQ : Joined<["-"], "fsycl-force-target=">, diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index a004f057846fa..58552a629f250 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -218,10 +218,6 @@ class ToolChain { ToolChain(const Driver &D, const llvm::Triple &T, const llvm::opt::ArgList &Args); - /// Executes the given \p Executable and returns the stdout. - llvm::Expected> - executeToolChainProgram(StringRef Executable) const; - void setTripleEnvironment(llvm::Triple::EnvironmentType Env); virtual Tool *buildAssembler() const; diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 92a0857505916..6a284417e079e 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -113,65 +113,6 @@ using namespace clang::driver; using namespace clang; using namespace llvm::opt; -static std::optional getOffloadTargetTriple(const Driver &D, - const ArgList &Args) { - auto OffloadTargets = Args.getAllArgValues(options::OPT_offload_EQ); - // Offload compilation flow does not support multiple targets for now. We - // need the HIPActionBuilder (and possibly the CudaActionBuilder{,Base}too) - // to support multiple tool chains first. - switch (OffloadTargets.size()) { - default: - D.Diag(diag::err_drv_only_one_offload_target_supported); - return std::nullopt; - case 0: - D.Diag(diag::err_drv_invalid_or_unsupported_offload_target) << ""; - return std::nullopt; - case 1: - break; - } - return llvm::Triple(OffloadTargets[0]); -} - -static std::optional -getNVIDIAOffloadTargetTriple(const Driver &D, const ArgList &Args, - const llvm::Triple &HostTriple) { - if (!Args.hasArg(options::OPT_offload_EQ)) { - return llvm::Triple(HostTriple.isArch64Bit() ? "nvptx64-nvidia-cuda" - : "nvptx-nvidia-cuda"); - } - auto TT = getOffloadTargetTriple(D, Args); - if (TT && (TT->getArch() == llvm::Triple::spirv32 || - TT->getArch() == llvm::Triple::spirv64)) { - if (Args.hasArg(options::OPT_emit_llvm)) - return TT; - D.Diag(diag::err_drv_cuda_offload_only_emit_bc); - return std::nullopt; - } - D.Diag(diag::err_drv_invalid_or_unsupported_offload_target) << TT->str(); - return std::nullopt; -} - -static std::optional -getHIPOffloadTargetTriple(const Driver &D, const ArgList &Args) { - if (!Args.hasArg(options::OPT_offload_EQ)) { - auto OffloadArchs = Args.getAllArgValues(options::OPT_offload_arch_EQ); - if (llvm::is_contained(OffloadArchs, "amdgcnspirv") && - OffloadArchs.size() == 1) - return llvm::Triple("spirv64-amd-amdhsa"); - return llvm::Triple("amdgcn-amd-amdhsa"); // Default HIP triple. - } - auto TT = getOffloadTargetTriple(D, Args); - if (!TT) - return std::nullopt; - if (TT->isAMDGCN() && TT->getVendor() == llvm::Triple::AMD && - TT->getOS() == llvm::Triple::AMDHSA) - return TT; - if (TT->getArch() == llvm::Triple::spirv64) - return TT; - D.Diag(diag::err_drv_invalid_or_unsupported_offload_target) << TT->str(); - return std::nullopt; -} - template static bool usesInput(const ArgList &Args, F &&Fn) { return llvm::any_of(Args, [&](Arg *A) { return (A->getOption().matches(options::OPT_x) && @@ -496,6 +437,44 @@ phases::ID Driver::getFinalPhase(const DerivedArgList &DAL, return FinalPhase; } +llvm::Expected> +Driver::executeProgram(llvm::ArrayRef Args) const { + llvm::SmallString<64> OutputFile; + llvm::sys::fs::createTemporaryFile("driver-program", "txt", OutputFile, + llvm::sys::fs::OF_Text); + llvm::FileRemover OutputRemover(OutputFile.c_str()); + std::optional Redirects[] = { + {""}, + OutputFile.str(), + {""}, + }; + + std::string ErrorMessage; + int SecondsToWait = 60; + if (std::optional Str = + llvm::sys::Process::GetEnv("CLANG_TOOLCHAIN_PROGRAM_TIMEOUT")) { + if (!llvm::to_integer(*Str, SecondsToWait)) + return llvm::createStringError(std::error_code(), + "CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected " + "an integer, got '" + + *Str + "'"); + SecondsToWait = std::max(SecondsToWait, 0); // infinite + } + StringRef Executable = Args[0]; + if (llvm::sys::ExecuteAndWait(Executable, Args, {}, Redirects, SecondsToWait, + /*MemoryLimit=*/0, &ErrorMessage)) + return llvm::createStringError(std::error_code(), + Executable + ": " + ErrorMessage); + + llvm::ErrorOr> OutputBuf = + llvm::MemoryBuffer::getFile(OutputFile.c_str()); + if (!OutputBuf) + return llvm::createStringError(OutputBuf.getError(), + "Failed to read stdout of " + Executable + + ": " + OutputBuf.getError().message()); + return std::move(*OutputBuf); +} + static Arg *MakeInputArg(DerivedArgList &Args, const OptTable &Opts, StringRef Value, bool Claim = true) { Arg *A = new Arg(Opts.getOption(options::OPT_INPUT), Value, @@ -1017,8 +996,11 @@ llvm::Triple Driver::getSYCLDeviceTriple(StringRef TargetArch, llvm::Triple TargetTriple(TargetArch); if (Arg && !Arg->isClaimed() && TargetTriple.isSPIR() && TargetTriple.getSubArch() == llvm::Triple::SPIRSubArch_fpga) { - Diag(diag::err_drv_unsupported_opt_removed) - << Arg->getSpelling().str() + TargetArch.str(); + SmallString<128> OptStr(Arg->getSpelling()); + if (Arg->getOption().matches(options::OPT_offload_targets_EQ)) + OptStr = "-fsycl-targets="; + OptStr += TargetArch.str(); + Diag(diag::err_drv_unsupported_opt_removed) << OptStr; Arg->claim(); } if (llvm::is_contained(SYCLAlias, TargetArch)) { @@ -1060,217 +1042,153 @@ static bool addSYCLDefaultTriple(Compilation &C, return true; } -void Driver::CreateOffloadingDeviceToolChains(Compilation &C, - InputList &Inputs) { - - // - // CUDA/HIP - // - // We need to generate a CUDA/HIP toolchain if any of the inputs has a CUDA - // or HIP type. However, mixed CUDA/HIP compilation is not supported. - using namespace tools::SYCL; - bool IsCuda = - llvm::any_of(Inputs, [](std::pair &I) { - return types::isCuda(I.first); - }); - bool IsHIP = - llvm::any_of(Inputs, - [](std::pair &I) { - return types::isHIP(I.first); - }) || - C.getInputArgs().hasArg(options::OPT_hip_link) || - C.getInputArgs().hasArg(options::OPT_hipstdpar); - bool UseLLVMOffload = C.getInputArgs().hasArg( - options::OPT_foffload_via_llvm, options::OPT_fno_offload_via_llvm, false); - if (IsCuda && IsHIP) { - Diag(clang::diag::err_drv_mix_cuda_hip); - return; - } - if (IsCuda && !UseLLVMOffload) { - auto CudaTriple = getNVIDIAOffloadTargetTriple( - *this, C.getInputArgs(), C.getDefaultToolChain().getTriple()); - if (!CudaTriple) - return; - - auto &TC = - getOffloadToolChain(C.getInputArgs(), Action::OFK_Cuda, *CudaTriple, - C.getDefaultToolChain().getTriple()); - - // Emit a warning if the detected CUDA version is too new. - const CudaInstallationDetector &CudaInstallation = - static_cast(TC).CudaInstallation; - if (CudaInstallation.isValid()) - CudaInstallation.WarnIfUnsupportedVersion(); - C.addOffloadDeviceToolChain(&TC, Action::OFK_Cuda); - OffloadArchs[&TC] = getOffloadArchs(C, C.getArgs(), Action::OFK_Cuda, &TC, - /*SpecificToolchain=*/true); - } else if (IsHIP && !UseLLVMOffload) { - if (auto *OMPTargetArg = - C.getInputArgs().getLastArg(options::OPT_offload_targets_EQ)) { - Diag(clang::diag::err_drv_unsupported_opt_for_language_mode) - << OMPTargetArg->getSpelling() << "HIP"; - return; - } - - auto HIPTriple = getHIPOffloadTargetTriple(*this, C.getInputArgs()); - if (!HIPTriple) - return; - - auto &TC = - getOffloadToolChain(C.getInputArgs(), Action::OFK_HIP, *HIPTriple, - C.getDefaultToolChain().getTriple()); - C.addOffloadDeviceToolChain(&TC, Action::OFK_HIP); - - // TODO: Fix 'amdgcnspirv' handling with the new driver. - if (C.getInputArgs().hasFlag(options::OPT_offload_new_driver, - options::OPT_no_offload_new_driver, false)) - OffloadArchs[&TC] = getOffloadArchs(C, C.getArgs(), Action::OFK_HIP, &TC, - /*SpecificToolchain=*/true); +// Handles `native` offload architectures by using the 'offload-arch' utility. +static llvm::SmallVector +getSystemOffloadArchs(Compilation &C, Action::OffloadKind Kind) { + StringRef Program = C.getArgs().getLastArgValue( + options::OPT_offload_arch_tool_EQ, "offload-arch"); + + SmallVector GPUArchs; + if (llvm::ErrorOr Executable = + llvm::sys::findProgramByName(Program)) { + llvm::SmallVector Args{*Executable}; + if (Kind == Action::OFK_HIP) + Args.push_back("--only=amdgpu"); + else if (Kind == Action::OFK_Cuda) + Args.push_back("--only=nvptx"); + auto StdoutOrErr = C.getDriver().executeProgram(Args); + + if (!StdoutOrErr) { + C.getDriver().Diag(diag::err_drv_undetermined_gpu_arch) + << Action::GetOffloadKindName(Kind) << StdoutOrErr.takeError() + << "--offload-arch"; + return GPUArchs; + } else if ((*StdoutOrErr)->getBuffer().empty()) { + C.getDriver().Diag(diag::err_drv_undetermined_gpu_arch) + << Action::GetOffloadKindName(Kind) << "No GPU detected in the system" + << "--offload-arch"; + return GPUArchs; + } + + for (StringRef Arch : llvm::split((*StdoutOrErr)->getBuffer(), "\n")) + if (!Arch.empty()) + GPUArchs.push_back(Arch.str()); + } else { + C.getDriver().Diag(diag::err_drv_command_failure) << "offload-arch"; } + return GPUArchs; +} - if (IsCuda || IsHIP) - CUIDOpts = CUIDOptions(C.getArgs(), *this); - - // - // OpenMP - // - // We need to generate an OpenMP toolchain if the user specified targets with - // the -fopenmp-targets option or used --offload-arch with OpenMP enabled. - bool IsOpenMPOffloading = - ((IsCuda || IsHIP) && UseLLVMOffload) || - (C.getInputArgs().hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ, - options::OPT_fno_openmp, false) && - (C.getInputArgs().hasArg(options::OPT_offload_targets_EQ) || - C.getInputArgs().hasArg(options::OPT_offload_arch_EQ))); - if (IsOpenMPOffloading) { - // We expect that -fopenmp-targets is always used in conjunction with the - // option -fopenmp specifying a valid runtime with offloading support, i.e. - // libomp or libiomp. - OpenMPRuntimeKind RuntimeKind = getOpenMPRuntime(C.getInputArgs()); - if (RuntimeKind != OMPRT_OMP && RuntimeKind != OMPRT_IOMP5) { - Diag(clang::diag::err_drv_expecting_fopenmp_with_fopenmp_targets); - return; - } - - // If the user specified -fopenmp-targets= we create a toolchain for each - // valid triple. Otherwise, if only --offload-arch= was specified we instead - // attempt to derive the appropriate toolchains from the arguments. - if (Arg *OpenMPTargets = - C.getInputArgs().getLastArg(options::OPT_offload_targets_EQ)) { - if (OpenMPTargets && !OpenMPTargets->getNumValues()) { - Diag(clang::diag::warn_drv_empty_joined_argument) - << OpenMPTargets->getAsString(C.getInputArgs()); - return; - } - - // Make sure these show up in a deterministic order. - std::multiset OpenMPTriples; - for (StringRef T : OpenMPTargets->getValues()) - OpenMPTriples.insert(T); - - llvm::StringMap FoundNormalizedTriples; - for (StringRef T : OpenMPTriples) { - llvm::Triple TT(ToolChain::getOpenMPTriple(T)); - std::string NormalizedName = TT.normalize(); - - // Make sure we don't have a duplicate triple. - auto [TripleIt, Inserted] = - FoundNormalizedTriples.try_emplace(NormalizedName, T); - if (!Inserted) { - Diag(clang::diag::warn_drv_omp_offload_target_duplicate) - << T << TripleIt->second; - continue; - } - - // If the specified target is invalid, emit a diagnostic. - if (TT.getArch() == llvm::Triple::UnknownArch) { - Diag(clang::diag::err_drv_invalid_omp_target) << T; - continue; - } - - auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_OpenMP, TT, - C.getDefaultToolChain().getTriple()); - C.addOffloadDeviceToolChain(&TC, Action::OFK_OpenMP); - OffloadArchs[&TC] = - getOffloadArchs(C, C.getArgs(), Action::OFK_OpenMP, &TC, - /*SpecificToolchain=*/true); - } - } else if (C.getInputArgs().hasArg(options::OPT_offload_arch_EQ) && - ((!IsHIP && !IsCuda) || UseLLVMOffload)) { - llvm::Triple AMDTriple("amdgcn-amd-amdhsa"); - llvm::Triple NVPTXTriple("nvptx64-nvidia-cuda"); - - for (StringRef Arch : - C.getInputArgs().getAllArgValues(options::OPT_offload_arch_EQ)) { - bool IsNVPTX = IsNVIDIAOffloadArch( - StringToOffloadArch(getProcessorFromTargetID(NVPTXTriple, Arch))); - bool IsAMDGPU = IsAMDOffloadArch( - StringToOffloadArch(getProcessorFromTargetID(AMDTriple, Arch))); - if (!IsNVPTX && !IsAMDGPU && !Arch.empty() && - !Arch.equals_insensitive("native")) { - Diag(clang::diag::err_drv_failed_to_deduce_target_from_arch) << Arch; - return; - } - } - - // Attempt to deduce the offloading triple from the set of architectures. - // We can only correctly deduce NVPTX / AMDGPU triples currently. - for (const llvm::Triple &TT : {AMDTriple, NVPTXTriple}) { - auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_OpenMP, TT, - C.getDefaultToolChain().getTriple()); - - llvm::SmallVector Archs = - getOffloadArchs(C, C.getArgs(), Action::OFK_OpenMP, &TC, - /*SpecificToolchain=*/false); - if (!Archs.empty()) { - C.addOffloadDeviceToolChain(&TC, Action::OFK_OpenMP); - OffloadArchs[&TC] = Archs; +// Attempts to infer the correct offloading toolchain triple by looking at the +// requested offloading kind and architectures. +static llvm::DenseSet +inferOffloadToolchains(Compilation &C, Action::OffloadKind Kind) { + std::set Archs; + for (Arg *A : C.getInputArgs()) { + for (StringRef Arch : A->getValues()) { + if (A->getOption().matches(options::OPT_offload_arch_EQ)) { + if (Arch == "native") { + for (StringRef Str : getSystemOffloadArchs(C, Kind)) + Archs.insert(Str.str()); + } else { + Archs.insert(Arch.str()); } + } else if (A->getOption().matches(options::OPT_no_offload_arch_EQ)) { + if (Arch == "all") + Archs.clear(); + else + Archs.erase(Arch.str()); } - - // If the set is empty then we failed to find a native architecture. - auto TCRange = C.getOffloadToolChains(Action::OFK_OpenMP); - if (TCRange.first == TCRange.second) - Diag(clang::diag::err_drv_failed_to_deduce_target_from_arch) - << "native"; } - } else if (C.getInputArgs().hasArg(options::OPT_offload_targets_EQ)) { - Diag(clang::diag::err_drv_expecting_fopenmp_with_fopenmp_targets); - return; } - // - // SYCL - // - // We need to generate a SYCL toolchain if the user specified -fsycl. - // If -fsycl is supplied without any of these we will assume SPIR-V. - // Use of -fsycl-device-only overrides -fsycl. - // Use of -fsyclbin enables SYCL device compilation. - bool IsSYCL = C.getInputArgs().hasFlag(options::OPT_fsycl, - options::OPT_fno_sycl, false) || - C.getInputArgs().hasArgNoClaim(options::OPT_fsycl_device_only, - options::OPT_fsyclbin_EQ); + llvm::DenseSet Triples; + for (llvm::StringRef Arch : Archs) { + OffloadArch ID = StringToOffloadArch(Arch); + if (ID == OffloadArch::UNKNOWN) + ID = StringToOffloadArch( + getProcessorFromTargetID(llvm::Triple("amdgcn-amd-amdhsa"), Arch)); - auto argSYCLIncompatible = [&](OptSpecifier OptId) { - if (!IsSYCL) - return; - if (Arg *IncompatArg = C.getInputArgs().getLastArg(OptId)) - Diag(clang::diag::err_drv_argument_not_allowed_with) - << IncompatArg->getSpelling() << "-fsycl"; - }; - // -static-libstdc++ is not compatible with -fsycl. - argSYCLIncompatible(options::OPT_static_libstdcxx); - // -ffreestanding cannot be used with -fsycl - argSYCLIncompatible(options::OPT_ffreestanding); + if (Kind == Action::OFK_HIP && !IsAMDOffloadArch(ID)) { + C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) + << "HIP" << Arch; + return llvm::DenseSet(); + } + if (Kind == Action::OFK_Cuda && !IsNVIDIAOffloadArch(ID)) { + C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) + << "CUDA" << Arch; + return llvm::DenseSet(); + } + if (Kind == Action::OFK_OpenMP && + (ID == OffloadArch::UNKNOWN || ID == OffloadArch::UNUSED)) { + C.getDriver().Diag(clang::diag::err_drv_failed_to_deduce_target_from_arch) + << Arch; + return llvm::DenseSet(); + } + if (Kind == Action::OFK_SYCL && !IsIntelGPUOffloadArch(ID) && + !IsIntelCPUOffloadArch(ID) && !IsAMDOffloadArch(ID) && + !IsNVIDIAOffloadArch(ID)) { + C.getDriver().Diag(clang::diag::err_drv_invalid_sycl_target) << Arch; + return llvm::DenseSet(); + } + if (ID == OffloadArch::UNKNOWN || ID == OffloadArch::UNUSED) { + C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) + << "offload" << Arch; + return llvm::DenseSet(); + } + + StringRef Triple; + if (ID == OffloadArch::AMDGCNSPIRV) + Triple = "spirv64-amd-amdhsa"; + else if (IsNVIDIAOffloadArch(ID)) + Triple = C.getDefaultToolChain().getTriple().isArch64Bit() + ? "nvptx64-nvidia-cuda" + : "nvptx-nvidia-cuda"; + else if (IsAMDOffloadArch(ID)) + Triple = "amdgcn-amd-amdhsa"; + else if (IsIntelGPUOffloadArch(ID)) + Triple = "spir64_gen-unknown-unknown"; + else if (IsIntelCPUOffloadArch(ID)) + Triple = "spir64_x86_64-unknown-unknown"; + else + continue; - llvm::SmallVector UniqueSYCLTriplesVec; + // Make a new argument that dispatches this argument to the appropriate + // toolchain. This is required when we infer it and create potentially + // incompatible toolchains from the global option. + Option Opt = C.getDriver().getOpts().getOption(options::OPT_Xarch__); + unsigned Index = C.getArgs().getBaseArgs().MakeIndex("-Xarch_"); + Arg *A = new Arg(Opt, C.getArgs().getArgString(Index), Index, + C.getArgs().MakeArgString(Triple.split("-").first), + C.getArgs().MakeArgString("--offload-arch=" + Arch)); + C.getArgs().append(A); + Triples.insert(Triple); + } + + // Infer the default target triple if no specific architectures are given. + if (Archs.empty() && Kind == Action::OFK_HIP) + Triples.insert("amdgcn-amd-amdhsa"); + else if (Archs.empty() && Kind == Action::OFK_Cuda) + Triples.insert(C.getDefaultToolChain().getTriple().isArch64Bit() + ? "nvptx64-nvidia-cuda" + : "nvptx-nvidia-cuda"); + else if (Archs.empty() && Kind == Action::OFK_SYCL) + Triples.insert(C.getDefaultToolChain().getTriple().isArch64Bit() + ? "spir64-unknown-unknown" + : "spir-unknown-unknown"); + + // We need to dispatch these to the appropriate toolchain now. + C.getArgs().eraseArg(options::OPT_offload_arch_EQ); + C.getArgs().eraseArg(options::OPT_no_offload_arch_EQ); + + return Triples; +} - // A mechanism for retrieving SYCL-specific options, erroring out - // if SYCL offloading wasn't enabled prior to that +static void diagnoseSYCLOptions(Compilation &C, bool IsSYCL) { auto getArgRequiringSYCLRuntime = [&](OptSpecifier OptId) -> Arg * { Arg *SYCLArg = C.getInputArgs().getLastArg(OptId); if (SYCLArg && !IsSYCL) { - Diag(clang::diag::err_drv_expecting_fsycl_with_sycl_opt) + C.getDriver().Diag(clang::diag::err_drv_expecting_fsycl_with_sycl_opt) // Dropping the '=' symbol, which would otherwise pollute // the diagnostics for the most of options << SYCLArg->getSpelling().split('=').first; @@ -1279,7 +1197,19 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, return SYCLArg; }; - Arg *SYCLTargets = getArgRequiringSYCLRuntime(options::OPT_fsycl_targets_EQ); + // Special check for -fsycl-targets. -fsycl-targets is an alias for + // --offload-targets. + if (!IsSYCL) { + if (auto SYCLArg = + C.getInputArgs().getLastArg(options::OPT_offload_targets_EQ)) { + std::string OptString(SYCLArg->getAsString(C.getArgs())); + if (StringRef(OptString).contains("fsycl-targets")) + C.getDriver().Diag(clang::diag::err_drv_expecting_fsycl_with_sycl_opt) + // Dropping the '=' symbol, which would otherwise pollute + // the diagnostics for the most of options + << StringRef(OptString).split('=').first; + } + } // Check if -fsycl-host-compiler is used in conjunction with -fsycl. Arg *SYCLHostCompiler = @@ -1289,7 +1219,7 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, // -fsycl-host-compiler-options cannot be used without -fsycl-host-compiler if (SYCLHostCompilerOptions && !SYCLHostCompiler) - Diag(clang::diag::warn_drv_opt_requires_opt) + C.getDriver().Diag(clang::diag::warn_drv_opt_requires_opt) << SYCLHostCompilerOptions->getSpelling().split('=').first << "-fsycl-host-compiler"; @@ -1308,7 +1238,7 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, for (const StringRef AllowedValue : AllowedValues) if (AllowedValue == ArgValue) return; - Diag(clang::diag::err_drv_invalid_argument_to_option) + C.getDriver().Diag(clang::diag::err_drv_invalid_argument_to_option) << ArgValue << A->getOption().getName(); }; @@ -1321,7 +1251,7 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, // Use of -fsycl-link=early and -fsycl-link=image are not supported. if (SYCLLink && (SYCLLink->getValue() == StringRef("early") || SYCLLink->getValue() == StringRef("image"))) - Diag(diag::err_drv_unsupported_opt_removed) + C.getDriver().Diag(diag::err_drv_unsupported_opt_removed) << SYCLLink->getAsString(C.getInputArgs()); Arg *DeviceCodeSplit = @@ -1344,220 +1274,250 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, StringRef ArgValue(DeviceObj->getValue()); SmallVector DeviceObjValues = {"spirv", "llvmir", "asm"}; if (llvm::find(DeviceObjValues, ArgValue) == DeviceObjValues.end()) - Diag(clang::diag::warn_ignoring_value_using_default) + C.getDriver().Diag(clang::diag::warn_ignoring_value_using_default) << DeviceObj->getSpelling().split('=').first << ArgValue << "llvmir"; else if (ArgValue == "asm" && (!SYCLDeviceOnly || !EmitAsm)) - Diag(clang::diag::warn_drv_fsycl_device_obj_asm_device_only); + C.getDriver().Diag( + clang::diag::warn_drv_fsycl_device_obj_asm_device_only); } Arg *SYCLForceTarget = getArgRequiringSYCLRuntime(options::OPT_fsycl_force_target_EQ); if (SYCLForceTarget) { StringRef Val(SYCLForceTarget->getValue()); - llvm::Triple TT(getSYCLDeviceTriple(Val, SYCLForceTarget)); + llvm::Triple TT(C.getDriver().getSYCLDeviceTriple(Val, SYCLForceTarget)); if (!isValidSYCLTriple(TT)) - Diag(clang::diag::err_drv_invalid_sycl_target) << Val; + C.getDriver().Diag(clang::diag::err_drv_invalid_sycl_target) << Val; } - bool HasSYCLTargetsOption = SYCLTargets; +} - llvm::StringMap> DerivedArchs; - llvm::StringMap FoundNormalizedTriples; - // StringSet to contain SYCL target triples. - llvm::StringSet<> SYCLTriples; - if (HasSYCLTargetsOption) { - // At this point, we know we have a valid combination - // of -fsycl*target options passed - Arg *SYCLTargetsValues = SYCLTargets; - if (SYCLTargetsValues) { - if (SYCLTargetsValues->getNumValues()) { - - // Multiple targets are currently not supported when using - // -fsycl-force-target as the bundler does not allow for multiple - // outputs of the same target. - if (SYCLForceTarget && SYCLTargetsValues->getNumValues() > 1) - Diag(clang::diag::err_drv_multiple_target_with_forced_target) - << SYCLTargetsValues->getAsString(C.getInputArgs()) - << SYCLForceTarget->getAsString(C.getInputArgs()); - - std::multiset SYCLTriples; - for (StringRef SYCLTargetTriple : SYCLTargetsValues->getValues()) - SYCLTriples.insert(SYCLTargetTriple); +void Driver::CreateOffloadingDeviceToolChains(Compilation &C, + InputList &Inputs) { + bool UseLLVMOffload = C.getInputArgs().hasArg( + options::OPT_foffload_via_llvm, options::OPT_fno_offload_via_llvm, false); + bool IsCuda = + llvm::any_of(Inputs, + [](std::pair &I) { + return types::isCuda(I.first); + }) && + !UseLLVMOffload; + bool IsHIP = + (llvm::any_of(Inputs, + [](std::pair &I) { + return types::isHIP(I.first); + }) || + C.getInputArgs().hasArg(options::OPT_hip_link) || + C.getInputArgs().hasArg(options::OPT_hipstdpar)) && + !UseLLVMOffload; + bool IsSYCL = C.getInputArgs().hasFlag(options::OPT_fsycl, + options::OPT_fno_sycl, false) || + C.getInputArgs().hasArgNoClaim(options::OPT_fsycl_device_only, + options::OPT_fsyclbin_EQ); + bool IsOpenMPOffloading = + UseLLVMOffload || + (C.getInputArgs().hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ, + options::OPT_fno_openmp, false) && + (C.getInputArgs().hasArg(options::OPT_offload_targets_EQ) || + (C.getInputArgs().hasArg(options::OPT_offload_arch_EQ)) && + !(IsCuda || IsHIP))); + + llvm::DenseSet Kinds; + const std::pair ActiveKinds[] = { + {IsCuda, Action::OFK_Cuda}, + {IsHIP, Action::OFK_HIP}, + {IsOpenMPOffloading, Action::OFK_OpenMP}, + {IsSYCL, Action::OFK_SYCL}}; + for (const auto &[Active, Kind] : ActiveKinds) + if (Active) + Kinds.insert(Kind); + + // We currently don't support any kind of mixed offloading. + if (Kinds.size() > 1 && !IsSYCL) { + Diag(clang::diag::err_drv_mix_offload) + << Action::GetOffloadKindName(*Kinds.begin()).upper() + << Action::GetOffloadKindName(*(++Kinds.begin())).upper(); + return; + } - llvm::StringMap FoundNormalizedTriples; - llvm::Triple TT; - for (StringRef Triple : SYCLTriples) { - - if (Triple.starts_with("intel_gpu_")) { - TT = getSYCLDeviceTriple("spir64_gen"); - } else if (Triple.starts_with("nvidia_gpu_")) { - TT = getSYCLDeviceTriple("nvptx64-nvidia-cuda"); - } else if (Triple.starts_with("amd_gpu_")) { - TT = getSYCLDeviceTriple("amdgcn-amd-amdhsa"); - } else - TT = getSYCLDeviceTriple(Triple); - - // For the new offloading model, we only want a single triple entry - // for each target, even if we have multiple intel_gpu* entries. We - // will track triples for new model and unique strings for the old - // model. - std::string NormalizedName; - bool UseNewOffload = - (C.getArgs().hasFlag(options::OPT_offload_new_driver, - options::OPT_no_offload_new_driver, false)); - NormalizedName = UseNewOffload - ? TT.normalize() - : getSYCLDeviceTriple(Triple).normalize(); - - auto [TripleIt, Inserted] = - FoundNormalizedTriples.try_emplace(NormalizedName, Triple); - - if (!Inserted) { - // Only emit the diagnostic of duplicate targets with the new - // offloading model only when the found triple matches. For the - // old model, we always emit the diagnostic. - if (!UseNewOffload || (UseNewOffload && Triple == TripleIt->second)) - Diag(clang::diag::warn_drv_sycl_offload_target_duplicate) - << Triple << TripleIt->second; - continue; - } + diagnoseSYCLOptions(C, IsSYCL); - // If the specified target is invalid, emit a diagnostic. - if (!isValidSYCLTriple(TT)) { - Diag(clang::diag::err_drv_invalid_sycl_target) << Triple; - continue; - } + // Initialize the compilation identifier used for unique CUDA / HIP names. + if (IsCuda || IsHIP) + CUIDOpts = CUIDOptions(C.getArgs(), *this); + + // Get the list of requested offloading toolchains. If they were not + // explicitly specified we will infer them based on the offloading language + // and requested architectures. + std::multiset Triples; + if (C.getInputArgs().hasArg(options::OPT_offload_targets_EQ)) { + std::vector ArgValues = + C.getInputArgs().getAllArgValues(options::OPT_offload_targets_EQ); + if (ArgValues.empty()) + Diag(clang::diag::warn_drv_empty_joined_argument) + << C.getInputArgs() + .getLastArg(options::OPT_offload_targets_EQ) + ->getAsString(C.getInputArgs()); + else if (IsSYCL) { + // Determine any SYCL implied triples that are based on non-triple + // values (intel_gpu*, nvidia_gpu*, amd_gpu* values). + // Multiple targets are currently not supported when using + // -fsycl-force-target as the bundler does not allow for multiple + // outputs of the same target. + Arg *SYCLForceTarget = + C.getInputArgs().getLastArg(options::OPT_fsycl_force_target_EQ); + if (SYCLForceTarget && ArgValues.size() > 1) { + std::string OptString("-fsycl-targets"); + if (auto SYCLArg = + C.getInputArgs().getLastArg(options::OPT_offload_targets_EQ)) + OptString = SYCLArg->getAsString(C.getInputArgs()); + Diag(clang::diag::err_drv_multiple_target_with_forced_target) + << OptString << SYCLForceTarget->getAsString(C.getInputArgs()); + } + + std::multiset SYCLTriples; + for (StringRef SYCLTargetTriple : ArgValues) + SYCLTriples.insert(SYCLTargetTriple); - auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, TT, - C.getDefaultToolChain().getTriple()); - C.addOffloadDeviceToolChain(&TC, Action::OFK_SYCL); - OffloadArchs[&TC] = - getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &TC, - /*SpecificToolchain=*/true); - UniqueSYCLTriplesVec.push_back(TT); + llvm::StringMap FoundNormalizedTriples; + llvm::Triple TT; + for (StringRef Triple : SYCLTriples) { + + if (Triple.starts_with("intel_gpu_")) { + TT = getSYCLDeviceTriple("spir64_gen"); + } else if (Triple.starts_with("nvidia_gpu_")) { + TT = getSYCLDeviceTriple("nvptx64-nvidia-cuda"); + } else if (Triple.starts_with("amd_gpu_")) { + TT = getSYCLDeviceTriple("amdgcn-amd-amdhsa"); + } else + TT = getSYCLDeviceTriple(Triple); + + // For the new offloading model, we only want a single triple entry + // for each target, even if we have multiple intel_gpu* entries. We + // will track triples for new model and unique strings for the old + // model. + std::string NormalizedName; + bool UseNewOffload = + (C.getArgs().hasFlag(options::OPT_offload_new_driver, + options::OPT_no_offload_new_driver, false)); + NormalizedName = UseNewOffload + ? TT.normalize() + : getSYCLDeviceTriple(Triple).normalize(); + + auto [TripleIt, Inserted] = + FoundNormalizedTriples.try_emplace(NormalizedName, Triple); + + if (IsSYCL && !Inserted) { + // Only emit the diagnostic of duplicate targets with the new + // offloading model only when the found triple matches. For the + // old model, we always emit the diagnostic. + if (!UseNewOffload || (UseNewOffload && Triple == TripleIt->second)) + Diag(clang::diag::warn_drv_sycl_offload_target_duplicate) + << Triple << TripleIt->second; + continue; } - if (addSYCLDefaultTriple(C, UniqueSYCLTriplesVec)) { - // Add the default triple (spir64) toolchain. - llvm::Triple DefaultTriple = - C.getDriver().getSYCLDeviceTriple(getDefaultSYCLArch(C)); - auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, - DefaultTriple, - C.getDefaultToolChain().getTriple()); - C.addOffloadDeviceToolChain(&TC, Action::OFK_SYCL); - OffloadArchs[&TC] = - getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &TC, - /*SpecificToolchain=*/true); + + // If the specified target is invalid, emit a diagnostic. + if (IsSYCL && !isValidSYCLTriple(TT)) { + Diag(clang::diag::err_drv_invalid_sycl_target) << Triple; + continue; } - } else - Diag(clang::diag::warn_drv_empty_joined_argument) - << SYCLTargetsValues->getAsString(C.getInputArgs()); - } - } - // If the user specified --offload-arch, deduce the offloading - // target triple(s) from the set of architecture(s). - // Create a toolchain for each valid triple. - // We do not support SYCL offloading if any of the inputs is a - // .cu (for CUDA type) or .hip (for HIP type) file. - else if (IsSYCL && C.getInputArgs().hasArg(options::OPT_offload_arch_EQ) && - !IsHIP && !IsCuda) { - // SYCL offloading to AOT Targets with '--offload-arch' - // is currently enabled only with '--offload-new-driver' option. - // Emit a diagnostic if '--offload-arch' is invoked without - // '--offload-new driver' option. - if (!C.getInputArgs().hasFlag(options::OPT_offload_new_driver, - options::OPT_no_offload_new_driver, false)) { - Diag(clang::diag::err_drv_sycl_offload_arch_new_driver); - return; + Triples.insert(C.getInputArgs().MakeArgString(TT.normalize())); + } + } else + for (llvm::StringRef Target : ArgValues) + Triples.insert(C.getInputArgs().MakeArgString(Target)); + } else if (Kinds.size() > 0) { + for (Action::OffloadKind Kind : Kinds) { + llvm::DenseSet Derived = inferOffloadToolchains(C, Kind); + Triples.insert(Derived.begin(), Derived.end()); } - llvm::Triple AMDTriple("amdgcn-amd-amdhsa"); - llvm::Triple NVPTXTriple("nvptx64-nvidia-cuda"); - llvm::Triple IntelGPUTriple("spir64_gen-unknown-unknown"); - llvm::Triple IntelCPUTriple("spir64_x86_64-unknown-unknown"); - - // Attempt to deduce the offloading triple from the set of architectures. - // We need to temporarily create these toolchains so that we can access - // tools for inferring architectures. - - for (StringRef Arch : - C.getInputArgs().getAllArgValues(options::OPT_offload_arch_EQ)) { - bool IsNVPTX = IsSYCLSupportedNVidiaGPUArch( - StringToOffloadArch(getProcessorFromTargetID(NVPTXTriple, Arch))); - bool IsAMDGPU = IsSYCLSupportedAMDGPUArch( - StringToOffloadArch(getProcessorFromTargetID(AMDTriple, Arch))); - bool IsIntelGPU = IsIntelGPUOffloadArch( - StringToOffloadArch(getProcessorFromTargetID(IntelGPUTriple, Arch))); - bool IsIntelCPU = IsIntelCPUOffloadArch( - StringToOffloadArch(getProcessorFromTargetID(IntelCPUTriple, Arch))); - - if (!IsNVPTX && !IsAMDGPU && !Arch.empty() && !IsIntelGPU && - !IsIntelCPU && !Arch.equals_insensitive("native")) { - Diag(clang::diag::err_drv_invalid_sycl_target) << Arch; + } + + // Build an offloading toolchain for every requested target and kind. + llvm::StringMap FoundNormalizedTriples; + for (StringRef Target : Triples) { + // OpenMP offloading requires a compatible libomp. + if (Kinds.contains(Action::OFK_OpenMP)) { + OpenMPRuntimeKind RuntimeKind = getOpenMPRuntime(C.getInputArgs()); + if (RuntimeKind != OMPRT_OMP && RuntimeKind != OMPRT_IOMP5) { + Diag(clang::diag::err_drv_expecting_fopenmp_with_fopenmp_targets); return; } } - for (const llvm::Triple &TT : - {AMDTriple, NVPTXTriple, IntelGPUTriple, IntelCPUTriple}) { - auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, TT, - C.getDefaultToolChain().getTriple()); + // Certain options are not allowed when combined with SYCL compilation. + if (Kinds.contains(Action::OFK_SYCL)) { + for (auto ID : + {options::OPT_static_libstdcxx, options::OPT_ffreestanding}) + if (Arg *IncompatArg = C.getInputArgs().getLastArg(ID)) + Diag(clang::diag::err_drv_argument_not_allowed_with) + << IncompatArg->getSpelling() << "-fsycl"; + } - llvm::SmallVector Archs = - getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &TC, - /*SpecificToolchain=*/false); - if (!Archs.empty()) { - C.addOffloadDeviceToolChain(&TC, Action::OFK_SYCL); - OffloadArchs[&TC] = Archs; + // Create a device toolchain for every specified kind and triple. + for (Action::OffloadKind Kind : Kinds) { + llvm::Triple TT = Kind == Action::OFK_OpenMP + ? ToolChain::getOpenMPTriple(Target) + : llvm::Triple(Target); + if (TT.getArch() == llvm::Triple::ArchType::UnknownArch) { + Diag(diag::err_drv_invalid_or_unsupported_offload_target) << TT.str(); + continue; } - } - auto TCRange = C.getOffloadToolChains(Action::OFK_SYCL); - if (TCRange.first == TCRange.second) { - Diag(clang::diag::err_drv_sycl_offload_arch_missing_value); - return; - } + if (Kind == Action::OFK_OpenMP) { + std::string NormalizedName = TT.normalize(); + auto [TripleIt, Inserted] = + FoundNormalizedTriples.try_emplace(NormalizedName, Target); + if (!Inserted) { + Diag(clang::diag::warn_drv_omp_offload_target_duplicate) + << Target << TripleIt->second; + continue; + } + } - } else { - // If -fsycl is supplied without -fsycl-targets we will assume SPIR-V. - // For -fsycl-device-only, we also setup the implied triple as needed. - if (IsSYCL) { - StringRef SYCLTargetArch = getDefaultSYCLArch(C); - UniqueSYCLTriplesVec.push_back(getSYCLDeviceTriple(SYCLTargetArch)); - addSYCLDefaultTriple(C, UniqueSYCLTriplesVec); - } - } - - // -fsycl-fp64-conv-emu is valid only for AOT compilation with an Intel GPU - // target. For other scenarios, we emit a warning message. - if (C.getInputArgs().hasArg(options::OPT_fsycl_fp64_conv_emu)) { - bool HasIntelGPUAOTTarget = false; - for (auto &TT : UniqueSYCLTriplesVec) { - if (TT.isSPIRAOT() && TT.getSubArch() == llvm::Triple::SPIRSubArch_gen) { - HasIntelGPUAOTTarget = true; - break; + auto &TC = getOffloadToolChain(C.getInputArgs(), Kind, TT, + C.getDefaultToolChain().getTriple()); + + // Emit a warning if the detected CUDA version is too new. + if (Kind == Action::OFK_Cuda) { + auto &CudaInstallation = + static_cast(TC).CudaInstallation; + if (CudaInstallation.isValid()) + CudaInstallation.WarnIfUnsupportedVersion(); } + + C.addOffloadDeviceToolChain(&TC, Kind); } - if (!HasIntelGPUAOTTarget) - Diag(diag::warn_unsupported_fsycl_fp64_conv_emu_use); } - // We'll need to use the SYCL and host triples as the key into - // getOffloadingDeviceToolChain, because the device toolchains we're - // going to create will depend on both. - if ((IsSYCL && !C.getInputArgs().hasArg(options::OPT_offload_arch_EQ)) && - !HasSYCLTargetsOption) { - const ToolChain *HostTC = C.getSingleOffloadToolChain(); - for (const auto &TT : UniqueSYCLTriplesVec) { - auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, TT, - HostTC->getTriple()); - C.addOffloadDeviceToolChain(&TC, Action::OFK_SYCL); - - OffloadArchs[&TC] = getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &TC, - /*SpecificToolchain=*/true); + // Perform any additional SYCL specific behaviors that are tied to expected + // triples. + if (Kinds.contains(Action::OFK_SYCL)) { + // Add the default toolchain for SYCL if it is not already added when using + // the old offloading model. + if (!C.getArgs().hasFlag(options::OPT_offload_new_driver, + options::OPT_no_offload_new_driver, false)) { + // Make vector of triples. + SmallVector Triples; + for (auto &TripleString : FoundNormalizedTriples) { + llvm::Triple T(TripleString.getKey()); + Triples.push_back(T); + } + if (addSYCLDefaultTriple(C, Triples)) { + llvm::Triple TT = llvm::Triple("spir64-unknown-unknown"); + auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, TT, + C.getDefaultToolChain().getTriple()); + C.addOffloadDeviceToolChain(&TC, Action::OFK_SYCL); + } + } + // -fsycl-fp64-conv-emu is valid only for AOT compilation with an Intel GPU + // target. For other scenarios, we emit a warning message. + if (C.getInputArgs().hasArg(options::OPT_fsycl_fp64_conv_emu)) { + llvm::Triple TT = llvm::Triple("spir64_gen-unknown-unknown"); + if (!FoundNormalizedTriples.contains(TT.normalize())) + Diag(diag::warn_unsupported_fsycl_fp64_conv_emu_use); } } - - // - // TODO: Add support for other offloading programming models here. - // } bool Driver::loadZOSCustomizationFile(llvm::cl::ExpansionContext &ExpCtx) { @@ -3973,7 +3933,7 @@ bool Driver::checkForSYCLDefaultDevice(Compilation &C, // Do not do the check if the default device is passed in -fsycl-targets // or if -fsycl-targets isn't passed (that implies default device) - if (const Arg *A = Args.getLastArgNoClaim(options::OPT_fsycl_targets_EQ)) { + if (const Arg *A = Args.getLastArgNoClaim(options::OPT_offload_targets_EQ)) { for (const char *Val : A->getValues()) { llvm::Triple TT(C.getDriver().getSYCLDeviceTriple(Val, A)); if ((TT.isSPIROrSPIRV()) && TT.getSubArch() == llvm::Triple::NoSubArch) @@ -4197,9 +4157,6 @@ class OffloadingActionBuilder final { // architecture. If we are in host-only mode we return 'success' so that // the host uses the CUDA offload kind. if (auto *IA = dyn_cast(HostAction)) { - assert(!GpuArchList.empty() && - "We should have at least one GPU architecture."); - // If the host input is not CUDA or HIP, we don't need to bother about // this input. if (!(IA->getType() == types::TY_CUDA || @@ -4299,10 +4256,6 @@ class OffloadingActionBuilder final { CudaDeviceActions.clear(); } - /// Get canonicalized offload arch option. \returns empty StringRef if the - /// option is invalid. - virtual StringRef getCanonicalOffloadArch(StringRef Arch) = 0; - virtual std::optional> getConflictOffloadArchCombination(const std::set &GpuArchs) = 0; @@ -4331,91 +4284,25 @@ class OffloadingActionBuilder final { return true; } - ToolChains.push_back( - AssociatedOffloadKind == Action::OFK_Cuda - ? C.getSingleOffloadToolChain() - : C.getSingleOffloadToolChain()); - - CompileHostOnly = C.getDriver().offloadHostOnly(); - EmitLLVM = Args.getLastArg(options::OPT_emit_llvm); - EmitAsm = Args.getLastArg(options::OPT_S); - - // --offload and --offload-arch options are mutually exclusive. - if (Args.hasArgNoClaim(options::OPT_offload_EQ) && - Args.hasArgNoClaim(options::OPT_offload_arch_EQ, - options::OPT_no_offload_arch_EQ)) { - C.getDriver().Diag(diag::err_opt_not_valid_with_opt) << "--offload-arch" - << "--offload"; - } - - // Collect all offload arch parameters, removing duplicates. std::set GpuArchs; - bool Error = false; - const ToolChain &TC = *ToolChains.front(); - for (Arg *A : C.getArgsForToolChain(&TC, /*BoundArch=*/"", - AssociatedOffloadKind)) { - if (!(A->getOption().matches(options::OPT_offload_arch_EQ) || - A->getOption().matches(options::OPT_no_offload_arch_EQ))) - continue; - A->claim(); - - for (StringRef ArchStr : llvm::split(A->getValue(), ",")) { - if (A->getOption().matches(options::OPT_no_offload_arch_EQ) && - ArchStr == "all") { - GpuArchs.clear(); - } else if (ArchStr == "native") { - auto GPUsOrErr = ToolChains.front()->getSystemGPUArchs(Args); - if (!GPUsOrErr) { - TC.getDriver().Diag(diag::err_drv_undetermined_gpu_arch) - << llvm::Triple::getArchTypeName(TC.getArch()) - << llvm::toString(GPUsOrErr.takeError()) << "--offload-arch"; - continue; - } + for (Action::OffloadKind Kind : {Action::OFK_Cuda, Action::OFK_HIP}) { + for (auto &I : llvm::make_range(C.getOffloadToolChains(Kind))) { + ToolChains.push_back(I.second); - for (auto GPU : *GPUsOrErr) { - GpuArchs.insert(Args.MakeArgString(GPU)); - } - } else { - ArchStr = getCanonicalOffloadArch(ArchStr); - if (ArchStr.empty()) { - Error = true; - } else if (A->getOption().matches(options::OPT_offload_arch_EQ)) - GpuArchs.insert(ArchStr); - else if (A->getOption().matches(options::OPT_no_offload_arch_EQ)) - GpuArchs.erase(ArchStr); - else - llvm_unreachable("Unexpected option."); - } + for (auto Arch : + C.getDriver().getOffloadArchs(C, C.getArgs(), Kind, *I.second)) + GpuArchs.insert(Arch); } } - auto &&ConflictingArchs = getConflictOffloadArchCombination(GpuArchs); - if (ConflictingArchs) { - C.getDriver().Diag(clang::diag::err_drv_bad_offload_arch_combo) - << ConflictingArchs->first << ConflictingArchs->second; - C.setContainsError(); - return true; - } - - // Collect list of GPUs remaining in the set. for (auto Arch : GpuArchs) GpuArchList.push_back(Arch.data()); - // Default to sm_20 which is the lowest common denominator for - // supported GPUs. sm_20 code should work correctly, if - // suboptimally, on all newer GPUs. - if (GpuArchList.empty()) { - if (ToolChains.front()->getTriple().isSPIROrSPIRV()) { - if (ToolChains.front()->getTriple().getVendor() == llvm::Triple::AMD) - GpuArchList.push_back(OffloadArch::AMDGCNSPIRV); - else - GpuArchList.push_back(OffloadArch::Generic); - } else { - GpuArchList.push_back(DefaultOffloadArch); - } - } + CompileHostOnly = C.getDriver().offloadHostOnly(); + EmitLLVM = Args.getLastArg(options::OPT_emit_llvm); + EmitAsm = Args.getLastArg(options::OPT_S); - return Error; + return false; } }; @@ -4430,15 +4317,6 @@ class OffloadingActionBuilder final { DefaultOffloadArch = OffloadArch::CudaDefault; } - StringRef getCanonicalOffloadArch(StringRef ArchStr) override { - OffloadArch Arch = StringToOffloadArch(ArchStr); - if (Arch == OffloadArch::UNKNOWN || !IsNVIDIAOffloadArch(Arch)) { - C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr; - return StringRef(); - } - return OffloadArchToString(Arch); - } - std::optional> getConflictOffloadArchCombination( const std::set &GpuArchs) override { @@ -4615,24 +4493,6 @@ class OffloadingActionBuilder final { bool canUseBundlerUnbundler() const override { return true; } - StringRef getCanonicalOffloadArch(StringRef IdStr) override { - llvm::StringMap Features; - // getHIPOffloadTargetTriple() is known to return valid value as it has - // been called successfully in the CreateOffloadingDeviceToolChains(). - auto T = - (IdStr == "amdgcnspirv") - ? llvm::Triple("spirv64-amd-amdhsa") - : *getHIPOffloadTargetTriple(C.getDriver(), C.getInputArgs()); - auto ArchStr = parseTargetID(T, IdStr, &Features); - if (!ArchStr) { - C.getDriver().Diag(clang::diag::err_drv_bad_target_id) << IdStr; - C.setContainsError(); - return StringRef(); - } - auto CanId = getCanonicalTargetID(*ArchStr, Features); - return Args.MakeArgStringRef(CanId); - }; - std::optional> getConflictOffloadArchCombination( const std::set &GpuArchs) override { @@ -5100,7 +4960,7 @@ class OffloadingActionBuilder final { JobAction *finalizeNVPTXDependences(Action *Input, const llvm::Triple &TT) { auto *BA = C.getDriver().ConstructPhaseAction( C, Args, phases::Backend, Input, AssociatedOffloadKind); - if (TT.getOS() != llvm::Triple::NVCL) { + if (TT.getOS() != llvm::Triple::NVCL && !TT.isSPIROrSPIRV()) { auto *AA = C.getDriver().ConstructPhaseAction( C, Args, phases::Assemble, BA, AssociatedOffloadKind); ActionList DeviceActions = {BA, AA}; @@ -5516,8 +5376,8 @@ class OffloadingActionBuilder final { bool IsNativeCPU = TargetTriple.isNativeCPU(); for (const auto &Input : ListIndex) { // No need for any conversion if we are coming in from the - // clang-offload-deps or regular compilation path. - if (IsNVPTX || IsAMDGCN || ContainsOffloadDepsAction(Input) || + // clang-offload-deps path. + if (ContainsOffloadDepsAction(Input) || ContainsCompileOrAssembleAction(Input)) { LinkObjects.push_back(Input); continue; @@ -6066,9 +5926,8 @@ class OffloadingActionBuilder final { ArchStr = OffloadArchToString(Arch); } else if (TargetBE->isAMDGCN()) { llvm::StringMap Features; - auto Arch = parseTargetID( - *getHIPOffloadTargetTriple(C.getDriver(), C.getInputArgs()), - ArchStr, &Features); + auto Arch = parseTargetID(llvm::Triple("amdgcn-amd-amdhsa"), + ArchStr, &Features); if (!Arch) { C.getDriver().Diag(clang::diag::err_drv_bad_target_id) << ArchStr; continue; @@ -6270,7 +6129,7 @@ class OffloadingActionBuilder final { // Gather information about the SYCL Ahead of Time targets. The targets // are determined on the SubArch values passed along in the triple. Arg *SYCLTargets = - C.getInputArgs().getLastArg(options::OPT_fsycl_targets_EQ); + C.getInputArgs().getLastArg(options::OPT_offload_targets_EQ); bool HasValidSYCLRuntime = C.getInputArgs().hasFlag( options::OPT_fsycl, options::OPT_fno_sycl, false); @@ -6281,27 +6140,33 @@ class OffloadingActionBuilder final { for (StringRef Val : SYCLTargetsValues->getValues()) { StringRef UserTargetName(Val); if (auto ValidDevice = gen::isGPUTarget(Val)) { - if (ValidDevice->empty()) - // Unrecognized, we have already diagnosed this earlier; skip. + if (ValidDevice->empty()) { + C.getDriver().Diag(clang::diag::err_drv_invalid_sycl_target) + << Val; continue; + } // Add the proper -device value to the list. GpuArchList.emplace_back( C.getDriver().getSYCLDeviceTriple("spir64_gen"), ValidDevice->data()); UserTargetName = "spir64_gen"; } else if (auto ValidDevice = gen::isGPUTarget(Val)) { - if (ValidDevice->empty()) - // Unrecognized, we have already diagnosed this earlier; skip. + if (ValidDevice->empty()) { + C.getDriver().Diag(clang::diag::err_drv_invalid_sycl_target) + << Val; continue; + } // Add the proper -device value to the list. GpuArchList.emplace_back( C.getDriver().getSYCLDeviceTriple("nvptx64-nvidia-cuda"), ValidDevice->data()); UserTargetName = "nvptx64-nvidia-cuda"; } else if (auto ValidDevice = gen::isGPUTarget(Val)) { - if (ValidDevice->empty()) - // Unrecognized, we have already diagnosed this earlier; skip. + if (ValidDevice->empty()) { + C.getDriver().Diag(clang::diag::err_drv_invalid_sycl_target) + << Val; continue; + } // Add the proper -device value to the list. GpuArchList.emplace_back( C.getDriver().getSYCLDeviceTriple("amdgcn-amd-amdhsa"), @@ -6322,8 +6187,10 @@ class OffloadingActionBuilder final { // the following iterations. FoundNormalizedTriples[NormalizedName] = Val; - SYCLTripleList.push_back( - C.getDriver().getSYCLDeviceTriple(UserTargetName)); + if (isValidSYCLTriple(llvm::Triple(UserTargetName))) + SYCLTripleList.push_back( + C.getDriver().getSYCLDeviceTriple(UserTargetName)); + // For user specified spir64_gen, add an empty device value as a // placeholder. if (TT.getSubArch() == llvm::Triple::SPIRSubArch_gen) @@ -6641,7 +6508,7 @@ class OffloadingActionBuilder final { // FIXME - unbundling action with -fsycl-link is unbundling for both host // and device, where only the device is needed. auto UnbundlingHostAction = C.MakeAction( - A, (HasSPIRTarget && HostAction->getType() == types::TY_Archive) + A, (HostAction->getType() == types::TY_Archive) ? types::TY_Tempfilelist : A->getType()); UnbundlingHostAction->registerDependentActionInfo( @@ -7485,37 +7352,32 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, static StringRef getCanonicalArchString(Compilation &C, const llvm::opt::DerivedArgList &Args, StringRef ArchStr, - const llvm::Triple &Triple, - bool SpecificToolchain) { + const llvm::Triple &Triple) { // Lookup the CUDA / HIP architecture string. Only report an error if we were // expecting the triple to be only NVPTX / AMDGPU. OffloadArch Arch = StringToOffloadArch(getProcessorFromTargetID(Triple, ArchStr)); if (Triple.isNVPTX() && (Arch == OffloadArch::UNKNOWN || !IsNVIDIAOffloadArch(Arch))) { - if (SpecificToolchain) - C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) - << "CUDA" << ArchStr; + C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) + << "CUDA" << ArchStr; return StringRef(); } else if (Triple.isAMDGPU() && (Arch == OffloadArch::UNKNOWN || !IsAMDOffloadArch(Arch))) { - if (SpecificToolchain) - C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) - << "HIP" << ArchStr; + C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) + << "HIP" << ArchStr; return StringRef(); } else if (Triple.isSPIRAOT() && Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen && (Arch == OffloadArch::UNKNOWN || !IsIntelGPUOffloadArch(Arch))) { - if (SpecificToolchain) - C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) - << "spir64_gen" << ArchStr; + C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) + << "spir64_gen" << ArchStr; return StringRef(); } else if (Triple.isSPIRAOT() && Triple.getSubArch() == llvm::Triple::SPIRSubArch_x86_64 && (Arch == OffloadArch::UNKNOWN || !IsIntelCPUOffloadArch(Arch))) { - if (SpecificToolchain) - C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) - << "spir64_x86_64" << ArchStr; + C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) + << "spir64_x86_64" << ArchStr; return StringRef(); } if (IsNVIDIAOffloadArch(Arch)) @@ -7557,11 +7419,7 @@ getConflictOffloadArchCombination(const llvm::DenseSet &Archs, llvm::SmallVector Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, - Action::OffloadKind Kind, const ToolChain *TC, - bool SpecificToolchain) const { - if (!TC) - TC = &C.getDefaultToolChain(); - + Action::OffloadKind Kind, const ToolChain &TC) const { // --offload and --offload-arch options are mutually exclusive. if (Args.hasArgNoClaim(options::OPT_offload_EQ) && Args.hasArgNoClaim(options::OPT_offload_arch_EQ, @@ -7574,89 +7432,42 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, } llvm::DenseSet Archs; - StringRef Arch; - for (auto *Arg : C.getArgsForToolChain(TC, /*BoundArch=*/"", Kind)) { + std::unique_ptr ExtractedArg = nullptr; + for (auto *Arg : C.getArgsForToolChain(&TC, /*BoundArch=*/"", Kind)) { // Extract any '--[no-]offload-arch' arguments intended for this toolchain. - std::unique_ptr ExtractedArg = nullptr; if (Kind == Action::OFK_SYCL) { - // -Xsycl-target-backend=spir64_gen "-device pvc,bdw" - // -fsycl-targets=spir64_gen -Xsycl-target-backend "-device pvc" - if (TC->getTriple().isSPIRAOT() && - TC->getTriple().getSubArch() == llvm::Triple::SPIRSubArch_gen && - (Arg->getOption().matches(options::OPT_Xsycl_backend_EQ) || - Arg->getOption().matches(options::OPT_Xsycl_backend))) { - const ToolChain *HostTC = - C.getSingleOffloadToolChain(); - auto DeviceTC = std::make_unique( - *this, TC->getTriple(), *HostTC, C.getInputArgs()); - assert(DeviceTC && "Device toolchain not defined."); - ArgStringList TargetArgs; - DeviceTC->TranslateBackendTargetArgs(DeviceTC->getTriple(), - C.getInputArgs(), TargetArgs); - // Look for -device and use that as the known - // arch to be associated with the current spir64_gen entry. Grab - // the right most entry. - for (int i = TargetArgs.size() - 2; i >= 0; --i) { - if (StringRef(TargetArgs[i]) == "-device") { - Arch = TargetArgs[i + 1]; - if (!Arch.empty()) - Archs.insert(Arch); - break; - } - } - } // For SYCL based offloading, we allow for -Xsycl-target-backend // and -Xsycl-target-backend=amdgcn-amd-hsa --offload-arch=gfx908 for // specifying options. - if (!(TC->getTriple().isSPIRAOT() && - TC->getTriple().getSubArch() == llvm::Triple::SPIRSubArch_gen) && - Arg->getOption().matches(options::OPT_Xsycl_backend_EQ) && - llvm::Triple(Arg->getValue(0)) == TC->getTriple()) { + if (Arg->getOption().matches(options::OPT_Xsycl_backend_EQ) && + llvm::Triple(Arg->getValue(0)) == TC.getTriple()) { Arg->claim(); unsigned Index = Args.getBaseArgs().MakeIndex(Arg->getValue(1)); ExtractedArg = getOpts().ParseOneArg(Args, Index); Arg = ExtractedArg.get(); // -Xsycl-target-backend --offload-arch=gfx1150 - } else if (!(TC->getTriple().isSPIRAOT() && - TC->getTriple().getSubArch() == - llvm::Triple::SPIRSubArch_gen) && - Arg->getOption().matches(options::OPT_Xsycl_backend)) { + } else if (Arg->getOption().matches(options::OPT_Xsycl_backend)) { unsigned Index = Args.getBaseArgs().MakeIndex(Arg->getValue(0)); ExtractedArg = getOpts().ParseOneArg(Args, Index); Arg = ExtractedArg.get(); } - } else { - if (Arg->getOption().matches(options::OPT_Xopenmp_target_EQ) && - ToolChain::getOpenMPTriple(Arg->getValue(0)) == TC->getTriple()) { - Arg->claim(); - unsigned Index = Args.getBaseArgs().MakeIndex(Arg->getValue(1)); - unsigned Prev = Index; - ExtractedArg = getOpts().ParseOneArg(Args, Index); - if (!ExtractedArg || Index > Prev + 1) { - TC->getDriver().Diag(diag::err_drv_invalid_Xopenmp_target_with_args) - << Arg->getAsString(Args); - continue; - } - Arg = ExtractedArg.get(); - } } - if (Kind == Action::OFK_SYCL && - Arg->getOption().matches(options::OPT_fsycl_targets_EQ)) { + Arg->getOption().matches(options::OPT_offload_targets_EQ)) { for (StringRef SYCLTargetValue : Arg->getValues()) { + StringRef Arch; if (auto Device = tools::SYCL::gen::isGPUTarget( SYCLTargetValue)) { - if (SpecificToolchain && - !(TC->getTriple().isSPIRAOT() && - TC->getTriple().getSubArch() == llvm::Triple::SPIRSubArch_gen)) + if (!(TC.getTriple().isSPIRAOT() && + TC.getTriple().getSubArch() == llvm::Triple::SPIRSubArch_gen)) continue; if (Device->empty()) { Diag(clang::diag::err_drv_invalid_sycl_target) << SYCLTargetValue; continue; } if (IsIntelGPUOffloadArch(StringToOffloadArch( - getProcessorFromTargetID(TC->getTriple(), Device->data())))) + getProcessorFromTargetID(TC.getTriple(), Device->data())))) Arch = Device->data(); } else if (auto Device = tools::SYCL::gen::isGPUTarget< tools::SYCL::gen::NvidiaGPU>(SYCLTargetValue)) { @@ -7665,7 +7476,7 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, continue; } if (IsSYCLSupportedNVidiaGPUArch(StringToOffloadArch( - getProcessorFromTargetID(TC->getTriple(), Device->data())))) + getProcessorFromTargetID(TC.getTriple(), Device->data())))) Arch = Device->data(); } else if (auto Device = tools::SYCL::gen::isGPUTarget< clang::driver::tools::SYCL::gen::AmdGPU>( @@ -7675,7 +7486,7 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, continue; } if (IsSYCLSupportedAMDGPUArch(StringToOffloadArch( - getProcessorFromTargetID(TC->getTriple(), Device->data())))) + getProcessorFromTargetID(TC.getTriple(), Device->data())))) Arch = Device->data(); } else { Arch = StringRef(); @@ -7684,75 +7495,110 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, Archs.insert(Arch); } } - // Add or remove the seen architectures in order of appearance. If an // invalid architecture is given we simply exit. if (Arg->getOption().matches(options::OPT_offload_arch_EQ)) { for (StringRef Arch : Arg->getValues()) { if (Arch == "native" || Arch.empty()) { - auto GPUsOrErr = TC->getSystemGPUArchs(Args); + auto GPUsOrErr = TC.getSystemGPUArchs(Args); if (!GPUsOrErr) { - if (!SpecificToolchain) - llvm::consumeError(GPUsOrErr.takeError()); - else - TC->getDriver().Diag(diag::err_drv_undetermined_gpu_arch) - << llvm::Triple::getArchTypeName(TC->getArch()) - << llvm::toString(GPUsOrErr.takeError()) << "--offload-arch"; + TC.getDriver().Diag(diag::err_drv_undetermined_gpu_arch) + << llvm::Triple::getArchTypeName(TC.getArch()) + << llvm::toString(GPUsOrErr.takeError()) << "--offload-arch"; continue; } for (auto ArchStr : *GPUsOrErr) { - StringRef CanonicalStr = - getCanonicalArchString(C, Args, Args.MakeArgString(ArchStr), - TC->getTriple(), SpecificToolchain); + StringRef CanonicalStr = getCanonicalArchString( + C, Args, Args.MakeArgString(ArchStr), TC.getTriple()); if (!CanonicalStr.empty()) Archs.insert(CanonicalStr); - else if (SpecificToolchain) + else return llvm::SmallVector(); } } else { - StringRef CanonicalStr = getCanonicalArchString( - C, Args, Arch, TC->getTriple(), SpecificToolchain); + StringRef CanonicalStr = + getCanonicalArchString(C, Args, Arch, TC.getTriple()); if (!CanonicalStr.empty()) Archs.insert(CanonicalStr); - else if (SpecificToolchain) + else return llvm::SmallVector(); } } } else if (Arg->getOption().matches(options::OPT_no_offload_arch_EQ)) { - for (StringRef Arch : llvm::split(Arg->getValue(), ",")) { + for (StringRef Arch : Arg->getValues()) { if (Arch == "all") { Archs.clear(); } else { - StringRef ArchStr = getCanonicalArchString( - C, Args, Arch, TC->getTriple(), SpecificToolchain); + StringRef ArchStr = + getCanonicalArchString(C, Args, Arch, TC.getTriple()); Archs.erase(ArchStr); } } } } + if (Kind == Action::OFK_SYCL) { + // -Xsycl-target-backend=spir64_gen "-device pvc,bdw" + // -fsycl-targets=spir64_gen -Xsycl-target-backend "-device pvc" + if (TC.getTriple().isSPIRAOT() && + TC.getTriple().getSubArch() == llvm::Triple::SPIRSubArch_gen) { + const ToolChain *HostTC = C.getSingleOffloadToolChain(); + auto DeviceTC = std::make_unique( + *this, TC.getTriple(), *HostTC, C.getInputArgs()); + assert(DeviceTC && "Device toolchain not defined."); + ArgStringList TargetArgs; + DeviceTC->TranslateBackendTargetArgs(DeviceTC->getTriple(), + C.getInputArgs(), TargetArgs); + // Look for -device and use that as the known + // arch to be associated with the current spir64_gen entry. Grab + // the right most entry. + for (int i = TargetArgs.size() - 2; i >= 0; --i) { + if (StringRef(TargetArgs[i]) == "-device") { + StringRef Arch; + Arch = TargetArgs[i + 1]; + if (!Arch.empty()) + Archs.insert(Arch); + break; + } + } + } + } + if (auto ConflictingArchs = - getConflictOffloadArchCombination(Archs, TC->getTriple())) + getConflictOffloadArchCombination(Archs, TC.getTriple())) C.getDriver().Diag(clang::diag::err_drv_bad_offload_arch_combo) << ConflictingArchs->first << ConflictingArchs->second; - // Skip filling defaults if we're just querying what is availible. - if (SpecificToolchain && Archs.empty()) { + // Fill in the default architectures if not provided explicitly. + if (Archs.empty()) { if (Kind == Action::OFK_Cuda) { Archs.insert(OffloadArchToString(OffloadArch::CudaDefault)); } else if (Kind == Action::OFK_HIP) { - Archs.insert(OffloadArchToString(OffloadArch::HIPDefault)); + Archs.insert(OffloadArchToString(TC.getTriple().isSPIRV() + ? OffloadArch::Generic + : OffloadArch::HIPDefault)); + } else if (Kind == Action::OFK_SYCL) { + // For SYCL offloading, we need to check the triple for NVPTX or AMDGPU. + // The default arch is set for NVPTX if not provided. For AMDGPU, emit + // an error as the user is responsible to set the arch. + if (TC.getTriple().isNVPTX()) + Archs.insert(OffloadArchToString(OffloadArch::SM_50)); + else if (TC.getTriple().isAMDGPU()) + C.getDriver().Diag(clang::diag::err_drv_sycl_missing_amdgpu_arch) + << 1 << TC.getTriple().str(); + else + Archs.insert(StringRef()); } else if (Kind == Action::OFK_OpenMP) { // Accept legacy `-march` device arguments for OpenMP. - if (auto *Arg = C.getArgsForToolChain(TC, /*BoundArch=*/"", Kind) + if (auto *Arg = C.getArgsForToolChain(&TC, /*BoundArch=*/"", Kind) .getLastArg(options::OPT_march_EQ)) { Archs.insert(Arg->getValue()); } else { - auto ArchsOrErr = TC->getSystemGPUArchs(Args); + auto ArchsOrErr = TC.getSystemGPUArchs(Args); if (!ArchsOrErr) { - TC->getDriver().Diag(diag::err_drv_undetermined_gpu_arch) - << llvm::Triple::getArchTypeName(TC->getArch()) + TC.getDriver().Diag(diag::err_drv_undetermined_gpu_arch) + << llvm::Triple::getArchTypeName(TC.getArch()) << llvm::toString(ArchsOrErr.takeError()) << "--offload-arch"; } else if (!ArchsOrErr->empty()) { for (auto Arch : *ArchsOrErr) @@ -7761,17 +7607,6 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, Archs.insert(StringRef()); } } - } else if (Kind == Action::OFK_SYCL) { - // For SYCL offloading, we need to check the triple for NVPTX or AMDGPU. - // The default arch is set for NVPTX if not provided. For AMDGPU, emit - // an error as the user is responsible to set the arch. - if (TC->getTriple().isNVPTX()) - Archs.insert(OffloadArchToString(OffloadArch::SM_50)); - else if (TC->getTriple().isAMDGPU()) - C.getDriver().Diag(clang::diag::err_drv_sycl_missing_amdgpu_arch) - << 1 << TC->getTriple().str(); - else - Archs.insert(StringRef()); } } Args.ClaimAllArgs(options::OPT_offload_arch_EQ); @@ -7866,7 +7701,7 @@ Action *Driver::BuildOffloadingActions(Compilation &C, // Get the product of all bound architectures and toolchains. SmallVector> TCAndArchs; for (const ToolChain *TC : ToolChains) { - for (StringRef Arch : OffloadArchs.lookup(TC)) { + for (StringRef Arch : getOffloadArchs(C, C.getArgs(), Kind, *TC)) { TCAndArchs.push_back(std::make_pair(TC, Arch)); DeviceActions.push_back( C.MakeAction(*InputArg, InputType, CUID)); diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index a53ab19164494..dc7b103e19c5b 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -105,44 +105,6 @@ ToolChain::ToolChain(const Driver &D, const llvm::Triple &T, addIfExists(getFilePaths(), Path); } -llvm::Expected> -ToolChain::executeToolChainProgram(StringRef Executable) const { - llvm::SmallString<64> OutputFile; - llvm::sys::fs::createTemporaryFile("toolchain-program", "txt", OutputFile, - llvm::sys::fs::OF_Text); - llvm::FileRemover OutputRemover(OutputFile.c_str()); - std::optional Redirects[] = { - {""}, - OutputFile.str(), - {""}, - }; - - std::string ErrorMessage; - int SecondsToWait = 60; - if (std::optional Str = - llvm::sys::Process::GetEnv("CLANG_TOOLCHAIN_PROGRAM_TIMEOUT")) { - if (!llvm::to_integer(*Str, SecondsToWait)) - return llvm::createStringError(std::error_code(), - "CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected " - "an integer, got '" + - *Str + "'"); - SecondsToWait = std::max(SecondsToWait, 0); // infinite - } - if (llvm::sys::ExecuteAndWait(Executable, {Executable}, {}, Redirects, - SecondsToWait, - /*MemoryLimit=*/0, &ErrorMessage)) - return llvm::createStringError(std::error_code(), - Executable + ": " + ErrorMessage); - - llvm::ErrorOr> OutputBuf = - llvm::MemoryBuffer::getFile(OutputFile.c_str()); - if (!OutputBuf) - return llvm::createStringError(OutputBuf.getError(), - "Failed to read stdout of " + Executable + - ": " + OutputBuf.getError().message()); - return std::move(*OutputBuf); -} - void ToolChain::setTripleEnvironment(llvm::Triple::EnvironmentType Env) { Triple.setEnvironment(Env); if (EffectiveTriple != llvm::Triple()) @@ -1928,7 +1890,7 @@ llvm::opt::DerivedArgList *ToolChain::TranslateOffloadTargetArgs( getDriver().Diag(diag::err_drv_Xopenmp_target_missing_triple); continue; } - if (IsSYCL && !SingleTargetTripleCount(options::OPT_fsycl_targets_EQ)) { + if (IsSYCL && !SingleTargetTripleCount(options::OPT_offload_targets_EQ)) { getDriver().Diag(diag::err_drv_Xsycl_target_missing_triple) << A->getSpelling(); continue; diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index 16fbd42a48156..5e463b9c98687 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -906,7 +906,7 @@ AMDGPUToolChain::getSystemGPUArchs(const ArgList &Args) const { else Program = GetProgramPath("amdgpu-arch"); - auto StdoutOrErr = executeToolChainProgram(Program); + auto StdoutOrErr = getDriver().executeProgram({Program}); if (!StdoutOrErr) return StdoutOrErr.takeError(); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 44ab37667c298..1454860b10c1f 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -106,32 +106,15 @@ forAllAssociatedToolChains(Compilation &C, const JobAction &JA, // Apply Work on all the offloading tool chains associated with the current // action. - if (JA.isHostOffloading(Action::OFK_Cuda)) - Work(*C.getSingleOffloadToolChain()); - else if (JA.isDeviceOffloading(Action::OFK_Cuda)) - Work(*C.getSingleOffloadToolChain()); - else if (JA.isHostOffloading(Action::OFK_HIP)) - Work(*C.getSingleOffloadToolChain()); - else if (JA.isDeviceOffloading(Action::OFK_HIP)) - Work(*C.getSingleOffloadToolChain()); - - if (JA.isHostOffloading(Action::OFK_OpenMP)) { - auto TCs = C.getOffloadToolChains(); - for (auto II = TCs.first, IE = TCs.second; II != IE; ++II) - Work(*II->second); - } else if (JA.isDeviceOffloading(Action::OFK_OpenMP)) - Work(*C.getSingleOffloadToolChain()); - - if (JA.isHostOffloading(Action::OFK_SYCL)) { - auto TCs = C.getOffloadToolChains(); - for (auto II = TCs.first, IE = TCs.second; II != IE; ++II) - Work(*II->second); - } else if (JA.isDeviceOffloading(Action::OFK_SYCL)) - Work(*C.getSingleOffloadToolChain()); - - // - // TODO: Add support for other offloading programming models here. - // + for (Action::OffloadKind Kind : {Action::OFK_Cuda, Action::OFK_OpenMP, + Action::OFK_HIP, Action::OFK_SYCL}) { + if (JA.isHostOffloading(Kind)) { + auto TCs = C.getOffloadToolChains(Kind); + for (auto II = TCs.first, IE = TCs.second; II != IE; ++II) + Work(*II->second); + } else if (JA.isDeviceOffloading(Kind)) + Work(*C.getSingleOffloadToolChain()); + } } static bool @@ -5434,8 +5417,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, else { // Host-side compilation. NormalizedTriple = - (IsCuda ? C.getSingleOffloadToolChain() - : C.getSingleOffloadToolChain()) + (IsCuda ? C.getOffloadToolChains(Action::OFK_Cuda).first->second + : C.getOffloadToolChains(Action::OFK_HIP).first->second) ->getTriple() .normalize(); if (IsCuda) { @@ -8683,7 +8666,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (isa(JA) && JA.isHostOffloading(Action::OFK_SYCL)) { SmallString<128> TargetInfo("-fsycl-targets="); - if (Arg *Tgts = Args.getLastArg(options::OPT_fsycl_targets_EQ)) { + if (Arg *Tgts = Args.getLastArg(options::OPT_offload_targets_EQ)) { for (unsigned i = 0; i < Tgts->getNumValues(); ++i) { if (i) TargetInfo += ','; diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 558172026a209..bc148b83d8ecb 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -900,7 +900,7 @@ NVPTXToolChain::getSystemGPUArchs(const ArgList &Args) const { else Program = GetProgramPath("nvptx-arch"); - auto StdoutOrErr = executeToolChainProgram(Program); + auto StdoutOrErr = getDriver().executeProgram({Program}); if (!StdoutOrErr) return StdoutOrErr.takeError(); diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index 5c8ac5585648e..61c79bb4df32a 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -312,7 +312,7 @@ static bool selectBfloatLibs(const llvm::Triple &Triple, const Compilation &C, // add -fsycl-targets=intel_gpu_pvc..., native bfloat16 devicelib can // only be linked when all GPU types specified support. // We need to filter CPU target here and only focus on GPU device. - if (Arg *SYCLTarget = Args.getLastArg(options::OPT_fsycl_targets_EQ)) { + if (Arg *SYCLTarget = Args.getLastArg(options::OPT_offload_targets_EQ)) { for (auto TargetsV : SYCLTarget->getValues()) { if (!checkSpirvJIT(StringRef(TargetsV)) && !StringRef(TargetsV).starts_with("spir64_gen") && @@ -333,7 +333,7 @@ static bool selectBfloatLibs(const llvm::Triple &Triple, const Compilation &C, // bfloat16 native conversion. UseNative = true; - if (Arg *SYCLTarget = Args.getLastArg(options::OPT_fsycl_targets_EQ)) { + if (Arg *SYCLTarget = Args.getLastArg(options::OPT_offload_targets_EQ)) { for (auto TargetsV : SYCLTarget->getValues()) { if (!checkSpirvJIT(StringRef(TargetsV)) && !GPUArchsWithNBF16.contains(StringRef(TargetsV))) { @@ -641,7 +641,8 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, if (!IsSpirvAOT) return JIT; - llvm::opt::Arg *SYCLTarget = Args.getLastArg(options::OPT_fsycl_targets_EQ); + llvm::opt::Arg *SYCLTarget = + Args.getLastArg(options::OPT_offload_targets_EQ); if (!SYCLTarget || (SYCLTarget->getValues().size() != 1)) return JIT; @@ -1667,7 +1668,7 @@ void SYCLToolChain::TranslateTargetOpt(const llvm::Triple &Triple, if (OptNoTriple) { // With multiple -fsycl-targets, a triple is required so we know where // the options should go. - const Arg *TargetArg = Args.getLastArg(options::OPT_fsycl_targets_EQ); + const Arg *TargetArg = Args.getLastArg(options::OPT_offload_targets_EQ); if (TargetArg && TargetArg->getValues().size() != 1) { getDriver().Diag(diag::err_drv_Xsycl_target_missing_triple) << A->getSpelling(); @@ -1906,7 +1907,7 @@ void SYCLToolChain::TranslateBackendTargetArgs( // Handle -Xsycl-target-backend. TranslateTargetOpt(Triple, Args, CmdArgs, options::OPT_Xsycl_backend, options::OPT_Xsycl_backend_EQ, Device); - TranslateGPUTargetOpt(Args, CmdArgs, options::OPT_fsycl_targets_EQ); + TranslateGPUTargetOpt(Args, CmdArgs, options::OPT_offload_targets_EQ); } void SYCLToolChain::TranslateLinkerTargetArgs(const llvm::Triple &Triple, diff --git a/clang/test/Driver/amdgpu-hip-system-arch.c b/clang/test/Driver/amdgpu-hip-system-arch.c index 9c27bc09fb36c..12e298a8636b1 100644 --- a/clang/test/Driver/amdgpu-hip-system-arch.c +++ b/clang/test/Driver/amdgpu-hip-system-arch.c @@ -14,14 +14,14 @@ // RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR // RUN: not %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-new-driver --offload-arch=native --amdgpu-arch-tool=%t/amdgpu_arch_fail -x hip %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR -// NO-OUTPUT-ERROR: error: cannot determine amdgcn architecture{{.*}}; consider passing it via '--offload-arch' +// NO-OUTPUT-ERROR: error: cannot determine hip architecture{{.*}}; consider passing it via '--offload-arch' // case when amdgpu-arch does not return anything with successful execution // RUN: not %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-arch=native --amdgpu-arch-tool=%t/amdgpu_arch_empty -x hip %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=EMPTY-OUTPUT // RUN: not %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-new-driver --offload-arch=native --amdgpu-arch-tool=%t/amdgpu_arch_empty -x hip %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=EMPTY-OUTPUT -// EMPTY-OUTPUT: error: cannot determine amdgcn architecture: No AMD GPU detected in the system; consider passing it via '--offload-arch' +// EMPTY-OUTPUT: error: cannot determine hip architecture: No GPU detected in the system; consider passing it via '--offload-arch' // case when amdgpu-arch returns a gfx906 GPU. // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib --offload-arch=native --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 -x hip %s 2>&1 \ @@ -36,4 +36,4 @@ // RUN: --offload-arch=native --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 \ // RUN: -x hip %s 2>&1 | \ // RUN: FileCheck %s --check-prefix=BAD-TIMEOUT -// BAD-TIMEOUT: clang: error: cannot determine amdgcn architecture: CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected an integer, got 'foo'; consider passing it via '--offload-arch'; environment variable CLANG_TOOLCHAIN_PROGRAM_TIMEOUT specifies the tool timeout (integer secs, <=0 is infinite) +// BAD-TIMEOUT: clang: error: cannot determine hip architecture: CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected an integer, got 'foo'; consider passing it via '--offload-arch'; environment variable CLANG_TOOLCHAIN_PROGRAM_TIMEOUT specifies the tool timeout (integer secs, <=0 is infinite) diff --git a/clang/test/Driver/cuda-phases.cu b/clang/test/Driver/cuda-phases.cu index 8b91a1d5a7fcf..220a320e32705 100644 --- a/clang/test/Driver/cuda-phases.cu +++ b/clang/test/Driver/cuda-phases.cu @@ -324,8 +324,8 @@ // RUN: -ccc-print-phases --offload-arch=sm_999 -fgpu-rdc -c %s 2>&1 \ // RUN: | FileCheck -check-prefix=INVALID-ARCH %s // INVALID-ARCH: error: unsupported CUDA gpu architecture: sm_999 -// INVALID-ARCH-NEXT: 0: input, "[[INPUT:.+]]", cuda, (host-cuda) -// INVALID-ARCH-NEXT: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) -// INVALID-ARCH-NEXT: 2: compiler, {1}, ir, (host-cuda) -// INVALID-ARCH-NEXT: 3: backend, {2}, assembler, (host-cuda) -// INVALID-ARCH-NEXT: 4: assembler, {3}, object, (host-cuda) +// INVALID-ARCH: 0: input, "[[INPUT:.+]]", cuda +// INVALID-ARCH-NEXT: 1: preprocessor, {0}, cuda-cpp-output +// INVALID-ARCH-NEXT: 2: compiler, {1}, ir +// INVALID-ARCH-NEXT: 3: backend, {2}, assembler +// INVALID-ARCH-NEXT: 4: assembler, {3}, object diff --git a/clang/test/Driver/hip-inputs.hip b/clang/test/Driver/hip-inputs.hip index 2d4cc3103c5ec..a8e25ad8ed198 100644 --- a/clang/test/Driver/hip-inputs.hip +++ b/clang/test/Driver/hip-inputs.hip @@ -15,5 +15,5 @@ // RUN: --hip-link %S/Inputs/hip_multiple_inputs/a.cu 2>&1 \ // RUN: | FileCheck -check-prefix=MIX %s -// CHECK-NOT: error: mixed CUDA and HIP compilation is not supported -// MIX: error: mixed CUDA and HIP compilation is not supported +// CHECK-NOT: error: mixed CUDA and HIP offloading compilation is not supported +// MIX: error: mixed CUDA and HIP offloading compilation is not supported diff --git a/clang/test/Driver/hip-invalid-target-id.hip b/clang/test/Driver/hip-invalid-target-id.hip index 555043facb2a3..ad942e476617e 100644 --- a/clang/test/Driver/hip-invalid-target-id.hip +++ b/clang/test/Driver/hip-invalid-target-id.hip @@ -4,7 +4,7 @@ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s 2>&1 | FileCheck -check-prefix=NOPLUS %s -// NOPLUS: error: invalid target ID 'gfx908xnack' +// NOPLUS: error: unsupported HIP gpu architecture: gfx908xnack // RUN: not %clang -### --target=x86_64-linux-gnu \ // RUN: -x hip --offload-arch=gfx900 \ @@ -22,7 +22,7 @@ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s 2>&1 | FileCheck -check-prefix=UNK %s -// UNK: error: invalid target ID 'gfx908:unknown+' +// UNK: error: unsupported HIP gpu architecture: gfx900+xnack // RUN: not %clang -### --target=x86_64-linux-gnu \ // RUN: -x hip --offload-arch=gfx908 \ @@ -31,7 +31,7 @@ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s 2>&1 | FileCheck -check-prefix=MIXED %s -// MIXED: error: invalid target ID 'gfx908:sramecc+:unknown+' +// MIXED: error: unsupported HIP gpu architecture: gfx900+xnack // RUN: not %clang -### --target=x86_64-linux-gnu \ // RUN: -x hip --offload-arch=gfx908 \ @@ -55,7 +55,7 @@ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s 2>&1 | FileCheck -check-prefix=NOCOLON %s -// NOCOLON: error: invalid target ID 'gfx900+xnack' +// NOCOLON: error: unsupported HIP gpu architecture: gfx900+xnack // RUN: not %clang -### --target=x86_64-linux-gnu \ // RUN: -x hip --offload-arch=gfx908 \ diff --git a/clang/test/Driver/hip-options.hip b/clang/test/Driver/hip-options.hip index af99b4a4550f9..a19bf4cee32a1 100644 --- a/clang/test/Driver/hip-options.hip +++ b/clang/test/Driver/hip-options.hip @@ -115,11 +115,6 @@ // OMP-NOT: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fopenmp" // OMP: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-fopenmp" -// RUN: not %clang --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \ -// RUN: --offload-arch=gfx906 -fopenmp=libomp -fopenmp-targets=amdgcn %s 2>&1 \ -// RUN: | FileCheck -check-prefix=OMPTGT %s -// OMPTGT: unsupported option '--offload-targets=' for language mode 'HIP' - // Check -Xoffload-linker option is passed to lld. // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \ diff --git a/clang/test/Driver/invalid-offload-options.cpp b/clang/test/Driver/invalid-offload-options.cpp index 48d5310538a3c..6048a3ca82e77 100644 --- a/clang/test/Driver/invalid-offload-options.cpp +++ b/clang/test/Driver/invalid-offload-options.cpp @@ -1,29 +1,7 @@ // UNSUPPORTED: system-windows -// RUN: not %clang -### -x hip --target=x86_64-linux-gnu --offload= \ -// RUN: --hip-path=%S/Inputs/hipspv -nogpuinc -nogpulib %s \ -// RUN: 2>&1 | FileCheck --check-prefix=INVALID-TARGET %s // RUN: not %clang -### -x hip --target=x86_64-linux-gnu --offload=foo \ // RUN: --hip-path=%S/Inputs/hipspv -nogpuinc -nogpulib %s \ // RUN: 2>&1 | FileCheck --check-prefix=INVALID-TARGET %s // INVALID-TARGET: error: invalid or unsupported offload target: '{{.*}}' - -// In the future we should be able to specify multiple targets for HIP -// compilation but currently it is not supported. -// -// RUN: not %clang -### -x hip --target=x86_64-linux-gnu --offload=foo,bar \ -// RUN: --hip-path=%S/Inputs/hipspv -nogpuinc -nogpulib %s \ -// RUN: 2>&1 | FileCheck --check-prefix=TOO-MANY-TARGETS %s -// RUN: not %clang -### -x hip --target=x86_64-linux-gnu \ -// RUN: --offload=foo --offload=bar \ -// RUN: --hip-path=%S/Inputs/hipspv -nogpuinc -nogpulib %s \ -// RUN: 2>&1 | FileCheck --check-prefix=TOO-MANY-TARGETS %s - -// TOO-MANY-TARGETS: error: only one offload target is supported - -// RUN: not %clang -### -x hip --target=x86_64-linux-gnu -nogpuinc -nogpulib \ -// RUN: --offload=amdgcn-amd-amdhsa --offload-arch=gfx900 %s \ -// RUN: 2>&1 | FileCheck --check-prefix=OFFLOAD-ARCH-MIX %s - -// OFFLOAD-ARCH-MIX: error: option '--offload-arch' cannot be specified with '--offload' diff --git a/clang/test/Driver/nvptx-cuda-system-arch.c b/clang/test/Driver/nvptx-cuda-system-arch.c index c54eeac73f73b..2d4eca8c43bc3 100644 --- a/clang/test/Driver/nvptx-cuda-system-arch.c +++ b/clang/test/Driver/nvptx-cuda-system-arch.c @@ -16,14 +16,14 @@ // RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR // RUN: not %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-new-driver --offload-arch=native --nvptx-arch-tool=%t/nvptx_arch_fail -x cuda %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR -// NO-OUTPUT-ERROR: error: cannot determine nvptx64 architecture{{.*}}; consider passing it via '--offload-arch' +// NO-OUTPUT-ERROR: error: cannot determine cuda architecture{{.*}}; consider passing it via '--offload-arch' // case when nvptx-arch does not return anything with successful execution // RUN: not %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-arch=native --nvptx-arch-tool=%t/nvptx_arch_empty -x cuda %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=EMPTY-OUTPUT // RUN: not %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-new-driver --offload-arch=native --nvptx-arch-tool=%t/nvptx_arch_empty -x cuda %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=EMPTY-OUTPUT -// EMPTY-OUTPUT: error: cannot determine nvptx64 architecture: No NVIDIA GPU detected in the system; consider passing it via '--offload-arch' +// EMPTY-OUTPUT: error: cannot determine cuda architecture: No GPU detected in the system; consider passing it via '--offload-arch' // case when nvptx-arch does not return anything with successful execution // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-arch=native --nvptx-arch-tool=%t/nvptx_arch_sm_70 -x cuda --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda %s 2>&1 \ @@ -49,4 +49,4 @@ // RUN: --offload-arch=native --nvptx-arch-tool=%t/nvptx_arch_sm_70 \ // RUN: --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda -x cuda %s 2>&1 | \ // RUN: FileCheck %s --check-prefix=BAD-TIMEOUT -// BAD-TIMEOUT: clang: error: cannot determine nvptx64 architecture: CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected an integer, got 'foo'; consider passing it via '--offload-arch'; environment variable CLANG_TOOLCHAIN_PROGRAM_TIMEOUT specifies the tool timeout (integer secs, <=0 is infinite) +// BAD-TIMEOUT: clang: error: cannot determine cuda architecture: CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected an integer, got 'foo'; consider passing it via '--offload-arch'; environment variable CLANG_TOOLCHAIN_PROGRAM_TIMEOUT specifies the tool timeout (integer secs, <=0 is infinite) diff --git a/clang/test/Driver/offload-target.c b/clang/test/Driver/offload-target.c new file mode 100644 index 0000000000000..af8653ea92225 --- /dev/null +++ b/clang/test/Driver/offload-target.c @@ -0,0 +1,22 @@ +// RUN: %clang -### -fsycl --offload-targets=spirv64 -nogpuinc %s -ccc-print-bindings 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL +// SYCL: "spirv64-unknown-unknown" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[SYCL_BC:.+]]" + +// RUN: %clang -### --offload-targets=amdgcn-amd-amdhsa -nogpulib -nogpuinc -x hip %s -ccc-print-bindings 2>&1 \ +// RUN: | FileCheck %s -check-prefix=HIP +// HIP: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[AMD_OBJ:.+]]" + +// RUN: %clang -### --offload-targets=nvptx64-nvidia-cuda -nogpulib -nogpuinc -x cuda %s -ccc-print-bindings 2>&1 \ +// RUN: | FileCheck %s -check-prefix=CUDA +// CUDA: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[NV_OBJ:.+]]" + +// RUN: %clang -### --offload-targets=amdgcn-amd-amdhsa,nvptx64-nvidia-cuda -fopenmp \ +// RUN: -Xarch_amdgcn --offload-arch=gfx90a -Xarch_nvptx64 --offload-arch=sm_89 \ +// RUN: -nogpulib -nogpuinc %s -ccc-print-bindings 2>&1 \ +// RUN: | FileCheck %s -check-prefix=OPENMP +// OPENMP: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[AMD_OBJ:.+]]" +// OPENMP: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]"], output: "[[NV_OBJ:.+]]" + +// RUN: %clang -### --offload-targets=spirv64-amd-amdhsa -nogpulib -nogpuinc -x hip %s -ccc-print-bindings 2>&1 \ +// RUN: | FileCheck %s -check-prefix=HIPSPIRV +// HIPSPIRV: "spirv64-amd-amdhsa" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[AMD_OBJ:.+]]" diff --git a/clang/test/Driver/openmp-offload-infer.c b/clang/test/Driver/openmp-offload-infer.c index 2a38a99c30518..a756e82438466 100644 --- a/clang/test/Driver/openmp-offload-infer.c +++ b/clang/test/Driver/openmp-offload-infer.c @@ -36,10 +36,10 @@ // CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out" // RUN: not %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp \ -// RUN: --offload-arch=sm_70 --offload-arch=gfx908 --offload-arch=skylake \ +// RUN: --offload-arch=sm_70 --offload-arch=gfx908 --offload-arch=unknown \ // RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-FAILED -// CHECK-FAILED: error: failed to deduce triple for target architecture 'skylake'; specify the triple using '-fopenmp-targets' and '-Xopenmp-target' instead +// CHECK-FAILED: error: failed to deduce triple for target architecture 'unknown'; specify the triple using '-fopenmp-targets' and '-Xopenmp-target' instead // RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp \ // RUN: --offload-arch=sm_70 --offload-arch=gfx908 -fno-openmp \ diff --git a/clang/test/Driver/openmp-offload.c b/clang/test/Driver/openmp-offload.c index 516c74b190885..d4016e98e6666 100644 --- a/clang/test/Driver/openmp-offload.c +++ b/clang/test/Driver/openmp-offload.c @@ -7,7 +7,7 @@ /// Check whether an invalid OpenMP target is specified: // RUN: not %clang -### -fopenmp=libomp -fopenmp-targets=aaa-bbb-ccc-ddd %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-INVALID-TARGET %s -// CHK-INVALID-TARGET: error: OpenMP target is invalid: 'aaa-bbb-ccc-ddd' +// CHK-INVALID-TARGET: error: invalid or unsupported offload target: 'aaa-bbb-ccc-ddd' /// ########################################################################### @@ -18,15 +18,6 @@ /// ########################################################################### -/// Check error for no -fopenmp option -// RUN: not %clang -### -fopenmp-targets=powerpc64le-ibm-linux-gnu %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-NO-FOPENMP %s -// RUN: not %clang -### -fopenmp=libgomp -fopenmp-targets=powerpc64le-ibm-linux-gnu %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-NO-FOPENMP %s -// CHK-NO-FOPENMP: error: '-fopenmp-targets' must be used in conjunction with a '-fopenmp' option compatible with offloading; e.g., '-fopenmp=libomp' or '-fopenmp=libiomp5' - -/// ########################################################################### - /// Check warning for duplicate offloading targets. // RUN: %clang -### -ccc-print-phases -fopenmp=libomp -fopenmp-targets=powerpc64le-ibm-linux-gnu,powerpc64le-ibm-linux-gnu %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-DUPLICATES %s diff --git a/clang/test/Driver/openmp-system-arch.c b/clang/test/Driver/openmp-system-arch.c index b18ecf3ec474b..167b07a23f512 100644 --- a/clang/test/Driver/openmp-system-arch.c +++ b/clang/test/Driver/openmp-system-arch.c @@ -24,13 +24,7 @@ // RUN: not %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp=libomp --offload-arch=native \ // RUN: --nvptx-arch-tool=%t/nvptx_arch_empty --amdgpu-arch-tool=%t/amdgpu_arch_empty %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR -// RUN: not %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp=libomp --offload-arch= \ -// RUN: --nvptx-arch-tool=%t/nvptx_arch_fail --amdgpu-arch-tool=%t/amdgpu_arch_fail %s 2>&1 \ -// RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR -// RUN: not %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp=libomp --offload-arch= \ -// RUN: --nvptx-arch-tool=%t/nvptx_arch_empty --amdgpu-arch-tool=%t/amdgpu_arch_empty %s 2>&1 \ -// RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR -// NO-OUTPUT-ERROR: error: failed to deduce triple for target architecture 'native'; specify the triple using '-fopenmp-targets' and '-Xopenmp-target' instead +// NO-OUTPUT-ERROR: error: cannot determine openmp architecture // case when amdgpu-arch succeeds. // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp=libomp --offload-arch=native \ diff --git a/clang/test/Driver/sycl-offload-arch-intel-cpu.cpp b/clang/test/Driver/sycl-offload-arch-intel-cpu.cpp index d733618744d71..46d0bc19c2258 100644 --- a/clang/test/Driver/sycl-offload-arch-intel-cpu.cpp +++ b/clang/test/Driver/sycl-offload-arch-intel-cpu.cpp @@ -58,15 +58,6 @@ // TARGET-TRIPLE-CPU: "-D__SYCL_TARGET_INTEL_X86_64__" // CLANG-OFFLOAD-PACKAGER-CPU: clang-offload-packager{{.*}} "--image={{.*}}triple=spir64_x86_64-unknown-unknown,arch=[[DEV_STR]],kind=sycl" -// Tests for handling a missing architecture. -// -// RUN: not %clangxx --offload-new-driver -fsycl --offload-arch= %s -### 2>&1 \ -// RUN: | FileCheck -check-prefix=MISSING %s -// RUN: not %clang_cl --offload-new-driver -fsycl --offload-arch= %s -### 2>&1 \ -// RUN: | FileCheck -check-prefix=MISSING %s - -// MISSING: error: must pass in an explicit cpu or gpu architecture to '--offload-arch' - // Tests for handling a incorrect architecture. // // RUN: not %clangxx --offload-new-driver -fsycl --offload-arch=badArch %s -### 2>&1 \ diff --git a/clang/test/Driver/sycl-offload-new-driver.c b/clang/test/Driver/sycl-offload-new-driver.c index 5e446369a4181..e0631f825c023 100644 --- a/clang/test/Driver/sycl-offload-new-driver.c +++ b/clang/test/Driver/sycl-offload-new-driver.c @@ -114,8 +114,8 @@ // RUN: --offload-new-driver %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK_PACKAGER_OPTS %s // CHK_PACKAGER_OPTS: clang-offload-packager{{.*}} "-o" -// CHK_PACKAGER_OPTS-SAME: {{.*}}triple=spir64_gen-unknown-unknown,arch=pvc,kind=sycl,compile-opts={{.*}}-spir64_gen-opt,link-opts=-spir64_gen-link-opt // CHK_PACKAGER_OPTS-SAME: {{.*}}triple=spir64-unknown-unknown,arch=generic,kind=sycl,compile-opts={{.*}}-spir64-opt,link-opts=-spir64-link-opt +// CHK_PACKAGER_OPTS-SAME: {{.*}}triple=spir64_gen-unknown-unknown,arch=pvc,kind=sycl,compile-opts={{.*}}-spir64_gen-opt,link-opts=-spir64_gen-link-opt /// Check phases with multiple intel_gpu settings // RUN: %clangxx --target=x86_64-unknown-linux-gnu -fsycl \ diff --git a/clang/test/Driver/sycl-offload-static-lib-2-old-model.cpp b/clang/test/Driver/sycl-offload-static-lib-2-old-model.cpp index 53d6fbc910e9c..52aaf074432d8 100644 --- a/clang/test/Driver/sycl-offload-static-lib-2-old-model.cpp +++ b/clang/test/Driver/sycl-offload-static-lib-2-old-model.cpp @@ -21,16 +21,14 @@ // RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -L/dummy/dir %t_lib.lo -### %t_obj.o 2>&1 \ // RUN: | FileCheck %s -check-prefixes=STATIC_LIB,STATIC_LIB_DEF -DBUNDLE_TRIPLE=sycl-spir64-unknown-unknown // RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -nocudalib -fsycl-targets=nvptx64-nvidia-cuda -L/dummy/dir %t_lib.a -### %t_obj.o 2>&1 \ -// RUN: | FileCheck %s -check-prefixes=STATIC_LIB_NVPTX -DBUNDLE_TRIPLE=sycl-nvptx64-nvidia-cuda-sm_50 +// RUN: | FileCheck %s -check-prefixes=STATIC_LIB_DEF -DBUNDLE_TRIPLE=sycl-nvptx64-nvidia-cuda-sm_50 // RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -nocudalib -fsycl-targets=nvptx64-nvidia-cuda -L/dummy/dir %t_lib.lo -### %t_obj.o 2>&1 \ -// RUN: | FileCheck %s -check-prefixes=STATIC_LIB_NVPTX -DBUNDLE_TRIPLE=sycl-nvptx64-nvidia-cuda-sm_50 +// RUN: | FileCheck %s -check-prefixes=STATIC_LIB_DEF -DBUNDLE_TRIPLE=sycl-nvptx64-nvidia-cuda-sm_50 // STATIC_LIB: clang-offload-bundler{{.*}} "-type=o" "-targets={{.*}},[[BUNDLE_TRIPLE]]" "-input=[[INPUTO:.+\.o]]" "-output=[[HOSTOBJ:.+\.o]]" "-output={{.+\.o}}" // STATIC_LIB: clang-offload-deps{{.*}} "-targets=[[BUNDLE_TRIPLE]]" // STATIC_LIB_DEF: clang-offload-bundler{{.*}} "-type=aoo" "-targets=[[BUNDLE_TRIPLE]]" "-input={{.*}}" "-output=[[OUTFILE:.+\.txt]]" -// STATIC_LIB_NVPTX: clang-offload-bundler{{.*}} "-type=a" "-targets=[[BUNDLE_TRIPLE]]" "-input={{.*}}" "-output=[[OUTFILE:.+\.a]]" // STATIC_LIB_DEF: llvm-foreach{{.*}} "--out-ext=txt" "--in-file-list=[[OUTFILE]]" "--in-replace=[[OUTFILE]]" "--out-file-list=[[IROUTFILE:.+\.txt]]" "--out-replace=[[IROUTFILE]]" "--" {{.*}}spirv-to-ir-wrapper{{.*}} "[[OUTFILE]]" "-o" "[[IROUTFILE]]" // STATIC_LIB_DEF: llvm-link{{.*}} "@[[IROUTFILE]]" -// STATIC_LIB_NVPTX: llvm-link{{.*}} "[[OUTFILE]]" // STATIC_LIB: ld{{.*}} "{{.*}}_lib.{{(a|lo)}}" "[[HOSTOBJ]]" // Test using -l style for passing libraries. @@ -43,14 +41,12 @@ // RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -Xlinker -Bstatic -L%t_dir -L%S/Inputs/SYCL -llin64 -### %t_obj.o 2>&1 \ // RUN: | FileCheck %s -check-prefixes=STATIC_L_LIB,STATIC_L_LIB_DEF -DBUNDLE_TRIPLE=sycl-spir64-unknown-unknown // RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -nocudalib -fsycl-targets=nvptx64-nvidia-cuda -L%S/Inputs/SYCL -llin64 -### %t_obj.o 2>&1 \ -// RUN: | FileCheck %s -check-prefixes=STATIC_L_LIB_NVPTX -DBUNDLE_TRIPLE=sycl-nvptx64-nvidia-cuda-sm_50 +// RUN: | FileCheck %s -check-prefixes=STATIC_L_LIB_DEF -DBUNDLE_TRIPLE=sycl-nvptx64-nvidia-cuda-sm_50 // STATIC_L_LIB: clang-offload-bundler{{.*}} "-type=o" "-targets={{.*}},[[BUNDLE_TRIPLE]]" "-input=[[INPUTO:.+\.o]]" "-output=[[HOSTOBJ:.+\.o]]" "-output={{.+\.o}}" // STATIC_L_LIB: clang-offload-deps{{.*}} "-targets=[[BUNDLE_TRIPLE]]" // STATIC_L_LIB_DEF: clang-offload-bundler{{.*}} "-type=aoo" "-targets=[[BUNDLE_TRIPLE]]" "-input={{.*}}liblin64.a" "-output=[[OUTFILE:.+\.txt]]" -// STATIC_L_LIB_NVPTX: clang-offload-bundler{{.*}} "-type=a" "-targets=[[BUNDLE_TRIPLE]]" "-input={{.*}}liblin64.a" "-output=[[OUTFILE:.+\.a]]" // STATIC_L_LIB_DEF: llvm-foreach{{.*}} "--out-ext=txt" "--in-file-list=[[OUTFILE]]" "--in-replace=[[OUTFILE]]" "--out-file-list=[[IROUTFILE:.+\.txt]]" "--out-replace=[[IROUTFILE]]" "--" {{.*}}spirv-to-ir-wrapper{{.*}} "[[OUTFILE]]" "-o" "[[IROUTFILE]]" // STATIC_L_LIB_DEF: llvm-link{{.*}} "@[[IROUTFILE]]" -// STATIC_L_LIB_NVPTX: llvm-link{{.*}} "[[OUTFILE]]" // STATIC_L_LIB: ld{{.*}} "-llin64" "[[HOSTOBJ]]" // non-fat libraries should not trigger the unbundling step. @@ -71,16 +67,14 @@ // RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver %t_lib.a -### %t-1.o %t-2.o %t-3.o 2>&1 \ // RUN: | FileCheck %s -check-prefixes=STATIC_LIB_MULTI_O,STATIC_LIB_MULTI_O_DEF -DBUNDLE_TRIPLE=sycl-spir64-unknown-unknown // RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -nocudalib -fsycl-targets=nvptx64-nvidia-cuda %t_lib.a -### %t-1.o %t-2.o %t-3.o 2>&1 \ -// RUN: | FileCheck %s -check-prefixes=STATIC_LIB_MULTI_O,STATIC_LIB_MULTI_O_NVPTX -DBUNDLE_TRIPLE=sycl-nvptx64-nvidia-cuda-sm_50 +// RUN: | FileCheck %s -check-prefixes=STATIC_LIB_MULTI_O,STATIC_LIB_MULTI_O_DEF -DBUNDLE_TRIPLE=sycl-nvptx64-nvidia-cuda-sm_50 // STATIC_LIB_MULTI_O: clang-offload-bundler{{.*}} "-type=o" "-targets={{.*}},[[BUNDLE_TRIPLE]]" "-input={{.+}}-1.o" // STATIC_LIB_MULTI_O: clang-offload-bundler{{.*}} "-type=o" "-targets={{.*}},[[BUNDLE_TRIPLE]]" "-input={{.+}}-2.o" // STATIC_LIB_MULTI_O: clang-offload-bundler{{.*}} "-type=o" "-targets={{.*}},[[BUNDLE_TRIPLE]]" "-input={{.+}}-3.o" // STATIC_LIB_MULTI_O: clang-offload-deps{{.*}} "-targets=[[BUNDLE_TRIPLE]]" // STATIC_LIB_MULTI_O_DEF: clang-offload-bundler{{.*}} "-type=aoo" "-targets=[[BUNDLE_TRIPLE]]" {{.*}} "-output=[[OUTFILE:.+\.txt]]" -// STATIC_LIB_MULTI_O_NVPTX: clang-offload-bundler{{.*}} "-type=a" "-targets=[[BUNDLE_TRIPLE]]" {{.*}} "-output=[[OUTFILE:.+\.a]]" // STATIC_LIB_MULTI_O_DEF: llvm-foreach{{.*}} "--out-ext=txt" "--in-file-list=[[OUTFILE]]" "--in-replace=[[OUTFILE]]" "--out-file-list=[[IROUTFILE:.+\.txt]]" "--out-replace=[[IROUTFILE]]" "--" {{.*}}spirv-to-ir-wrapper{{.*}} "[[OUTFILE]]" "-o" "[[IROUTFILE]]" // STATIC_LIB_MULTI_O_DEF: llvm-link{{.*}} "@[[IROUTFILE]]" -// STATIC_LIB_MULTI_O_NVPTX: llvm-link{{.*}} "[[OUTFILE]]" /// ########################################################################### @@ -127,18 +121,19 @@ // STATIC_LIB_SRC-CUDA: 10: linker, {0, 9}, host_dep_image, (host-sycl) // STATIC_LIB_SRC-CUDA: 11: clang-offload-deps, {10}, ir, (host-sycl) // STATIC_LIB_SRC-CUDA: 12: input, "[[INPUTA]]", archive -// STATIC_LIB_SRC-CUDA: 13: clang-offload-unbundler, {12}, archive -// STATIC_LIB_SRC-CUDA: 14: linker, {5, 11, 13}, ir, (device-sycl, sm_50) -// STATIC_LIB_SRC-CUDA: 15: sycl-post-link, {14}, ir, (device-sycl, sm_50) -// STATIC_LIB_SRC-CUDA: 16: file-table-tform, {15}, ir, (device-sycl, sm_50) -// STATIC_LIB_SRC-CUDA: 17: backend, {16}, assembler, (device-sycl, sm_50) -// STATIC_LIB_SRC-CUDA: 18: assembler, {17}, object, (device-sycl, sm_50) -// STATIC_LIB_SRC-CUDA: 19: linker, {17, 18}, cuda-fatbin, (device-sycl, sm_50) -// STATIC_LIB_SRC-CUDA: 20: foreach, {16, 19}, cuda-fatbin, (device-sycl, sm_50) -// STATIC_LIB_SRC-CUDA: 21: file-table-tform, {15, 20}, tempfiletable, (device-sycl, sm_50) -// STATIC_LIB_SRC-CUDA: 22: clang-offload-wrapper, {21}, object, (device-sycl, sm_50) -// STATIC_LIB_SRC-CUDA: 23: offload, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {22}, object -// STATIC_LIB_SRC-CUDA: 24: linker, {0, 9, 23}, image, (host-sycl) +// STATIC_LIB_SRC-CUDA: 13: clang-offload-unbundler, {12}, tempfilelist +// STATIC_LIB_SRC-CUDA: 14: spirv-to-ir-wrapper, {13}, tempfilelist, (device-sycl, sm_50) +// STATIC_LIB_SRC-CUDA: 15: linker, {5, 11, 14}, ir, (device-sycl, sm_50) +// STATIC_LIB_SRC-CUDA: 16: sycl-post-link, {15}, ir, (device-sycl, sm_50) +// STATIC_LIB_SRC-CUDA: 17: file-table-tform, {16}, ir, (device-sycl, sm_50) +// STATIC_LIB_SRC-CUDA: 18: backend, {17}, assembler, (device-sycl, sm_50) +// STATIC_LIB_SRC-CUDA: 19: assembler, {18}, object, (device-sycl, sm_50) +// STATIC_LIB_SRC-CUDA: 20: linker, {18, 19}, cuda-fatbin, (device-sycl, sm_50) +// STATIC_LIB_SRC-CUDA: 21: foreach, {17, 20}, cuda-fatbin, (device-sycl, sm_50) +// STATIC_LIB_SRC-CUDA: 22: file-table-tform, {16, 21}, tempfiletable, (device-sycl, sm_50) +// STATIC_LIB_SRC-CUDA: 23: clang-offload-wrapper, {22}, object, (device-sycl, sm_50) +// STATIC_LIB_SRC-CUDA: 24: offload, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {23}, object +// STATIC_LIB_SRC-CUDA: 25: linker, {0, 9, 24}, image, (host-sycl) /// ########################################################################### @@ -151,11 +146,9 @@ // STATIC_LIB_SRC2: ld{{(.exe)?}}" {{.*}} "-o" "[[HOSTEXE:.+\.out]]" {{.*}}"--unresolved-symbols=ignore-all" // STATIC_LIB_SRC2: clang-offload-deps{{.*}} "-targets=[[DEPS_TRIPLE]]" "-outputs=[[OUTDEPS:.+\.bc]]" "[[HOSTEXE]]" // STATIC_LIB_SRC2_DEF: clang-offload-bundler{{.*}} "-type=aoo" "-targets=[[BUNDLE_TRIPLE]]" {{.*}} "-output=[[OUTLIB:.+\.txt]]" -// STATIC_LIB_SRC2_NVPTX: clang-offload-bundler{{.*}} "-type=a" "-targets=[[BUNDLE_TRIPLE]]" {{.*}} "-output=[[OUTLIB:.+\.a]]" // STATIC_LIB_SRC2_DEF: llvm-foreach{{.*}} "--out-ext=txt" "--in-file-list=[[OUTLIB]]" "--in-replace=[[OUTLIB]]" "--out-file-list=[[OUTLIBLIST:.+\.txt]]" "--out-replace=[[OUTLIBLIST]]" "--" {{.*}}spirv-to-ir-wrapper{{.*}} "[[OUTLIB]]" "-o" [[OUTLIBLIST]]" // STATIC_LIB_SRC2: llvm-link{{.*}} "[[OUTDEPS]]" "-o" "[[OUTTEMP:.+\.bc]]" // STATIC_LIB_SRC2_DEF: llvm-link{{.*}} "--only-needed" "[[OUTTEMP]]" "@[[OUTLIBLIST]]" -// STATIC_LIB_SRC2_NVPTX: llvm-link{{.*}} "--only-needed" "[[OUTTEMP]]" "[[OUTLIB]]" // STATIC_LIB_SRC2: ld{{(.exe)?}}" {{.*}} "[[HOSTOBJ]]" /// ########################################################################### @@ -174,7 +167,8 @@ // RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -nocudalib -fsycl-targets=nvptx64-nvidia-cuda,spir64 %t_lib.a -### %s 2>&1 \ // RUN: | FileCheck %s -check-prefix=STATIC_LIB_MIX -DBUNDLE_TRIPLE=sycl-nvptx64-nvidia-cuda-sm_50 // STATIC_LIB_MIX: clang-offload-bundler{{.*}} "-type=aoo" "-targets=sycl-nvptx64-nvidia-cuda-sm_50,sycl-spir64-unknown-unknown" {{.*}} "-output=[[NVPTXLIST:.+\.txt]]" "-output=[[SYCLLIST:.+\.txt]]" -// STATIC_LIB_MIX: llvm-link{{.*}} "@[[NVPTXLIST]]" +// STATIC_LIB_MIX: spirv-to-ir-wrapper{{.*}} "[[NVPTXLIST]]" "-o" "[[NVPTXLINKLIST:.+\.txt]]" +// STATIC_LIB_MIX: llvm-link{{.*}} "@[[NVPTXLINKLIST]]" // STATIC_LIB_MIX: spirv-to-ir-wrapper{{.*}} "[[SYCLLIST]]" "-o" "[[SYCLLINKLIST:.+\.txt]]" // STATIC_LIB_MIX: llvm-link{{.*}} "@[[SYCLLINKLIST]]" @@ -191,18 +185,15 @@ // RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -L/dummy/dir %t_obj.o -Wl,@%/t_arg.arg -### 2>&1 \ // RUN: | FileCheck %s -check-prefixes=WHOLE_STATIC_LIB,WHOLE_STATIC_LIB_DEF -DBUNDLE_TRIPLE=sycl-spir64-unknown-unknown // RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -nocudalib -fsycl-targets=nvptx64-nvidia-cuda -L/dummy/dir %t_obj.o -Wl,--whole-archive %t_lib.a %t_lib_2.a -Wl,--no-whole-archive -### 2>&1 \ -// RUN: | FileCheck %s -check-prefixes=WHOLE_STATIC_LIB,WHOLE_STATIC_LIB_1,WHOLE_STATIC_LIB_NVPTX -DBUNDLE_TRIPLE=sycl-nvptx64-nvidia-cuda-sm_50 +// RUN: | FileCheck %s -check-prefixes=WHOLE_STATIC_LIB,WHOLE_STATIC_LIB_1,WHOLE_STATIC_LIB_DEF -DBUNDLE_TRIPLE=sycl-nvptx64-nvidia-cuda-sm_50 // RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver -nocudalib -fsycl-targets=nvptx64-nvidia-cuda -L/dummy/dir %t_obj.o -Wl,@%/t_arg.arg -### 2>&1 \ -// RUN: | FileCheck %s -check-prefixes=WHOLE_STATIC_LIB,WHOLE_STATIC_LIB_NVPTX -DBUNDLE_TRIPLE=sycl-nvptx64-nvidia-cuda-sm_50 +// RUN: | FileCheck %s -check-prefixes=WHOLE_STATIC_LIB,WHOLE_STATIC_LIB_DEF -DBUNDLE_TRIPLE=sycl-nvptx64-nvidia-cuda-sm_50 // WHOLE_STATIC_LIB: clang-offload-bundler{{.*}} "-type=o" "-targets={{.*}},[[BUNDLE_TRIPLE]]" // WHOLE_STATIC_LIB_DEF: clang-offload-bundler{{.*}} "-type=aoo" "-targets=[[BUNDLE_TRIPLE]]" "-input=[[INPUTA:.+\.a]]" "-output=[[OUTPUTA:.+\.txt]]" // WHOLE_STATIC_LIB_DEF: llvm-foreach{{.*}} "--out-ext=txt" "--in-file-list=[[OUTPUTA]]" "--in-replace=[[OUTPUTA]]" "--out-file-list=[[OUTLISTA:.+\.txt]]" "--out-replace=[[OUTLISTA]]" "--" {{.*}}spirv-to-ir-wrapper{{.*}} "[[OUTPUTA]]" "-o" "[[OUTLISTA]]" // WHOLE_STATIC_LIB_DEF: clang-offload-bundler{{.*}} "-type=aoo" "-targets=[[BUNDLE_TRIPLE]]" "-input=[[INPUTB:.+\.a]]" "-output=[[OUTPUTB:.+\.txt]]" // WHOLE_STATIC_LIB_DEF: llvm-foreach{{.*}} "--out-ext=txt" "--in-file-list=[[OUTPUTB]]" "--in-replace=[[OUTPUTB]]" "--out-file-list=[[OUTLISTB:.+\.txt]]" "--out-replace=[[OUTLISTB]]" "--" {{.*}}spirv-to-ir-wrapper{{.*}} "[[OUTPUTB]]" "-o" "[[OUTLISTB]]" // WHOLE_STATIC_LIB_DEF: llvm-link{{.*}} "@[[OUTLISTA]]" "@[[OUTLISTB]]" -// WHOLE_STATIC_LIB_NVPTX: clang-offload-bundler{{.*}} "-type=a" "-targets=[[BUNDLE_TRIPLE]]" "-input=[[INPUTA:.+\.a]]" "-output=[[OUTPUTA:.+\.a]]" -// WHOLE_STATIC_LIB_NVPTX: clang-offload-bundler{{.*}} "-type=a" "-targets=[[BUNDLE_TRIPLE]]" "-input=[[INPUTB:.+\.a]]" "-output=[[OUTPUTB:.+\.a]]" -// WHOLE_STATIC_LIB_NVPTX: llvm-link{{.*}} "[[OUTPUTA]]" "[[OUTPUTB]]" // WHOLE_STATIC_LIB: clang-offload-wrapper{{.*}} // WHOLE_STATIC_LIB: clang{{.*}} "-c" // WHOLE_STATIC_LIB_1: ld{{.*}} "--whole-archive" "[[INPUTA]]" "[[INPUTB]]" "--no-whole-archive" @@ -229,8 +220,9 @@ // STATIC_LIB_NOSRC-SPIR: clang-offload-bundler{{.*}} "-type=aoo" "-targets=[[BUNDLE_TRIPLE]]" "-input={{.*}}_lib.{{(a|lo)}}" "-output=[[DEVICELIB:.+\.txt]]" "-unbundle" // STATIC_LIB_NOSRC-SPIR: llvm-foreach{{.*}}spirv-to-ir-wrapper{{.*}} "[[DEVICELIB]]" "-o" "[[DEVICELIST:.+\.txt]]" // STATIC_LIB_NOSRC-SPIR: llvm-link{{.*}} "@[[DEVICELIST]]" "-o" "[[BCFILE:.+\.bc]]" -// STATIC_LIB_NOSRC-CUDA: clang-offload-bundler{{.*}} "-type=a" "-targets=[[BUNDLE_TRIPLE]]" "-input={{.*}}_lib.{{(a|lo)}}" "-output=[[DEVICELIB:.+\.a]]" "-unbundle" -// STATIC_LIB_NOSRC-CUDA: llvm-link{{.*}} "[[DEVICELIB]]" "-o" "[[BCFILE:.+\.bc]]" +// STATIC_LIB_NOSRC-CUDA: clang-offload-bundler{{.*}} "-type=aoo" "-targets=[[BUNDLE_TRIPLE]]" "-input={{.*}}_lib.{{(a|lo)}}" "-output=[[DEVICELIB:.+\.txt]]" "-unbundle" +// STATIC_LIB_NOSRC-CUDA: llvm-foreach{{.*}}spirv-to-ir-wrapper{{.*}} "[[DEVICELIB]]" "-o" "[[DEVICELIST:.+\.txt]]" +// STATIC_LIB_NOSRC-CUDA: llvm-link{{.*}} "@[[DEVICELIST]]" "-o" "[[BCFILE:.+\.bc]]" // STATIC_LIB_NOSRC: sycl-post-link{{.*}} "-o" "[[TABLE:.+]]" "[[BCFILE]]" // STATIC_LIB_NOSRC: file-table-tform{{.*}} "-o" "[[LIST:.+]]" "[[TABLE]]" // STATIC_LIB_NOSRC-SPIR: llvm-foreach{{.*}}llvm-spirv{{.*}} "-o" "[[OBJLIST:.+\.txt]]"{{.*}} "[[LIST]]" diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp index fd4e55c2d5f14..b4237b1117c84 100644 --- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp +++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp @@ -650,7 +650,7 @@ Error jit_compiler::linkDeviceLibraries(llvm::Module &Module, C->getSingleOffloadToolChain(); InputArgList EmptyArgList; auto Archs = - D.getOffloadArchs(*C, EmptyArgList, Action::OFK_SYCL, OffloadTC); + D.getOffloadArchs(*C, EmptyArgList, Action::OFK_SYCL, *OffloadTC); assert(Archs.size() == 1 && "Offload toolchain should be configured to single architecture"); StringRef CPU = *Archs.begin();