diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index f630735c4875f..28b3f3e0cd3a6 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -130,6 +130,8 @@ def warn_flag_no_sycl_libspirv InGroup; def err_drv_mix_cuda_hip : Error< "mixed CUDA and HIP compilation is not supported">; +def err_drv_mix_offload + : Error<"mixed %0 and %1 offloading compilation is not supported">; def err_drv_bad_target_id : Error< "invalid target ID '%0'; format is a processor name followed by an optional " "colon-delimited list of features followed by an enable/disable sign (e.g., " @@ -415,9 +417,11 @@ def err_drv_sycl_offload_arch_new_driver: Error< "'--offload-arch' is supported when '-fsycl' is set with '--offload-new-driver'">; def err_drv_sycl_offload_arch_missing_value : Error< "must pass in an explicit cpu or gpu architecture to '--offload-arch'">; -def warn_drv_sycl_offload_target_duplicate : Warning< - "SYCL offloading target '%0' is similar to target '%1' already specified; " - "will be ignored">, InGroup; +def warn_drv_offload_target_duplicate + : Warning< + "offloading target '%0' is similar to target '%1' already specified; " + "will be ignored">, + InGroup; def warn_drv_sycl_target_missing : Warning< "linked binaries do not contain expected '%0' target; found targets: '%1'">, InGroup; @@ -445,11 +449,8 @@ def err_drv_omp_offload_target_missingbcruntime : Error< "; use '--libomptarget-%1-bc-path' to specify %1 bitcode library">; def err_drv_omp_offload_target_bcruntime_not_found : Error< "bitcode library '%0' does not exist">; -def err_drv_omp_offload_target_cuda_version_not_support : Error< - "NVPTX target requires CUDA 9.2 or above; CUDA %0 detected">; -def warn_drv_omp_offload_target_duplicate : Warning< - "OpenMP offloading target '%0' is similar to target '%1' already specified; " - "will be ignored">, InGroup; +def err_drv_omp_offload_target_cuda_version_not_support + : Error<"NVPTX target requires CUDA 9.2 or above; CUDA %0 detected">; def err_drv_unsupported_embed_bitcode : Error<"%0 is not supported with -fembed-bitcode">; def err_drv_bitcode_unsupported_on_toolchain : Error< diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 887b222b6b593..8256c32170674 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -1560,6 +1560,9 @@ def SyclAspectMismatch : DiagGroup<"sycl-aspect-mismatch">; def SyclNativeCPUTargets: DiagGroup<"sycl-native-cpu-targets">; def SyclPrivateAllocaPositiveSize : DiagGroup<"sycl-private-alloca-positive-size">; +// Common warnings for SYCL and OpenMP offloading +def OffloadTarget : DiagGroup<"offload-target">; + // OpenACC warnings. def SourceUsesOpenACC : DiagGroup<"source-uses-openacc">; def OpenACC : DiagGroup<"openacc", [SourceUsesOpenACC]>; diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index dc07d7c352ec1..aff4d8e753b95 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -360,6 +360,8 @@ class Driver { // handleArguments. phases::ID getFinalPhase(const llvm::opt::DerivedArgList &DAL, llvm::opt::Arg **FinalPhaseArg = nullptr) const; + llvm::Expected> + executeProgram(llvm::ArrayRef Args) const; private: /// Certain options suppress the 'no input files' warning. @@ -549,8 +551,7 @@ class Driver { /// empty string. llvm::SmallVector getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, - Action::OffloadKind Kind, const ToolChain *TC, - bool SpecificToolchain = true) const; + Action::OffloadKind Kind, const ToolChain &TC) const; /// Check that the file referenced by Value exists. If it doesn't, /// issue a diagnostic and return false. diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 78e4fe84fc60c..266fcd97a05fc 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1236,10 +1236,12 @@ def offload_arch_EQ : CommaJoined<["--"], "offload-arch=">, "If 'native' is used the compiler will detect locally installed architectures. " "For HIP offloading, the device architecture can be followed by target ID features " "delimited by a colon (e.g. gfx908:xnack+:sramecc-). May be specified more than once.">; -def no_offload_arch_EQ : Joined<["--"], "no-offload-arch=">, - Visibility<[ClangOption, FlangOption]>, - HelpText<"Remove CUDA/HIP offloading device architecture (e.g. sm_35, gfx906) from the list of devices to compile for. " - "'all' resets the list to its default value.">; +def no_offload_arch_EQ + : CommaJoined<["--"], "no-offload-arch=">, + Visibility<[ClangOption, FlangOption]>, + HelpText<"Remove CUDA/HIP offloading device architecture (e.g. sm_35, " + "gfx906) from the list of devices to compile for. " + "'all' resets the list to its default value.">; def offload_new_driver : Flag<["--"], "offload-new-driver">, Visibility<[ClangOption, CC1Option]>, Group, @@ -1732,8 +1734,11 @@ defm auto_import : BoolFOption<"auto-import", // In the future this option will be supported by other offloading // languages and accept other values such as CPU/GPU architectures, // offload kinds and target aliases. -def offload_EQ : CommaJoined<["--"], "offload=">, Flags<[NoXarchOption]>, - HelpText<"Specify comma-separated list of offloading target triples (CUDA and HIP only)">; +def offload_EQ : CommaJoined<["--"], "offload=">, + Flags<[NoXarchOption]>, + Alias, + HelpText<"Specify comma-separated list of offloading target " + "triples (CUDA and HIP only)">; // C++ Coroutines defm coroutines : BoolFOption<"coroutines", @@ -7267,10 +7272,14 @@ defm sycl_rtc_mode: BoolFOption<"sycl-rtc-mode", def fno_sycl_esimd_build_host_code : Flag<["-"], "fno-sycl-esimd-build-host-code">, Visibility<[ClangOption, CLOption, CC1Option]>, Flags<[HelpHidden]>, HelpText<"Do not build the host implementation of ESIMD functions.">; -def fsycl_targets_EQ : CommaJoined<["-"], "fsycl-targets=">, - Flags<[NoXarchOption]>, Visibility<[ClangOption, CLOption, CC1Option]>, - HelpText<"Specify comma-separated list of triples SYCL offloading targets " - "to be supported">; +def fsycl_targets_EQ + : CommaJoined<["-"], "fsycl-targets=">, + Alias, + Flags<[NoXarchOption]>, + Visibility<[ClangOption, CLOption, CC1Option]>, + HelpText< + "Specify comma-separated list of triples SYCL offloading targets " + "to be supported">; def fsycl_force_target_EQ : Joined<["-"], "fsycl-force-target=">, Flags<[NoXarchOption]>, HelpText<"Force the usage of the given triple when extracting device code " diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index a004f057846fa..58552a629f250 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -218,10 +218,6 @@ class ToolChain { ToolChain(const Driver &D, const llvm::Triple &T, const llvm::opt::ArgList &Args); - /// Executes the given \p Executable and returns the stdout. - llvm::Expected> - executeToolChainProgram(StringRef Executable) const; - void setTripleEnvironment(llvm::Triple::EnvironmentType Env); virtual Tool *buildAssembler() const; diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 92a0857505916..f974c2233bb13 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -113,65 +113,6 @@ using namespace clang::driver; using namespace clang; using namespace llvm::opt; -static std::optional getOffloadTargetTriple(const Driver &D, - const ArgList &Args) { - auto OffloadTargets = Args.getAllArgValues(options::OPT_offload_EQ); - // Offload compilation flow does not support multiple targets for now. We - // need the HIPActionBuilder (and possibly the CudaActionBuilder{,Base}too) - // to support multiple tool chains first. - switch (OffloadTargets.size()) { - default: - D.Diag(diag::err_drv_only_one_offload_target_supported); - return std::nullopt; - case 0: - D.Diag(diag::err_drv_invalid_or_unsupported_offload_target) << ""; - return std::nullopt; - case 1: - break; - } - return llvm::Triple(OffloadTargets[0]); -} - -static std::optional -getNVIDIAOffloadTargetTriple(const Driver &D, const ArgList &Args, - const llvm::Triple &HostTriple) { - if (!Args.hasArg(options::OPT_offload_EQ)) { - return llvm::Triple(HostTriple.isArch64Bit() ? "nvptx64-nvidia-cuda" - : "nvptx-nvidia-cuda"); - } - auto TT = getOffloadTargetTriple(D, Args); - if (TT && (TT->getArch() == llvm::Triple::spirv32 || - TT->getArch() == llvm::Triple::spirv64)) { - if (Args.hasArg(options::OPT_emit_llvm)) - return TT; - D.Diag(diag::err_drv_cuda_offload_only_emit_bc); - return std::nullopt; - } - D.Diag(diag::err_drv_invalid_or_unsupported_offload_target) << TT->str(); - return std::nullopt; -} - -static std::optional -getHIPOffloadTargetTriple(const Driver &D, const ArgList &Args) { - if (!Args.hasArg(options::OPT_offload_EQ)) { - auto OffloadArchs = Args.getAllArgValues(options::OPT_offload_arch_EQ); - if (llvm::is_contained(OffloadArchs, "amdgcnspirv") && - OffloadArchs.size() == 1) - return llvm::Triple("spirv64-amd-amdhsa"); - return llvm::Triple("amdgcn-amd-amdhsa"); // Default HIP triple. - } - auto TT = getOffloadTargetTriple(D, Args); - if (!TT) - return std::nullopt; - if (TT->isAMDGCN() && TT->getVendor() == llvm::Triple::AMD && - TT->getOS() == llvm::Triple::AMDHSA) - return TT; - if (TT->getArch() == llvm::Triple::spirv64) - return TT; - D.Diag(diag::err_drv_invalid_or_unsupported_offload_target) << TT->str(); - return std::nullopt; -} - template static bool usesInput(const ArgList &Args, F &&Fn) { return llvm::any_of(Args, [&](Arg *A) { return (A->getOption().matches(options::OPT_x) && @@ -496,6 +437,44 @@ phases::ID Driver::getFinalPhase(const DerivedArgList &DAL, return FinalPhase; } +llvm::Expected> +Driver::executeProgram(llvm::ArrayRef Args) const { + llvm::SmallString<64> OutputFile; + llvm::sys::fs::createTemporaryFile("driver-program", "txt", OutputFile, + llvm::sys::fs::OF_Text); + llvm::FileRemover OutputRemover(OutputFile.c_str()); + std::optional Redirects[] = { + {""}, + OutputFile.str(), + {""}, + }; + + std::string ErrorMessage; + int SecondsToWait = 60; + if (std::optional Str = + llvm::sys::Process::GetEnv("CLANG_TOOLCHAIN_PROGRAM_TIMEOUT")) { + if (!llvm::to_integer(*Str, SecondsToWait)) + return llvm::createStringError(std::error_code(), + "CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected " + "an integer, got '" + + *Str + "'"); + SecondsToWait = std::max(SecondsToWait, 0); // infinite + } + StringRef Executable = Args[0]; + if (llvm::sys::ExecuteAndWait(Executable, Args, {}, Redirects, SecondsToWait, + /*MemoryLimit=*/0, &ErrorMessage)) + return llvm::createStringError(std::error_code(), + Executable + ": " + ErrorMessage); + + llvm::ErrorOr> OutputBuf = + llvm::MemoryBuffer::getFile(OutputFile.c_str()); + if (!OutputBuf) + return llvm::createStringError(OutputBuf.getError(), + "Failed to read stdout of " + Executable + + ": " + OutputBuf.getError().message()); + return std::move(*OutputBuf); +} + static Arg *MakeInputArg(DerivedArgList &Args, const OptTable &Opts, StringRef Value, bool Claim = true) { Arg *A = new Arg(Opts.getOption(options::OPT_INPUT), Value, @@ -966,6 +945,150 @@ Driver::OpenMPRuntimeKind Driver::getOpenMPRuntime(const ArgList &Args) const { return RT; } +// Handles `native` offload architectures by using the 'offload-arch' utility. +static llvm::SmallVector +getSystemOffloadArchs(Compilation &C, Action::OffloadKind Kind) { + StringRef Program = C.getArgs().getLastArgValue( + options::OPT_offload_arch_tool_EQ, "offload-arch"); + + SmallVector GPUArchs; + if (llvm::ErrorOr Executable = + llvm::sys::findProgramByName(Program)) { + llvm::SmallVector Args{*Executable}; + if (Kind == Action::OFK_HIP) + Args.push_back("--only=amdgpu"); + else if (Kind == Action::OFK_Cuda) + Args.push_back("--only=nvptx"); + auto StdoutOrErr = C.getDriver().executeProgram(Args); + + if (!StdoutOrErr) { + C.getDriver().Diag(diag::err_drv_undetermined_gpu_arch) + << Action::GetOffloadKindName(Kind) << StdoutOrErr.takeError() + << "--offload-arch"; + return GPUArchs; + } + if ((*StdoutOrErr)->getBuffer().empty()) { + C.getDriver().Diag(diag::err_drv_undetermined_gpu_arch) + << Action::GetOffloadKindName(Kind) << "No GPU detected in the system" + << "--offload-arch"; + return GPUArchs; + } + + for (StringRef Arch : llvm::split((*StdoutOrErr)->getBuffer(), "\n")) + if (!Arch.empty()) + GPUArchs.push_back(Arch.str()); + } else { + C.getDriver().Diag(diag::err_drv_command_failure) << "offload-arch"; + } + return GPUArchs; +} + +// Attempts to infer the correct offloading toolchain triple by looking at the +// requested offloading kind and architectures. +static llvm::DenseSet +inferOffloadToolchains(Compilation &C, Action::OffloadKind Kind) { + // SYCL offloading to AOT Targets with '--offload-arch' + // is currently enabled only with '--offload-new-driver' option. + // Emit a diagnostic if '--offload-arch' is invoked without + // '--offload-new driver' option. + if (Kind == Action::OFK_SYCL && + C.getInputArgs().hasArg(options::OPT_offload_arch_EQ) && + !C.getInputArgs().hasFlag(options::OPT_offload_new_driver, + options::OPT_no_offload_new_driver, false)) { + C.getDriver().Diag(clang::diag::err_drv_sycl_offload_arch_new_driver); + return llvm::DenseSet(); + ; + } + std::set Archs; + for (Arg *A : C.getInputArgs()) { + for (StringRef Arch : A->getValues()) { + if (A->getOption().matches(options::OPT_offload_arch_EQ)) { + if (Arch == "native") { + for (StringRef Str : getSystemOffloadArchs(C, Kind)) + Archs.insert(Str.str()); + } else { + Archs.insert(Arch.str()); + } + } else if (A->getOption().matches(options::OPT_no_offload_arch_EQ)) { + if (Arch == "all") + Archs.clear(); + else + Archs.erase(Arch.str()); + } + } + } + + llvm::DenseSet Triples; + for (llvm::StringRef Arch : Archs) { + OffloadArch ID = StringToOffloadArch(Arch); + if (ID == OffloadArch::UNKNOWN) + ID = StringToOffloadArch( + getProcessorFromTargetID(llvm::Triple("amdgcn-amd-amdhsa"), Arch)); + + if (Kind == Action::OFK_HIP && !IsAMDOffloadArch(ID)) { + C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) + << "HIP" << Arch; + return llvm::DenseSet(); + } + if (Kind == Action::OFK_Cuda && !IsNVIDIAOffloadArch(ID)) { + C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) + << "CUDA" << Arch; + return llvm::DenseSet(); + } + + if (ID == OffloadArch::UNKNOWN || ID == OffloadArch::UNUSED) { + C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) + << "offload" << Arch; + return llvm::DenseSet(); + } + + StringRef Triple; + if (ID == OffloadArch::AMDGCNSPIRV) + Triple = "spirv64-amd-amdhsa"; + else if (IsNVIDIAOffloadArch(ID)) + Triple = C.getDefaultToolChain().getTriple().isArch64Bit() + ? "nvptx64-nvidia-cuda" + : "nvptx-nvidia-cuda"; + else if (IsAMDOffloadArch(ID)) + Triple = "amdgcn-amd-amdhsa"; + else if (IsIntelCPUOffloadArch(ID)) + Triple = "spir64_x86_64-unknown-unknown"; + else if (IsIntelGPUOffloadArch(ID)) + Triple = "spir64_gen-unknown-unknown"; + else + continue; + + // Make a new argument that dispatches this argument to the appropriate + // toolchain. This is required when we infer it and create potentially + // incompatible toolchains from the global option. + Option Opt = C.getDriver().getOpts().getOption(options::OPT_Xarch__); + unsigned Index = C.getArgs().getBaseArgs().MakeIndex("-Xarch_"); + Arg *A = new Arg(Opt, C.getArgs().getArgString(Index), Index, + C.getArgs().MakeArgString(Triple.split("-").first), + C.getArgs().MakeArgString("--offload-arch=" + Arch)); + C.getArgs().append(A); + Triples.insert(Triple); + } + + // Infer the default target triple if no specific architectures are given. + if (Archs.empty() && Kind == Action::OFK_HIP) + Triples.insert("amdgcn-amd-amdhsa"); + else if (Archs.empty() && Kind == Action::OFK_Cuda) + Triples.insert(C.getDefaultToolChain().getTriple().isArch64Bit() + ? "nvptx64-nvidia-cuda" + : "nvptx-nvidia-cuda"); + else if (Archs.empty() && Kind == Action::OFK_SYCL) + Triples.insert(C.getDefaultToolChain().getTriple().isArch64Bit() + ? "spir64-unknown-unknown" + : "spir-unknown-unknown"); + + // We need to dispatch these to the appropriate toolchain now. + C.getArgs().eraseArg(options::OPT_offload_arch_EQ); + C.getArgs().eraseArg(options::OPT_no_offload_arch_EQ); + + return Triples; +} + static bool isValidSYCLTriple(llvm::Triple T) { // 'nvptx64-nvidia-cuda' is the valid SYCL triple for NVidia GPUs. if (T.getArch() == llvm::Triple::nvptx64 && @@ -1017,8 +1140,11 @@ llvm::Triple Driver::getSYCLDeviceTriple(StringRef TargetArch, llvm::Triple TargetTriple(TargetArch); if (Arg && !Arg->isClaimed() && TargetTriple.isSPIR() && TargetTriple.getSubArch() == llvm::Triple::SPIRSubArch_fpga) { - Diag(diag::err_drv_unsupported_opt_removed) - << Arg->getSpelling().str() + TargetArch.str(); + SmallString<128> OptStr(Arg->getSpelling()); + if (Arg->getOption().matches(options::OPT_offload_targets_EQ)) + OptStr = "-fsycl-targets="; + OptStr += TargetArch.str(); + Diag(diag::err_drv_unsupported_opt_removed) << OptStr; Arg->claim(); } if (llvm::is_contained(SYCLAlias, TargetArch)) { @@ -1060,217 +1186,11 @@ static bool addSYCLDefaultTriple(Compilation &C, return true; } -void Driver::CreateOffloadingDeviceToolChains(Compilation &C, - InputList &Inputs) { - - // - // CUDA/HIP - // - // We need to generate a CUDA/HIP toolchain if any of the inputs has a CUDA - // or HIP type. However, mixed CUDA/HIP compilation is not supported. - using namespace tools::SYCL; - bool IsCuda = - llvm::any_of(Inputs, [](std::pair &I) { - return types::isCuda(I.first); - }); - bool IsHIP = - llvm::any_of(Inputs, - [](std::pair &I) { - return types::isHIP(I.first); - }) || - C.getInputArgs().hasArg(options::OPT_hip_link) || - C.getInputArgs().hasArg(options::OPT_hipstdpar); - bool UseLLVMOffload = C.getInputArgs().hasArg( - options::OPT_foffload_via_llvm, options::OPT_fno_offload_via_llvm, false); - if (IsCuda && IsHIP) { - Diag(clang::diag::err_drv_mix_cuda_hip); - return; - } - if (IsCuda && !UseLLVMOffload) { - auto CudaTriple = getNVIDIAOffloadTargetTriple( - *this, C.getInputArgs(), C.getDefaultToolChain().getTriple()); - if (!CudaTriple) - return; - - auto &TC = - getOffloadToolChain(C.getInputArgs(), Action::OFK_Cuda, *CudaTriple, - C.getDefaultToolChain().getTriple()); - - // Emit a warning if the detected CUDA version is too new. - const CudaInstallationDetector &CudaInstallation = - static_cast(TC).CudaInstallation; - if (CudaInstallation.isValid()) - CudaInstallation.WarnIfUnsupportedVersion(); - C.addOffloadDeviceToolChain(&TC, Action::OFK_Cuda); - OffloadArchs[&TC] = getOffloadArchs(C, C.getArgs(), Action::OFK_Cuda, &TC, - /*SpecificToolchain=*/true); - } else if (IsHIP && !UseLLVMOffload) { - if (auto *OMPTargetArg = - C.getInputArgs().getLastArg(options::OPT_offload_targets_EQ)) { - Diag(clang::diag::err_drv_unsupported_opt_for_language_mode) - << OMPTargetArg->getSpelling() << "HIP"; - return; - } - - auto HIPTriple = getHIPOffloadTargetTriple(*this, C.getInputArgs()); - if (!HIPTriple) - return; - - auto &TC = - getOffloadToolChain(C.getInputArgs(), Action::OFK_HIP, *HIPTriple, - C.getDefaultToolChain().getTriple()); - C.addOffloadDeviceToolChain(&TC, Action::OFK_HIP); - - // TODO: Fix 'amdgcnspirv' handling with the new driver. - if (C.getInputArgs().hasFlag(options::OPT_offload_new_driver, - options::OPT_no_offload_new_driver, false)) - OffloadArchs[&TC] = getOffloadArchs(C, C.getArgs(), Action::OFK_HIP, &TC, - /*SpecificToolchain=*/true); - } - - if (IsCuda || IsHIP) - CUIDOpts = CUIDOptions(C.getArgs(), *this); - - // - // OpenMP - // - // We need to generate an OpenMP toolchain if the user specified targets with - // the -fopenmp-targets option or used --offload-arch with OpenMP enabled. - bool IsOpenMPOffloading = - ((IsCuda || IsHIP) && UseLLVMOffload) || - (C.getInputArgs().hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ, - options::OPT_fno_openmp, false) && - (C.getInputArgs().hasArg(options::OPT_offload_targets_EQ) || - C.getInputArgs().hasArg(options::OPT_offload_arch_EQ))); - if (IsOpenMPOffloading) { - // We expect that -fopenmp-targets is always used in conjunction with the - // option -fopenmp specifying a valid runtime with offloading support, i.e. - // libomp or libiomp. - OpenMPRuntimeKind RuntimeKind = getOpenMPRuntime(C.getInputArgs()); - if (RuntimeKind != OMPRT_OMP && RuntimeKind != OMPRT_IOMP5) { - Diag(clang::diag::err_drv_expecting_fopenmp_with_fopenmp_targets); - return; - } - - // If the user specified -fopenmp-targets= we create a toolchain for each - // valid triple. Otherwise, if only --offload-arch= was specified we instead - // attempt to derive the appropriate toolchains from the arguments. - if (Arg *OpenMPTargets = - C.getInputArgs().getLastArg(options::OPT_offload_targets_EQ)) { - if (OpenMPTargets && !OpenMPTargets->getNumValues()) { - Diag(clang::diag::warn_drv_empty_joined_argument) - << OpenMPTargets->getAsString(C.getInputArgs()); - return; - } - - // Make sure these show up in a deterministic order. - std::multiset OpenMPTriples; - for (StringRef T : OpenMPTargets->getValues()) - OpenMPTriples.insert(T); - - llvm::StringMap FoundNormalizedTriples; - for (StringRef T : OpenMPTriples) { - llvm::Triple TT(ToolChain::getOpenMPTriple(T)); - std::string NormalizedName = TT.normalize(); - - // Make sure we don't have a duplicate triple. - auto [TripleIt, Inserted] = - FoundNormalizedTriples.try_emplace(NormalizedName, T); - if (!Inserted) { - Diag(clang::diag::warn_drv_omp_offload_target_duplicate) - << T << TripleIt->second; - continue; - } - - // If the specified target is invalid, emit a diagnostic. - if (TT.getArch() == llvm::Triple::UnknownArch) { - Diag(clang::diag::err_drv_invalid_omp_target) << T; - continue; - } - - auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_OpenMP, TT, - C.getDefaultToolChain().getTriple()); - C.addOffloadDeviceToolChain(&TC, Action::OFK_OpenMP); - OffloadArchs[&TC] = - getOffloadArchs(C, C.getArgs(), Action::OFK_OpenMP, &TC, - /*SpecificToolchain=*/true); - } - } else if (C.getInputArgs().hasArg(options::OPT_offload_arch_EQ) && - ((!IsHIP && !IsCuda) || UseLLVMOffload)) { - llvm::Triple AMDTriple("amdgcn-amd-amdhsa"); - llvm::Triple NVPTXTriple("nvptx64-nvidia-cuda"); - - for (StringRef Arch : - C.getInputArgs().getAllArgValues(options::OPT_offload_arch_EQ)) { - bool IsNVPTX = IsNVIDIAOffloadArch( - StringToOffloadArch(getProcessorFromTargetID(NVPTXTriple, Arch))); - bool IsAMDGPU = IsAMDOffloadArch( - StringToOffloadArch(getProcessorFromTargetID(AMDTriple, Arch))); - if (!IsNVPTX && !IsAMDGPU && !Arch.empty() && - !Arch.equals_insensitive("native")) { - Diag(clang::diag::err_drv_failed_to_deduce_target_from_arch) << Arch; - return; - } - } - - // Attempt to deduce the offloading triple from the set of architectures. - // We can only correctly deduce NVPTX / AMDGPU triples currently. - for (const llvm::Triple &TT : {AMDTriple, NVPTXTriple}) { - auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_OpenMP, TT, - C.getDefaultToolChain().getTriple()); - - llvm::SmallVector Archs = - getOffloadArchs(C, C.getArgs(), Action::OFK_OpenMP, &TC, - /*SpecificToolchain=*/false); - if (!Archs.empty()) { - C.addOffloadDeviceToolChain(&TC, Action::OFK_OpenMP); - OffloadArchs[&TC] = Archs; - } - } - - // If the set is empty then we failed to find a native architecture. - auto TCRange = C.getOffloadToolChains(Action::OFK_OpenMP); - if (TCRange.first == TCRange.second) - Diag(clang::diag::err_drv_failed_to_deduce_target_from_arch) - << "native"; - } - } else if (C.getInputArgs().hasArg(options::OPT_offload_targets_EQ)) { - Diag(clang::diag::err_drv_expecting_fopenmp_with_fopenmp_targets); - return; - } - - // - // SYCL - // - // We need to generate a SYCL toolchain if the user specified -fsycl. - // If -fsycl is supplied without any of these we will assume SPIR-V. - // Use of -fsycl-device-only overrides -fsycl. - // Use of -fsyclbin enables SYCL device compilation. - bool IsSYCL = C.getInputArgs().hasFlag(options::OPT_fsycl, - options::OPT_fno_sycl, false) || - C.getInputArgs().hasArgNoClaim(options::OPT_fsycl_device_only, - options::OPT_fsyclbin_EQ); - - auto argSYCLIncompatible = [&](OptSpecifier OptId) { - if (!IsSYCL) - return; - if (Arg *IncompatArg = C.getInputArgs().getLastArg(OptId)) - Diag(clang::diag::err_drv_argument_not_allowed_with) - << IncompatArg->getSpelling() << "-fsycl"; - }; - // -static-libstdc++ is not compatible with -fsycl. - argSYCLIncompatible(options::OPT_static_libstdcxx); - // -ffreestanding cannot be used with -fsycl - argSYCLIncompatible(options::OPT_ffreestanding); - - llvm::SmallVector UniqueSYCLTriplesVec; - - // A mechanism for retrieving SYCL-specific options, erroring out - // if SYCL offloading wasn't enabled prior to that +static void diagnoseSYCLOptions(Compilation &C, bool IsSYCL) { auto getArgRequiringSYCLRuntime = [&](OptSpecifier OptId) -> Arg * { Arg *SYCLArg = C.getInputArgs().getLastArg(OptId); if (SYCLArg && !IsSYCL) { - Diag(clang::diag::err_drv_expecting_fsycl_with_sycl_opt) + C.getDriver().Diag(clang::diag::err_drv_expecting_fsycl_with_sycl_opt) // Dropping the '=' symbol, which would otherwise pollute // the diagnostics for the most of options << SYCLArg->getSpelling().split('=').first; @@ -1279,7 +1199,29 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, return SYCLArg; }; - Arg *SYCLTargets = getArgRequiringSYCLRuntime(options::OPT_fsycl_targets_EQ); + // Special check for -fsycl-targets. -fsycl-targets is an alias for + // --offload-targets. + const Arg *SYCLOffloadTargetsArg = + C.getInputArgs().getLastArg(options::OPT_offload_targets_EQ); + Arg *SYCLForceTarget = + getArgRequiringSYCLRuntime(options::OPT_fsycl_force_target_EQ); + if (SYCLOffloadTargetsArg && SYCLOffloadTargetsArg->getAlias()) { + const Arg *Alias = SYCLOffloadTargetsArg->getAlias(); + bool IsFsyclTargetsOption = Alias->getSpelling() == "-fsycl-targets="; + if (!IsSYCL && IsFsyclTargetsOption) + C.getDriver().Diag(clang::diag::err_drv_expecting_fsycl_with_sycl_opt) + // Dropping the '=' symbol, which would otherwise pollute + // the diagnostics for the most of options + << StringRef(SYCLOffloadTargetsArg->getAsString(C.getArgs())) + .split('=') + .first; + else if (IsFsyclTargetsOption && + SYCLOffloadTargetsArg->getNumValues() > 1 && SYCLForceTarget) + C.getDriver().Diag( + clang::diag::err_drv_multiple_target_with_forced_target) + << SYCLOffloadTargetsArg->getAsString(C.getInputArgs()) + << SYCLForceTarget->getAsString(C.getInputArgs()); + } // Check if -fsycl-host-compiler is used in conjunction with -fsycl. Arg *SYCLHostCompiler = @@ -1289,7 +1231,7 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, // -fsycl-host-compiler-options cannot be used without -fsycl-host-compiler if (SYCLHostCompilerOptions && !SYCLHostCompiler) - Diag(clang::diag::warn_drv_opt_requires_opt) + C.getDriver().Diag(clang::diag::warn_drv_opt_requires_opt) << SYCLHostCompilerOptions->getSpelling().split('=').first << "-fsycl-host-compiler"; @@ -1308,7 +1250,7 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, for (const StringRef AllowedValue : AllowedValues) if (AllowedValue == ArgValue) return; - Diag(clang::diag::err_drv_invalid_argument_to_option) + C.getDriver().Diag(clang::diag::err_drv_invalid_argument_to_option) << ArgValue << A->getOption().getName(); }; @@ -1321,7 +1263,7 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, // Use of -fsycl-link=early and -fsycl-link=image are not supported. if (SYCLLink && (SYCLLink->getValue() == StringRef("early") || SYCLLink->getValue() == StringRef("image"))) - Diag(diag::err_drv_unsupported_opt_removed) + C.getDriver().Diag(diag::err_drv_unsupported_opt_removed) << SYCLLink->getAsString(C.getInputArgs()); Arg *DeviceCodeSplit = @@ -1344,220 +1286,178 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, StringRef ArgValue(DeviceObj->getValue()); SmallVector DeviceObjValues = {"spirv", "llvmir", "asm"}; if (llvm::find(DeviceObjValues, ArgValue) == DeviceObjValues.end()) - Diag(clang::diag::warn_ignoring_value_using_default) + C.getDriver().Diag(clang::diag::warn_ignoring_value_using_default) << DeviceObj->getSpelling().split('=').first << ArgValue << "llvmir"; else if (ArgValue == "asm" && (!SYCLDeviceOnly || !EmitAsm)) - Diag(clang::diag::warn_drv_fsycl_device_obj_asm_device_only); + C.getDriver().Diag( + clang::diag::warn_drv_fsycl_device_obj_asm_device_only); } - Arg *SYCLForceTarget = - getArgRequiringSYCLRuntime(options::OPT_fsycl_force_target_EQ); if (SYCLForceTarget) { StringRef Val(SYCLForceTarget->getValue()); - llvm::Triple TT(getSYCLDeviceTriple(Val, SYCLForceTarget)); + llvm::Triple TT(C.getDriver().getSYCLDeviceTriple(Val, SYCLForceTarget)); if (!isValidSYCLTriple(TT)) - Diag(clang::diag::err_drv_invalid_sycl_target) << Val; + C.getDriver().Diag(clang::diag::err_drv_invalid_sycl_target) << Val; } - bool HasSYCLTargetsOption = SYCLTargets; +} - llvm::StringMap> DerivedArchs; - llvm::StringMap FoundNormalizedTriples; - // StringSet to contain SYCL target triples. - llvm::StringSet<> SYCLTriples; - if (HasSYCLTargetsOption) { - // At this point, we know we have a valid combination - // of -fsycl*target options passed - Arg *SYCLTargetsValues = SYCLTargets; - if (SYCLTargetsValues) { - if (SYCLTargetsValues->getNumValues()) { - - // Multiple targets are currently not supported when using - // -fsycl-force-target as the bundler does not allow for multiple - // outputs of the same target. - if (SYCLForceTarget && SYCLTargetsValues->getNumValues() > 1) - Diag(clang::diag::err_drv_multiple_target_with_forced_target) - << SYCLTargetsValues->getAsString(C.getInputArgs()) - << SYCLForceTarget->getAsString(C.getInputArgs()); - - std::multiset SYCLTriples; - for (StringRef SYCLTargetTriple : SYCLTargetsValues->getValues()) - SYCLTriples.insert(SYCLTargetTriple); +void Driver::CreateOffloadingDeviceToolChains(Compilation &C, + InputList &Inputs) { + bool UseLLVMOffload = C.getInputArgs().hasArg( + options::OPT_foffload_via_llvm, options::OPT_fno_offload_via_llvm, false); + bool IsCuda = + llvm::any_of(Inputs, + [](std::pair &I) { + return types::isCuda(I.first); + }) && + !UseLLVMOffload; + bool IsHIP = + (llvm::any_of(Inputs, + [](std::pair &I) { + return types::isHIP(I.first); + }) || + C.getInputArgs().hasArg(options::OPT_hip_link) || + C.getInputArgs().hasArg(options::OPT_hipstdpar)) && + !UseLLVMOffload; - llvm::StringMap FoundNormalizedTriples; - llvm::Triple TT; - for (StringRef Triple : SYCLTriples) { - - if (Triple.starts_with("intel_gpu_")) { - TT = getSYCLDeviceTriple("spir64_gen"); - } else if (Triple.starts_with("nvidia_gpu_")) { - TT = getSYCLDeviceTriple("nvptx64-nvidia-cuda"); - } else if (Triple.starts_with("amd_gpu_")) { - TT = getSYCLDeviceTriple("amdgcn-amd-amdhsa"); - } else - TT = getSYCLDeviceTriple(Triple); - - // For the new offloading model, we only want a single triple entry - // for each target, even if we have multiple intel_gpu* entries. We - // will track triples for new model and unique strings for the old - // model. - std::string NormalizedName; - bool UseNewOffload = - (C.getArgs().hasFlag(options::OPT_offload_new_driver, - options::OPT_no_offload_new_driver, false)); - NormalizedName = UseNewOffload - ? TT.normalize() - : getSYCLDeviceTriple(Triple).normalize(); - - auto [TripleIt, Inserted] = - FoundNormalizedTriples.try_emplace(NormalizedName, Triple); - - if (!Inserted) { - // Only emit the diagnostic of duplicate targets with the new - // offloading model only when the found triple matches. For the - // old model, we always emit the diagnostic. - if (!UseNewOffload || (UseNewOffload && Triple == TripleIt->second)) - Diag(clang::diag::warn_drv_sycl_offload_target_duplicate) - << Triple << TripleIt->second; - continue; - } + bool IsSYCL = C.getInputArgs().hasFlag(options::OPT_fsycl, + options::OPT_fno_sycl, false) || + C.getInputArgs().hasArgNoClaim(options::OPT_fsycl_device_only, + options::OPT_fsyclbin_EQ); + bool IsOpenMPOffloading = + UseLLVMOffload || + (C.getInputArgs().hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ, + options::OPT_fno_openmp, false) && + (C.getInputArgs().hasArg(options::OPT_offload_targets_EQ) || + (C.getInputArgs().hasArg(options::OPT_offload_arch_EQ) && + !(IsCuda || IsHIP)))); + + llvm::SmallSet Kinds; + const std::pair ActiveKinds[] = { + {IsCuda, Action::OFK_Cuda}, + {IsHIP, Action::OFK_HIP}, + {IsOpenMPOffloading, Action::OFK_OpenMP}, + {IsSYCL, Action::OFK_SYCL}}; + for (const auto &[Active, Kind] : ActiveKinds) + if (Active) + Kinds.insert(Kind); + + // We currently don't support any kind of mixed offloading. + if (Kinds.size() > 1 && !IsSYCL) { + Diag(clang::diag::err_drv_mix_offload) + << Action::GetOffloadKindName(*Kinds.begin()).upper() + << Action::GetOffloadKindName(*(++Kinds.begin())).upper(); + return; + } - // If the specified target is invalid, emit a diagnostic. - if (!isValidSYCLTriple(TT)) { - Diag(clang::diag::err_drv_invalid_sycl_target) << Triple; - continue; - } + diagnoseSYCLOptions(C, IsSYCL); + // Initialize the compilation identifier used for unique CUDA / HIP names. + if (IsCuda || IsHIP) + CUIDOpts = CUIDOptions(C.getArgs(), *this); - auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, TT, - C.getDefaultToolChain().getTriple()); - C.addOffloadDeviceToolChain(&TC, Action::OFK_SYCL); - OffloadArchs[&TC] = - getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &TC, - /*SpecificToolchain=*/true); - UniqueSYCLTriplesVec.push_back(TT); - } - if (addSYCLDefaultTriple(C, UniqueSYCLTriplesVec)) { - // Add the default triple (spir64) toolchain. - llvm::Triple DefaultTriple = - C.getDriver().getSYCLDeviceTriple(getDefaultSYCLArch(C)); - auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, - DefaultTriple, - C.getDefaultToolChain().getTriple()); - C.addOffloadDeviceToolChain(&TC, Action::OFK_SYCL); - OffloadArchs[&TC] = - getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &TC, - /*SpecificToolchain=*/true); - } - } else - Diag(clang::diag::warn_drv_empty_joined_argument) - << SYCLTargetsValues->getAsString(C.getInputArgs()); - } - } - // If the user specified --offload-arch, deduce the offloading - // target triple(s) from the set of architecture(s). - // Create a toolchain for each valid triple. - // We do not support SYCL offloading if any of the inputs is a - // .cu (for CUDA type) or .hip (for HIP type) file. - else if (IsSYCL && C.getInputArgs().hasArg(options::OPT_offload_arch_EQ) && - !IsHIP && !IsCuda) { - // SYCL offloading to AOT Targets with '--offload-arch' - // is currently enabled only with '--offload-new-driver' option. - // Emit a diagnostic if '--offload-arch' is invoked without - // '--offload-new driver' option. - if (!C.getInputArgs().hasFlag(options::OPT_offload_new_driver, - options::OPT_no_offload_new_driver, false)) { - Diag(clang::diag::err_drv_sycl_offload_arch_new_driver); - return; - } - llvm::Triple AMDTriple("amdgcn-amd-amdhsa"); - llvm::Triple NVPTXTriple("nvptx64-nvidia-cuda"); - llvm::Triple IntelGPUTriple("spir64_gen-unknown-unknown"); - llvm::Triple IntelCPUTriple("spir64_x86_64-unknown-unknown"); - - // Attempt to deduce the offloading triple from the set of architectures. - // We need to temporarily create these toolchains so that we can access - // tools for inferring architectures. - - for (StringRef Arch : - C.getInputArgs().getAllArgValues(options::OPT_offload_arch_EQ)) { - bool IsNVPTX = IsSYCLSupportedNVidiaGPUArch( - StringToOffloadArch(getProcessorFromTargetID(NVPTXTriple, Arch))); - bool IsAMDGPU = IsSYCLSupportedAMDGPUArch( - StringToOffloadArch(getProcessorFromTargetID(AMDTriple, Arch))); - bool IsIntelGPU = IsIntelGPUOffloadArch( - StringToOffloadArch(getProcessorFromTargetID(IntelGPUTriple, Arch))); - bool IsIntelCPU = IsIntelCPUOffloadArch( - StringToOffloadArch(getProcessorFromTargetID(IntelCPUTriple, Arch))); - - if (!IsNVPTX && !IsAMDGPU && !Arch.empty() && !IsIntelGPU && - !IsIntelCPU && !Arch.equals_insensitive("native")) { - Diag(clang::diag::err_drv_invalid_sycl_target) << Arch; - return; + // Get the list of requested offloading toolchains. If they were not + // explicitly specified we will infer them based on the offloading language + // and requested architectures. + std::multiset Triples; + if (C.getInputArgs().hasArg(options::OPT_offload_targets_EQ)) { + std::vector ArgValues = + C.getInputArgs().getAllArgValues(options::OPT_offload_targets_EQ); + llvm::Triple TT; + for (llvm::StringRef Target : ArgValues) { + if(IsSYCL) { + if (Target.starts_with("intel_gpu_")) + Triples.insert( + C.getInputArgs().MakeArgString("spir64_gen-unknown-unknown")); + else if (Target.starts_with("nvidia_gpu_")) + Triples.insert(C.getInputArgs().MakeArgString("nvptx64-nvidia-cuda")); + else if (Target.starts_with("amd_gpu_")) + Triples.insert(C.getInputArgs().MakeArgString("amdgcn-amd-amdhsa")); + else + Triples.insert(C.getInputArgs().MakeArgString(Target)); } + else + Triples.insert(C.getInputArgs().MakeArgString(Target)); } - for (const llvm::Triple &TT : - {AMDTriple, NVPTXTriple, IntelGPUTriple, IntelCPUTriple}) { - auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, TT, - C.getDefaultToolChain().getTriple()); + if (ArgValues.empty()) + Diag(clang::diag::warn_drv_empty_joined_argument) + << C.getInputArgs() + .getLastArg(options::OPT_offload_targets_EQ) + ->getAsString(C.getInputArgs()); + } else if (Kinds.size() > 0) { + for (Action::OffloadKind Kind : Kinds) { + llvm::DenseSet Derived = inferOffloadToolchains(C, Kind); + Triples.insert(Derived.begin(), Derived.end()); + } + } - llvm::SmallVector Archs = - getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &TC, - /*SpecificToolchain=*/false); - if (!Archs.empty()) { - C.addOffloadDeviceToolChain(&TC, Action::OFK_SYCL); - OffloadArchs[&TC] = Archs; + // Build an offloading toolchain for every requested target and kind. + llvm::StringMap FoundNormalizedTriples; + for (StringRef Target : Triples) { + // OpenMP offloading requires a compatible libomp. + if (Kinds.contains(Action::OFK_OpenMP)) { + OpenMPRuntimeKind RuntimeKind = getOpenMPRuntime(C.getInputArgs()); + if (RuntimeKind != OMPRT_OMP && RuntimeKind != OMPRT_IOMP5) { + Diag(clang::diag::err_drv_expecting_fopenmp_with_fopenmp_targets); + return; } } - auto TCRange = C.getOffloadToolChains(Action::OFK_SYCL); - if (TCRange.first == TCRange.second) { - Diag(clang::diag::err_drv_sycl_offload_arch_missing_value); - return; + // Certain options are not allowed when combined with SYCL compilation. + if (Kinds.contains(Action::OFK_SYCL)) { + for (auto ID : + {options::OPT_static_libstdcxx, options::OPT_ffreestanding}) + if (Arg *IncompatArg = C.getInputArgs().getLastArg(ID)) + Diag(clang::diag::err_drv_argument_not_allowed_with) + << IncompatArg->getSpelling() << "-fsycl"; } - } else { - // If -fsycl is supplied without -fsycl-targets we will assume SPIR-V. - // For -fsycl-device-only, we also setup the implied triple as needed. - if (IsSYCL) { - StringRef SYCLTargetArch = getDefaultSYCLArch(C); - UniqueSYCLTriplesVec.push_back(getSYCLDeviceTriple(SYCLTargetArch)); - addSYCLDefaultTriple(C, UniqueSYCLTriplesVec); - } - } - - // -fsycl-fp64-conv-emu is valid only for AOT compilation with an Intel GPU - // target. For other scenarios, we emit a warning message. - if (C.getInputArgs().hasArg(options::OPT_fsycl_fp64_conv_emu)) { - bool HasIntelGPUAOTTarget = false; - for (auto &TT : UniqueSYCLTriplesVec) { - if (TT.isSPIRAOT() && TT.getSubArch() == llvm::Triple::SPIRSubArch_gen) { - HasIntelGPUAOTTarget = true; - break; + // Create a device toolchain for every specified kind and triple. + for (Action::OffloadKind Kind : Kinds) { + llvm::Triple TT; + if (Kind == Action::OFK_OpenMP) + TT = ToolChain::getOpenMPTriple(Target); + else if (Kind == Action::OFK_SYCL) + TT = getSYCLDeviceTriple(Target); + else + TT = llvm::Triple(Target); + + if (C.getInputArgs().hasArg(options::OPT_fsycl_fp64_conv_emu) && + !(TT.isSPIRAOT() && + TT.getSubArch() == llvm::Triple::SPIRSubArch_gen)) { + Diag(diag::warn_unsupported_fsycl_fp64_conv_emu_use); } - } - if (!HasIntelGPUAOTTarget) - Diag(diag::warn_unsupported_fsycl_fp64_conv_emu_use); - } - // We'll need to use the SYCL and host triples as the key into - // getOffloadingDeviceToolChain, because the device toolchains we're - // going to create will depend on both. - if ((IsSYCL && !C.getInputArgs().hasArg(options::OPT_offload_arch_EQ)) && - !HasSYCLTargetsOption) { - const ToolChain *HostTC = C.getSingleOffloadToolChain(); - for (const auto &TT : UniqueSYCLTriplesVec) { - auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, TT, - HostTC->getTriple()); - C.addOffloadDeviceToolChain(&TC, Action::OFK_SYCL); + // Common diagnostic for both OpenMP and SYCL. + if (TT.getArch() == llvm::Triple::ArchType::UnknownArch || + (Kind == Action::OFK_SYCL && !isValidSYCLTriple(TT))) { + Diag(diag::err_drv_invalid_or_unsupported_offload_target) << TT.str(); + continue; + } + // Check for duplicate target triple strings. + std::string NormalizedName = TT.normalize(); + auto [TripleIt, Inserted] = + FoundNormalizedTriples.try_emplace(NormalizedName, Target); + if (!Inserted) { + Diag(clang::diag::warn_drv_offload_target_duplicate) + << Target << TripleIt->second; + continue; + } + + auto &TC = getOffloadToolChain(C.getInputArgs(), Kind, TT, + C.getDefaultToolChain().getTriple()); + + // Emit a warning if the detected CUDA version is too new. + if (Kind == Action::OFK_Cuda) { + auto &CudaInstallation = + static_cast(TC).CudaInstallation; + if (CudaInstallation.isValid()) + CudaInstallation.WarnIfUnsupportedVersion(); + } - OffloadArchs[&TC] = getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &TC, - /*SpecificToolchain=*/true); + C.addOffloadDeviceToolChain(&TC, Kind); } } - - // - // TODO: Add support for other offloading programming models here. - // } bool Driver::loadZOSCustomizationFile(llvm::cl::ExpansionContext &ExpCtx) { @@ -3973,7 +3873,7 @@ bool Driver::checkForSYCLDefaultDevice(Compilation &C, // Do not do the check if the default device is passed in -fsycl-targets // or if -fsycl-targets isn't passed (that implies default device) - if (const Arg *A = Args.getLastArgNoClaim(options::OPT_fsycl_targets_EQ)) { + if (const Arg *A = Args.getLastArgNoClaim(options::OPT_offload_targets_EQ)) { for (const char *Val : A->getValues()) { llvm::Triple TT(C.getDriver().getSYCLDeviceTriple(Val, A)); if ((TT.isSPIROrSPIRV()) && TT.getSubArch() == llvm::Triple::NoSubArch) @@ -4197,9 +4097,6 @@ class OffloadingActionBuilder final { // architecture. If we are in host-only mode we return 'success' so that // the host uses the CUDA offload kind. if (auto *IA = dyn_cast(HostAction)) { - assert(!GpuArchList.empty() && - "We should have at least one GPU architecture."); - // If the host input is not CUDA or HIP, we don't need to bother about // this input. if (!(IA->getType() == types::TY_CUDA || @@ -4299,10 +4196,6 @@ class OffloadingActionBuilder final { CudaDeviceActions.clear(); } - /// Get canonicalized offload arch option. \returns empty StringRef if the - /// option is invalid. - virtual StringRef getCanonicalOffloadArch(StringRef Arch) = 0; - virtual std::optional> getConflictOffloadArchCombination(const std::set &GpuArchs) = 0; @@ -4331,91 +4224,25 @@ class OffloadingActionBuilder final { return true; } - ToolChains.push_back( - AssociatedOffloadKind == Action::OFK_Cuda - ? C.getSingleOffloadToolChain() - : C.getSingleOffloadToolChain()); - - CompileHostOnly = C.getDriver().offloadHostOnly(); - EmitLLVM = Args.getLastArg(options::OPT_emit_llvm); - EmitAsm = Args.getLastArg(options::OPT_S); - - // --offload and --offload-arch options are mutually exclusive. - if (Args.hasArgNoClaim(options::OPT_offload_EQ) && - Args.hasArgNoClaim(options::OPT_offload_arch_EQ, - options::OPT_no_offload_arch_EQ)) { - C.getDriver().Diag(diag::err_opt_not_valid_with_opt) << "--offload-arch" - << "--offload"; - } - - // Collect all offload arch parameters, removing duplicates. std::set GpuArchs; - bool Error = false; - const ToolChain &TC = *ToolChains.front(); - for (Arg *A : C.getArgsForToolChain(&TC, /*BoundArch=*/"", - AssociatedOffloadKind)) { - if (!(A->getOption().matches(options::OPT_offload_arch_EQ) || - A->getOption().matches(options::OPT_no_offload_arch_EQ))) - continue; - A->claim(); - - for (StringRef ArchStr : llvm::split(A->getValue(), ",")) { - if (A->getOption().matches(options::OPT_no_offload_arch_EQ) && - ArchStr == "all") { - GpuArchs.clear(); - } else if (ArchStr == "native") { - auto GPUsOrErr = ToolChains.front()->getSystemGPUArchs(Args); - if (!GPUsOrErr) { - TC.getDriver().Diag(diag::err_drv_undetermined_gpu_arch) - << llvm::Triple::getArchTypeName(TC.getArch()) - << llvm::toString(GPUsOrErr.takeError()) << "--offload-arch"; - continue; - } + for (Action::OffloadKind Kind : {Action::OFK_Cuda, Action::OFK_HIP}) { + for (auto &I : llvm::make_range(C.getOffloadToolChains(Kind))) { + ToolChains.push_back(I.second); - for (auto GPU : *GPUsOrErr) { - GpuArchs.insert(Args.MakeArgString(GPU)); - } - } else { - ArchStr = getCanonicalOffloadArch(ArchStr); - if (ArchStr.empty()) { - Error = true; - } else if (A->getOption().matches(options::OPT_offload_arch_EQ)) - GpuArchs.insert(ArchStr); - else if (A->getOption().matches(options::OPT_no_offload_arch_EQ)) - GpuArchs.erase(ArchStr); - else - llvm_unreachable("Unexpected option."); - } + for (auto Arch : + C.getDriver().getOffloadArchs(C, C.getArgs(), Kind, *I.second)) + GpuArchs.insert(Arch); } } - auto &&ConflictingArchs = getConflictOffloadArchCombination(GpuArchs); - if (ConflictingArchs) { - C.getDriver().Diag(clang::diag::err_drv_bad_offload_arch_combo) - << ConflictingArchs->first << ConflictingArchs->second; - C.setContainsError(); - return true; - } - - // Collect list of GPUs remaining in the set. for (auto Arch : GpuArchs) GpuArchList.push_back(Arch.data()); - // Default to sm_20 which is the lowest common denominator for - // supported GPUs. sm_20 code should work correctly, if - // suboptimally, on all newer GPUs. - if (GpuArchList.empty()) { - if (ToolChains.front()->getTriple().isSPIROrSPIRV()) { - if (ToolChains.front()->getTriple().getVendor() == llvm::Triple::AMD) - GpuArchList.push_back(OffloadArch::AMDGCNSPIRV); - else - GpuArchList.push_back(OffloadArch::Generic); - } else { - GpuArchList.push_back(DefaultOffloadArch); - } - } + CompileHostOnly = C.getDriver().offloadHostOnly(); + EmitLLVM = Args.getLastArg(options::OPT_emit_llvm); + EmitAsm = Args.getLastArg(options::OPT_S); - return Error; + return false; } }; @@ -4430,15 +4257,6 @@ class OffloadingActionBuilder final { DefaultOffloadArch = OffloadArch::CudaDefault; } - StringRef getCanonicalOffloadArch(StringRef ArchStr) override { - OffloadArch Arch = StringToOffloadArch(ArchStr); - if (Arch == OffloadArch::UNKNOWN || !IsNVIDIAOffloadArch(Arch)) { - C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr; - return StringRef(); - } - return OffloadArchToString(Arch); - } - std::optional> getConflictOffloadArchCombination( const std::set &GpuArchs) override { @@ -4615,24 +4433,6 @@ class OffloadingActionBuilder final { bool canUseBundlerUnbundler() const override { return true; } - StringRef getCanonicalOffloadArch(StringRef IdStr) override { - llvm::StringMap Features; - // getHIPOffloadTargetTriple() is known to return valid value as it has - // been called successfully in the CreateOffloadingDeviceToolChains(). - auto T = - (IdStr == "amdgcnspirv") - ? llvm::Triple("spirv64-amd-amdhsa") - : *getHIPOffloadTargetTriple(C.getDriver(), C.getInputArgs()); - auto ArchStr = parseTargetID(T, IdStr, &Features); - if (!ArchStr) { - C.getDriver().Diag(clang::diag::err_drv_bad_target_id) << IdStr; - C.setContainsError(); - return StringRef(); - } - auto CanId = getCanonicalTargetID(*ArchStr, Features); - return Args.MakeArgStringRef(CanId); - }; - std::optional> getConflictOffloadArchCombination( const std::set &GpuArchs) override { @@ -5100,7 +4900,7 @@ class OffloadingActionBuilder final { JobAction *finalizeNVPTXDependences(Action *Input, const llvm::Triple &TT) { auto *BA = C.getDriver().ConstructPhaseAction( C, Args, phases::Backend, Input, AssociatedOffloadKind); - if (TT.getOS() != llvm::Triple::NVCL) { + if (TT.getOS() != llvm::Triple::NVCL && !TT.isSPIROrSPIRV()) { auto *AA = C.getDriver().ConstructPhaseAction( C, Args, phases::Assemble, BA, AssociatedOffloadKind); ActionList DeviceActions = {BA, AA}; @@ -5516,7 +5316,7 @@ class OffloadingActionBuilder final { bool IsNativeCPU = TargetTriple.isNativeCPU(); for (const auto &Input : ListIndex) { // No need for any conversion if we are coming in from the - // clang-offload-deps or regular compilation path. + // clang-offload-deps path or regular compilation path. if (IsNVPTX || IsAMDGCN || ContainsOffloadDepsAction(Input) || ContainsCompileOrAssembleAction(Input)) { LinkObjects.push_back(Input); @@ -6066,9 +5866,8 @@ class OffloadingActionBuilder final { ArchStr = OffloadArchToString(Arch); } else if (TargetBE->isAMDGCN()) { llvm::StringMap Features; - auto Arch = parseTargetID( - *getHIPOffloadTargetTriple(C.getDriver(), C.getInputArgs()), - ArchStr, &Features); + auto Arch = parseTargetID(llvm::Triple("amdgcn-amd-amdhsa"), + ArchStr, &Features); if (!Arch) { C.getDriver().Diag(clang::diag::err_drv_bad_target_id) << ArchStr; continue; @@ -6270,7 +6069,7 @@ class OffloadingActionBuilder final { // Gather information about the SYCL Ahead of Time targets. The targets // are determined on the SubArch values passed along in the triple. Arg *SYCLTargets = - C.getInputArgs().getLastArg(options::OPT_fsycl_targets_EQ); + C.getInputArgs().getLastArg(options::OPT_offload_targets_EQ); bool HasValidSYCLRuntime = C.getInputArgs().hasFlag( options::OPT_fsycl, options::OPT_fno_sycl, false); @@ -6281,27 +6080,33 @@ class OffloadingActionBuilder final { for (StringRef Val : SYCLTargetsValues->getValues()) { StringRef UserTargetName(Val); if (auto ValidDevice = gen::isGPUTarget(Val)) { - if (ValidDevice->empty()) - // Unrecognized, we have already diagnosed this earlier; skip. + if (ValidDevice->empty()) { + C.getDriver().Diag(clang::diag::err_drv_invalid_sycl_target) + << Val; continue; + } // Add the proper -device value to the list. GpuArchList.emplace_back( C.getDriver().getSYCLDeviceTriple("spir64_gen"), ValidDevice->data()); UserTargetName = "spir64_gen"; } else if (auto ValidDevice = gen::isGPUTarget(Val)) { - if (ValidDevice->empty()) - // Unrecognized, we have already diagnosed this earlier; skip. + if (ValidDevice->empty()) { + C.getDriver().Diag(clang::diag::err_drv_invalid_sycl_target) + << Val; continue; + } // Add the proper -device value to the list. GpuArchList.emplace_back( C.getDriver().getSYCLDeviceTriple("nvptx64-nvidia-cuda"), ValidDevice->data()); UserTargetName = "nvptx64-nvidia-cuda"; } else if (auto ValidDevice = gen::isGPUTarget(Val)) { - if (ValidDevice->empty()) - // Unrecognized, we have already diagnosed this earlier; skip. + if (ValidDevice->empty()) { + C.getDriver().Diag(clang::diag::err_drv_invalid_sycl_target) + << Val; continue; + } // Add the proper -device value to the list. GpuArchList.emplace_back( C.getDriver().getSYCLDeviceTriple("amdgcn-amd-amdhsa"), @@ -6322,8 +6127,10 @@ class OffloadingActionBuilder final { // the following iterations. FoundNormalizedTriples[NormalizedName] = Val; - SYCLTripleList.push_back( - C.getDriver().getSYCLDeviceTriple(UserTargetName)); + if (isValidSYCLTriple(llvm::Triple(UserTargetName))) + SYCLTripleList.push_back( + C.getDriver().getSYCLDeviceTriple(UserTargetName)); + // For user specified spir64_gen, add an empty device value as a // placeholder. if (TT.getSubArch() == llvm::Triple::SPIRSubArch_gen) @@ -7485,37 +7292,32 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, static StringRef getCanonicalArchString(Compilation &C, const llvm::opt::DerivedArgList &Args, StringRef ArchStr, - const llvm::Triple &Triple, - bool SpecificToolchain) { + const llvm::Triple &Triple) { // Lookup the CUDA / HIP architecture string. Only report an error if we were // expecting the triple to be only NVPTX / AMDGPU. OffloadArch Arch = StringToOffloadArch(getProcessorFromTargetID(Triple, ArchStr)); if (Triple.isNVPTX() && (Arch == OffloadArch::UNKNOWN || !IsNVIDIAOffloadArch(Arch))) { - if (SpecificToolchain) - C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) - << "CUDA" << ArchStr; + C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) + << "CUDA" << ArchStr; return StringRef(); } else if (Triple.isAMDGPU() && (Arch == OffloadArch::UNKNOWN || !IsAMDOffloadArch(Arch))) { - if (SpecificToolchain) - C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) - << "HIP" << ArchStr; + C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) + << "HIP" << ArchStr; return StringRef(); } else if (Triple.isSPIRAOT() && Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen && (Arch == OffloadArch::UNKNOWN || !IsIntelGPUOffloadArch(Arch))) { - if (SpecificToolchain) - C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) - << "spir64_gen" << ArchStr; + C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) + << "spir64_gen" << ArchStr; return StringRef(); } else if (Triple.isSPIRAOT() && Triple.getSubArch() == llvm::Triple::SPIRSubArch_x86_64 && (Arch == OffloadArch::UNKNOWN || !IsIntelCPUOffloadArch(Arch))) { - if (SpecificToolchain) - C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) - << "spir64_x86_64" << ArchStr; + C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) + << "spir64_x86_64" << ArchStr; return StringRef(); } if (IsNVIDIAOffloadArch(Arch)) @@ -7557,11 +7359,7 @@ getConflictOffloadArchCombination(const llvm::DenseSet &Archs, llvm::SmallVector Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, - Action::OffloadKind Kind, const ToolChain *TC, - bool SpecificToolchain) const { - if (!TC) - TC = &C.getDefaultToolChain(); - + Action::OffloadKind Kind, const ToolChain &TC) const { // --offload and --offload-arch options are mutually exclusive. if (Args.hasArgNoClaim(options::OPT_offload_EQ) && Args.hasArgNoClaim(options::OPT_offload_arch_EQ, @@ -7574,89 +7372,42 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, } llvm::DenseSet Archs; - StringRef Arch; - for (auto *Arg : C.getArgsForToolChain(TC, /*BoundArch=*/"", Kind)) { + std::unique_ptr ExtractedArg = nullptr; + for (auto *Arg : C.getArgsForToolChain(&TC, /*BoundArch=*/"", Kind)) { // Extract any '--[no-]offload-arch' arguments intended for this toolchain. - std::unique_ptr ExtractedArg = nullptr; if (Kind == Action::OFK_SYCL) { - // -Xsycl-target-backend=spir64_gen "-device pvc,bdw" - // -fsycl-targets=spir64_gen -Xsycl-target-backend "-device pvc" - if (TC->getTriple().isSPIRAOT() && - TC->getTriple().getSubArch() == llvm::Triple::SPIRSubArch_gen && - (Arg->getOption().matches(options::OPT_Xsycl_backend_EQ) || - Arg->getOption().matches(options::OPT_Xsycl_backend))) { - const ToolChain *HostTC = - C.getSingleOffloadToolChain(); - auto DeviceTC = std::make_unique( - *this, TC->getTriple(), *HostTC, C.getInputArgs()); - assert(DeviceTC && "Device toolchain not defined."); - ArgStringList TargetArgs; - DeviceTC->TranslateBackendTargetArgs(DeviceTC->getTriple(), - C.getInputArgs(), TargetArgs); - // Look for -device and use that as the known - // arch to be associated with the current spir64_gen entry. Grab - // the right most entry. - for (int i = TargetArgs.size() - 2; i >= 0; --i) { - if (StringRef(TargetArgs[i]) == "-device") { - Arch = TargetArgs[i + 1]; - if (!Arch.empty()) - Archs.insert(Arch); - break; - } - } - } // For SYCL based offloading, we allow for -Xsycl-target-backend // and -Xsycl-target-backend=amdgcn-amd-hsa --offload-arch=gfx908 for // specifying options. - if (!(TC->getTriple().isSPIRAOT() && - TC->getTriple().getSubArch() == llvm::Triple::SPIRSubArch_gen) && - Arg->getOption().matches(options::OPT_Xsycl_backend_EQ) && - llvm::Triple(Arg->getValue(0)) == TC->getTriple()) { + if (Arg->getOption().matches(options::OPT_Xsycl_backend_EQ) && + llvm::Triple(Arg->getValue(0)) == TC.getTriple()) { Arg->claim(); unsigned Index = Args.getBaseArgs().MakeIndex(Arg->getValue(1)); ExtractedArg = getOpts().ParseOneArg(Args, Index); Arg = ExtractedArg.get(); // -Xsycl-target-backend --offload-arch=gfx1150 - } else if (!(TC->getTriple().isSPIRAOT() && - TC->getTriple().getSubArch() == - llvm::Triple::SPIRSubArch_gen) && - Arg->getOption().matches(options::OPT_Xsycl_backend)) { + } else if (Arg->getOption().matches(options::OPT_Xsycl_backend)) { unsigned Index = Args.getBaseArgs().MakeIndex(Arg->getValue(0)); ExtractedArg = getOpts().ParseOneArg(Args, Index); Arg = ExtractedArg.get(); } - } else { - if (Arg->getOption().matches(options::OPT_Xopenmp_target_EQ) && - ToolChain::getOpenMPTriple(Arg->getValue(0)) == TC->getTriple()) { - Arg->claim(); - unsigned Index = Args.getBaseArgs().MakeIndex(Arg->getValue(1)); - unsigned Prev = Index; - ExtractedArg = getOpts().ParseOneArg(Args, Index); - if (!ExtractedArg || Index > Prev + 1) { - TC->getDriver().Diag(diag::err_drv_invalid_Xopenmp_target_with_args) - << Arg->getAsString(Args); - continue; - } - Arg = ExtractedArg.get(); - } } - if (Kind == Action::OFK_SYCL && - Arg->getOption().matches(options::OPT_fsycl_targets_EQ)) { + Arg->getOption().matches(options::OPT_offload_targets_EQ)) { for (StringRef SYCLTargetValue : Arg->getValues()) { + StringRef Arch; if (auto Device = tools::SYCL::gen::isGPUTarget( SYCLTargetValue)) { - if (SpecificToolchain && - !(TC->getTriple().isSPIRAOT() && - TC->getTriple().getSubArch() == llvm::Triple::SPIRSubArch_gen)) + if (!(TC.getTriple().isSPIRAOT() && + TC.getTriple().getSubArch() == llvm::Triple::SPIRSubArch_gen)) continue; if (Device->empty()) { Diag(clang::diag::err_drv_invalid_sycl_target) << SYCLTargetValue; continue; } if (IsIntelGPUOffloadArch(StringToOffloadArch( - getProcessorFromTargetID(TC->getTriple(), Device->data())))) + getProcessorFromTargetID(TC.getTriple(), Device->data())))) Arch = Device->data(); } else if (auto Device = tools::SYCL::gen::isGPUTarget< tools::SYCL::gen::NvidiaGPU>(SYCLTargetValue)) { @@ -7665,7 +7416,7 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, continue; } if (IsSYCLSupportedNVidiaGPUArch(StringToOffloadArch( - getProcessorFromTargetID(TC->getTriple(), Device->data())))) + getProcessorFromTargetID(TC.getTriple(), Device->data())))) Arch = Device->data(); } else if (auto Device = tools::SYCL::gen::isGPUTarget< clang::driver::tools::SYCL::gen::AmdGPU>( @@ -7675,7 +7426,7 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, continue; } if (IsSYCLSupportedAMDGPUArch(StringToOffloadArch( - getProcessorFromTargetID(TC->getTriple(), Device->data())))) + getProcessorFromTargetID(TC.getTriple(), Device->data())))) Arch = Device->data(); } else { Arch = StringRef(); @@ -7684,75 +7435,110 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, Archs.insert(Arch); } } - // Add or remove the seen architectures in order of appearance. If an // invalid architecture is given we simply exit. if (Arg->getOption().matches(options::OPT_offload_arch_EQ)) { for (StringRef Arch : Arg->getValues()) { if (Arch == "native" || Arch.empty()) { - auto GPUsOrErr = TC->getSystemGPUArchs(Args); + auto GPUsOrErr = TC.getSystemGPUArchs(Args); if (!GPUsOrErr) { - if (!SpecificToolchain) - llvm::consumeError(GPUsOrErr.takeError()); - else - TC->getDriver().Diag(diag::err_drv_undetermined_gpu_arch) - << llvm::Triple::getArchTypeName(TC->getArch()) - << llvm::toString(GPUsOrErr.takeError()) << "--offload-arch"; + TC.getDriver().Diag(diag::err_drv_undetermined_gpu_arch) + << llvm::Triple::getArchTypeName(TC.getArch()) + << llvm::toString(GPUsOrErr.takeError()) << "--offload-arch"; continue; } for (auto ArchStr : *GPUsOrErr) { - StringRef CanonicalStr = - getCanonicalArchString(C, Args, Args.MakeArgString(ArchStr), - TC->getTriple(), SpecificToolchain); + StringRef CanonicalStr = getCanonicalArchString( + C, Args, Args.MakeArgString(ArchStr), TC.getTriple()); if (!CanonicalStr.empty()) Archs.insert(CanonicalStr); - else if (SpecificToolchain) + else return llvm::SmallVector(); } } else { - StringRef CanonicalStr = getCanonicalArchString( - C, Args, Arch, TC->getTriple(), SpecificToolchain); + StringRef CanonicalStr = + getCanonicalArchString(C, Args, Arch, TC.getTriple()); if (!CanonicalStr.empty()) Archs.insert(CanonicalStr); - else if (SpecificToolchain) + else return llvm::SmallVector(); } } } else if (Arg->getOption().matches(options::OPT_no_offload_arch_EQ)) { - for (StringRef Arch : llvm::split(Arg->getValue(), ",")) { + for (StringRef Arch : Arg->getValues()) { if (Arch == "all") { Archs.clear(); } else { - StringRef ArchStr = getCanonicalArchString( - C, Args, Arch, TC->getTriple(), SpecificToolchain); + StringRef ArchStr = + getCanonicalArchString(C, Args, Arch, TC.getTriple()); Archs.erase(ArchStr); } } } } + if (Kind == Action::OFK_SYCL) { + // -Xsycl-target-backend=spir64_gen "-device pvc,bdw" + // -fsycl-targets=spir64_gen -Xsycl-target-backend "-device pvc" + if (TC.getTriple().isSPIRAOT() && + TC.getTriple().getSubArch() == llvm::Triple::SPIRSubArch_gen) { + const ToolChain *HostTC = C.getSingleOffloadToolChain(); + auto DeviceTC = std::make_unique( + *this, TC.getTriple(), *HostTC, C.getInputArgs()); + assert(DeviceTC && "Device toolchain not defined."); + ArgStringList TargetArgs; + DeviceTC->TranslateBackendTargetArgs(DeviceTC->getTriple(), + C.getInputArgs(), TargetArgs); + // Look for -device and use that as the known + // arch to be associated with the current spir64_gen entry. Grab + // the right most entry. + for (int i = TargetArgs.size() - 2; i >= 0; --i) { + if (StringRef(TargetArgs[i]) == "-device") { + StringRef Arch; + Arch = TargetArgs[i + 1]; + if (!Arch.empty()) + Archs.insert(Arch); + break; + } + } + } + } + if (auto ConflictingArchs = - getConflictOffloadArchCombination(Archs, TC->getTriple())) + getConflictOffloadArchCombination(Archs, TC.getTriple())) C.getDriver().Diag(clang::diag::err_drv_bad_offload_arch_combo) << ConflictingArchs->first << ConflictingArchs->second; - // Skip filling defaults if we're just querying what is availible. - if (SpecificToolchain && Archs.empty()) { + // Fill in the default architectures if not provided explicitly. + if (Archs.empty()) { if (Kind == Action::OFK_Cuda) { Archs.insert(OffloadArchToString(OffloadArch::CudaDefault)); } else if (Kind == Action::OFK_HIP) { - Archs.insert(OffloadArchToString(OffloadArch::HIPDefault)); + Archs.insert(OffloadArchToString(TC.getTriple().isSPIRV() + ? OffloadArch::Generic + : OffloadArch::HIPDefault)); + } else if (Kind == Action::OFK_SYCL) { + // For SYCL offloading, we need to check the triple for NVPTX or AMDGPU. + // The default arch is set for NVPTX if not provided. For AMDGPU, emit + // an error as the user is responsible to set the arch. + if (TC.getTriple().isNVPTX()) + Archs.insert(OffloadArchToString(OffloadArch::SM_50)); + else if (TC.getTriple().isAMDGPU()) + C.getDriver().Diag(clang::diag::err_drv_sycl_missing_amdgpu_arch) + << 1 << TC.getTriple().str(); + else + Archs.insert(StringRef()); } else if (Kind == Action::OFK_OpenMP) { // Accept legacy `-march` device arguments for OpenMP. - if (auto *Arg = C.getArgsForToolChain(TC, /*BoundArch=*/"", Kind) + if (auto *Arg = C.getArgsForToolChain(&TC, /*BoundArch=*/"", Kind) .getLastArg(options::OPT_march_EQ)) { Archs.insert(Arg->getValue()); } else { - auto ArchsOrErr = TC->getSystemGPUArchs(Args); + auto ArchsOrErr = TC.getSystemGPUArchs(Args); if (!ArchsOrErr) { - TC->getDriver().Diag(diag::err_drv_undetermined_gpu_arch) - << llvm::Triple::getArchTypeName(TC->getArch()) + TC.getDriver().Diag(diag::err_drv_undetermined_gpu_arch) + << llvm::Triple::getArchTypeName(TC.getArch()) << llvm::toString(ArchsOrErr.takeError()) << "--offload-arch"; } else if (!ArchsOrErr->empty()) { for (auto Arch : *ArchsOrErr) @@ -7761,17 +7547,6 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, Archs.insert(StringRef()); } } - } else if (Kind == Action::OFK_SYCL) { - // For SYCL offloading, we need to check the triple for NVPTX or AMDGPU. - // The default arch is set for NVPTX if not provided. For AMDGPU, emit - // an error as the user is responsible to set the arch. - if (TC->getTriple().isNVPTX()) - Archs.insert(OffloadArchToString(OffloadArch::SM_50)); - else if (TC->getTriple().isAMDGPU()) - C.getDriver().Diag(clang::diag::err_drv_sycl_missing_amdgpu_arch) - << 1 << TC->getTriple().str(); - else - Archs.insert(StringRef()); } } Args.ClaimAllArgs(options::OPT_offload_arch_EQ); @@ -7866,7 +7641,7 @@ Action *Driver::BuildOffloadingActions(Compilation &C, // Get the product of all bound architectures and toolchains. SmallVector> TCAndArchs; for (const ToolChain *TC : ToolChains) { - for (StringRef Arch : OffloadArchs.lookup(TC)) { + for (StringRef Arch : getOffloadArchs(C, C.getArgs(), Kind, *TC)) { TCAndArchs.push_back(std::make_pair(TC, Arch)); DeviceActions.push_back( C.MakeAction(*InputArg, InputType, CUID)); diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index db9b9f572647d..c70c0203eb594 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -105,44 +105,6 @@ ToolChain::ToolChain(const Driver &D, const llvm::Triple &T, addIfExists(getFilePaths(), Path); } -llvm::Expected> -ToolChain::executeToolChainProgram(StringRef Executable) const { - llvm::SmallString<64> OutputFile; - llvm::sys::fs::createTemporaryFile("toolchain-program", "txt", OutputFile, - llvm::sys::fs::OF_Text); - llvm::FileRemover OutputRemover(OutputFile.c_str()); - std::optional Redirects[] = { - {""}, - OutputFile.str(), - {""}, - }; - - std::string ErrorMessage; - int SecondsToWait = 60; - if (std::optional Str = - llvm::sys::Process::GetEnv("CLANG_TOOLCHAIN_PROGRAM_TIMEOUT")) { - if (!llvm::to_integer(*Str, SecondsToWait)) - return llvm::createStringError(std::error_code(), - "CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected " - "an integer, got '" + - *Str + "'"); - SecondsToWait = std::max(SecondsToWait, 0); // infinite - } - if (llvm::sys::ExecuteAndWait(Executable, {Executable}, {}, Redirects, - SecondsToWait, - /*MemoryLimit=*/0, &ErrorMessage)) - return llvm::createStringError(std::error_code(), - Executable + ": " + ErrorMessage); - - llvm::ErrorOr> OutputBuf = - llvm::MemoryBuffer::getFile(OutputFile.c_str()); - if (!OutputBuf) - return llvm::createStringError(OutputBuf.getError(), - "Failed to read stdout of " + Executable + - ": " + OutputBuf.getError().message()); - return std::move(*OutputBuf); -} - void ToolChain::setTripleEnvironment(llvm::Triple::EnvironmentType Env) { Triple.setEnvironment(Env); if (EffectiveTriple != llvm::Triple()) @@ -1941,7 +1903,7 @@ llvm::opt::DerivedArgList *ToolChain::TranslateOffloadTargetArgs( getDriver().Diag(diag::err_drv_Xopenmp_target_missing_triple); continue; } - if (IsSYCL && !SingleTargetTripleCount(options::OPT_fsycl_targets_EQ)) { + if (IsSYCL && !SingleTargetTripleCount(options::OPT_offload_targets_EQ)) { getDriver().Diag(diag::err_drv_Xsycl_target_missing_triple) << A->getSpelling(); continue; diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index 16fbd42a48156..5e463b9c98687 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -906,7 +906,7 @@ AMDGPUToolChain::getSystemGPUArchs(const ArgList &Args) const { else Program = GetProgramPath("amdgpu-arch"); - auto StdoutOrErr = executeToolChainProgram(Program); + auto StdoutOrErr = getDriver().executeProgram({Program}); if (!StdoutOrErr) return StdoutOrErr.takeError(); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 44ab37667c298..1454860b10c1f 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -106,32 +106,15 @@ forAllAssociatedToolChains(Compilation &C, const JobAction &JA, // Apply Work on all the offloading tool chains associated with the current // action. - if (JA.isHostOffloading(Action::OFK_Cuda)) - Work(*C.getSingleOffloadToolChain()); - else if (JA.isDeviceOffloading(Action::OFK_Cuda)) - Work(*C.getSingleOffloadToolChain()); - else if (JA.isHostOffloading(Action::OFK_HIP)) - Work(*C.getSingleOffloadToolChain()); - else if (JA.isDeviceOffloading(Action::OFK_HIP)) - Work(*C.getSingleOffloadToolChain()); - - if (JA.isHostOffloading(Action::OFK_OpenMP)) { - auto TCs = C.getOffloadToolChains(); - for (auto II = TCs.first, IE = TCs.second; II != IE; ++II) - Work(*II->second); - } else if (JA.isDeviceOffloading(Action::OFK_OpenMP)) - Work(*C.getSingleOffloadToolChain()); - - if (JA.isHostOffloading(Action::OFK_SYCL)) { - auto TCs = C.getOffloadToolChains(); - for (auto II = TCs.first, IE = TCs.second; II != IE; ++II) - Work(*II->second); - } else if (JA.isDeviceOffloading(Action::OFK_SYCL)) - Work(*C.getSingleOffloadToolChain()); - - // - // TODO: Add support for other offloading programming models here. - // + for (Action::OffloadKind Kind : {Action::OFK_Cuda, Action::OFK_OpenMP, + Action::OFK_HIP, Action::OFK_SYCL}) { + if (JA.isHostOffloading(Kind)) { + auto TCs = C.getOffloadToolChains(Kind); + for (auto II = TCs.first, IE = TCs.second; II != IE; ++II) + Work(*II->second); + } else if (JA.isDeviceOffloading(Kind)) + Work(*C.getSingleOffloadToolChain()); + } } static bool @@ -5434,8 +5417,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, else { // Host-side compilation. NormalizedTriple = - (IsCuda ? C.getSingleOffloadToolChain() - : C.getSingleOffloadToolChain()) + (IsCuda ? C.getOffloadToolChains(Action::OFK_Cuda).first->second + : C.getOffloadToolChains(Action::OFK_HIP).first->second) ->getTriple() .normalize(); if (IsCuda) { @@ -8683,7 +8666,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (isa(JA) && JA.isHostOffloading(Action::OFK_SYCL)) { SmallString<128> TargetInfo("-fsycl-targets="); - if (Arg *Tgts = Args.getLastArg(options::OPT_fsycl_targets_EQ)) { + if (Arg *Tgts = Args.getLastArg(options::OPT_offload_targets_EQ)) { for (unsigned i = 0; i < Tgts->getNumValues(); ++i) { if (i) TargetInfo += ','; diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 558172026a209..bc148b83d8ecb 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -900,7 +900,7 @@ NVPTXToolChain::getSystemGPUArchs(const ArgList &Args) const { else Program = GetProgramPath("nvptx-arch"); - auto StdoutOrErr = executeToolChainProgram(Program); + auto StdoutOrErr = getDriver().executeProgram({Program}); if (!StdoutOrErr) return StdoutOrErr.takeError(); diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index 5c8ac5585648e..61c79bb4df32a 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -312,7 +312,7 @@ static bool selectBfloatLibs(const llvm::Triple &Triple, const Compilation &C, // add -fsycl-targets=intel_gpu_pvc..., native bfloat16 devicelib can // only be linked when all GPU types specified support. // We need to filter CPU target here and only focus on GPU device. - if (Arg *SYCLTarget = Args.getLastArg(options::OPT_fsycl_targets_EQ)) { + if (Arg *SYCLTarget = Args.getLastArg(options::OPT_offload_targets_EQ)) { for (auto TargetsV : SYCLTarget->getValues()) { if (!checkSpirvJIT(StringRef(TargetsV)) && !StringRef(TargetsV).starts_with("spir64_gen") && @@ -333,7 +333,7 @@ static bool selectBfloatLibs(const llvm::Triple &Triple, const Compilation &C, // bfloat16 native conversion. UseNative = true; - if (Arg *SYCLTarget = Args.getLastArg(options::OPT_fsycl_targets_EQ)) { + if (Arg *SYCLTarget = Args.getLastArg(options::OPT_offload_targets_EQ)) { for (auto TargetsV : SYCLTarget->getValues()) { if (!checkSpirvJIT(StringRef(TargetsV)) && !GPUArchsWithNBF16.contains(StringRef(TargetsV))) { @@ -641,7 +641,8 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, if (!IsSpirvAOT) return JIT; - llvm::opt::Arg *SYCLTarget = Args.getLastArg(options::OPT_fsycl_targets_EQ); + llvm::opt::Arg *SYCLTarget = + Args.getLastArg(options::OPT_offload_targets_EQ); if (!SYCLTarget || (SYCLTarget->getValues().size() != 1)) return JIT; @@ -1667,7 +1668,7 @@ void SYCLToolChain::TranslateTargetOpt(const llvm::Triple &Triple, if (OptNoTriple) { // With multiple -fsycl-targets, a triple is required so we know where // the options should go. - const Arg *TargetArg = Args.getLastArg(options::OPT_fsycl_targets_EQ); + const Arg *TargetArg = Args.getLastArg(options::OPT_offload_targets_EQ); if (TargetArg && TargetArg->getValues().size() != 1) { getDriver().Diag(diag::err_drv_Xsycl_target_missing_triple) << A->getSpelling(); @@ -1906,7 +1907,7 @@ void SYCLToolChain::TranslateBackendTargetArgs( // Handle -Xsycl-target-backend. TranslateTargetOpt(Triple, Args, CmdArgs, options::OPT_Xsycl_backend, options::OPT_Xsycl_backend_EQ, Device); - TranslateGPUTargetOpt(Args, CmdArgs, options::OPT_fsycl_targets_EQ); + TranslateGPUTargetOpt(Args, CmdArgs, options::OPT_offload_targets_EQ); } void SYCLToolChain::TranslateLinkerTargetArgs(const llvm::Triple &Triple, diff --git a/clang/test/Driver/amdgpu-hip-system-arch.c b/clang/test/Driver/amdgpu-hip-system-arch.c index 9c27bc09fb36c..609155901c71b 100644 --- a/clang/test/Driver/amdgpu-hip-system-arch.c +++ b/clang/test/Driver/amdgpu-hip-system-arch.c @@ -14,14 +14,14 @@ // RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR // RUN: not %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-new-driver --offload-arch=native --amdgpu-arch-tool=%t/amdgpu_arch_fail -x hip %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR -// NO-OUTPUT-ERROR: error: cannot determine amdgcn architecture{{.*}}; consider passing it via '--offload-arch' +// NO-OUTPUT-ERROR: error: cannot determine hip architecture{{.*}}; consider passing it via '--offload-arch' // case when amdgpu-arch does not return anything with successful execution // RUN: not %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-arch=native --amdgpu-arch-tool=%t/amdgpu_arch_empty -x hip %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=EMPTY-OUTPUT // RUN: not %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-new-driver --offload-arch=native --amdgpu-arch-tool=%t/amdgpu_arch_empty -x hip %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=EMPTY-OUTPUT -// EMPTY-OUTPUT: error: cannot determine amdgcn architecture: No AMD GPU detected in the system; consider passing it via '--offload-arch' +// EMPTY-OUTPUT: error: cannot determine hip architecture: No GPU detected in the system; consider passing it via '--offload-arch' // case when amdgpu-arch returns a gfx906 GPU. // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib --offload-arch=native --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 -x hip %s 2>&1 \ @@ -36,4 +36,5 @@ // RUN: --offload-arch=native --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 \ // RUN: -x hip %s 2>&1 | \ // RUN: FileCheck %s --check-prefix=BAD-TIMEOUT -// BAD-TIMEOUT: clang: error: cannot determine amdgcn architecture: CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected an integer, got 'foo'; consider passing it via '--offload-arch'; environment variable CLANG_TOOLCHAIN_PROGRAM_TIMEOUT specifies the tool timeout (integer secs, <=0 is infinite) +// BAD-TIMEOUT: clang: error: cannot determine hip architecture: CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected an integer, got 'foo'; consider passing it via '--offload-arch'; environment variable CLANG_TOOLCHAIN_PROGRAM_TIMEOUT specifies the tool timeout (integer secs, <=0 is infinite) + diff --git a/clang/test/Driver/cuda-phases.cu b/clang/test/Driver/cuda-phases.cu index 8b91a1d5a7fcf..265eb6f64ccd8 100644 --- a/clang/test/Driver/cuda-phases.cu +++ b/clang/test/Driver/cuda-phases.cu @@ -324,8 +324,9 @@ // RUN: -ccc-print-phases --offload-arch=sm_999 -fgpu-rdc -c %s 2>&1 \ // RUN: | FileCheck -check-prefix=INVALID-ARCH %s // INVALID-ARCH: error: unsupported CUDA gpu architecture: sm_999 -// INVALID-ARCH-NEXT: 0: input, "[[INPUT:.+]]", cuda, (host-cuda) -// INVALID-ARCH-NEXT: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) -// INVALID-ARCH-NEXT: 2: compiler, {1}, ir, (host-cuda) -// INVALID-ARCH-NEXT: 3: backend, {2}, assembler, (host-cuda) -// INVALID-ARCH-NEXT: 4: assembler, {3}, object, (host-cuda) +// INVALID-ARCH: 0: input, "[[INPUT:.+]]", cuda +// INVALID-ARCH-NEXT: 1: preprocessor, {0}, cuda-cpp-output +// INVALID-ARCH-NEXT: 2: compiler, {1}, ir +// INVALID-ARCH-NEXT: 3: backend, {2}, assembler +// INVALID-ARCH-NEXT: 4: assembler, {3}, object + diff --git a/clang/test/Driver/hip-inputs.hip b/clang/test/Driver/hip-inputs.hip index 2d4cc3103c5ec..4417c43c2f55c 100644 --- a/clang/test/Driver/hip-inputs.hip +++ b/clang/test/Driver/hip-inputs.hip @@ -15,5 +15,6 @@ // RUN: --hip-link %S/Inputs/hip_multiple_inputs/a.cu 2>&1 \ // RUN: | FileCheck -check-prefix=MIX %s -// CHECK-NOT: error: mixed CUDA and HIP compilation is not supported -// MIX: error: mixed CUDA and HIP compilation is not supported +// CHECK-NOT: error: mixed CUDA and HIP offloading compilation is not supported +// MIX: error: mixed CUDA and HIP offloading compilation is not supported + diff --git a/clang/test/Driver/hip-invalid-target-id.hip b/clang/test/Driver/hip-invalid-target-id.hip index 555043facb2a3..e41981b902138 100644 --- a/clang/test/Driver/hip-invalid-target-id.hip +++ b/clang/test/Driver/hip-invalid-target-id.hip @@ -4,7 +4,7 @@ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s 2>&1 | FileCheck -check-prefix=NOPLUS %s -// NOPLUS: error: invalid target ID 'gfx908xnack' +// NOPLUS: error: unsupported HIP gpu architecture: gfx908xnack // RUN: not %clang -### --target=x86_64-linux-gnu \ // RUN: -x hip --offload-arch=gfx900 \ @@ -22,7 +22,7 @@ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s 2>&1 | FileCheck -check-prefix=UNK %s -// UNK: error: invalid target ID 'gfx908:unknown+' +// UNK: error: unsupported HIP gpu architecture: gfx900+xnack // RUN: not %clang -### --target=x86_64-linux-gnu \ // RUN: -x hip --offload-arch=gfx908 \ @@ -31,7 +31,7 @@ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s 2>&1 | FileCheck -check-prefix=MIXED %s -// MIXED: error: invalid target ID 'gfx908:sramecc+:unknown+' +// MIXED: error: unsupported HIP gpu architecture: gfx900+xnack // RUN: not %clang -### --target=x86_64-linux-gnu \ // RUN: -x hip --offload-arch=gfx908 \ @@ -55,7 +55,7 @@ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s 2>&1 | FileCheck -check-prefix=NOCOLON %s -// NOCOLON: error: invalid target ID 'gfx900+xnack' +// NOCOLON: error: unsupported HIP gpu architecture: gfx900+xnack // RUN: not %clang -### --target=x86_64-linux-gnu \ // RUN: -x hip --offload-arch=gfx908 \ @@ -64,3 +64,4 @@ // RUN: %s 2>&1 | FileCheck -check-prefix=COMBO %s // COMBO: error: invalid offload arch combinations: 'gfx908' and 'gfx908:xnack+' + diff --git a/clang/test/Driver/hip-options.hip b/clang/test/Driver/hip-options.hip index af99b4a4550f9..44f93746d94b0 100644 --- a/clang/test/Driver/hip-options.hip +++ b/clang/test/Driver/hip-options.hip @@ -92,7 +92,7 @@ // HIPTHINLTO-NOT: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-flto-unit" // HIPTHINLTO: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-flto=thin" "-flto-unit" {{.*}} "-fwhole-program-vtables" // HIPTHINLTO-NOT: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-flto-unit" -// HIPTHINLTO: lld{{.*}}"-plugin-opt=mcpu=gfx906"{{.*}}"-plugin-opt=thinlto"{{.*}}"-plugin-opt=-force-import-all" +// HIPTHINLTO: lld{{.*}}"-plugin-opt=mcpu=gfx906" "-plugin-opt=thinlto" "-plugin-opt=-force-import-all" // Check that -flto=thin is handled correctly, particularly with -fwhole-program-vtables. // @@ -115,11 +115,6 @@ // OMP-NOT: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fopenmp" // OMP: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-fopenmp" -// RUN: not %clang --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \ -// RUN: --offload-arch=gfx906 -fopenmp=libomp -fopenmp-targets=amdgcn %s 2>&1 \ -// RUN: | FileCheck -check-prefix=OMPTGT %s -// OMPTGT: unsupported option '--offload-targets=' for language mode 'HIP' - // Check -Xoffload-linker option is passed to lld. // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \ @@ -259,3 +254,4 @@ // RUN: --offload-arch=gfx1100 --offload-new-driver --offload-jobs=0x4 %s 2>&1 | \ // RUN: FileCheck -check-prefix=INVJOBS %s // INVJOBS: clang: error: invalid integral value '0x4' in '--offload-jobs=0x4' + diff --git a/clang/test/Driver/invalid-offload-options.cpp b/clang/test/Driver/invalid-offload-options.cpp index 48d5310538a3c..52ab604d5e0d2 100644 --- a/clang/test/Driver/invalid-offload-options.cpp +++ b/clang/test/Driver/invalid-offload-options.cpp @@ -1,29 +1,8 @@ // UNSUPPORTED: system-windows -// RUN: not %clang -### -x hip --target=x86_64-linux-gnu --offload= \ -// RUN: --hip-path=%S/Inputs/hipspv -nogpuinc -nogpulib %s \ -// RUN: 2>&1 | FileCheck --check-prefix=INVALID-TARGET %s // RUN: not %clang -### -x hip --target=x86_64-linux-gnu --offload=foo \ // RUN: --hip-path=%S/Inputs/hipspv -nogpuinc -nogpulib %s \ // RUN: 2>&1 | FileCheck --check-prefix=INVALID-TARGET %s // INVALID-TARGET: error: invalid or unsupported offload target: '{{.*}}' -// In the future we should be able to specify multiple targets for HIP -// compilation but currently it is not supported. -// -// RUN: not %clang -### -x hip --target=x86_64-linux-gnu --offload=foo,bar \ -// RUN: --hip-path=%S/Inputs/hipspv -nogpuinc -nogpulib %s \ -// RUN: 2>&1 | FileCheck --check-prefix=TOO-MANY-TARGETS %s -// RUN: not %clang -### -x hip --target=x86_64-linux-gnu \ -// RUN: --offload=foo --offload=bar \ -// RUN: --hip-path=%S/Inputs/hipspv -nogpuinc -nogpulib %s \ -// RUN: 2>&1 | FileCheck --check-prefix=TOO-MANY-TARGETS %s - -// TOO-MANY-TARGETS: error: only one offload target is supported - -// RUN: not %clang -### -x hip --target=x86_64-linux-gnu -nogpuinc -nogpulib \ -// RUN: --offload=amdgcn-amd-amdhsa --offload-arch=gfx900 %s \ -// RUN: 2>&1 | FileCheck --check-prefix=OFFLOAD-ARCH-MIX %s - -// OFFLOAD-ARCH-MIX: error: option '--offload-arch' cannot be specified with '--offload' diff --git a/clang/test/Driver/nvptx-cuda-system-arch.c b/clang/test/Driver/nvptx-cuda-system-arch.c index c54eeac73f73b..2d4eca8c43bc3 100644 --- a/clang/test/Driver/nvptx-cuda-system-arch.c +++ b/clang/test/Driver/nvptx-cuda-system-arch.c @@ -16,14 +16,14 @@ // RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR // RUN: not %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-new-driver --offload-arch=native --nvptx-arch-tool=%t/nvptx_arch_fail -x cuda %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR -// NO-OUTPUT-ERROR: error: cannot determine nvptx64 architecture{{.*}}; consider passing it via '--offload-arch' +// NO-OUTPUT-ERROR: error: cannot determine cuda architecture{{.*}}; consider passing it via '--offload-arch' // case when nvptx-arch does not return anything with successful execution // RUN: not %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-arch=native --nvptx-arch-tool=%t/nvptx_arch_empty -x cuda %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=EMPTY-OUTPUT // RUN: not %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-new-driver --offload-arch=native --nvptx-arch-tool=%t/nvptx_arch_empty -x cuda %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=EMPTY-OUTPUT -// EMPTY-OUTPUT: error: cannot determine nvptx64 architecture: No NVIDIA GPU detected in the system; consider passing it via '--offload-arch' +// EMPTY-OUTPUT: error: cannot determine cuda architecture: No GPU detected in the system; consider passing it via '--offload-arch' // case when nvptx-arch does not return anything with successful execution // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-arch=native --nvptx-arch-tool=%t/nvptx_arch_sm_70 -x cuda --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda %s 2>&1 \ @@ -49,4 +49,4 @@ // RUN: --offload-arch=native --nvptx-arch-tool=%t/nvptx_arch_sm_70 \ // RUN: --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda -x cuda %s 2>&1 | \ // RUN: FileCheck %s --check-prefix=BAD-TIMEOUT -// BAD-TIMEOUT: clang: error: cannot determine nvptx64 architecture: CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected an integer, got 'foo'; consider passing it via '--offload-arch'; environment variable CLANG_TOOLCHAIN_PROGRAM_TIMEOUT specifies the tool timeout (integer secs, <=0 is infinite) +// BAD-TIMEOUT: clang: error: cannot determine cuda architecture: CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected an integer, got 'foo'; consider passing it via '--offload-arch'; environment variable CLANG_TOOLCHAIN_PROGRAM_TIMEOUT specifies the tool timeout (integer secs, <=0 is infinite) diff --git a/clang/test/Driver/offload-target.c b/clang/test/Driver/offload-target.c new file mode 100644 index 0000000000000..b0b3660bfad71 --- /dev/null +++ b/clang/test/Driver/offload-target.c @@ -0,0 +1,23 @@ +// RUN: %clang -### -fsycl --offload-targets=spirv64 -nogpuinc %s -ccc-print-bindings 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL +// SYCL: "spirv64-unknown-unknown" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[SYCL_BC:.+]]" + +// RUN: %clang -### --offload-targets=amdgcn-amd-amdhsa -nogpulib -nogpuinc -x hip %s -ccc-print-bindings 2>&1 \ +// RUN: | FileCheck %s -check-prefix=HIP +// HIP: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[AMD_OBJ:.+]]" + +// RUN: %clang -### --offload-targets=nvptx64-nvidia-cuda -nogpulib -nogpuinc -x cuda %s -ccc-print-bindings 2>&1 \ +// RUN: | FileCheck %s -check-prefix=CUDA +// CUDA: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[NV_OBJ:.+]]" + +// RUN: %clang -### --offload-targets=amdgcn-amd-amdhsa,nvptx64-nvidia-cuda -fopenmp \ +// RUN: -Xarch_amdgcn --offload-arch=gfx90a -Xarch_nvptx64 --offload-arch=sm_89 \ +// RUN: -nogpulib -nogpuinc %s -ccc-print-bindings 2>&1 \ +// RUN: | FileCheck %s -check-prefix=OPENMP +// OPENMP: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[AMD_OBJ:.+]]" +// OPENMP: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]"], output: "[[NV_OBJ:.+]]" + +// RUN: %clang -### --offload-targets=spirv64-amd-amdhsa -nogpulib -nogpuinc -x hip %s -ccc-print-bindings 2>&1 \ +// RUN: | FileCheck %s -check-prefix=HIPSPIRV +// HIPSPIRV: "spirv64-amd-amdhsa" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[AMD_OBJ:.+]]" + diff --git a/clang/test/Driver/openmp-offload.c b/clang/test/Driver/openmp-offload.c index 516c74b190885..1b460a0ddca43 100644 --- a/clang/test/Driver/openmp-offload.c +++ b/clang/test/Driver/openmp-offload.c @@ -7,7 +7,7 @@ /// Check whether an invalid OpenMP target is specified: // RUN: not %clang -### -fopenmp=libomp -fopenmp-targets=aaa-bbb-ccc-ddd %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-INVALID-TARGET %s -// CHK-INVALID-TARGET: error: OpenMP target is invalid: 'aaa-bbb-ccc-ddd' +// CHK-INVALID-TARGET: error: invalid or unsupported offload target: 'aaa-bbb-ccc-ddd' /// ########################################################################### @@ -16,21 +16,10 @@ // RUN: | FileCheck -check-prefix=CHK-EMPTY-OMPTARGETS %s // CHK-EMPTY-OMPTARGETS: warning: joined argument expects additional value: '-fopenmp-targets=' -/// ########################################################################### - -/// Check error for no -fopenmp option -// RUN: not %clang -### -fopenmp-targets=powerpc64le-ibm-linux-gnu %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-NO-FOPENMP %s -// RUN: not %clang -### -fopenmp=libgomp -fopenmp-targets=powerpc64le-ibm-linux-gnu %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-NO-FOPENMP %s -// CHK-NO-FOPENMP: error: '-fopenmp-targets' must be used in conjunction with a '-fopenmp' option compatible with offloading; e.g., '-fopenmp=libomp' or '-fopenmp=libiomp5' - -/// ########################################################################### - /// Check warning for duplicate offloading targets. // RUN: %clang -### -ccc-print-phases -fopenmp=libomp -fopenmp-targets=powerpc64le-ibm-linux-gnu,powerpc64le-ibm-linux-gnu %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-DUPLICATES %s -// CHK-DUPLICATES: warning: OpenMP offloading target 'powerpc64le-ibm-linux-gnu' is similar to target 'powerpc64le-ibm-linux-gnu' already specified; will be ignored +// CHK-DUPLICATES: warning: offloading target 'powerpc64le-ibm-linux-gnu' is similar to target 'powerpc64le-ibm-linux-gnu' already specified; will be ignored /// ########################################################################### diff --git a/clang/test/Driver/openmp-system-arch.c b/clang/test/Driver/openmp-system-arch.c index b18ecf3ec474b..167b07a23f512 100644 --- a/clang/test/Driver/openmp-system-arch.c +++ b/clang/test/Driver/openmp-system-arch.c @@ -24,13 +24,7 @@ // RUN: not %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp=libomp --offload-arch=native \ // RUN: --nvptx-arch-tool=%t/nvptx_arch_empty --amdgpu-arch-tool=%t/amdgpu_arch_empty %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR -// RUN: not %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp=libomp --offload-arch= \ -// RUN: --nvptx-arch-tool=%t/nvptx_arch_fail --amdgpu-arch-tool=%t/amdgpu_arch_fail %s 2>&1 \ -// RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR -// RUN: not %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp=libomp --offload-arch= \ -// RUN: --nvptx-arch-tool=%t/nvptx_arch_empty --amdgpu-arch-tool=%t/amdgpu_arch_empty %s 2>&1 \ -// RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR -// NO-OUTPUT-ERROR: error: failed to deduce triple for target architecture 'native'; specify the triple using '-fopenmp-targets' and '-Xopenmp-target' instead +// NO-OUTPUT-ERROR: error: cannot determine openmp architecture // case when amdgpu-arch succeeds. // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp=libomp --offload-arch=native \ diff --git a/clang/test/Driver/sycl-offload-aot.cpp b/clang/test/Driver/sycl-offload-aot.cpp index 50b484b1f874d..031d0db530d15 100644 --- a/clang/test/Driver/sycl-offload-aot.cpp +++ b/clang/test/Driver/sycl-offload-aot.cpp @@ -7,7 +7,7 @@ // RUN: | FileCheck -check-prefix=CHK-SYCL-BAD-TRIPLE %s // RUN: not %clang_cl -### -fsycl-targets=spir64_bad-unknown-unknown -fsycl -- %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-SYCL-BAD-TRIPLE %s -// CHK-SYCL-BAD-TRIPLE: error: SYCL target is invalid: 'spir64_bad-unknown-unknown' +// CHK-SYCL-BAD-TRIPLE: error: invalid or unsupported offload target: 'spir64_bad-unknown-unknown' /// Check no error for -fsycl-targets with good triple // RUN: %clang -### -fsycl-targets=spir64_x86_64-unknown-unknown -fsycl %s 2>&1 \ diff --git a/clang/test/Driver/sycl-offload-arch-intel-cpu.cpp b/clang/test/Driver/sycl-offload-arch-intel-cpu.cpp index d733618744d71..f728e620ab7f9 100644 --- a/clang/test/Driver/sycl-offload-arch-intel-cpu.cpp +++ b/clang/test/Driver/sycl-offload-arch-intel-cpu.cpp @@ -54,18 +54,19 @@ // RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=graniterapids %s 2>&1 | \ // RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-CPU,CLANG-OFFLOAD-PACKAGER-CPU -DDEV_STR=graniterapids +// Tests for handling a missing architecture. +// +// RUN: %clangxx --offload-new-driver -fsycl --offload-arch= %s -### 2>&1 \ +// RUN: | FileCheck --check-prefixes=TARGET-TRIPLE-DEFAULT,CLANG-OFFLOAD-PACKAGER-DEFAULT %s +// RUN: %clang_cl --offload-new-driver -fsycl --offload-arch= %s -### 2>&1 \ +// RUN: | FileCheck --check-prefixes=TARGET-TRIPLE-DEFAULT,CLANG-OFFLOAD-PACKAGER-DEFAULT %s + // TARGET-TRIPLE-CPU: clang{{.*}} "-triple" "spir64_x86_64-unknown-unknown" // TARGET-TRIPLE-CPU: "-D__SYCL_TARGET_INTEL_X86_64__" // CLANG-OFFLOAD-PACKAGER-CPU: clang-offload-packager{{.*}} "--image={{.*}}triple=spir64_x86_64-unknown-unknown,arch=[[DEV_STR]],kind=sycl" -// Tests for handling a missing architecture. -// -// RUN: not %clangxx --offload-new-driver -fsycl --offload-arch= %s -### 2>&1 \ -// RUN: | FileCheck -check-prefix=MISSING %s -// RUN: not %clang_cl --offload-new-driver -fsycl --offload-arch= %s -### 2>&1 \ -// RUN: | FileCheck -check-prefix=MISSING %s - -// MISSING: error: must pass in an explicit cpu or gpu architecture to '--offload-arch' +// TARGET-TRIPLE-DEFAULT: clang{{.*}} "-triple" "spir64-unknown-unknown" +// CLANG-OFFLOAD-PACKAGER-DEFAULT: clang-offload-packager{{.*}} "--image={{.*}}triple=spir64-unknown-unknown,arch=generic,kind=sycl{{.*}}" // Tests for handling a incorrect architecture. // @@ -74,6 +75,6 @@ // RUN: not %clang_cl --offload-new-driver -fsycl --offload-arch=badArch %s -### 2>&1 \ // RUN: | FileCheck -check-prefix=BAD-ARCH %s -// BAD-ARCH: error: SYCL target is invalid: 'badArch' +// BAD-ARCH: error: unsupported offload gpu architecture: badArch diff --git a/clang/test/Driver/sycl-offload-new-driver.c b/clang/test/Driver/sycl-offload-new-driver.c index 5e446369a4181..cf8783648034c 100644 --- a/clang/test/Driver/sycl-offload-new-driver.c +++ b/clang/test/Driver/sycl-offload-new-driver.c @@ -114,8 +114,9 @@ // RUN: --offload-new-driver %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK_PACKAGER_OPTS %s // CHK_PACKAGER_OPTS: clang-offload-packager{{.*}} "-o" -// CHK_PACKAGER_OPTS-SAME: {{.*}}triple=spir64_gen-unknown-unknown,arch=pvc,kind=sycl,compile-opts={{.*}}-spir64_gen-opt,link-opts=-spir64_gen-link-opt // CHK_PACKAGER_OPTS-SAME: {{.*}}triple=spir64-unknown-unknown,arch=generic,kind=sycl,compile-opts={{.*}}-spir64-opt,link-opts=-spir64-link-opt +// CHK_PACKAGER_OPTS-SAME: {{.*}}triple=spir64_gen-unknown-unknown,arch=pvc,kind=sycl,compile-opts={{.*}}-spir64_gen-opt,link-opts=-spir64_gen-link-opt + /// Check phases with multiple intel_gpu settings // RUN: %clangxx --target=x86_64-unknown-linux-gnu -fsycl \ diff --git a/clang/test/Driver/sycl-offload-old-model.c b/clang/test/Driver/sycl-offload-old-model.c index e5a7d67145a61..1ca44f2bb3b39 100644 --- a/clang/test/Driver/sycl-offload-old-model.c +++ b/clang/test/Driver/sycl-offload-old-model.c @@ -11,7 +11,7 @@ // RUN: | FileCheck -check-prefix=CHK-INVALID-TARGET %s // RUN: not %clang_cl -### -fsycl --no-offload-new-driver -fsycl-targets=aaa-bbb-ccc-ddd %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-INVALID-TARGET %s -// CHK-INVALID-TARGET: error: SYCL target is invalid: 'aaa-bbb-ccc-ddd' +// CHK-INVALID-TARGET: error: invalid or unsupported offload target: 'aaa-bbb-ccc-ddd' /// ########################################################################### @@ -20,7 +20,7 @@ // RUN: | FileCheck -check-prefix=CHK-INVALID-REAL-TARGET %s // RUN: not %clang_cl -### -fsycl --no-offload-new-driver -fsycl-targets=x86_64 %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-INVALID-REAL-TARGET %s -// CHK-INVALID-REAL-TARGET: error: SYCL target is invalid: 'x86_64' +// CHK-INVALID-REAL-TARGET: error: invalid or unsupported offload target: 'x86_64' /// ########################################################################### @@ -74,7 +74,7 @@ /// Check warning for duplicate offloading targets. // RUN: %clang -### -ccc-print-phases -fsycl --no-offload-new-driver -fsycl-targets=spir64-unknown-unknown,spir64-unknown-unknown %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-DUPLICATES %s -// CHK-DUPLICATES: warning: SYCL offloading target 'spir64-unknown-unknown' is similar to target 'spir64-unknown-unknown' already specified; will be ignored +// CHK-DUPLICATES: warning: offloading target 'spir64-unknown-unknown' is similar to target 'spir64-unknown-unknown' already specified; will be ignored // RUN: %clang -### -ccc-print-phases -fsycl --no-offload-new-driver -fsycl-targets=intel_gpu_pvc,intel_gpu_pvc %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-DUPLICATES-GPU %s diff --git a/clang/test/Driver/sycl-offload.c b/clang/test/Driver/sycl-offload.c index e845add455f9e..92a09c7d8c0d6 100644 --- a/clang/test/Driver/sycl-offload.c +++ b/clang/test/Driver/sycl-offload.c @@ -11,7 +11,7 @@ // RUN: | FileCheck -check-prefix=CHK-INVALID-TARGET %s // RUN: not %clang_cl -### -fsycl --offload-new-driver -fsycl-targets=aaa-bbb-ccc-ddd %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-INVALID-TARGET %s -// CHK-INVALID-TARGET: error: SYCL target is invalid: 'aaa-bbb-ccc-ddd' +// CHK-INVALID-TARGET: error: invalid or unsupported offload target: 'aaa-bbb-ccc-ddd' /// ########################################################################### @@ -20,7 +20,7 @@ // RUN: | FileCheck -check-prefix=CHK-INVALID-REAL-TARGET %s // RUN: not %clang_cl -### -fsycl --offload-new-driver -fsycl-targets=x86_64 %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-INVALID-REAL-TARGET %s -// CHK-INVALID-REAL-TARGET: error: SYCL target is invalid: 'x86_64' +// CHK-INVALID-REAL-TARGET: error: invalid or unsupported offload target: 'x86_64' /// ########################################################################### @@ -81,7 +81,7 @@ /// Check warning for duplicate offloading targets. // RUN: %clang -### -ccc-print-phases -fsycl --offload-new-driver -fsycl-targets=spir64-unknown-unknown,spir64-unknown-unknown %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-DUPLICATES %s -// CHK-DUPLICATES: warning: SYCL offloading target 'spir64-unknown-unknown' is similar to target 'spir64-unknown-unknown' already specified; will be ignored +// CHK-DUPLICATES: warning: offloading target 'spir64-unknown-unknown' is similar to target 'spir64-unknown-unknown' already specified; will be ignored // RUN: %clang -### -ccc-print-phases -fsycl --offload-new-driver -fsycl-targets=intel_gpu_pvc,intel_gpu_pvc %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-DUPLICATES-GPU %s diff --git a/clang/test/Driver/sycl-oneapi-gpu-amdgpu.cpp b/clang/test/Driver/sycl-oneapi-gpu-amdgpu.cpp index b8beeeeda65db..7bfff859d9222 100644 --- a/clang/test/Driver/sycl-oneapi-gpu-amdgpu.cpp +++ b/clang/test/Driver/sycl-oneapi-gpu-amdgpu.cpp @@ -142,14 +142,14 @@ // RUN: FileCheck %s --check-prefix=BAD_TARGET_TRIPLE_ENV // RUN: not %clang_cl -c -fsycl -fsycl-targets=amdgcn-amd-amdhsa-sycl -### %s 2>&1 | \ // RUN: FileCheck %s --check-prefix=BAD_TARGET_TRIPLE_ENV -// BAD_TARGET_TRIPLE_ENV: error: SYCL target is invalid: 'amdgcn-amd-amdhsa-sycl' +// BAD_TARGET_TRIPLE_ENV: error: invalid or unsupported offload target: 'amdgcn-amd-amdhsa-sycl' // Check for invalid SYCL triple for AMD GPUs. // RUN: not %clangxx -c -fsycl -nogpulib -fsycl-targets=amdgcn -### %s 2>&1 | \ // RUN: FileCheck %s --check-prefix=BAD_TARGET_TRIPLE // RUN: not %clang_cl -c -fsycl -fsycl-targets=amdgcn-amd -### %s 2>&1 | \ // RUN: FileCheck %s --check-prefix=BAD_TARGET_TRIPLE -// BAD_TARGET_TRIPLE: error: SYCL target is invalid: 'amdgcn{{.*}}' +// BAD_TARGET_TRIPLE: error: invalid or unsupported offload target: 'amdgcn{{.*}}' /// Test for proper creation of fat object // RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amd_gpu_gfx700 \ diff --git a/clang/test/Driver/sycl-oneapi-gpu-nvidia.cpp b/clang/test/Driver/sycl-oneapi-gpu-nvidia.cpp index 26b0cf8812ffa..af3c57b1f958f 100644 --- a/clang/test/Driver/sycl-oneapi-gpu-nvidia.cpp +++ b/clang/test/Driver/sycl-oneapi-gpu-nvidia.cpp @@ -53,14 +53,14 @@ // RUN: FileCheck %s --check-prefix=BAD_TARGET_TRIPLE_ENV // RUN: not %clang_cl -c -fsycl -fsycl-targets=nvptx64-nvidia-cuda-sycl -### %s 2>&1 | \ // RUN: FileCheck %s --check-prefix=BAD_TARGET_TRIPLE_ENV -// BAD_TARGET_TRIPLE_ENV: error: SYCL target is invalid: 'nvptx64-nvidia-cuda-sycl' +// BAD_TARGET_TRIPLE_ENV: error: invalid or unsupported offload target: 'nvptx64-nvidia-cuda-sycl' // Check for invalid SYCL triple for NVidia GPUs. // RUN: not %clangxx -c -fsycl -fsycl-targets=nvptx-nvidia-cuda -### %s 2>&1 | \ // RUN: FileCheck %s --check-prefix=BAD_TARGET_TRIPLE // RUN: not %clang_cl -c -fsycl -fsycl-targets=nvptx-nvidia-cuda -### %s 2>&1 | \ // RUN: FileCheck %s --check-prefix=BAD_TARGET_TRIPLE -// BAD_TARGET_TRIPLE: error: SYCL target is invalid: 'nvptx-nvidia-cuda' +// BAD_TARGET_TRIPLE: error: invalid or unsupported offload target: 'nvptx-nvidia-cuda' /// Test for proper creation of fat object // RUN: %clangxx -c -fsycl -nocudalib -fsycl-targets=nvidia_gpu_sm_50 \ diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp index fd4e55c2d5f14..b4237b1117c84 100644 --- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp +++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp @@ -650,7 +650,7 @@ Error jit_compiler::linkDeviceLibraries(llvm::Module &Module, C->getSingleOffloadToolChain(); InputArgList EmptyArgList; auto Archs = - D.getOffloadArchs(*C, EmptyArgList, Action::OFK_SYCL, OffloadTC); + D.getOffloadArchs(*C, EmptyArgList, Action::OFK_SYCL, *OffloadTC); assert(Archs.size() == 1 && "Offload toolchain should be configured to single architecture"); StringRef CPU = *Archs.begin();