diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 0ab923fcdd583..55d10ed8e974a 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -932,7 +932,9 @@ def W_Joined : Joined<["-"], "W">, Group, def Xanalyzer : Separate<["-"], "Xanalyzer">, HelpText<"Pass to the static analyzer">, MetaVarName<"">, Group; -def Xarch__ : JoinedAndSeparate<["-"], "Xarch_">, Flags<[NoXarchOption]>; +def Xarch__ : JoinedAndSeparate<["-"], "Xarch_">, Flags<[NoXarchOption]>, + HelpText<"Pass to the compiliation if the target matches ">, + MetaVarName<" ">; def Xarch_host : Separate<["-"], "Xarch_host">, Flags<[NoXarchOption]>, HelpText<"Pass to the CUDA/HIP host compilation">, MetaVarName<"">; def Xarch_device : Separate<["-"], "Xarch_device">, Flags<[NoXarchOption]>, @@ -1115,8 +1117,8 @@ def fno_convergent_functions : Flag<["-"], "fno-convergent-functions">, // Common offloading options let Group = offload_Group in { -def offload_arch_EQ : Joined<["--"], "offload-arch=">, Flags<[NoXarchOption]>, - Visibility<[ClangOption, FlangOption]>, +def offload_arch_EQ : Joined<["--"], "offload-arch=">, + Visibility<[ClangOption, FlangOption]>, Flags<[NoXarchOption]>, HelpText<"Specify an offloading device architecture for CUDA, HIP, or OpenMP. (e.g. sm_35). " "If 'native' is used the compiler will detect locally installed architectures. " "For HIP offloading, the device architecture can be followed by target ID features " diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 912777a9808b4..5a4737fb381e6 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -3409,7 +3409,9 @@ class OffloadingActionBuilder final { // Collect all offload arch parameters, removing duplicates. std::set GpuArchs; bool Error = false; - for (Arg *A : Args) { + const ToolChain &TC = *ToolChains.front(); + for (Arg *A : C.getArgsForToolChain(&TC, /*BoundArch=*/"", + AssociatedOffloadKind)) { if (!(A->getOption().matches(options::OPT_offload_arch_EQ) || A->getOption().matches(options::OPT_no_offload_arch_EQ))) continue; @@ -3420,7 +3422,6 @@ class OffloadingActionBuilder final { ArchStr == "all") { GpuArchs.clear(); } else if (ArchStr == "native") { - const ToolChain &TC = *ToolChains.front(); auto GPUsOrErr = ToolChains.front()->getSystemGPUArchs(Args); if (!GPUsOrErr) { TC.getDriver().Diag(diag::err_drv_undetermined_gpu_arch) diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index ebc982096595e..c25d1b6be14b5 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -1648,7 +1648,8 @@ void ToolChain::TranslateXarchArgs( A->getOption().matches(options::OPT_Xarch_host)) ValuePos = 0; - unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(ValuePos)); + const InputArgList &BaseArgs = Args.getBaseArgs(); + unsigned Index = BaseArgs.MakeIndex(A->getValue(ValuePos)); unsigned Prev = Index; std::unique_ptr XarchArg(Opts.ParseOneArg(Args, Index)); @@ -1672,8 +1673,31 @@ void ToolChain::TranslateXarchArgs( Diags.Report(DiagID) << A->getAsString(Args); return; } + XarchArg->setBaseArg(A); A = XarchArg.release(); + + // Linker input arguments require custom handling. The problem is that we + // have already constructed the phase actions, so we can not treat them as + // "input arguments". + if (A->getOption().hasFlag(options::LinkerInput)) { + // Convert the argument into individual Zlinker_input_args. Need to do this + // manually to avoid memory leaks with the allocated arguments. + for (const char *Value : A->getValues()) { + auto Opt = Opts.getOption(options::OPT_Zlinker_input); + unsigned Index = BaseArgs.MakeIndex(Opt.getName(), Value); + auto NewArg = + new Arg(Opt, BaseArgs.MakeArgString(Opt.getPrefix() + Opt.getName()), + Index, BaseArgs.getArgString(Index + 1), A); + + DAL->append(NewArg); + if (!AllocatedArgs) + DAL->AddSynthesizedArg(NewArg); + else + AllocatedArgs->push_back(NewArg); + } + } + if (!AllocatedArgs) DAL->AddSynthesizedArg(A); else @@ -1697,19 +1721,17 @@ llvm::opt::DerivedArgList *ToolChain::TranslateXarchArgs( } else if (A->getOption().matches(options::OPT_Xarch_host)) { NeedTrans = !IsDevice; Skip = IsDevice; - } else if (A->getOption().matches(options::OPT_Xarch__) && IsDevice) { - // Do not translate -Xarch_ options for non CUDA/HIP toolchain since - // they may need special translation. - // Skip this argument unless the architecture matches BoundArch - if (BoundArch.empty() || A->getValue(0) != BoundArch) - Skip = true; - else - NeedTrans = true; + } else if (A->getOption().matches(options::OPT_Xarch__)) { + NeedTrans = A->getValue() == getArchName() || + (!BoundArch.empty() && A->getValue() == BoundArch); + Skip = !NeedTrans; } if (NeedTrans || Skip) Modified = true; - if (NeedTrans) + if (NeedTrans) { + A->claim(); TranslateXarchArgs(Args, A, DAL, AllocatedArgs); + } if (!Skip) DAL->append(A); } diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp index 9a276c55bf7bc..b26c5bf1a909e 100644 --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -2777,30 +2777,6 @@ DerivedArgList *MachO::TranslateArgs(const DerivedArgList &Args, // and try to push it down into tool specific logic. for (Arg *A : Args) { - if (A->getOption().matches(options::OPT_Xarch__)) { - // Skip this argument unless the architecture matches either the toolchain - // triple arch, or the arch being bound. - StringRef XarchArch = A->getValue(0); - if (!(XarchArch == getArchName() || - (!BoundArch.empty() && XarchArch == BoundArch))) - continue; - - Arg *OriginalArg = A; - TranslateXarchArgs(Args, A, DAL); - - // Linker input arguments require custom handling. The problem is that we - // have already constructed the phase actions, so we can not treat them as - // "input arguments". - if (A->getOption().hasFlag(options::LinkerInput)) { - // Convert the argument into individual Zlinker_input_args. - for (const char *Value : A->getValues()) { - DAL->AddSeparateArg( - OriginalArg, Opts.getOption(options::OPT_Zlinker_input), Value); - } - continue; - } - } - // Sob. These is strictly gcc compatible for the time being. Apple // gcc translates options twice, which means that self-expanding // options add duplicates. diff --git a/clang/test/Driver/Xarch.c b/clang/test/Driver/Xarch.c index f7693fb689d58..f35e2926f9c8d 100644 --- a/clang/test/Driver/Xarch.c +++ b/clang/test/Driver/Xarch.c @@ -1,8 +1,13 @@ // RUN: %clang -target i386-apple-darwin11 -m32 -Xarch_i386 -O3 %s -S -### 2>&1 | FileCheck -check-prefix=O3ONCE %s +// RUN: %clang -target x86_64-unknown-linux-gnu -Xarch_x86_64 -O3 %s -S -### 2>&1 | FileCheck -check-prefix=O3ONCE %s +// RUN: %clang -target x86_64-unknown-windows-msvc -Xarch_x86_64 -O3 %s -S -### 2>&1 | FileCheck -check-prefix=O3ONCE %s +// RUN: %clang -target aarch64-unknown-linux-gnu -Xarch_aarch64 -O3 %s -S -### 2>&1 | FileCheck -check-prefix=O3ONCE %s +// RUN: %clang -target powerpc64le-unknown-linux-gnu -Xarch_powerpc64le -O3 %s -S -### 2>&1 | FileCheck -check-prefix=O3ONCE %s // O3ONCE: "-O3" // O3ONCE-NOT: "-O3" // RUN: %clang -target i386-apple-darwin11 -m64 -Xarch_i386 -O3 %s -S -### 2>&1 | FileCheck -check-prefix=O3NONE %s +// RUN: %clang -target x86_64-unknown-linux-gnu -m64 -Xarch_i386 -O3 %s -S -### 2>&1 | FileCheck -check-prefix=O3NONE %s // O3NONE-NOT: "-O3" // O3NONE: argument unused during compilation: '-Xarch_i386 -O3' @@ -10,3 +15,6 @@ // INVALID: error: invalid Xarch argument: '-Xarch_i386 -o' // INVALID: error: invalid Xarch argument: '-Xarch_i386 -S' // INVALID: error: invalid Xarch argument: '-Xarch_i386 -o' + +// RUN: %clang -target x86_64-unknown-linux-gnu -Xarch_x86_64 -Wl,foo %s -### 2>&1 | FileCheck -check-prefix=LINKER %s +// LINKER: "foo" diff --git a/clang/test/Driver/offload-Xarch.c b/clang/test/Driver/offload-Xarch.c new file mode 100644 index 0000000000000..978e6763fbdbc --- /dev/null +++ b/clang/test/Driver/offload-Xarch.c @@ -0,0 +1,34 @@ +// RUN: %clang -x cuda %s -Xarch_nvptx64 -O3 -S -nogpulib -nogpuinc -### 2>&1 | FileCheck -check-prefix=O3ONCE %s +// RUN: %clang -x cuda %s -Xarch_device -O3 -S -nogpulib -nogpuinc -### 2>&1 | FileCheck -check-prefix=O3ONCE %s +// RUN: %clang -x hip %s -Xarch_amdgcn -O3 -S -nogpulib -nogpuinc -### 2>&1 | FileCheck -check-prefix=O3ONCE %s +// RUN: %clang -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib -nogpuinc \ +// RUN: -Xarch_amdgcn -march=gfx90a -Xarch_amdgcn -O3 -S -### %s 2>&1 \ +// RUN: | FileCheck -check-prefix=O3ONCE %s +// RUN: %clang -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -nogpulib -nogpuinc \ +// RUN: -Xarch_nvptx64 -march=sm_52 -Xarch_nvptx64 -O3 -S -### %s 2>&1 \ +// RUN: | FileCheck -check-prefix=O3ONCE %s +// O3ONCE: "-O3" +// O3ONCE-NOT: "-O3" + +// RUN: %clang -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda,amdgcn-amd-amdhsa -nogpulib \ +// RUN: --target=x86_64-unknown-linux-gnu -Xopenmp-target=nvptx64-nvidia-cuda --offload-arch=sm_52,sm_60 -nogpuinc \ +// RUN: -Xopenmp-target=amdgcn-amd-amdhsa --offload-arch=gfx90a,gfx1030 -ccc-print-bindings -### %s 2>&1 \ +// RUN: | FileCheck -check-prefix=OPENMP %s +// +// OPENMP: # "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[HOST_BC:.+]]" +// OPENMP: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[GFX1030_BC:.+]]" +// OPENMP: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[GFX90A_BC:.+]]" +// OPENMP: # "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[SM52_PTX:.+]]" +// OPENMP: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[SM52_PTX]]"], output: "[[SM52_CUBIN:.+]]" +// OPENMP: # "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[SM60_PTX:.+]]" +// OPENMP: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[SM60_PTX]]"], output: "[[SM60_CUBIN:.+]]" +// OPENMP: # "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[GFX1030_BC]]", "[[GFX90A_BC]]", "[[SM52_CUBIN]]", "[[SM60_CUBIN]]"], output: "[[BINARY:.+]]" +// OPENMP: # "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[BINARY]]"], output: "[[HOST_OBJ:.+]]" +// OPENMP: # "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out" + +// RUN: %clang -x cuda %s --offload-arch=sm_52,sm_60 -Xarch_sm_52 -O3 -Xarch_sm_60 -O0 \ +// RUN: --target=x86_64-unknown-linux-gnu -Xarch_host -O3 -S -nogpulib -nogpuinc -### 2>&1 \ +// RUN: | FileCheck -check-prefix=CUDA %s +// CUDA: "-cc1" "-triple" "nvptx64-nvidia-cuda" {{.*}}"-target-cpu" "sm_52" {{.*}}"-O3" +// CUDA: "-cc1" "-triple" "nvptx64-nvidia-cuda" {{.*}}"-target-cpu" "sm_60" {{.*}}"-O0" +// CUDA: "-cc1" "-triple" "x86_64-unknown-linux-gnu" {{.*}}"-O3"