Skip to content

Commit 6cefad7

Browse files
committed
[Offload] Move HIP and CUDA to new driver by default
Summary: This patch updates the `--offload-new-driver` flag to be default for all current offloading languages. This mostly just required updating a lot of tests to use the old format. I tried to update them where possible, but some were directly checking the old format. This is not intended to be landed immediately, but to allow for greater testing. One potential issue I've discovered is the lack of SPIR-V support or handling for `--offload`.
1 parent 4f791a3 commit 6cefad7

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+268
-280
lines changed

clang/lib/Driver/Driver.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4115,9 +4115,9 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
41154115
handleArguments(C, Args, Inputs, Actions);
41164116

41174117
bool UseNewOffloadingDriver =
4118-
C.isOffloadingHostKind(Action::OFK_OpenMP) ||
4118+
C.getActiveOffloadKinds() != Action::OFK_None &&
41194119
Args.hasFlag(options::OPT_offload_new_driver,
4120-
options::OPT_no_offload_new_driver, false);
4120+
options::OPT_no_offload_new_driver, true);
41214121

41224122
// Builder to be used to build offloading actions.
41234123
std::unique_ptr<OffloadingActionBuilder> OffloadBuilder =
@@ -4802,7 +4802,7 @@ Action *Driver::ConstructPhaseAction(
48024802
offloadDeviceOnly() ||
48034803
(TargetDeviceOffloadKind == Action::OFK_HIP &&
48044804
!Args.hasFlag(options::OPT_offload_new_driver,
4805-
options::OPT_no_offload_new_driver, false)))
4805+
options::OPT_no_offload_new_driver, true)))
48064806
? types::TY_LLVM_IR
48074807
: types::TY_LLVM_BC;
48084808
return C.MakeAction<BackendJobAction>(Input, Output);

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4687,8 +4687,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
46874687
bool IsHostOffloadingAction =
46884688
JA.isHostOffloading(Action::OFK_OpenMP) ||
46894689
(JA.isHostOffloading(C.getActiveOffloadKinds()) &&
4690+
C.getActiveOffloadKinds() != Action::OFK_None &&
46904691
Args.hasFlag(options::OPT_offload_new_driver,
4691-
options::OPT_no_offload_new_driver, false));
4692+
options::OPT_no_offload_new_driver, true));
46924693

46934694
bool IsRDCMode =
46944695
Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false);
@@ -4997,7 +4998,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
49974998
if (IsUsingLTO) {
49984999
if (IsDeviceOffloadAction && !JA.isDeviceOffloading(Action::OFK_OpenMP) &&
49995000
!Args.hasFlag(options::OPT_offload_new_driver,
5000-
options::OPT_no_offload_new_driver, false) &&
5001+
options::OPT_no_offload_new_driver, true) &&
50015002
!Triple.isAMDGPU()) {
50025003
D.Diag(diag::err_drv_unsupported_opt_for_target)
50035004
<< Args.getLastArg(options::OPT_foffload_lto,
@@ -6521,8 +6522,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
65216522
}
65226523

65236524
// Forward the new driver to change offloading code generation.
6524-
if (Args.hasFlag(options::OPT_offload_new_driver,
6525-
options::OPT_no_offload_new_driver, false))
6525+
if (C.getActiveOffloadKinds() != Action::OFK_None &&
6526+
Args.hasFlag(options::OPT_offload_new_driver,
6527+
options::OPT_no_offload_new_driver, true))
65266528
CmdArgs.push_back("--offload-new-driver");
65276529

65286530
SanitizeArgs.addArgs(TC, Args, CmdArgs, InputType);

clang/test/Driver/cl-offload.cu

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,10 @@
1818
// CUDA-SAME: "-Weverything"
1919
// CUDA: link
2020

21-
// HIP: "-cc1" "-triple" "x86_64-pc-windows-msvc{{.*}}" "-aux-triple" "amdgcn-amd-amdhsa"
22-
// HIP-SAME: "-Weverything"
2321
// HIP: "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-pc-windows-msvc"
2422
// HIP-SAME: "-Weverything"
25-
// HIP: {{lld.* "-flavor" "gnu" "-m" "elf64_amdgpu"}}
23+
// HIP: "-cc1" "-triple" "x86_64-pc-windows-msvc{{.*}}" "-aux-triple" "amdgcn-amd-amdhsa"
24+
// HIP-SAME: "-Weverything"
2625
// HIP: {{link.* "amdhip64.lib"}}
2726

2827
// CMake uses this option when finding packages for HIP, so

clang/test/Driver/cuda-arch-translation.cu

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -69,19 +69,19 @@
6969

7070
// HIP: clang-offload-bundler
7171

72-
// SM20:--image=profile=sm_20{{.*}}--image=profile=compute_20
73-
// SM21:--image=profile=sm_21{{.*}}--image=profile=compute_20
74-
// SM30:--image=profile=sm_30{{.*}}--image=profile=compute_30
75-
// SM32:--image=profile=sm_32{{.*}}--image=profile=compute_32
76-
// SM35:--image=profile=sm_35{{.*}}--image=profile=compute_35
77-
// SM37:--image=profile=sm_37{{.*}}--image=profile=compute_37
78-
// SM50:--image=profile=sm_50{{.*}}--image=profile=compute_50
79-
// SM52:--image=profile=sm_52{{.*}}--image=profile=compute_52
80-
// SM53:--image=profile=sm_53{{.*}}--image=profile=compute_53
81-
// SM60:--image=profile=sm_60{{.*}}--image=profile=compute_60
82-
// SM61:--image=profile=sm_61{{.*}}--image=profile=compute_61
83-
// SM62:--image=profile=sm_62{{.*}}--image=profile=compute_62
84-
// SM70:--image=profile=sm_70{{.*}}--image=profile=compute_70
72+
// SM20:--image=profile=sm_20{{.*}}
73+
// SM21:--image=profile=sm_21{{.*}}
74+
// SM30:--image=profile=sm_30{{.*}}
75+
// SM32:--image=profile=sm_32{{.*}}
76+
// SM35:--image=profile=sm_35{{.*}}
77+
// SM37:--image=profile=sm_37{{.*}}
78+
// SM50:--image=profile=sm_50{{.*}}
79+
// SM52:--image=profile=sm_52{{.*}}
80+
// SM53:--image=profile=sm_53{{.*}}
81+
// SM60:--image=profile=sm_60{{.*}}
82+
// SM61:--image=profile=sm_61{{.*}}
83+
// SM62:--image=profile=sm_62{{.*}}
84+
// SM70:--image=profile=sm_70{{.*}}
8585
// GFX600:-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx600
8686
// GFX601:-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx601
8787
// GFX602:-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx602

clang/test/Driver/cuda-bad-arch.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,9 @@
3030
// RUN: | FileCheck -check-prefix OK %s
3131

3232
// We don't allow using NVPTX/AMDGCN for host compilation.
33-
// RUN: not %clang -### --cuda-host-only --target=nvptx-nvidia-cuda -nogpulib -nogpuinc -c %s 2>&1 \
33+
// RUN: not %clang -### --no-offload-new-driver --cuda-host-only --target=nvptx-nvidia-cuda -nogpulib -nogpuinc -c %s 2>&1 \
3434
// RUN: | FileCheck -check-prefix HOST_NVPTX %s
35-
// RUN: not %clang -### --cuda-host-only --target=amdgcn-amd-amdhsa -nogpulib -nogpuinc -c %s 2>&1 \
35+
// RUN: not %clang -### --no-offload-new-driver --cuda-host-only --target=amdgcn-amd-amdhsa -nogpulib -nogpuinc -c %s 2>&1 \
3636
// RUN: | FileCheck -check-prefix HOST_AMDGCN %s
3737

3838
// OK-NOT: error: Unsupported CUDA gpu architecture

clang/test/Driver/cuda-bindings.cu

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,14 @@
2626
// BIN-NOT: cuda-bindings-device-cuda-nvptx64
2727
// BIN: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output:
2828
// BIN-NOT: cuda-bindings-device-cuda-nvptx64
29-
// BIN: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "a.out"
29+
// BIN: # "powerpc64le-ibm-linux-gnu" - "Offload::Linker", inputs:{{.*}}, output: "a.out"
3030

3131
//
3232
// Test single gpu architecture up to the assemble phase.
3333
//
3434
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings --cuda-gpu-arch=sm_30 %s -S 2>&1 \
3535
// RUN: | FileCheck -check-prefix=ASM %s
36-
// ASM-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s"
36+
// ASM-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "[[BINDINGS:.+.s]]"
3737
// ASM-DAG: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: "cuda-bindings.s"
3838

3939
//
@@ -64,8 +64,8 @@
6464
// BIN2-NOT: cuda-bindings-device-cuda-nvptx64
6565
// BIN2: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output:
6666
// BIN2-NOT: cuda-bindings-device-cuda-nvptx64
67-
// AOUT: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "a.out"
68-
// TOUT: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "{{.*}}/out"
67+
// AOUT: # "powerpc64le-ibm-linux-gnu" - "Offload::Linker", inputs:{{.*}}, output: "a.out"
68+
// TOUT: # "powerpc64le-ibm-linux-gnu" - "Offload::Linker", inputs:{{.*}}, output: "{{.*}}/out"
6969

7070
// .. same, but with -fsyntax-only
7171
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
@@ -81,9 +81,9 @@
8181
// RUN: --offload-arch=sm_30,sm_35 %s -o %t/out 2>&1 \
8282
// RUN: | FileCheck -check-prefix=SYN %s
8383
// SYN-NOT: inputs:
84-
// SYN: # "powerpc64le-ibm-linux-gnu" - "clang", inputs: [{{.*}}], output: (nothing)
85-
// SYN-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
84+
// SYN: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
8685
// SYN-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
86+
// SYN-NEXT: # "powerpc64le-ibm-linux-gnu" - "clang", inputs: [{{.*}}], output: (nothing)
8787
// SYN-NOT: inputs
8888

8989
// .. and with --offload-new-driver
@@ -100,7 +100,7 @@
100100
// RUN: --offload-arch=sm_30,sm_35 %s --offload-new-driver -o %t/out 2>&1 \
101101
// RUN: | FileCheck -check-prefix=NDSYN %s
102102
// NDSYN-NOT: inputs:
103-
// NDSYN: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
103+
// NDSYN: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
104104
// NDSYN-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
105105
// NDSYN-NEXT: # "powerpc64le-ibm-linux-gnu" - "clang", inputs: [{{.*}}], output: (nothing)
106106
// NDSYN-NOT: inputs:
@@ -112,8 +112,8 @@
112112
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
113113
// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s -S 2>&1 \
114114
// RUN: | FileCheck -check-prefix=ASM2 %s
115-
// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s"
116-
// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_35.s"
115+
// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "[[SM30:.+.s]]"
116+
// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "[[SM35:.+.s]]"
117117
// ASM2-DAG: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: "cuda-bindings.s"
118118

119119
//
@@ -128,7 +128,7 @@
128128
// RUN: | FileCheck -check-prefix=HBIN %s
129129
// HBIN: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output:
130130
// HBIN-NOT: cuda-bindings-device-cuda-nvptx64
131-
// HBIN: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "a.out"
131+
// HBIN: # "powerpc64le-ibm-linux-gnu" - "Offload::Linker", inputs:{{.*}}, output: "a.out"
132132

133133
//
134134
// Test one or more gpu architecture up to the assemble phase in host-only
@@ -166,7 +166,7 @@
166166
// Test two gpu architectures with complete compilation in device-only
167167
// compilation mode.
168168
//
169-
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
169+
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings --no-offload-new-driver \
170170
// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only 2>&1 \
171171
// RUN: | FileCheck -check-prefix=DBIN2 %s
172172
// DBIN2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output:
@@ -180,7 +180,7 @@
180180
// Test two gpu architectures up to the assemble phase in device-only
181181
// compilation mode.
182182
//
183-
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
183+
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings --no-offload-new-driver \
184184
// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only -S 2>&1 \
185185
// RUN: | FileCheck -check-prefix=DASM2 %s
186186
// DASM2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s"

clang/test/Driver/cuda-external-tools.cu

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT3 %s
2626
// Generating relocatable device code
2727
// RUN: %clang -### --target=x86_64-linux-gnu -fgpu-rdc -c %s 2>&1 \
28-
// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
28+
// RUN: --no-offload-new-driver --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
2929
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s
3030

3131
// With debugging enabled, ptxas should be run with with no ptxas optimizations.
@@ -59,7 +59,7 @@
5959
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35 %s
6060
// Separate compilation targeting sm_35.
6161
// RUN: %clang -### --target=x86_64-linux-gnu --cuda-gpu-arch=sm_35 -fgpu-rdc -c %s 2>&1 \
62-
// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
62+
// RUN: --no-offload-new-driver --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
6363
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s
6464

6565
// 32-bit compile.
@@ -68,7 +68,7 @@
6868
// RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM35 %s
6969
// 32-bit compile when generating relocatable device code.
7070
// RUN: %clang -### --target=i386-linux-gnu -fgpu-rdc -c %s 2>&1 \
71-
// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
71+
// RUN: --no-offload-new-driver --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
7272
// RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM35,RDC %s
7373

7474
// Compile with -fintegrated-as. This should still cause us to invoke ptxas.
@@ -77,7 +77,7 @@
7777
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT0 %s
7878
// Check that we still pass -c when generating relocatable device code.
7979
// RUN: %clang -### --target=x86_64-linux-gnu -fintegrated-as -fgpu-rdc -c %s 2>&1 \
80-
// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
80+
// RUN: --no-offload-new-driver --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
8181
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s
8282

8383
// Check -Xcuda-ptxas and -Xcuda-fatbinary
@@ -99,13 +99,13 @@
9999

100100
// Check relocatable device code generation on MacOS.
101101
// RUN: %clang -### --target=x86_64-apple-macosx -O0 -fgpu-rdc -c %s 2>&1 \
102-
// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
102+
// RUN: --no-offload-new-driver --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
103103
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s
104104
// RUN: %clang -### --target=x86_64-apple-macosx --cuda-gpu-arch=sm_35 -fgpu-rdc -c %s 2>&1 \
105-
// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
105+
// RUN: --no-offload-new-driver --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
106106
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s
107107
// RUN: %clang -### --target=i386-apple-macosx -fgpu-rdc -c %s 2>&1 \
108-
// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
108+
// RUN: --no-offload-new-driver --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
109109
// RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM35,RDC %s
110110

111111
// Check that CLANG forwards the -v flag to PTXAS.

clang/test/Driver/cuda-options.cu

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,13 @@
44

55
// Simple compilation case. Compile device-side to PTX assembly and make sure
66
// we use it on the host side.
7-
// RUN: %clang -### -target x86_64-linux-gnu -c -nogpulib -nogpuinc %s 2>&1 \
7+
// RUN: %clang -### --cuda-include-ptx=all -target x86_64-linux-gnu -c -nogpulib -nogpuinc %s 2>&1 \
88
// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
99
// RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \
1010
// RUN: -check-prefix NOLINK %s
1111

1212
// Typical compilation + link case.
13-
// RUN: %clang -### -target x86_64-linux-gnu -nogpulib -nogpuinc %s 2>&1 \
13+
// RUN: %clang -### --cuda-include-ptx=all -target x86_64-linux-gnu -nogpulib -nogpuinc %s 2>&1 \
1414
// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
1515
// RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \
1616
// RUN: -check-prefix LINK %s
@@ -35,7 +35,7 @@
3535
// RUN: -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s
3636

3737
// RUN: %clang -### --target=x86_64-linux-gnu --cuda-compile-host-device \
38-
// RUN: --cuda-host-only -nogpulib -nogpuinc %s 2>&1 \
38+
// RUN: --cuda-host-only --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
3939
// RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \
4040
// RUN: -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s
4141

@@ -50,27 +50,27 @@
5050
// RUN: -check-prefix NOHOST -check-prefix NOLINK %s
5151

5252
// RUN: %clang -### --target=x86_64-linux-gnu --cuda-host-only \
53-
// RUN: -nogpulib -nogpuinc --cuda-compile-host-device %s 2>&1 \
53+
// RUN: --cuda-include-ptx=all -nogpulib -nogpuinc --cuda-compile-host-device %s 2>&1 \
5454
// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
5555
// RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \
5656
// RUN: -check-prefix LINK %s
5757

5858
// RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \
59-
// RUN: -nogpulib -nogpuinc --cuda-compile-host-device %s 2>&1 \
59+
// RUN: --cuda-include-ptx=all -nogpulib -nogpuinc --cuda-compile-host-device %s 2>&1 \
6060
// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
6161
// RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \
6262
// RUN: -check-prefix LINK %s
6363

6464
// Verify that --cuda-gpu-arch option passes the correct GPU architecture to
6565
// device compilation.
66-
// RUN: %clang -### -nogpulib -nogpuinc --target=x86_64-linux-gnu --cuda-gpu-arch=sm_52 -c %s 2>&1 \
66+
// RUN: %clang -### --cuda-include-ptx=all -nogpulib -nogpuinc --target=x86_64-linux-gnu --cuda-gpu-arch=sm_52 -c %s 2>&1 \
6767
// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
6868
// RUN: -check-prefix DEVICE-SM52 -check-prefix HOST \
6969
// RUN: -check-prefix INCLUDES-DEVICE -check-prefix NOLINK %s
7070

7171
// Verify that there is one device-side compilation per --cuda-gpu-arch args
7272
// and that all results are included on the host side.
73-
// RUN: %clang -### --target=x86_64-linux-gnu \
73+
// RUN: %clang -### --target=x86_64-linux-gnu --cuda-include-ptx=all \
7474
// RUN: -nogpulib -nogpuinc --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 -c %s 2>&1 \
7575
// RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,DEVICE2 \
7676
// RUN: -check-prefixes DEVICE-SM52,DEVICE2-SM60 \
@@ -130,9 +130,9 @@
130130
// f) --no-cuda-gpu-arch=all negates all preceding --cuda-gpu-arch=X
131131
// RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
132132
// RUN: -nogpulib -nogpuinc --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \
133-
// RUN: --no-cuda-gpu-arch=all \
133+
// RUN: --no-cuda-version-check --no-cuda-gpu-arch=all \
134134
// RUN: --cuda-gpu-arch=sm_70 \
135-
// RUN: -c -nogpulib -nogpuinc %s 2>&1 \
135+
// RUN: -c --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
136136
// RUN: | FileCheck -check-prefixes NOARCH-SM52,NOARCH-SM60,ARCH-SM70 %s
137137

138138
// g) There's no --cuda-gpu-arch=all
@@ -143,7 +143,7 @@
143143

144144

145145
// Verify that --[no-]cuda-include-ptx arguments are handled correctly.
146-
// a) by default we're including PTX for all GPUs.
146+
// a) by default we're not including PTX for all GPUs.
147147
// RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc \
148148
// RUN: --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \
149149
// RUN: -c %s 2>&1 \
@@ -185,7 +185,8 @@
185185
// Verify -flto=thin -fwhole-program-vtables handling. This should result in
186186
// both options being passed to the host compilation, with neither passed to
187187
// the device compilation.
188-
// RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc -c -flto=thin -fwhole-program-vtables %s 2>&1 \
188+
// RUN: %clang -### --target=x86_64-linux-gnu --cuda-include-ptx=all \
189+
// RUN: -nogpulib -nogpuinc -c -flto=thin -fwhole-program-vtables %s 2>&1 \
189190
// RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,HOST,INCLUDES-DEVICE,NOLINK,THINLTOWPD %s
190191
// THINLTOWPD-NOT: error: invalid argument '-fwhole-program-vtables' only allowed with '-flto'
191192

clang/test/Driver/cuda-output-asm.cu

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,9 @@
2020
// SM30-DAG: "-cc1" "-triple" "nvptx64-nvidia-cuda"
2121
// SM30-same: "-target-cpu" "sm_30"
2222

23-
// RUN: not %clang -### -S --target=x86_64-linux-gnu -o foo.s %s 2>&1 \
24-
// RUN: | FileCheck -check-prefix MULTIPLE-OUTPUT-FILES %s
2523
// RUN: not %clang -### -S --target=x86_64-linux-gnu --cuda-device-only \
2624
// RUN: --cuda-gpu-arch=sm_20 --cuda-gpu-arch=sm_30 -o foo.s %s 2>&1 \
2725
// RUN: | FileCheck -check-prefix MULTIPLE-OUTPUT-FILES %s
28-
// RUN: not %clang -### -emit-llvm -c --target=x86_64-linux-gnu -o foo.s %s 2>&1 \
29-
// RUN: | FileCheck -check-prefix MULTIPLE-OUTPUT-FILES %s
3026
// MULTIPLE-OUTPUT-FILES: error: cannot specify -o when generating multiple output files
3127
// Make sure we do not get duplicate diagnostics.
3228
// MULTIPLE-OUTPUT-FILES-NOT: error: cannot specify -o when generating multiple output files

0 commit comments

Comments
 (0)