Skip to content

Commit 953beb9

Browse files
authored
[CUDA] Move CUDA to new driver by default (#122312)
Summary: This patch updates the --offload-new-driver flag to be default for CUDA. This mostly just required updating a lot of tests to use the old format. I tried to update them where possible, but some were directly checking the old format. https://discourse.llvm.org/t/rfc-use-the-new-offloding-driver-for-cuda-and-hip-compilation-by-default/77468/18
1 parent b43c97c commit 953beb9

File tree

8 files changed

+57
-69
lines changed

8 files changed

+57
-69
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1083,6 +1083,12 @@ CUDA Support
10831083
- Clang now supports CUDA SDK up to 12.6
10841084
- Added support for sm_100
10851085
- Added support for `__grid_constant__` attribute.
1086+
- CUDA now uses the new offloading driver by default. The new driver supports
1087+
device-side LTO, interoperability with OpenMP and other languages, and native ``-fgpu-rdc``
1088+
support with static libraries. The old behavior can be returned using the
1089+
``--no-offload-new-driver`` flag. The binary format is no longer compatible
1090+
with the NVIDIA compiler's RDC-mode support. More information can be found at:
1091+
https://clang.llvm.org/docs/OffloadingDesign.html
10861092

10871093
AIX Support
10881094
^^^^^^^^^^^

clang/lib/Driver/Driver.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4339,7 +4339,8 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
43394339
Args.hasFlag(options::OPT_foffload_via_llvm,
43404340
options::OPT_fno_offload_via_llvm, false) ||
43414341
Args.hasFlag(options::OPT_offload_new_driver,
4342-
options::OPT_no_offload_new_driver, false);
4342+
options::OPT_no_offload_new_driver,
4343+
C.isOffloadingHostKind(Action::OFK_Cuda));
43434344

43444345
// Builder to be used to build offloading actions.
43454346
std::unique_ptr<OffloadingActionBuilder> OffloadBuilder =
@@ -5089,7 +5090,8 @@ Action *Driver::ConstructPhaseAction(
50895090
offloadDeviceOnly() ||
50905091
(TargetDeviceOffloadKind == Action::OFK_HIP &&
50915092
!Args.hasFlag(options::OPT_offload_new_driver,
5092-
options::OPT_no_offload_new_driver, false)))
5093+
options::OPT_no_offload_new_driver,
5094+
C.isOffloadingHostKind(Action::OFK_Cuda))))
50935095
? types::TY_LLVM_IR
50945096
: types::TY_LLVM_BC;
50955097
return C.MakeAction<BackendJobAction>(Input, Output);

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5064,7 +5064,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
50645064
JA.isHostOffloading(Action::OFK_SYCL) ||
50655065
(JA.isHostOffloading(C.getActiveOffloadKinds()) &&
50665066
Args.hasFlag(options::OPT_offload_new_driver,
5067-
options::OPT_no_offload_new_driver, false));
5067+
options::OPT_no_offload_new_driver,
5068+
C.isOffloadingHostKind(Action::OFK_Cuda)));
50685069

50695070
bool IsRDCMode =
50705071
Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false);
@@ -5419,7 +5420,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
54195420
if (IsUsingLTO) {
54205421
if (IsDeviceOffloadAction && !JA.isDeviceOffloading(Action::OFK_OpenMP) &&
54215422
!Args.hasFlag(options::OPT_offload_new_driver,
5422-
options::OPT_no_offload_new_driver, false) &&
5423+
options::OPT_no_offload_new_driver,
5424+
C.isOffloadingHostKind(Action::OFK_Cuda)) &&
54235425
!Triple.isAMDGPU()) {
54245426
D.Diag(diag::err_drv_unsupported_opt_for_target)
54255427
<< Args.getLastArg(options::OPT_foffload_lto,
@@ -6896,7 +6898,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
68966898
options::OPT_fno_offload_via_llvm, false)) {
68976899
CmdArgs.append({"--offload-new-driver", "-foffload-via-llvm"});
68986900
} else if (Args.hasFlag(options::OPT_offload_new_driver,
6899-
options::OPT_no_offload_new_driver, false)) {
6901+
options::OPT_no_offload_new_driver,
6902+
C.isOffloadingHostKind(Action::OFK_Cuda))) {
69006903
CmdArgs.push_back("--offload-new-driver");
69016904
}
69026905

clang/lib/Driver/ToolChains/Cuda.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -506,7 +506,7 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
506506
static bool shouldIncludePTX(const ArgList &Args, StringRef InputArch) {
507507
// The new driver does not include PTX by default to avoid overhead.
508508
bool includePTX = !Args.hasFlag(options::OPT_offload_new_driver,
509-
options::OPT_no_offload_new_driver, false);
509+
options::OPT_no_offload_new_driver, true);
510510
for (Arg *A : Args.filtered(options::OPT_cuda_include_ptx_EQ,
511511
options::OPT_no_cuda_include_ptx_EQ)) {
512512
A->claim();

clang/test/Driver/cuda-arch-translation.cu

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -68,19 +68,19 @@
6868

6969
// HIP: clang-offload-bundler
7070

71-
// SM20:--image=profile=sm_20{{.*}}--image=profile=compute_20
72-
// SM21:--image=profile=sm_21{{.*}}--image=profile=compute_20
73-
// SM30:--image=profile=sm_30{{.*}}--image=profile=compute_30
74-
// SM32:--image=profile=sm_32{{.*}}--image=profile=compute_32
75-
// SM35:--image=profile=sm_35{{.*}}--image=profile=compute_35
76-
// SM37:--image=profile=sm_37{{.*}}--image=profile=compute_37
77-
// SM50:--image=profile=sm_50{{.*}}--image=profile=compute_50
78-
// SM52:--image=profile=sm_52{{.*}}--image=profile=compute_52
79-
// SM53:--image=profile=sm_53{{.*}}--image=profile=compute_53
80-
// SM60:--image=profile=sm_60{{.*}}--image=profile=compute_60
81-
// SM61:--image=profile=sm_61{{.*}}--image=profile=compute_61
82-
// SM62:--image=profile=sm_62{{.*}}--image=profile=compute_62
83-
// SM70:--image=profile=sm_70{{.*}}--image=profile=compute_70
71+
// SM20:--image=profile=sm_20{{.*}}
72+
// SM21:--image=profile=sm_21{{.*}}
73+
// SM30:--image=profile=sm_30{{.*}}
74+
// SM32:--image=profile=sm_32{{.*}}
75+
// SM35:--image=profile=sm_35{{.*}}
76+
// SM37:--image=profile=sm_37{{.*}}
77+
// SM50:--image=profile=sm_50{{.*}}
78+
// SM52:--image=profile=sm_52{{.*}}
79+
// SM53:--image=profile=sm_53{{.*}}
80+
// SM60:--image=profile=sm_60{{.*}}
81+
// SM61:--image=profile=sm_61{{.*}}
82+
// SM62:--image=profile=sm_62{{.*}}
83+
// SM70:--image=profile=sm_70{{.*}}
8484
// GFX600:-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx600
8585
// GFX601:-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx601
8686
// GFX602:-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx602

clang/test/Driver/cuda-bindings.cu

Lines changed: 12 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,14 @@
2323
// BIN-NOT: cuda-bindings-device-cuda-nvptx64
2424
// BIN: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output:
2525
// BIN-NOT: cuda-bindings-device-cuda-nvptx64
26-
// BIN: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "a.out"
26+
// BIN: # "powerpc64le-ibm-linux-gnu" - "Offload::Linker", inputs:{{.*}}, output: "a.out"
2727

2828
//
2929
// Test single gpu architecture up to the assemble phase.
3030
//
3131
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings --cuda-gpu-arch=sm_30 %s -S 2>&1 \
3232
// RUN: | FileCheck -check-prefix=ASM %s
33-
// ASM-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s"
33+
// ASM-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "[[PTX:.+]].s"
3434
// ASM-DAG: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: "cuda-bindings.s"
3535

3636
//
@@ -61,40 +61,21 @@
6161
// BIN2-NOT: cuda-bindings-device-cuda-nvptx64
6262
// BIN2: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output:
6363
// BIN2-NOT: cuda-bindings-device-cuda-nvptx64
64-
// AOUT: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "a.out"
65-
// TOUT: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "{{.*}}/out"
64+
// AOUT: # "powerpc64le-ibm-linux-gnu" - "Offload::Linker", inputs:{{.*}}, output: "a.out"
65+
// TOUT: # "powerpc64le-ibm-linux-gnu" - "Offload::Linker", inputs:{{.*}}, output: "{{.*}}/out"
6666

6767
// .. same, but with -fsyntax-only
6868
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
6969
// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s 2>&1 \
70-
// RUN: | FileCheck -check-prefix=SYN %s
71-
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
72-
// RUN: --offload-arch=sm_30,sm_35 %s -o %t/out 2>&1 \
73-
// RUN: | FileCheck -check-prefix=SYN %s
74-
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
75-
// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s 2>&1 \
76-
// RUN: | FileCheck -check-prefix=SYN %s
77-
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
78-
// RUN: --offload-arch=sm_30,sm_35 %s -o %t/out 2>&1 \
79-
// RUN: | FileCheck -check-prefix=SYN %s
80-
// SYN-NOT: inputs:
81-
// SYN: # "powerpc64le-ibm-linux-gnu" - "clang", inputs: [{{.*}}], output: (nothing)
82-
// SYN-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
83-
// SYN-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
84-
// SYN-NOT: inputs
85-
86-
// .. and with --offload-new-driver
87-
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
88-
// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 --offload-new-driver %s 2>&1 \
8970
// RUN: | FileCheck -check-prefix=NDSYN %s
9071
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
91-
// RUN: --offload-arch=sm_30,sm_35 %s --offload-new-driver -o %t/out 2>&1 \
72+
// RUN: --offload-arch=sm_30,sm_35 %s -o %t/out 2>&1 \
9273
// RUN: | FileCheck -check-prefix=NDSYN %s
9374
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
94-
// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --offload-new-driver 2>&1 \
75+
// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s 2>&1 \
9576
// RUN: | FileCheck -check-prefix=NDSYN %s
9677
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
97-
// RUN: --offload-arch=sm_30,sm_35 %s --offload-new-driver -o %t/out 2>&1 \
78+
// RUN: --offload-arch=sm_30,sm_35 %s -o %t/out 2>&1 \
9879
// RUN: | FileCheck -check-prefix=NDSYN %s
9980
// NDSYN-NOT: inputs:
10081
// NDSYN: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
@@ -109,8 +90,8 @@
10990
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
11091
// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s -S 2>&1 \
11192
// RUN: | FileCheck -check-prefix=ASM2 %s
112-
// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s"
113-
// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_35.s"
93+
// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "[[SM30:.+]].s"
94+
// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "[[SM35:.+]].s"
11495
// ASM2-DAG: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: "cuda-bindings.s"
11596

11697
//
@@ -125,7 +106,7 @@
125106
// RUN: | FileCheck -check-prefix=HBIN %s
126107
// HBIN: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output:
127108
// HBIN-NOT: cuda-bindings-device-cuda-nvptx64
128-
// HBIN: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "a.out"
109+
// HBIN: # "powerpc64le-ibm-linux-gnu" - "Offload::Linker", inputs:{{.*}}, output: "a.out"
129110

130111
//
131112
// Test one or more gpu architecture up to the assemble phase in host-only
@@ -163,7 +144,7 @@
163144
// Test two gpu architectures with complete compilation in device-only
164145
// compilation mode.
165146
//
166-
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
147+
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings --no-offload-new-driver \
167148
// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only 2>&1 \
168149
// RUN: | FileCheck -check-prefix=DBIN2 %s
169150
// DBIN2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output:
@@ -177,7 +158,7 @@
177158
// Test two gpu architectures up to the assemble phase in device-only
178159
// compilation mode.
179160
//
180-
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
161+
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings --no-offload-new-driver \
181162
// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only -S 2>&1 \
182163
// RUN: | FileCheck -check-prefix=DASM2 %s
183164
// DASM2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s"

clang/test/Driver/cuda-options.cu

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,13 @@
22

33
// Simple compilation case. Compile device-side to PTX assembly and make sure
44
// we use it on the host side.
5-
// RUN: %clang -### -target x86_64-linux-gnu -c -nogpulib -nogpuinc %s 2>&1 \
5+
// RUN: %clang -### --cuda-include-ptx=all -target x86_64-linux-gnu -c -nogpulib -nogpuinc %s 2>&1 \
66
// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
77
// RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \
88
// RUN: -check-prefix NOLINK %s
99

1010
// Typical compilation + link case.
11-
// RUN: %clang -### -target x86_64-linux-gnu -nogpulib -nogpuinc %s 2>&1 \
11+
// RUN: %clang -### --cuda-include-ptx=all -target x86_64-linux-gnu -nogpulib -nogpuinc %s 2>&1 \
1212
// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
1313
// RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \
1414
// RUN: -check-prefix LINK %s
@@ -33,7 +33,7 @@
3333
// RUN: -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s
3434

3535
// RUN: %clang -### --target=x86_64-linux-gnu --cuda-compile-host-device \
36-
// RUN: --cuda-host-only -nogpulib -nogpuinc %s 2>&1 \
36+
// RUN: --cuda-host-only --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
3737
// RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \
3838
// RUN: -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s
3939

@@ -47,28 +47,28 @@
4747
// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
4848
// RUN: -check-prefix NOHOST -check-prefix NOLINK %s
4949

50-
// RUN: %clang -### --target=x86_64-linux-gnu --cuda-host-only \
50+
// RUN: %clang -### --cuda-include-ptx=all --target=x86_64-linux-gnu --cuda-host-only \
5151
// RUN: -nogpulib -nogpuinc --cuda-compile-host-device %s 2>&1 \
5252
// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
5353
// RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \
5454
// RUN: -check-prefix LINK %s
5555

56-
// RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \
56+
// RUN: %clang -### --cuda-include-ptx=all --target=x86_64-linux-gnu --cuda-device-only \
5757
// RUN: -nogpulib -nogpuinc --cuda-compile-host-device %s 2>&1 \
5858
// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
5959
// RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \
6060
// RUN: -check-prefix LINK %s
6161

6262
// Verify that --cuda-gpu-arch option passes the correct GPU architecture to
6363
// device compilation.
64-
// RUN: %clang -### -nogpulib -nogpuinc --target=x86_64-linux-gnu --cuda-gpu-arch=sm_52 -c %s 2>&1 \
64+
// RUN: %clang -### -nogpulib -nogpuinc --cuda-include-ptx=all --target=x86_64-linux-gnu --cuda-gpu-arch=sm_52 -c %s 2>&1 \
6565
// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
6666
// RUN: -check-prefix DEVICE-SM52 -check-prefix HOST \
6767
// RUN: -check-prefix INCLUDES-DEVICE -check-prefix NOLINK %s
6868

6969
// Verify that there is one device-side compilation per --cuda-gpu-arch args
7070
// and that all results are included on the host side.
71-
// RUN: %clang -### --target=x86_64-linux-gnu \
71+
// RUN: %clang -### --cuda-include-ptx=all --target=x86_64-linux-gnu \
7272
// RUN: -nogpulib -nogpuinc --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 -c %s 2>&1 \
7373
// RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,DEVICE2 \
7474
// RUN: -check-prefixes DEVICE-SM52,DEVICE2-SM60 \
@@ -128,9 +128,9 @@
128128
// f) --no-cuda-gpu-arch=all negates all preceding --cuda-gpu-arch=X
129129
// RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
130130
// RUN: -nogpulib -nogpuinc --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \
131-
// RUN: --no-cuda-gpu-arch=all \
131+
// RUN: --no-cuda-version-check --no-cuda-gpu-arch=all \
132132
// RUN: --cuda-gpu-arch=sm_70 \
133-
// RUN: -c -nogpulib -nogpuinc %s 2>&1 \
133+
// RUN: -c --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
134134
// RUN: | FileCheck -check-prefixes NOARCH-SM52,NOARCH-SM60,ARCH-SM70 %s
135135

136136
// g) There's no --cuda-gpu-arch=all
@@ -141,9 +141,9 @@
141141

142142

143143
// Verify that --[no-]cuda-include-ptx arguments are handled correctly.
144-
// a) by default we're including PTX for all GPUs.
144+
// a) by default we're not including PTX for all GPUs.
145145
// RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc \
146-
// RUN: --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \
146+
// RUN: --cuda-include-ptx=all --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \
147147
// RUN: -c %s 2>&1 \
148148
// RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM60,PTX-SM52 %s
149149

@@ -157,12 +157,12 @@
157157
// c) --no-cuda-include-ptx=sm_XX disables PTX inclusion for that GPU only.
158158
// RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc \
159159
// RUN: --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \
160-
// RUN: --no-cuda-include-ptx=sm_60 \
160+
// RUN: --no-cuda-include-ptx=sm_60 --cuda-include-ptx=sm_52 \
161161
// RUN: -c %s 2>&1 \
162162
// RUN: | FileCheck -check-prefixes FATBIN-COMMON,NOPTX-SM60,PTX-SM52 %s
163163
// RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc \
164164
// RUN: --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \
165-
// RUN: --no-cuda-include-ptx=sm_52 \
165+
// RUN: --no-cuda-include-ptx=sm_52 --cuda-include-ptx=sm_60 \
166166
// RUN: -c %s 2>&1 \
167167
// RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM60,NOPTX-SM52 %s
168168

@@ -183,8 +183,8 @@
183183
// Verify -flto=thin -fwhole-program-vtables handling. This should result in
184184
// both options being passed to the host compilation, with neither passed to
185185
// the device compilation.
186-
// RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc -c -flto=thin -fwhole-program-vtables %s 2>&1 \
187-
// RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,HOST,INCLUDES-DEVICE,NOLINK,THINLTOWPD %s
186+
// RUN: %clang -### --cuda-include-ptx=sm_60 --target=x86_64-linux-gnu -nogpulib -nogpuinc -c -flto=thin -fwhole-program-vtables %s 2>&1 \
187+
// RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,HOST,NOLINK,THINLTOWPD %s
188188
// THINLTOWPD-NOT: error: invalid argument '-fwhole-program-vtables' only allowed with '-flto'
189189

190190
// ARCH-SM52: "-cc1"{{.*}}"-target-cpu" "sm_52"

clang/test/Driver/cuda-output-asm.cu

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,9 @@
1717
// SM30-DAG: "-cc1" "-triple" "nvptx64-nvidia-cuda"
1818
// SM30-same: "-target-cpu" "sm_30"
1919

20-
// RUN: not %clang -### -S --target=x86_64-linux-gnu -o foo.s %s 2>&1 \
21-
// RUN: | FileCheck -check-prefix MULTIPLE-OUTPUT-FILES %s
2220
// RUN: not %clang -### -S --target=x86_64-linux-gnu --cuda-device-only \
2321
// RUN: --cuda-gpu-arch=sm_20 --cuda-gpu-arch=sm_30 -o foo.s %s 2>&1 \
2422
// RUN: | FileCheck -check-prefix MULTIPLE-OUTPUT-FILES %s
25-
// RUN: not %clang -### -emit-llvm -c --target=x86_64-linux-gnu -o foo.s %s 2>&1 \
26-
// RUN: | FileCheck -check-prefix MULTIPLE-OUTPUT-FILES %s
2723
// MULTIPLE-OUTPUT-FILES: error: cannot specify -o when generating multiple output files
2824
// Make sure we do not get duplicate diagnostics.
2925
// MULTIPLE-OUTPUT-FILES-NOT: error: cannot specify -o when generating multiple output files

0 commit comments

Comments
 (0)