llvm
diff --git a/‎clang/lib/Driver/Driver.cpp‎
Lines changed: 3 additions & 3 deletions b/‎clang/lib/Driver/Driver.cpp‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎clang/lib/Driver/ToolChains/Clang.cpp‎
Lines changed: 6 additions & 4 deletions b/‎clang/lib/Driver/ToolChains/Clang.cpp‎
Lines changed: 6 additions & 4 deletions
diff --git a/‎clang/test/Driver/cl-offload.cu‎
Lines changed: 2 additions & 3 deletions b/‎clang/test/Driver/cl-offload.cu‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎clang/test/Driver/cuda-arch-translation.cu‎
Lines changed: 13 additions & 13 deletions b/‎clang/test/Driver/cuda-arch-translation.cu‎
Lines changed: 13 additions & 13 deletions
diff --git a/‎clang/test/Driver/cuda-bad-arch.cu‎
Lines changed: 2 additions & 2 deletions b/‎clang/test/Driver/cuda-bad-arch.cu‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎clang/test/Driver/cuda-bindings.cu‎
Lines changed: 12 additions & 12 deletions b/‎clang/test/Driver/cuda-bindings.cu‎
Lines changed: 12 additions & 12 deletions
diff --git a/‎clang/test/Driver/cuda-external-tools.cu‎
Lines changed: 7 additions & 7 deletions b/‎clang/test/Driver/cuda-external-tools.cu‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎clang/test/Driver/cuda-options.cu‎
Lines changed: 12 additions & 11 deletions b/‎clang/test/Driver/cuda-options.cu‎
Lines changed: 12 additions & 11 deletions
diff --git a/‎clang/test/Driver/cuda-output-asm.cu‎
Lines changed: 0 additions & 4 deletions b/‎clang/test/Driver/cuda-output-asm.cu‎
Lines changed: 0 additions & 4 deletions
@@ -4115,9 +4115,9 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
   handleArguments(C, Args, Inputs, Actions);
 
   bool UseNewOffloadingDriver =
-      C.isOffloadingHostKind(Action::OFK_OpenMP) ||
+      C.getActiveOffloadKinds() != Action::OFK_None &&
       Args.hasFlag(options::OPT_offload_new_driver,
-                   options::OPT_no_offload_new_driver, false);
+                   options::OPT_no_offload_new_driver, true);
 
   // Builder to be used to build offloading actions.
   std::unique_ptr<OffloadingActionBuilder> OffloadBuilder =
@@ -4802,7 +4802,7 @@ Action *Driver::ConstructPhaseAction(
                    offloadDeviceOnly() ||
                    (TargetDeviceOffloadKind == Action::OFK_HIP &&
                     !Args.hasFlag(options::OPT_offload_new_driver,
-                                  options::OPT_no_offload_new_driver, false)))
+                                  options::OPT_no_offload_new_driver, true)))
               ? types::TY_LLVM_IR
               : types::TY_LLVM_BC;
       return C.MakeAction<BackendJobAction>(Input, Output);
 
@@ -4687,8 +4687,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   bool IsHostOffloadingAction =
       JA.isHostOffloading(Action::OFK_OpenMP) ||
       (JA.isHostOffloading(C.getActiveOffloadKinds()) &&
+       C.getActiveOffloadKinds() != Action::OFK_None &&
        Args.hasFlag(options::OPT_offload_new_driver,
-                    options::OPT_no_offload_new_driver, false));
+                    options::OPT_no_offload_new_driver, true));
 
   bool IsRDCMode =
       Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false);
@@ -4997,7 +4998,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     if (IsUsingLTO) {
       if (IsDeviceOffloadAction && !JA.isDeviceOffloading(Action::OFK_OpenMP) &&
           !Args.hasFlag(options::OPT_offload_new_driver,
-                        options::OPT_no_offload_new_driver, false) &&
+                        options::OPT_no_offload_new_driver, true) &&
           !Triple.isAMDGPU()) {
         D.Diag(diag::err_drv_unsupported_opt_for_target)
             << Args.getLastArg(options::OPT_foffload_lto,
@@ -6521,8 +6522,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   }
 
   // Forward the new driver to change offloading code generation.
-  if (Args.hasFlag(options::OPT_offload_new_driver,
-                   options::OPT_no_offload_new_driver, false))
+  if (C.getActiveOffloadKinds() != Action::OFK_None &&
+      Args.hasFlag(options::OPT_offload_new_driver,
+                   options::OPT_no_offload_new_driver, true))
     CmdArgs.push_back("--offload-new-driver");
 
   SanitizeArgs.addArgs(TC, Args, CmdArgs, InputType);
 
@@ -18,11 +18,10 @@
 // CUDA-SAME: "-Weverything"
 // CUDA: link
 
-// HIP: "-cc1" "-triple" "x86_64-pc-windows-msvc{{.*}}" "-aux-triple" "amdgcn-amd-amdhsa"
-// HIP-SAME: "-Weverything"
 // HIP: "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-pc-windows-msvc"
 // HIP-SAME: "-Weverything"
-// HIP: {{lld.* "-flavor" "gnu" "-m" "elf64_amdgpu"}}
+// HIP: "-cc1" "-triple" "x86_64-pc-windows-msvc{{.*}}" "-aux-triple" "amdgcn-amd-amdhsa"
+// HIP-SAME: "-Weverything"
 // HIP: {{link.* "amdhip64.lib"}}
 
 // CMake uses this option when finding packages for HIP, so
 
@@ -69,19 +69,19 @@
 
 // HIP: clang-offload-bundler
 
-// SM20:--image=profile=sm_20{{.*}}--image=profile=compute_20
-// SM21:--image=profile=sm_21{{.*}}--image=profile=compute_20
-// SM30:--image=profile=sm_30{{.*}}--image=profile=compute_30
-// SM32:--image=profile=sm_32{{.*}}--image=profile=compute_32
-// SM35:--image=profile=sm_35{{.*}}--image=profile=compute_35
-// SM37:--image=profile=sm_37{{.*}}--image=profile=compute_37
-// SM50:--image=profile=sm_50{{.*}}--image=profile=compute_50
-// SM52:--image=profile=sm_52{{.*}}--image=profile=compute_52
-// SM53:--image=profile=sm_53{{.*}}--image=profile=compute_53
-// SM60:--image=profile=sm_60{{.*}}--image=profile=compute_60
-// SM61:--image=profile=sm_61{{.*}}--image=profile=compute_61
-// SM62:--image=profile=sm_62{{.*}}--image=profile=compute_62
-// SM70:--image=profile=sm_70{{.*}}--image=profile=compute_70
+// SM20:--image=profile=sm_20{{.*}}
+// SM21:--image=profile=sm_21{{.*}}
+// SM30:--image=profile=sm_30{{.*}}
+// SM32:--image=profile=sm_32{{.*}}
+// SM35:--image=profile=sm_35{{.*}}
+// SM37:--image=profile=sm_37{{.*}}
+// SM50:--image=profile=sm_50{{.*}}
+// SM52:--image=profile=sm_52{{.*}}
+// SM53:--image=profile=sm_53{{.*}}
+// SM60:--image=profile=sm_60{{.*}}
+// SM61:--image=profile=sm_61{{.*}}
+// SM62:--image=profile=sm_62{{.*}}
+// SM70:--image=profile=sm_70{{.*}}
 // GFX600:-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx600
 // GFX601:-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx601
 // GFX602:-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx602
 
@@ -30,9 +30,9 @@
 // RUN: | FileCheck -check-prefix OK %s
 
 // We don't allow using NVPTX/AMDGCN for host compilation.
-// RUN: not %clang -### --cuda-host-only --target=nvptx-nvidia-cuda -nogpulib -nogpuinc -c %s 2>&1 \
+// RUN: not %clang -### --no-offload-new-driver --cuda-host-only --target=nvptx-nvidia-cuda -nogpulib -nogpuinc -c %s 2>&1 \
 // RUN: | FileCheck -check-prefix HOST_NVPTX %s
-// RUN: not %clang -### --cuda-host-only --target=amdgcn-amd-amdhsa -nogpulib -nogpuinc -c %s 2>&1 \
+// RUN: not %clang -### --no-offload-new-driver --cuda-host-only --target=amdgcn-amd-amdhsa -nogpulib -nogpuinc -c %s 2>&1 \
 // RUN: | FileCheck -check-prefix HOST_AMDGCN %s
 
 // OK-NOT: error: Unsupported CUDA gpu architecture
 
@@ -26,14 +26,14 @@
 // BIN-NOT: cuda-bindings-device-cuda-nvptx64
 // BIN: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}}  output:
 // BIN-NOT: cuda-bindings-device-cuda-nvptx64
-// BIN: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "a.out"
+// BIN: # "powerpc64le-ibm-linux-gnu" - "Offload::Linker", inputs:{{.*}}, output: "a.out"
 
 //
 // Test single gpu architecture up to the assemble phase.
 //
 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings --cuda-gpu-arch=sm_30 %s -S 2>&1 \
 // RUN: | FileCheck -check-prefix=ASM %s
-// ASM-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s"
+// ASM-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "[[BINDINGS:.+.s]]"
 // ASM-DAG: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: "cuda-bindings.s"
 
 //
@@ -64,8 +64,8 @@
 // BIN2-NOT: cuda-bindings-device-cuda-nvptx64
 // BIN2: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}}  output:
 // BIN2-NOT: cuda-bindings-device-cuda-nvptx64
-// AOUT: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "a.out"
-// TOUT: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "{{.*}}/out"
+// AOUT: # "powerpc64le-ibm-linux-gnu" - "Offload::Linker", inputs:{{.*}}, output: "a.out"
+// TOUT: # "powerpc64le-ibm-linux-gnu" - "Offload::Linker", inputs:{{.*}}, output: "{{.*}}/out"
 
 // .. same, but with -fsyntax-only
 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
@@ -81,9 +81,9 @@
 // RUN:        --offload-arch=sm_30,sm_35 %s -o %t/out 2>&1 \
 // RUN: | FileCheck -check-prefix=SYN %s
 // SYN-NOT: inputs:
-// SYN: # "powerpc64le-ibm-linux-gnu" - "clang", inputs: [{{.*}}], output: (nothing)
-// SYN-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
+//      SYN: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
 // SYN-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
+// SYN-NEXT: # "powerpc64le-ibm-linux-gnu" - "clang", inputs: [{{.*}}], output: (nothing)
 // SYN-NOT: inputs
 
 // .. and with --offload-new-driver
@@ -100,7 +100,7 @@
 // RUN:        --offload-arch=sm_30,sm_35 %s --offload-new-driver -o %t/out 2>&1 \
 // RUN: | FileCheck -check-prefix=NDSYN %s
 // NDSYN-NOT: inputs:
-// NDSYN: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
+//      NDSYN: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
 // NDSYN-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
 // NDSYN-NEXT: # "powerpc64le-ibm-linux-gnu" - "clang", inputs: [{{.*}}], output: (nothing)
 // NDSYN-NOT: inputs:
@@ -112,8 +112,8 @@
 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
 // RUN:        --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s -S 2>&1 \
 // RUN: | FileCheck -check-prefix=ASM2 %s
-// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s"
-// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_35.s"
+// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "[[SM30:.+.s]]"
+// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "[[SM35:.+.s]]"
 // ASM2-DAG: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: "cuda-bindings.s"
 
 //
@@ -128,7 +128,7 @@
 // RUN: | FileCheck -check-prefix=HBIN %s
 // HBIN: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}}  output:
 // HBIN-NOT: cuda-bindings-device-cuda-nvptx64
-// HBIN: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "a.out"
+// HBIN: # "powerpc64le-ibm-linux-gnu" - "Offload::Linker", inputs:{{.*}}, output: "a.out"
 
 //
 // Test one or more gpu architecture up to the assemble phase in host-only
@@ -166,7 +166,7 @@
 // Test two gpu architectures with complete compilation in device-only
 // compilation mode.
 //
-// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
+// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings --no-offload-new-driver \
 // RUN:        --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only 2>&1 \
 // RUN: | FileCheck -check-prefix=DBIN2 %s
 // DBIN2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output:
@@ -180,7 +180,7 @@
 // Test two gpu architectures up to the assemble phase in device-only
 // compilation mode.
 //
-// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
+// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings --no-offload-new-driver \
 // RUN:        --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only -S 2>&1 \
 // RUN: | FileCheck -check-prefix=DASM2 %s
 // DASM2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s"
 
@@ -25,7 +25,7 @@
 // RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT3 %s
 // Generating relocatable device code
 // RUN: %clang -### --target=x86_64-linux-gnu -fgpu-rdc -c %s 2>&1 \
-// RUN:   --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN:   --no-offload-new-driver --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
 // RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s
 
 // With debugging enabled, ptxas should be run with with no ptxas optimizations.
@@ -59,7 +59,7 @@
 // RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35 %s
 // Separate compilation targeting sm_35.
 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-gpu-arch=sm_35 -fgpu-rdc -c %s 2>&1 \
-// RUN:   --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN:   --no-offload-new-driver --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
 // RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s
 
 // 32-bit compile.
@@ -68,7 +68,7 @@
 // RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM35 %s
 // 32-bit compile when generating relocatable device code.
 // RUN: %clang -### --target=i386-linux-gnu -fgpu-rdc -c %s 2>&1 \
-// RUN:   --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN:   --no-offload-new-driver --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
 // RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM35,RDC %s
 
 // Compile with -fintegrated-as.  This should still cause us to invoke ptxas.
@@ -77,7 +77,7 @@
 // RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT0 %s
 // Check that we still pass -c when generating relocatable device code.
 // RUN: %clang -### --target=x86_64-linux-gnu -fintegrated-as -fgpu-rdc -c %s 2>&1 \
-// RUN:   --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN:   --no-offload-new-driver --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
 // RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s
 
 // Check -Xcuda-ptxas and -Xcuda-fatbinary
@@ -99,13 +99,13 @@
 
 // Check relocatable device code generation on MacOS.
 // RUN: %clang -### --target=x86_64-apple-macosx -O0 -fgpu-rdc -c %s 2>&1 \
-// RUN:   --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN:   --no-offload-new-driver --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
 // RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s
 // RUN: %clang -### --target=x86_64-apple-macosx --cuda-gpu-arch=sm_35 -fgpu-rdc -c %s 2>&1 \
-// RUN:   --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN:   --no-offload-new-driver --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
 // RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s
 // RUN: %clang -### --target=i386-apple-macosx -fgpu-rdc -c %s 2>&1 \
-// RUN:   --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN:   --no-offload-new-driver --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
 // RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM35,RDC %s
 
 // Check that CLANG forwards the -v flag to PTXAS.
 
@@ -4,13 +4,13 @@
 
 // Simple compilation case. Compile device-side to PTX assembly and make sure
 // we use it on the host side.
-// RUN: %clang -### -target x86_64-linux-gnu -c -nogpulib -nogpuinc %s 2>&1 \
+// RUN: %clang -### --cuda-include-ptx=all -target x86_64-linux-gnu -c -nogpulib -nogpuinc %s 2>&1 \
 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
 // RUN:    -check-prefix HOST -check-prefix INCLUDES-DEVICE \
 // RUN:    -check-prefix NOLINK %s
 
 // Typical compilation + link case.
-// RUN: %clang -### -target x86_64-linux-gnu -nogpulib -nogpuinc %s 2>&1 \
+// RUN: %clang -### --cuda-include-ptx=all -target x86_64-linux-gnu -nogpulib -nogpuinc %s 2>&1 \
 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
 // RUN:    -check-prefix HOST -check-prefix INCLUDES-DEVICE \
 // RUN:    -check-prefix LINK %s
@@ -35,7 +35,7 @@
 // RUN:    -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s
 
 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-compile-host-device \
-// RUN:    --cuda-host-only -nogpulib -nogpuinc %s 2>&1 \
+// RUN:    --cuda-host-only --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
 // RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \
 // RUN:    -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s
 
@@ -50,27 +50,27 @@
 // RUN:    -check-prefix NOHOST -check-prefix NOLINK %s
 
 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-host-only \
-// RUN:   -nogpulib -nogpuinc --cuda-compile-host-device %s 2>&1 \
+// RUN:   --cuda-include-ptx=all -nogpulib -nogpuinc --cuda-compile-host-device %s 2>&1 \
 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
 // RUN:    -check-prefix HOST -check-prefix INCLUDES-DEVICE \
 // RUN:    -check-prefix LINK %s
 
 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \
-// RUN:   -nogpulib -nogpuinc --cuda-compile-host-device %s 2>&1 \
+// RUN:   --cuda-include-ptx=all -nogpulib -nogpuinc --cuda-compile-host-device %s 2>&1 \
 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
 // RUN:    -check-prefix HOST -check-prefix INCLUDES-DEVICE \
 // RUN:    -check-prefix LINK %s
 
 // Verify that --cuda-gpu-arch option passes the correct GPU architecture to
 // device compilation.
-// RUN: %clang -### -nogpulib -nogpuinc --target=x86_64-linux-gnu --cuda-gpu-arch=sm_52 -c %s 2>&1 \
+// RUN: %clang -### --cuda-include-ptx=all -nogpulib -nogpuinc --target=x86_64-linux-gnu --cuda-gpu-arch=sm_52 -c %s 2>&1 \
 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
 // RUN:    -check-prefix DEVICE-SM52 -check-prefix HOST \
 // RUN:    -check-prefix INCLUDES-DEVICE -check-prefix NOLINK %s
 
 // Verify that there is one device-side compilation per --cuda-gpu-arch args
 // and that all results are included on the host side.
-// RUN: %clang -### --target=x86_64-linux-gnu \
+// RUN: %clang -### --target=x86_64-linux-gnu --cuda-include-ptx=all \
 // RUN:   -nogpulib -nogpuinc --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 -c %s 2>&1 \
 // RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,DEVICE2 \
 // RUN:             -check-prefixes DEVICE-SM52,DEVICE2-SM60 \
@@ -130,9 +130,9 @@
 // f) --no-cuda-gpu-arch=all negates all preceding --cuda-gpu-arch=X
 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
 // RUN:   -nogpulib -nogpuinc --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \
-// RUN:   --no-cuda-gpu-arch=all \
+// RUN:   --no-cuda-version-check --no-cuda-gpu-arch=all \
 // RUN:   --cuda-gpu-arch=sm_70 \
-// RUN:   -c -nogpulib -nogpuinc %s 2>&1 \
+// RUN:   -c --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
 // RUN: | FileCheck -check-prefixes NOARCH-SM52,NOARCH-SM60,ARCH-SM70 %s
 
 // g) There's no --cuda-gpu-arch=all
@@ -143,7 +143,7 @@
 
 
 // Verify that --[no-]cuda-include-ptx arguments are handled correctly.
-// a) by default we're including PTX for all GPUs.
+// a) by default we're not including PTX for all GPUs.
 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc \
 // RUN:   --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \
 // RUN:   -c %s 2>&1 \
@@ -185,7 +185,8 @@
 // Verify -flto=thin -fwhole-program-vtables handling. This should result in
 // both options being passed to the host compilation, with neither passed to
 // the device compilation.
-// RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc -c -flto=thin -fwhole-program-vtables %s 2>&1 \
+// RUN: %clang -### --target=x86_64-linux-gnu --cuda-include-ptx=all \
+// RUN:   -nogpulib -nogpuinc -c -flto=thin -fwhole-program-vtables %s 2>&1 \
 // RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,HOST,INCLUDES-DEVICE,NOLINK,THINLTOWPD %s
 // THINLTOWPD-NOT: error: invalid argument '-fwhole-program-vtables' only allowed with '-flto'
 
 
@@ -20,13 +20,9 @@
 // SM30-DAG: "-cc1" "-triple" "nvptx64-nvidia-cuda"
 // SM30-same: "-target-cpu" "sm_30"
 
-// RUN: not %clang -### -S --target=x86_64-linux-gnu -o foo.s %s 2>&1 \
-// RUN:   | FileCheck -check-prefix MULTIPLE-OUTPUT-FILES %s
 // RUN: not %clang -### -S --target=x86_64-linux-gnu --cuda-device-only \
 // RUN:   --cuda-gpu-arch=sm_20 --cuda-gpu-arch=sm_30 -o foo.s %s 2>&1 \
 // RUN:   | FileCheck -check-prefix MULTIPLE-OUTPUT-FILES %s
-// RUN: not %clang -### -emit-llvm -c --target=x86_64-linux-gnu -o foo.s %s 2>&1 \
-// RUN:   | FileCheck -check-prefix MULTIPLE-OUTPUT-FILES %s
 // MULTIPLE-OUTPUT-FILES: error: cannot specify -o when generating multiple output files
 // Make sure we do not get duplicate diagnostics.
 // MULTIPLE-OUTPUT-FILES-NOT: error: cannot specify -o when generating multiple output files