Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions clang/lib/CodeGen/CGCUDANV.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1280,8 +1280,7 @@ llvm::Function *CGNVCUDARuntime::finalizeModule() {
return nullptr;
}
if (CGM.getLangOpts().OffloadViaLLVM ||
(CGM.getLangOpts().OffloadingNewDriver &&
(CGM.getLangOpts().HIP || RelocatableDeviceCode)))
(CGM.getLangOpts().OffloadingNewDriver && RelocatableDeviceCode))
createOffloadingEntries();
else
return makeModuleCtorFunction();
Expand Down
46 changes: 26 additions & 20 deletions clang/lib/Driver/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4413,10 +4413,6 @@ void Driver::BuildDefaultActions(Compilation &C, DerivedArgList &Args,
options::OPT_no_offload_new_driver,
C.isOffloadingHostKind(Action::OFK_Cuda));

bool HIPNoRDC =
C.isOffloadingHostKind(Action::OFK_HIP) &&
!Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false);

// Builder to be used to build offloading actions.
std::unique_ptr<OffloadingActionBuilder> OffloadBuilder =
!UseNewOffloadingDriver
Expand Down Expand Up @@ -4550,7 +4546,7 @@ void Driver::BuildDefaultActions(Compilation &C, DerivedArgList &Args,
// Check if this Linker Job should emit a static library.
if (ShouldEmitStaticLibrary(Args)) {
LA = C.MakeAction<StaticLibJobAction>(LinkerInputs, types::TY_Image);
} else if ((UseNewOffloadingDriver && !HIPNoRDC) ||
} else if (UseNewOffloadingDriver ||
Args.hasArg(options::OPT_offload_link)) {
LA = C.MakeAction<LinkerWrapperJobAction>(LinkerInputs, types::TY_Image);
LA->propagateHostOffloadInfo(C.getActiveOffloadKinds(),
Expand Down Expand Up @@ -4887,20 +4883,6 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
<< "-fhip-emit-relocatable"
<< "--offload-device-only";

// For HIP non-rdc non-device-only compilation, create a linker wrapper
// action for each host object to link, bundle and wrap device files in
// it.
if ((isa<AssembleJobAction>(HostAction) ||
(isa<BackendJobAction>(HostAction) &&
HostAction->getType() == types::TY_LTO_BC)) &&
HIPNoRDC && !offloadDeviceOnly()) {
ActionList AL{HostAction};
HostAction = C.MakeAction<LinkerWrapperJobAction>(AL, types::TY_Object);
HostAction->propagateHostOffloadInfo(C.getActiveOffloadKinds(),
/*BoundArch=*/nullptr);
return HostAction;
}

// Don't build offloading actions if we do not have a compile action. If
// preprocessing only ignore embedding.
if (!(isa<CompileJobAction>(HostAction) ||
Expand Down Expand Up @@ -5065,6 +5047,21 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
DDep.add(*FatbinAction,
*C.getOffloadToolChains<Action::OFK_HIP>().first->second, nullptr,
Action::OFK_HIP);
} else if (HIPNoRDC) {
// Package all the offloading actions into a single output that can be
// embedded in the host and linked.
Action *PackagerAction =
C.MakeAction<OffloadPackagerJobAction>(OffloadActions, types::TY_Image);

// For HIP non-RDC compilation, wrap the device binary with linker wrapper
// before bundling with host code. Do not bind a specific GPU arch here,
// as the packaged image may contain entries for multiple GPUs.
ActionList AL{PackagerAction};
PackagerAction =
C.MakeAction<LinkerWrapperJobAction>(AL, types::TY_HIP_FATBIN);
DDep.add(*PackagerAction,
*C.getOffloadToolChains<Action::OFK_HIP>().first->second,
/*BoundArch=*/nullptr, Action::OFK_HIP);
} else {
// Package all the offloading actions into a single output that can be
// embedded in the host and linked.
Expand Down Expand Up @@ -5194,6 +5191,14 @@ Action *Driver::ConstructPhaseAction(
return C.MakeAction<CompileJobAction>(Input, types::TY_LLVM_BC);
}
case phases::Backend: {
// Skip a redundant Backend phase for HIP device code when using the new
// offload driver, where mid-end is done in linker wrapper.
if (TargetDeviceOffloadKind == Action::OFK_HIP &&
Args.hasFlag(options::OPT_offload_new_driver,
options::OPT_no_offload_new_driver, false) &&
!offloadDeviceOnly())
return Input;

if (isUsingLTO() && TargetDeviceOffloadKind == Action::OFK_None) {
types::ID Output;
if (Args.hasArg(options::OPT_ffat_lto_objects) &&
Expand All @@ -5213,7 +5218,8 @@ Action *Driver::ConstructPhaseAction(
if (Args.hasArg(options::OPT_emit_llvm) ||
TargetDeviceOffloadKind == Action::OFK_SYCL ||
(((Input->getOffloadingToolChain() &&
Input->getOffloadingToolChain()->getTriple().isAMDGPU()) ||
Input->getOffloadingToolChain()->getTriple().isAMDGPU() &&
TargetDeviceOffloadKind != Action::OFK_None) ||
TargetDeviceOffloadKind == Action::OFK_HIP) &&
((Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
false) ||
Expand Down
16 changes: 8 additions & 8 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7636,7 +7636,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back("-fcuda-include-gpubinary");
CmdArgs.push_back(CudaDeviceInput->getFilename());
} else if (!HostOffloadingInputs.empty()) {
if (IsCuda && !IsRDCMode) {
if ((IsCuda || IsHIP) && !IsRDCMode) {
assert(HostOffloadingInputs.size() == 1 && "Only one input expected");
CmdArgs.push_back("-fcuda-include-gpubinary");
CmdArgs.push_back(HostOffloadingInputs.front().getFilename());
Expand Down Expand Up @@ -9093,7 +9093,7 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
auto ShouldForward = [&](const llvm::DenseSet<unsigned> &Set, Arg *A,
const ToolChain &TC) {
// CMake hack to avoid printing verbose informatoin for HIP non-RDC mode.
if (A->getOption().matches(OPT_v) && JA.getType() == types::TY_Object)
if (A->getOption().matches(OPT_v) && JA.getType() == types::TY_HIP_FATBIN)
return false;
return (Set.contains(A->getOption().getID()) ||
(A->getOption().getGroup().isValid() &&
Expand Down Expand Up @@ -9175,7 +9175,7 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
// non-RDC mode compilation. This confuses default CMake implicit linker
// argument parsing when the language is set to HIP and the system linker is
// also `ld.lld`.
if (Args.hasArg(options::OPT_v) && JA.getType() != types::TY_Object)
if (Args.hasArg(options::OPT_v) && JA.getType() != types::TY_HIP_FATBIN)
CmdArgs.push_back("--wrapper-verbose");
if (Arg *A = Args.getLastArg(options::OPT_cuda_path_EQ))
CmdArgs.push_back(
Expand Down Expand Up @@ -9247,14 +9247,14 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,

// We use action type to differentiate two use cases of the linker wrapper.
// TY_Image for normal linker wrapper work.
// TY_Object for HIP fno-gpu-rdc embedding device binary in a relocatable
// object.
assert(JA.getType() == types::TY_Object || JA.getType() == types::TY_Image);
if (JA.getType() == types::TY_Object) {
// TY_HIP_FATBIN for HIP fno-gpu-rdc emitting a fat binary without wrapping.
assert(JA.getType() == types::TY_HIP_FATBIN ||
JA.getType() == types::TY_Image);
if (JA.getType() == types::TY_HIP_FATBIN) {
CmdArgs.push_back("--emit-fatbin-only");
CmdArgs.append({"-o", Output.getFilename()});
for (auto Input : Inputs)
CmdArgs.push_back(Input.getFilename());
CmdArgs.push_back("-r");
} else
for (const char *LinkArg : LinkCommand->getArguments())
CmdArgs.push_back(LinkArg);
Expand Down
6 changes: 3 additions & 3 deletions clang/test/Driver/hip-binding.hip
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,6 @@
// RUN: | FileCheck -check-prefix=LTO-NO-RDC %s
// LTO-NO-RDC: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[LTO_908:.+]]"
// LTO-NO-RDC-NEXT: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]"], output: "[[LTO_90A:.+]]"
// LTO-NO-RDC-NEXT: # "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[LTO_908]]", "[[LTO_90A]]"], output: "[[PKG:.+]]"
// LTO-NO-RDC-NEXT: # "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT]]", "[[PKG]]"], output: "[[OBJ:.+]]"
// LTO-NO-RDC-NEXT: # "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[OBJ]]"], output: "hip-binding.o"
// LTO-NO-RDC-NEXT: # "amdgcn-amd-amdhsa" - "Offload::Packager", inputs: ["[[LTO_908]]", "[[LTO_90A]]"], output: "[[PKG:.+]]"
// LTO-NO-RDC-NEXT: # "amdgcn-amd-amdhsa" - "Offload::Linker", inputs: ["[[PKG]]"], output: "[[HIPFB:.+]]"
// LTO-NO-RDC-NEXT: # "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT]]", "[[HIPFB]]"], output: "hip-binding.o"
48 changes: 23 additions & 25 deletions clang/test/Driver/hip-phases.hip
Original file line number Diff line number Diff line change
Expand Up @@ -33,32 +33,33 @@
// BIN-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
// BIN-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH]])
// OLDN-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH]])
// NEW-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, ir, (device-[[T]], [[ARCH]])
// OLDN-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH]])
// OLDR-DAG: [[P7:[0-9]+]]: backend, {[[P5]]}, ir, (device-[[T]], [[ARCH]])
// OLD-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, image, (device-[[T]], [[ARCH]])
// OLD-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P8]]}, image
// NEW-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P6]]}, ir
// NEW-DAG: [[P6:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P5]]}, ir
// OLDN-DAG: [[P10:[0-9]+]]: linker, {[[P9]]}, hip-fatbin, (device-[[T]])
// NEW-DAG: [[P10:[0-9]+]]: llvm-offload-binary, {[[P9]]}, image, (device-[[T]])
// NEW-DAG: [[P7:[0-9]+]]: llvm-offload-binary, {[[P6]]}, image, (device-[[T]])
// NEWN-DAG: [[P8:[0-9]+]]: clang-linker-wrapper, {[[P7]]}, hip-fatbin, (device-[[T]])
// NEWLTO-DAG: [[P8:[0-9]+]]: clang-linker-wrapper, {[[P7]]}, hip-fatbin, (device-[[T]])
// OLDR-DAG: [[P10:[0-9]+]]: linker, {[[P9]]}, object, (device-[[T]])

// OLDN-DAG: [[P11:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P10]]}, ir
// NEW-DAG: [[P11:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-[[T]] (x86_64-unknown-linux-gnu)" {[[P10]]}, ir
// NEWN-DAG: [[P9:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P8]]}, ir
// NEWLTO-DAG: [[P9:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P8]]}, ir
// NEWR-DAG: [[P8:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-[[T]] (x86_64-unknown-linux-gnu)" {[[P7]]}, ir
// OLDR-DAG: [[P11:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P10]]}, object
// OLDN-DAG: [[P12:[0-9]+]]: backend, {[[P11]]}, assembler, (host-[[T]])
// OLDN-DAG: [[P13:[0-9]+]]: assembler, {[[P12]]}, object, (host-[[T]])
// NEWN-DAG: [[P12:[0-9]+]]: backend, {[[P11]]}, assembler, (host-[[T]])
// NEWN-DAG: [[P13:[0-9]+]]: assembler, {[[P12]]}, object, (host-[[T]])
// NEWLTO-DAG: [[P13:[0-9]+]]: backend, {[[P11]]}, lto-bc, (host-hip)
// NEWR-DAG: [[P12:[0-9]+]]: backend, {[[P11]]}, assembler, (host-[[T]])
// NEWR-DAG: [[P13:[0-9]+]]: assembler, {[[P12]]}, object, (host-[[T]])
// NEWN-DAG: [[P10:[0-9]+]]: backend, {[[P9]]}, assembler, (host-[[T]])
// NEWN-DAG: [[P11:[0-9]+]]: assembler, {[[P10]]}, object, (host-[[T]])
// NEWLTO-DAG: [[P10:[0-9]+]]: backend, {[[P9]]}, lto-bc, (host-hip)
// NEWR-DAG: [[P9:[0-9]+]]: backend, {[[P8]]}, assembler, (host-[[T]])
// NEWR-DAG: [[P10:[0-9]+]]: assembler, {[[P9]]}, object, (host-[[T]])
// OLDN-DAG: [[P14:[0-9]+]]: linker, {[[P13]]}, image, (host-[[T]])
// NEWN-DAG: [[P14:[0-9]+]]: clang-linker-wrapper, {[[P13]]}, object, (host-[[T]])
// NEWLTO-DAG: [[P14:[0-9]+]]: clang-linker-wrapper, {[[P13]]}, object, (host-[[T]])
// NEWN-DAG: [[P12:[0-9]+]]: clang-linker-wrapper, {[[P11]]}, image, (host-[[T]])
// OLDR-DAG: [[P14:[0-9]+]]: linker, {[[P13]], [[P11]]}, image, (host-[[T]])
// NEWR-DAG: [[P14:[0-9]+]]: clang-linker-wrapper, {[[P13]]}, image, (host-[[T]])
// NEWN-DAG: [[P15:[0-9]+]]: linker, {[[P14]]}, image
// NEWR-DAG: [[P11:[0-9]+]]: clang-linker-wrapper, {[[P10]]}, image, (host-[[T]])

//
// Test single gpu architecture up to the assemble phase.
Expand Down Expand Up @@ -613,7 +614,6 @@
// MIXED-DAG: input, "{{.*}}empty.hip", hip, (device-hip, gfx803)
// MIXED-DAG: input, "{{.*}}empty.hip", hip, (device-hip, gfx900)
// MIXED-DAG: input, "{{.*}}empty.cpp", c++
// MIXED-NEG-NOT: input, "{{.*}}empty.cpp", c++, (host-hip)
// MIXED-NEG-NOT: input, "{{.*}}empty.cpp", c++, (device-hip

// MIXED2-DAG: input, "{{.*}}empty.hip", hip, (host-hip)
Expand Down Expand Up @@ -658,17 +658,15 @@
// LTO-NEXT: 3: input, "[[INPUT]]", hip, (device-hip, gfx908)
// LTO-NEXT: 4: preprocessor, {3}, hip-cpp-output, (device-hip, gfx908)
// LTO-NEXT: 5: compiler, {4}, ir, (device-hip, gfx908)
// LTO-NEXT: 6: backend, {5}, lto-bc, (device-hip, gfx908)
// LTO-NEXT: 7: offload, "device-hip (amdgcn-amd-amdhsa:gfx908)" {6}, lto-bc
// LTO-NEXT: 8: input, "[[INPUT]]", hip, (device-hip, gfx90a)
// LTO-NEXT: 9: preprocessor, {8}, hip-cpp-output, (device-hip, gfx90a)
// LTO-NEXT: 10: compiler, {9}, ir, (device-hip, gfx90a)
// LTO-NEXT: 11: backend, {10}, lto-bc, (device-hip, gfx90a)
// LTO-NEXT: 12: offload, "device-hip (amdgcn-amd-amdhsa:gfx90a)" {11}, lto-bc
// LTO-NEXT: 13: llvm-offload-binary, {7, 12}, image, (device-hip)
// LTO-NEXT: 14: offload, "host-hip (x86_64-unknown-linux-gnu)" {2}, "device-hip (x86_64-unknown-linux-gnu)" {13}, ir
// LTO-NEXT: 15: backend, {14}, assembler, (host-hip)
// LTO-NEXT: 16: assembler, {15}, object, (host-hip)
// LTO-NEXT: 6: offload, "device-hip (amdgcn-amd-amdhsa:gfx908)" {5}, ir
// LTO-NEXT: 7: input, "[[INPUT]]", hip, (device-hip, gfx90a)
// LTO-NEXT: 8: preprocessor, {7}, hip-cpp-output, (device-hip, gfx90a)
// LTO-NEXT: 9: compiler, {8}, ir, (device-hip, gfx90a)
// LTO-NEXT: 10: offload, "device-hip (amdgcn-amd-amdhsa:gfx90a)" {9}, ir
// LTO-NEXT: 11: llvm-offload-binary, {6, 10}, image, (device-hip)
// LTO-NEXT: 12: offload, "host-hip (x86_64-unknown-linux-gnu)" {2}, "device-hip (x86_64-unknown-linux-gnu)" {11}, ir
// LTO-NEXT: 13: backend, {12}, assembler, (host-hip)
// LTO-NEXT: 14: assembler, {13}, object, (host-hip)

//
// Test the new driver when not bundling
Expand Down
2 changes: 1 addition & 1 deletion clang/test/Driver/hip-spirv-translator-new-driver.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@
// RUN: | FileCheck %s

// CHECK-NOT: {{".*clang-linker-wrapper.*"}} {{.*}} "-o" "[[OUTPUT_FILE:.*.o]]" {{.*}}"[[OUTPUT_FILE]]"
// CHECK: {{".*clang-linker-wrapper.*"}} {{.*}} "-o" {{".*.tmp.o"}}
// CHECK: {{".*clang-linker-wrapper.*"}} {{.*}} "-o" {{".*.hipfb"}}
28 changes: 14 additions & 14 deletions clang/test/Driver/hip-toolchain-no-rdc.hip
Original file line number Diff line number Diff line change
Expand Up @@ -101,19 +101,19 @@
// NEW-SAME: "--image=file=[[OBJ_DEV_A_803]],triple=amdgcn-amd-amdhsa,arch=gfx803,kind=hip"
// NEW-SAME: "--image=file=[[OBJ_DEV_A_900]],triple=amdgcn-amd-amdhsa,arch=gfx900,kind=hip"

// NEW: [[WRAPPER:".*clang-linker-wrapper]]"
// NEW-SAME: "--emit-fatbin-only"
// NEW-SAME: "-o" "[[HIPFB_A:.*.hipfb]]" "[[PACKAGE_A]]"

// CHECK: [[CLANG]] "-cc1" "-triple" "x86_64-unknown-linux-gnu"
// CHECK-SAME: "-aux-triple" "amdgcn-amd-amdhsa"
// CHECK-SAME: "-emit-obj"
// CHECK-SAME: {{.*}} "-main-file-name" "a.cu"
// OLD-SAME: {{.*}} "-fcuda-include-gpubinary" "[[BUNDLE_A]]"
// NEW-SAME: {{.*}} "-fembed-offload-object=[[PACKAGE_A]]"
// OLD-SAME: {{.*}} "-o" [[A_OBJ_HOST:".*o"]] "-x" "hip"
// NEW-SAME: {{.*}} "-o" [[A_OBJ_HOST_TMP:".*o"]] "-x" "hip"
// NEW-SAME: {{.*}} "-fcuda-include-gpubinary" "[[HIPFB_A]]"
// CHECK-SAME: {{.*}} "-o" [[A_OBJ_HOST:".*o"]] "-x" "hip"
// CHECK-SAME: {{.*}} [[A_SRC]]

// NEW: [[WRAPPER:".*clang-linker-wrapper]]" {{.*}}"--host-triple=x86_64-unknown-linux-gnu"
// NEW: "--linker-path={{.*}}" "-o" [[A_OBJ_HOST:".*o"]] [[A_OBJ_HOST_TMP]] "-r"

//
// Compile device code in b.hip to code object for gfx803.
//
Expand Down Expand Up @@ -173,19 +173,19 @@
// NEW-SAME: "--image=file=[[OBJ_DEV_B_803]],triple=amdgcn-amd-amdhsa,arch=gfx803,kind=hip"
// NEW-SAME: "--image=file=[[OBJ_DEV_B_900]],triple=amdgcn-amd-amdhsa,arch=gfx900,kind=hip"

// NEW: [[WRAPPER:".*clang-linker-wrapper]]"
// NEW-SAME: "--emit-fatbin-only"
// NEW-SAME: "-o" "[[HIPFB_B:.*.hipfb]]" "[[PACKAGE_B]]"

// CHECK: [[CLANG]] "-cc1" "-triple" "x86_64-unknown-linux-gnu"
// CHECK-SAME: "-aux-triple" "amdgcn-amd-amdhsa"
// CHECK-SAME: "-emit-obj"
// CHECK-SAME: {{.*}} "-main-file-name" "b.hip"
// OLD-SAME: {{.*}} "-fcuda-include-gpubinary" "[[BUNDLE_B]]"
// NEW-SAME: {{.*}} "-fembed-offload-object=[[PACKAGE_B]]"
// OLD-SAME: {{.*}} "-o" [[B_OBJ_HOST:".*o"]] "-x" "hip"
// NEW-SAME: {{.*}} "-o" [[B_OBJ_HOST_TMP:".*o"]] "-x" "hip"
// NEW-SAME: {{.*}} "-fcuda-include-gpubinary" "[[HIPFB_B]]"
// CHECK-SAME: {{.*}} "-o" [[B_OBJ_HOST:".*o"]] "-x" "hip"
// CHECK-SAME: {{.*}} [[B_SRC]]

// NEW: [[WRAPPER:".*clang-linker-wrapper]]" {{.*}}"--host-triple=x86_64-unknown-linux-gnu"
// NEW: "--linker-path={{.*}}" "-o" [[B_OBJ_HOST:".*o"]] [[B_OBJ_HOST_TMP]] "-r"

//
// Link host objects.
//
Expand Down Expand Up @@ -219,5 +219,5 @@
// RUN: %clang -### --target=x86_64-linux-gnu -fno-gpu-rdc -nogpulib -nogpuinc \
// RUN: --offload-new-driver --offload-arch=gfx908 -v %s 2>&1 | FileCheck %s --check-prefix=VERBOSE
// VERBOSE: clang-linker-wrapper
// VERBOSE-NOT: --device-compiler=amdgcn-amd-amdhsa=-v
// VERBOSE-NOT: --wrapper-verbose
// VERBOSE-NOT: --device-compiler=amdgcn-amd-amdhsa=-v {{.*}}-o {{.*}}.hipfb
// VERBOSE-NOT: --wrapper-verbose {{.*}}-o {{.*}}.hipfb
40 changes: 40 additions & 0 deletions clang/test/Driver/linker-wrapper-hip-no-rdc.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// UNSUPPORTED: system-windows
// REQUIRES: amdgpu-registered-target

// Test HIP non-RDC linker wrapper behavior with new offload driver.
// The linker wrapper should output .hipfb files directly without using -r option.

// An externally visible variable so static libraries extract.
__attribute__((visibility("protected"), used)) int x;

// Create device binaries and package them
// RUN: %clang -cc1 %s -triple amdgcn-amd-amdhsa -emit-llvm-bc -o %t.amdgpu.bc
// RUN: llvm-offload-binary -o %t.out \
// RUN: --image=file=%t.amdgpu.bc,kind=hip,triple=amdgcn-amd-amdhsa,arch=gfx1100 \
// RUN: --image=file=%t.amdgpu.bc,kind=hip,triple=amdgcn-amd-amdhsa,arch=gfx1200

// Test that linker wrapper outputs .hipfb file without -r option for HIP non-RDC
// The linker wrapper is called directly with the packaged device binary (not embedded in host object)
// Note: When called directly (not through the driver), the linker wrapper processes architectures
// from the packaged binary. The test verifies it can process at least one architecture correctly.
// RUN: clang-linker-wrapper --emit-fatbin-only --linker-path=/usr/bin/ld %t.out -o %t.hipfb 2>&1

// Verify the fat binary was created
// RUN: test -f %t.hipfb

// List code objects in the fat binary
// RUN: clang-offload-bundler -type=o -input=%t.hipfb -list | FileCheck %s --check-prefix=HIP-FATBIN-LIST

// HIP-FATBIN-LIST-DAG: hip-amdgcn-amd-amdhsa--gfx1100
// HIP-FATBIN-LIST-DAG: hip-amdgcn-amd-amdhsa--gfx1200
// HIP-FATBIN-LIST-DAG: host-x86_64-unknown-linux-gnu-

// Extract code objects for both architectures from the fat binary
// RUN: clang-offload-bundler -type=o -targets=hip-amdgcn-amd-amdhsa--gfx1100,hip-amdgcn-amd-amdhsa--gfx1200 \
// RUN: -output=%t.gfx1100.co -output=%t.gfx1200.co -input=%t.hipfb -unbundle

// Verify extracted code objects exist and are not empty
// RUN: test -f %t.gfx1100.co
// RUN: test -s %t.gfx1100.co
// RUN: test -f %t.gfx1200.co
// RUN: test -s %t.gfx1200.co
Loading
Loading