Skip to content

Commit 92591c0

Browse files
committed
merge main into amd-staging
2 parents cf409fe + 07dad4e commit 92591c0

File tree

27 files changed

+323
-153
lines changed

27 files changed

+323
-153
lines changed

clang/lib/CIR/CodeGen/CIRGenValue.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,7 @@ class AggValueSlot {
271271
/// destructor for the slot. Otherwise the code which constructs it should
272272
/// push the appropriate cleanup.
273273
LLVM_PREFERRED_TYPE(bool)
274-
[[maybe_unused]] unsigned destructedFlag : 1;
274+
LLVM_ATTRIBUTE_UNUSED unsigned destructedFlag : 1;
275275

276276
/// This is set to true if the memory in the slot is known to be zero before
277277
/// the assignment into it. This means that zero fields don't need to be set.
@@ -290,15 +290,15 @@ class AggValueSlot {
290290
/// object, it's important that this flag never be set when
291291
/// evaluating an expression which constructs such an object.
292292
LLVM_PREFERRED_TYPE(bool)
293-
[[maybe_unused]] unsigned aliasedFlag : 1;
293+
LLVM_ATTRIBUTE_UNUSED unsigned aliasedFlag : 1;
294294

295295
/// This is set to true if the tail padding of this slot might overlap
296296
/// another object that may have already been initialized (and whose
297297
/// value must be preserved by this initialization). If so, we may only
298298
/// store up to the dsize of the type. Otherwise we can widen stores to
299299
/// the size of the type.
300300
LLVM_PREFERRED_TYPE(bool)
301-
[[maybe_unused]] unsigned overlapFlag : 1;
301+
LLVM_ATTRIBUTE_UNUSED unsigned overlapFlag : 1;
302302

303303
public:
304304
enum IsDestructed_t { IsNotDestructed, IsDestructed };

clang/lib/CodeGen/CGCUDANV.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1280,7 +1280,8 @@ llvm::Function *CGNVCUDARuntime::finalizeModule() {
12801280
return nullptr;
12811281
}
12821282
if (CGM.getLangOpts().OffloadViaLLVM ||
1283-
(CGM.getLangOpts().OffloadingNewDriver && RelocatableDeviceCode))
1283+
(CGM.getLangOpts().OffloadingNewDriver &&
1284+
(CGM.getLangOpts().HIP || RelocatableDeviceCode)))
12841285
createOffloadingEntries();
12851286
else
12861287
return makeModuleCtorFunction();

clang/lib/Driver/Driver.cpp

Lines changed: 44 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4445,6 +4445,10 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
44454445
A->claim();
44464446
}
44474447

4448+
bool HIPNoRDC =
4449+
C.isOffloadingHostKind(Action::OFK_HIP) &&
4450+
!Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false);
4451+
44484452
// Builder to be used to build offloading actions.
44494453
std::unique_ptr<OffloadingActionBuilder> OffloadBuilder =
44504454
!UseNewOffloadingDriver
@@ -4582,7 +4586,7 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
45824586
// Check if this Linker Job should emit a static library.
45834587
if (ShouldEmitStaticLibrary(Args)) {
45844588
LA = C.MakeAction<StaticLibJobAction>(LinkerInputs, types::TY_Image);
4585-
} else if (UseNewOffloadingDriver ||
4589+
} else if ((UseNewOffloadingDriver && !HIPNoRDC) ||
45864590
Args.hasArg(options::OPT_offload_link)) {
45874591
LA = C.MakeAction<LinkerWrapperJobAction>(LinkerInputs, types::TY_Image);
45884592
LA->propagateHostOffloadInfo(C.getActiveOffloadKinds(),
@@ -4899,10 +4903,31 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
48994903
const InputTy &Input, StringRef CUID,
49004904
Action *HostAction) const {
49014905
// Don't build offloading actions if explicitly disabled or we do not have a
4902-
// valid source input and compile action to embed it in. If preprocessing only
4903-
// ignore embedding.
4904-
if (offloadHostOnly() || !types::isSrcFile(Input.first) ||
4905-
!(isa<CompileJobAction>(HostAction) ||
4906+
// valid source input.
4907+
if (offloadHostOnly() || !types::isSrcFile(Input.first))
4908+
return HostAction;
4909+
4910+
bool HIPNoRDC =
4911+
C.isOffloadingHostKind(Action::OFK_HIP) &&
4912+
!Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false);
4913+
4914+
// For HIP non-rdc non-device-only compilation, create a linker wrapper
4915+
// action for each host object to link, bundle and wrap device files in
4916+
// it.
4917+
if ((isa<AssembleJobAction>(HostAction) ||
4918+
(isa<BackendJobAction>(HostAction) &&
4919+
HostAction->getType() == types::TY_LTO_BC)) &&
4920+
HIPNoRDC && !offloadDeviceOnly()) {
4921+
ActionList AL{HostAction};
4922+
HostAction = C.MakeAction<LinkerWrapperJobAction>(AL, types::TY_Object);
4923+
HostAction->propagateHostOffloadInfo(C.getActiveOffloadKinds(),
4924+
/*BoundArch=*/nullptr);
4925+
return HostAction;
4926+
}
4927+
4928+
// Don't build offloading actions if we do not have a compile action. If
4929+
// preprocessing only ignore embedding.
4930+
if (!(isa<CompileJobAction>(HostAction) ||
49064931
getFinalPhase(Args) == phases::Preprocess))
49074932
return HostAction;
49084933

@@ -4998,12 +5023,12 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
49985023
}
49995024
}
50005025

5001-
// Compiling HIP in non-RDC mode requires linking each action individually.
5026+
// Compiling HIP in device-only non-RDC mode requires linking each action
5027+
// individually.
50025028
for (Action *&A : DeviceActions) {
50035029
if ((A->getType() != types::TY_Object &&
50045030
A->getType() != types::TY_LTO_BC) ||
5005-
Kind != Action::OFK_HIP ||
5006-
Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false))
5031+
!HIPNoRDC || !offloadDeviceOnly())
50075032
continue;
50085033
ActionList LinkerInput = {A};
50095034
A = C.MakeAction<LinkJobAction>(LinkerInput, types::TY_Image);
@@ -5027,12 +5052,12 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
50275052
}
50285053
}
50295054

5030-
// HIP code in non-RDC mode will bundle the output if it invoked the linker.
5055+
// HIP code in device-only non-RDC mode will bundle the output if it invoked
5056+
// the linker.
50315057
bool ShouldBundleHIP =
5032-
C.isOffloadingHostKind(Action::OFK_HIP) &&
5058+
HIPNoRDC && offloadDeviceOnly() &&
50335059
Args.hasFlag(options::OPT_gpu_bundle_output,
50345060
options::OPT_no_gpu_bundle_output, true) &&
5035-
!Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false) &&
50365061
!llvm::any_of(OffloadActions,
50375062
[](Action *A) { return A->getType() != types::TY_Image; });
50385063

@@ -5052,11 +5077,9 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
50525077
C.MakeAction<LinkJobAction>(OffloadActions, types::TY_CUDA_FATBIN);
50535078
DDep.add(*FatbinAction, *C.getSingleOffloadToolChain<Action::OFK_Cuda>(),
50545079
nullptr, Action::OFK_Cuda);
5055-
} else if (C.isOffloadingHostKind(Action::OFK_HIP) &&
5056-
!Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
5057-
false)) {
5058-
// If we are not in RDC-mode we just emit the final HIP fatbinary for each
5059-
// translation unit, linking each input individually.
5080+
} else if (HIPNoRDC && offloadDeviceOnly()) {
5081+
// If we are in device-only non-RDC-mode we just emit the final HIP
5082+
// fatbinary for each translation unit, linking each input individually.
50605083
Action *FatbinAction =
50615084
C.MakeAction<LinkJobAction>(OffloadActions, types::TY_HIP_FATBIN);
50625085
DDep.add(*FatbinAction, *C.getSingleOffloadToolChain<Action::OFK_HIP>(),
@@ -5211,8 +5234,11 @@ Action *Driver::ConstructPhaseAction(
52115234
(((Input->getOffloadingToolChain() &&
52125235
Input->getOffloadingToolChain()->getTriple().isAMDGPU()) ||
52135236
TargetDeviceOffloadKind == Action::OFK_HIP) &&
5214-
(Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
5215-
false) ||
5237+
((Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
5238+
false) ||
5239+
(Args.hasFlag(options::OPT_offload_new_driver,
5240+
options::OPT_no_offload_new_driver, false) &&
5241+
!offloadDeviceOnly())) ||
52165242
TargetDeviceOffloadKind == Action::OFK_OpenMP))) {
52175243
types::ID Output =
52185244
Args.hasArg(options::OPT_S) &&

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7890,7 +7890,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
78907890
CmdArgs.push_back("-fcuda-include-gpubinary");
78917891
CmdArgs.push_back(CudaDeviceInput->getFilename());
78927892
} else if (!HostOffloadingInputs.empty()) {
7893-
if ((IsCuda || IsHIP) && !IsRDCMode) {
7893+
if (IsCuda && !IsRDCMode) {
78947894
assert(HostOffloadingInputs.size() == 1 && "Only one input expected");
78957895
CmdArgs.push_back("-fcuda-include-gpubinary");
78967896
CmdArgs.push_back(HostOffloadingInputs.front().getFilename());
@@ -9477,8 +9477,20 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
94779477
// Add the linker arguments to be forwarded by the wrapper.
94789478
CmdArgs.push_back(Args.MakeArgString(Twine("--linker-path=") +
94799479
LinkCommand->getExecutable()));
9480-
for (const char *LinkArg : LinkCommand->getArguments())
9481-
CmdArgs.push_back(LinkArg);
9480+
9481+
// We use action type to differentiate two use cases of the linker wrapper.
9482+
// TY_Image for normal linker wrapper work.
9483+
// TY_Object for HIP fno-gpu-rdc embedding device binary in a relocatable
9484+
// object.
9485+
assert(JA.getType() == types::TY_Object || JA.getType() == types::TY_Image);
9486+
if (JA.getType() == types::TY_Object) {
9487+
CmdArgs.append({"-o", Output.getFilename()});
9488+
for (auto Input : Inputs)
9489+
CmdArgs.push_back(Input.getFilename());
9490+
CmdArgs.push_back("-r");
9491+
} else
9492+
for (const char *LinkArg : LinkCommand->getArguments())
9493+
CmdArgs.push_back(LinkArg);
94829494

94839495
addOffloadCompressArgs(Args, CmdArgs);
94849496

clang/test/Driver/hip-binding.hip

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@
9393
// RUN: -nogpulib -nogpuinc -foffload-lto --offload-arch=gfx90a --offload-arch=gfx908 -c %s 2>&1 \
9494
// RUN: | FileCheck -check-prefix=LTO-NO-RDC %s
9595
// LTO-NO-RDC: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[LTO_908:.+]]"
96-
// LTO-NO-RDC-NEXT: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[LTO_908]]"], output: "[[OBJ_908:.+]]"
9796
// LTO-NO-RDC-NEXT: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]"], output: "[[LTO_90A:.+]]"
98-
// LTO-NO-RDC-NEXT: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[LTO_90A]]"], output: "[[OBJ_90A:.+]]"
99-
// LTO-NO-RDC-NEXT: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[OBJ_908]]", "[[OBJ_90A]]"], output: "[[HIPFB:.+]]"
97+
// LTO-NO-RDC-NEXT: # "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[LTO_908]]", "[[LTO_90A]]"], output: "[[PKG:.+]]"
98+
// LTO-NO-RDC-NEXT: # "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT]]", "[[PKG]]"], output: "[[OBJ:.+]]"
99+
// LTO-NO-RDC-NEXT: # "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[OBJ]]"], output: "hip-binding.o"

clang/test/Driver/hip-phases.hip

Lines changed: 38 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -8,39 +8,57 @@
88
//
99
// RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
1010
// RUN: --no-offload-new-driver --cuda-gpu-arch=gfx803 %s 2>&1 \
11-
// RUN: | FileCheck -check-prefixes=BIN,NRD,OLD %s
11+
// RUN: | FileCheck -check-prefixes=BIN,OLD,OLDN %s
1212
// RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
1313
// RUN: --offload-new-driver --cuda-gpu-arch=gfx803 %s 2>&1 \
14-
// RUN: | FileCheck -check-prefixes=BIN,NRD,NEW %s
14+
// RUN: | FileCheck -check-prefixes=BIN,NEW,NEWN %s
15+
// RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
16+
// RUN: --offload-new-driver --cuda-gpu-arch=gfx803 -flto -c %s 2>&1 \
17+
// RUN: | FileCheck -check-prefixes=BIN,NEW,NEWLTO %s
1518
//
1619
// RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
1720
// RUN: --no-offload-new-driver --cuda-gpu-arch=gfx803 -fgpu-rdc %s 2>&1 \
18-
// RUN: | FileCheck -check-prefixes=BIN,RDC %s
21+
// RUN: | FileCheck -check-prefixes=BIN,OLD,OLDR %s
22+
// RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
23+
// RUN: --offload-new-driver --cuda-gpu-arch=gfx803 -fgpu-rdc %s 2>&1 \
24+
// RUN: | FileCheck -check-prefixes=BIN,NEW,NEWR %s
1925
//
2026
// BIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]])
2127
// BIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
2228
// BIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
23-
// RDC-DAG: [[P12:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
24-
// RDC-DAG: [[P13:[0-9]+]]: assembler, {[[P12]]}, object, (host-[[T]])
29+
// OLDR-DAG: [[P12:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
30+
// OLDR-DAG: [[P13:[0-9]+]]: assembler, {[[P12]]}, object, (host-[[T]])
2531

2632
// BIN-DAG: [[P3:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH:gfx803]])
2733
// BIN-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
2834
// BIN-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH]])
29-
// NRD-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH]])
30-
// NRD-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH]])
31-
// RDC-DAG: [[P7:[0-9]+]]: backend, {[[P5]]}, ir, (device-[[T]], [[ARCH]])
32-
// BIN-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, image, (device-[[T]], [[ARCH]])
33-
// BIN-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P8]]}, image
34-
// NRD-DAG: [[P10:[0-9]+]]: linker, {[[P9]]}, hip-fatbin, (device-[[T]])
35-
// RDC-DAG: [[P10:[0-9]+]]: linker, {[[P9]]}, object, (device-[[T]])
36-
37-
// NRD-DAG: [[P11:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P10]]}, ir
38-
// RDC-DAG: [[P11:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P10]]}, object
39-
// NRD-DAG: [[P12:[0-9]+]]: backend, {[[P11]]}, assembler, (host-[[T]])
40-
// NRD-DAG: [[P13:[0-9]+]]: assembler, {[[P12]]}, object, (host-[[T]])
41-
// OLD-DAG: [[P14:[0-9]+]]: linker, {[[P13]]}, image, (host-[[T]])
42-
// NEW-DAG: [[P14:[0-9]+]]: clang-linker-wrapper, {[[P13]]}, image, (host-[[T]])
43-
// RDC-DAG: [[P14:[0-9]+]]: linker, {[[P13]], [[P11]]}, image, (host-[[T]])
35+
// OLDN-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH]])
36+
// NEW-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, ir, (device-[[T]], [[ARCH]])
37+
// OLDN-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH]])
38+
// OLDR-DAG: [[P7:[0-9]+]]: backend, {[[P5]]}, ir, (device-[[T]], [[ARCH]])
39+
// OLD-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, image, (device-[[T]], [[ARCH]])
40+
// OLD-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P8]]}, image
41+
// NEW-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P6]]}, ir
42+
// OLDN-DAG: [[P10:[0-9]+]]: linker, {[[P9]]}, hip-fatbin, (device-[[T]])
43+
// NEW-DAG: [[P10:[0-9]+]]: clang-offload-packager, {[[P9]]}, image, (device-[[T]])
44+
// OLDR-DAG: [[P10:[0-9]+]]: linker, {[[P9]]}, object, (device-[[T]])
45+
46+
// OLDN-DAG: [[P11:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P10]]}, ir
47+
// NEW-DAG: [[P11:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-[[T]] (x86_64-unknown-linux-gnu)" {[[P10]]}, ir
48+
// OLDR-DAG: [[P11:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P10]]}, object
49+
// OLDN-DAG: [[P12:[0-9]+]]: backend, {[[P11]]}, assembler, (host-[[T]])
50+
// OLDN-DAG: [[P13:[0-9]+]]: assembler, {[[P12]]}, object, (host-[[T]])
51+
// NEWN-DAG: [[P12:[0-9]+]]: backend, {[[P11]]}, assembler, (host-[[T]])
52+
// NEWN-DAG: [[P13:[0-9]+]]: assembler, {[[P12]]}, object, (host-[[T]])
53+
// NEWLTO-DAG: [[P13:[0-9]+]]: backend, {[[P11]]}, lto-bc, (host-hip)
54+
// NEWR-DAG: [[P12:[0-9]+]]: backend, {[[P11]]}, assembler, (host-[[T]])
55+
// NEWR-DAG: [[P13:[0-9]+]]: assembler, {[[P12]]}, object, (host-[[T]])
56+
// OLDN-DAG: [[P14:[0-9]+]]: linker, {[[P13]]}, image, (host-[[T]])
57+
// NEWN-DAG: [[P14:[0-9]+]]: clang-linker-wrapper, {[[P13]]}, object, (host-[[T]])
58+
// NEWLTO-DAG: [[P14:[0-9]+]]: clang-linker-wrapper, {[[P13]]}, object, (host-[[T]])
59+
// OLDR-DAG: [[P14:[0-9]+]]: linker, {[[P13]], [[P11]]}, image, (host-[[T]])
60+
// NEWR-DAG: [[P14:[0-9]+]]: clang-linker-wrapper, {[[P13]]}, image, (host-[[T]])
61+
// NEWN-DAG: [[P15:[0-9]+]]: linker, {[[P14]]}, image
4462

4563
//
4664
// Test single gpu architecture up to the assemble phase.

0 commit comments

Comments
 (0)