Skip to content

Commit 0bb1bbb

Browse files
committed
Merge from 'main' to 'sycl-web' (5 commits)
CONFLICT (content): Merge conflict in clang/lib/Driver/Driver.cpp
2 parents 83d5646 + efa25c4 commit 0bb1bbb

File tree

8 files changed

+382
-214
lines changed

8 files changed

+382
-214
lines changed

clang/lib/Driver/Driver.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7909,7 +7909,13 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
79097909
// Compiling HIP in device-only non-RDC mode requires linking each action
79107910
// individually.
79117911
for (Action *&A : DeviceActions) {
7912-
if ((A->getType() != types::TY_Object &&
7912+
// Special handling for the HIP SPIR-V toolchain because it doesn't use
7913+
// the SPIR-V backend yet doesn't report the output as an object.
7914+
bool IsAMDGCNSPIRV = A->getOffloadingToolChain() &&
7915+
A->getOffloadingToolChain()->getTriple().getOS() ==
7916+
llvm::Triple::OSType::AMDHSA &&
7917+
A->getOffloadingToolChain()->getTriple().isSPIRV();
7918+
if ((A->getType() != types::TY_Object && !IsAMDGCNSPIRV &&
79137919
A->getType() != types::TY_LTO_BC) ||
79147920
!HIPNoRDC || !offloadDeviceOnly())
79157921
continue;
@@ -7980,8 +7986,9 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
79807986
// fatbinary for each translation unit, linking each input individually.
79817987
Action *FatbinAction =
79827988
C.MakeAction<LinkJobAction>(OffloadActions, types::TY_HIP_FATBIN);
7983-
DDep.add(*FatbinAction, *C.getSingleOffloadToolChain<Action::OFK_HIP>(),
7984-
nullptr, Action::OFK_HIP);
7989+
DDep.add(*FatbinAction,
7990+
*C.getOffloadToolChains<Action::OFK_HIP>().first->second, nullptr,
7991+
Action::OFK_HIP);
79857992
} else if (C.isOffloadingHostKind(Action::OFK_SYCL) &&
79867993
Args.hasArg(options::OPT_fsyclbin_EQ)) {
79877994
// With '-fsyclbin', package all the offloading actions into a single output

clang/test/Driver/hip-phases.hip

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -675,3 +675,26 @@
675675
// DEVICE-ONLY-NEXT: 2: compiler, {1}, ir, (device-hip, gfx90a)
676676
// DEVICE-ONLY-NEXT: 3: backend, {2}, ir, (device-hip, gfx90a)
677677
// DEVICE-ONLY-NEXT: 4: offload, "device-hip (amdgcn-amd-amdhsa:gfx90a)" {3}, none
678+
679+
//
680+
// Test the new driver when not bundling
681+
//
682+
// RUN: %clang -### --target=x86_64-linux-gnu --offload-new-driver -ccc-print-phases \
683+
// RUN: --offload-device-only --offload-arch=amdgcnspirv,gfx1030 %s 2>&1 \
684+
// RUN: | FileCheck -check-prefix=SPIRV-ONLY %s
685+
// SPIRV-ONLY: 0: input, "[[INPUT:.+]]", hip, (device-hip, gfx1030)
686+
// SPIRV-ONLY-NEXT: 1: preprocessor, {0}, hip-cpp-output, (device-hip, gfx1030)
687+
// SPIRV-ONLY-NEXT: 2: compiler, {1}, ir, (device-hip, gfx1030)
688+
// SPIRV-ONLY-NEXT: 3: backend, {2}, assembler, (device-hip, gfx1030)
689+
// SPIRV-ONLY-NEXT: 4: assembler, {3}, object, (device-hip, gfx1030)
690+
// SPIRV-ONLY-NEXT: 5: linker, {4}, image, (device-hip, gfx1030)
691+
// SPIRV-ONLY-NEXT: 6: offload, "device-hip (amdgcn-amd-amdhsa:gfx1030)" {5}, image
692+
// SPIRV-ONLY-NEXT: 7: input, "[[INPUT]]", hip, (device-hip, amdgcnspirv)
693+
// SPIRV-ONLY-NEXT: 8: preprocessor, {7}, hip-cpp-output, (device-hip, amdgcnspirv)
694+
// SPIRV-ONLY-NEXT: 9: compiler, {8}, ir, (device-hip, amdgcnspirv)
695+
// SPIRV-ONLY-NEXT: 10: backend, {9}, assembler, (device-hip, amdgcnspirv)
696+
// SPIRV-ONLY-NEXT: 11: assembler, {10}, object, (device-hip, amdgcnspirv)
697+
// SPIRV-ONLY-NEXT: 12: linker, {11}, image, (device-hip, amdgcnspirv)
698+
// SPIRV-ONLY-NEXT: 13: offload, "device-hip (spirv64-amd-amdhsa:amdgcnspirv)" {12}, image
699+
// SPIRV-ONLY-NEXT: 14: linker, {6, 13}, hip-fatbin, (device-hip)
700+
// SPIRV-ONLY-NEXT: 15: offload, "device-hip (amdgcn-amd-amdhsa)" {14}, none

llvm/lib/Transforms/Utils/Local.cpp

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -482,16 +482,11 @@ bool llvm::wouldInstructionBeTriviallyDead(const Instruction *I,
482482

483483
if (II->isLifetimeStartOrEnd()) {
484484
auto *Arg = II->getArgOperand(1);
485-
// Lifetime intrinsics are dead when their right-hand is undef.
486-
if (isa<UndefValue>(Arg))
487-
return true;
488-
// If the right-hand is an alloc, global, or argument and the only uses
489-
// are lifetime intrinsics then the intrinsics are dead.
490-
if (isa<AllocaInst>(Arg) || isa<GlobalValue>(Arg) || isa<Argument>(Arg))
491-
return llvm::all_of(Arg->uses(), [](Use &Use) {
492-
return isa<LifetimeIntrinsic>(Use.getUser());
493-
});
494-
return false;
485+
// If the only uses of the alloca are lifetime intrinsics, then the
486+
// intrinsics are dead.
487+
return llvm::all_of(Arg->uses(), [](Use &Use) {
488+
return isa<LifetimeIntrinsic>(Use.getUser());
489+
});
495490
}
496491

497492
// Assumptions are dead if their condition is trivially true.

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2335,8 +2335,9 @@ class LLVM_ABI_FOR_TEST VPBlendRecipe : public VPSingleDefRecipe {
23352335
return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized());
23362336
}
23372337

2338-
/// Generate the phi/select nodes.
2339-
void execute(VPTransformState &State) override;
2338+
void execute(VPTransformState &State) override {
2339+
llvm_unreachable("VPBlendRecipe should be expanded by simplifyBlends");
2340+
}
23402341

23412342
/// Return the cost of this VPWidenMemoryRecipe.
23422343
InstructionCost computeCost(ElementCount VF,

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 0 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -2411,42 +2411,6 @@ void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
24112411
}
24122412
#endif
24132413

2414-
void VPBlendRecipe::execute(VPTransformState &State) {
2415-
assert(isNormalized() && "Expected blend to be normalized!");
2416-
// We know that all PHIs in non-header blocks are converted into
2417-
// selects, so we don't have to worry about the insertion order and we
2418-
// can just use the builder.
2419-
// At this point we generate the predication tree. There may be
2420-
// duplications since this is a simple recursive scan, but future
2421-
// optimizations will clean it up.
2422-
2423-
unsigned NumIncoming = getNumIncomingValues();
2424-
2425-
// Generate a sequence of selects of the form:
2426-
// SELECT(Mask3, In3,
2427-
// SELECT(Mask2, In2,
2428-
// SELECT(Mask1, In1,
2429-
// In0)))
2430-
// Note that Mask0 is never used: lanes for which no path reaches this phi and
2431-
// are essentially undef are taken from In0.
2432-
bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
2433-
Value *Result = nullptr;
2434-
for (unsigned In = 0; In < NumIncoming; ++In) {
2435-
// We might have single edge PHIs (blocks) - use an identity
2436-
// 'select' for the first PHI operand.
2437-
Value *In0 = State.get(getIncomingValue(In), OnlyFirstLaneUsed);
2438-
if (In == 0)
2439-
Result = In0; // Initialize with the first incoming value.
2440-
else {
2441-
// Select between the current value and the previous incoming edge
2442-
// based on the incoming mask.
2443-
Value *Cond = State.get(getMask(In), OnlyFirstLaneUsed);
2444-
Result = State.Builder.CreateSelect(Cond, In0, Result, "predphi");
2445-
}
2446-
}
2447-
State.set(this, Result, OnlyFirstLaneUsed);
2448-
}
2449-
24502414
InstructionCost VPBlendRecipe::computeCost(ElementCount VF,
24512415
VPCostContext &Ctx) const {
24522416
// Handle cases where only the first lane is used the same way as the legacy

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2711,6 +2711,18 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan,
27112711
continue;
27122712
}
27132713

2714+
// Expand VPBlendRecipe into VPInstruction::Select.
2715+
VPBuilder Builder(&R);
2716+
if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
2717+
VPValue *Select = Blend->getIncomingValue(0);
2718+
for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I)
2719+
Select = Builder.createSelect(Blend->getMask(I),
2720+
Blend->getIncomingValue(I), Select,
2721+
R.getDebugLoc(), "predphi");
2722+
Blend->replaceAllUsesWith(Select);
2723+
ToRemove.push_back(Blend);
2724+
}
2725+
27142726
if (auto *Expr = dyn_cast<VPExpressionRecipe>(&R)) {
27152727
Expr->decompose();
27162728
ToRemove.push_back(Expr);
@@ -2724,7 +2736,6 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan,
27242736

27252737
// Expand WideIVStep.
27262738
auto *VPI = cast<VPInstruction>(&R);
2727-
VPBuilder Builder(VPI);
27282739
Type *IVTy = TypeInfo.inferScalarType(VPI);
27292740
if (TypeInfo.inferScalarType(VectorStep) != IVTy) {
27302741
Instruction::CastOps CastOp = IVTy->isFloatingPointTy()

llvm/test/CodeGen/AMDGPU/wait-xcnt.mir

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -966,3 +966,45 @@ body: |
966966
$vgpr2 = V_MOV_B32_e32 $vgpr2, implicit $exec
967967
$sgpr0 = S_MOV_B32 0
968968
...
969+
970+
# TODO: Unnecessary wait before overwriting vgpr0.
971+
---
972+
name: overwrite_vgpr_after_smem
973+
tracksRegLiveness: true
974+
machineFunctionInfo:
975+
isEntryFunction: true
976+
body: |
977+
bb.0:
978+
liveins: $vgpr0_vgpr1, $sgpr0_sgpr1
979+
; GCN-LABEL: name: overwrite_vgpr_after_smem
980+
; GCN: liveins: $vgpr0_vgpr1, $sgpr0_sgpr1
981+
; GCN-NEXT: {{ $}}
982+
; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
983+
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
984+
; GCN-NEXT: S_WAIT_XCNT 0
985+
; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
986+
$vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
987+
$sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
988+
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
989+
...
990+
991+
# TODO: Unnecessary wait before overwriting sgpr0.
992+
---
993+
name: overwrite_sgpr_after_vmem
994+
tracksRegLiveness: true
995+
machineFunctionInfo:
996+
isEntryFunction: true
997+
body: |
998+
bb.0:
999+
liveins: $vgpr0_vgpr1, $sgpr0_sgpr1
1000+
; GCN-LABEL: name: overwrite_sgpr_after_vmem
1001+
; GCN: liveins: $vgpr0_vgpr1, $sgpr0_sgpr1
1002+
; GCN-NEXT: {{ $}}
1003+
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
1004+
; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
1005+
; GCN-NEXT: S_WAIT_XCNT 0
1006+
; GCN-NEXT: $sgpr0 = S_MOV_B32 0
1007+
$sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
1008+
$vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
1009+
$sgpr0 = S_MOV_B32 0
1010+
...

0 commit comments

Comments
 (0)