Skip to content

Commit f7cd51c

Browse files
[LLVM][CGP] Allow finer control for sinking compares.
Compare sinking is selectable based on the result of hasMultipleConditionRegisters. This function is too coarse grained by not taking into account the differences between scalar and vector compares. This PR extends the interface to take an EVT to allow finer control. The new interface is used by AArch64 to disable sinking of scalable vector compares, but with isProfitableToSinkOperands updated to maintain the cases that are specifically tested.
1 parent 195c01c commit f7cd51c

File tree

10 files changed

+41
-38
lines changed

10 files changed

+41
-38
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 7 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -518,10 +518,12 @@ class LLVM_ABI TargetLoweringBase {
518518
return true;
519519
}
520520

521-
/// Return true if multiple condition registers are available.
522-
bool hasMultipleConditionRegisters() const {
523-
return HasMultipleConditionRegisters;
524-
}
521+
/// Does the target have multiple (allocatable) condition registers that
522+
/// can be used to store the results of comparisons for use by selects
523+
/// and conditional branches. With multiple condition registers, the code
524+
/// generator will not aggressively sink comparisons into the blocks of their
525+
/// users.
526+
virtual bool hasMultipleConditionRegisters(EVT VT) const { return false; }
525527

526528
/// Return true if the target has BitExtract instructions.
527529
bool hasExtractBitsInsn() const { return HasExtractBitsInsn; }
@@ -2453,7 +2455,7 @@ class LLVM_ABI TargetLoweringBase {
24532455
EVT VT) const {
24542456
// If a target has multiple condition registers, then it likely has logical
24552457
// operations on those registers.
2456-
if (hasMultipleConditionRegisters())
2458+
if (hasMultipleConditionRegisters(VT))
24572459
return false;
24582460
// Only do the transform if the value won't be split into multiple
24592461
// registers.
@@ -2560,15 +2562,6 @@ class LLVM_ABI TargetLoweringBase {
25602562
StackPointerRegisterToSaveRestore = R;
25612563
}
25622564

2563-
/// Tells the code generator that the target has multiple (allocatable)
2564-
/// condition registers that can be used to store the results of comparisons
2565-
/// for use by selects and conditional branches. With multiple condition
2566-
/// registers, the code generator will not aggressively sink comparisons into
2567-
/// the blocks of their users.
2568-
void setHasMultipleConditionRegisters(bool hasManyRegs = true) {
2569-
HasMultipleConditionRegisters = hasManyRegs;
2570-
}
2571-
25722565
/// Tells the code generator that the target has BitExtract instructions.
25732566
/// The code generator will aggressively sink "shift"s into the blocks of
25742567
/// their users if the users will generate "and" instructions which can be
@@ -3604,13 +3597,6 @@ class LLVM_ABI TargetLoweringBase {
36043597
private:
36053598
const TargetMachine &TM;
36063599

3607-
/// Tells the code generator that the target has multiple (allocatable)
3608-
/// condition registers that can be used to store the results of comparisons
3609-
/// for use by selects and conditional branches. With multiple condition
3610-
/// registers, the code generator will not aggressively sink comparisons into
3611-
/// the blocks of their users.
3612-
bool HasMultipleConditionRegisters;
3613-
36143600
/// Tells the code generator that the target has BitExtract instructions.
36153601
/// The code generator will aggressively sink "shift"s into the blocks of
36163602
/// their users if the users will generate "and" instructions which can be

llvm/lib/CodeGen/CodeGenPrepare.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1834,7 +1834,7 @@ bool CodeGenPrepare::unfoldPowerOf2Test(CmpInst *Cmp) {
18341834
///
18351835
/// Return true if any changes are made.
18361836
static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
1837-
if (TLI.hasMultipleConditionRegisters())
1837+
if (TLI.hasMultipleConditionRegisters(EVT::getEVT(Cmp->getType())))
18381838
return false;
18391839

18401840
// Avoid sinking soft-FP comparisons, since this can move them into a loop.

llvm/lib/CodeGen/TargetLoweringBase.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -697,7 +697,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm)
697697
MaxGluedStoresPerMemcpy = 0;
698698
MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize =
699699
MaxStoresPerMemmoveOptSize = MaxLoadsPerMemcmpOptSize = 4;
700-
HasMultipleConditionRegisters = false;
701700
HasExtractBitsInsn = false;
702701
JumpIsExpensive = JumpIsExpensiveOverride;
703702
PredictableSelectIsExpensive = false;

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -887,6 +887,10 @@ class AArch64TargetLowering : public TargetLowering {
887887
bool shouldScalarizeBinop(SDValue VecOp) const override {
888888
return VecOp.getOpcode() == ISD::SETCC;
889889
}
890+
891+
bool hasMultipleConditionRegisters(EVT VT) const override {
892+
return VT.isScalableVector();
893+
}
890894
};
891895

892896
namespace AArch64 {

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6225,10 +6225,17 @@ bool AArch64TTIImpl::isProfitableToSinkOperands(
62256225
}
62266226
}
62276227

6228-
auto ShouldSinkCondition = [](Value *Cond) -> bool {
6228+
auto ShouldSinkCondition = [](Value *Cond,
6229+
SmallVectorImpl<Use *> &Ops) -> bool {
6230+
if (!isa<IntrinsicInst>(Cond))
6231+
return false;
62296232
auto *II = dyn_cast<IntrinsicInst>(Cond);
6230-
return II && II->getIntrinsicID() == Intrinsic::vector_reduce_or &&
6231-
isa<ScalableVectorType>(II->getOperand(0)->getType());
6233+
if (II->getIntrinsicID() != Intrinsic::vector_reduce_or ||
6234+
!isa<ScalableVectorType>(II->getOperand(0)->getType()))
6235+
return false;
6236+
if (isa<CmpInst>(II->getOperand(0)))
6237+
Ops.push_back(&II->getOperandUse(0));
6238+
return true;
62326239
};
62336240

62346241
switch (I->getOpcode()) {
@@ -6244,7 +6251,7 @@ bool AArch64TTIImpl::isProfitableToSinkOperands(
62446251
}
62456252
break;
62466253
case Instruction::Select: {
6247-
if (!ShouldSinkCondition(I->getOperand(0)))
6254+
if (!ShouldSinkCondition(I->getOperand(0), Ops))
62486255
return false;
62496256

62506257
Ops.push_back(&I->getOperandUse(0));
@@ -6254,7 +6261,7 @@ bool AArch64TTIImpl::isProfitableToSinkOperands(
62546261
if (cast<BranchInst>(I)->isUnconditional())
62556262
return false;
62566263

6257-
if (!ShouldSinkCondition(cast<BranchInst>(I)->getCondition()))
6264+
if (!ShouldSinkCondition(cast<BranchInst>(I)->getCondition(), Ops))
62586265
return false;
62596266

62606267
Ops.push_back(&I->getOperandUse(0));

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -589,14 +589,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
589589
setSchedulingPreference(Sched::RegPressure);
590590
setJumpIsExpensive(true);
591591

592-
// FIXME: This is only partially true. If we have to do vector compares, any
593-
// SGPR pair can be a condition register. If we have a uniform condition, we
594-
// are better off doing SALU operations, where there is only one SCC. For now,
595-
// we don't have a way of knowing during instruction selection if a condition
596-
// will be uniform and we always use vector compares. Assume we are using
597-
// vector compares until that is fixed.
598-
setHasMultipleConditionRegisters(true);
599-
600592
setMinCmpXchgSizeInBits(32);
601593
setSupportsUnalignedAtomics(false);
602594

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,16 @@ class AMDGPUTargetLowering : public TargetLowering {
388388
MVT getFenceOperandTy(const DataLayout &DL) const override {
389389
return MVT::i32;
390390
}
391+
392+
bool hasMultipleConditionRegisters(EVT VT) const override {
393+
// FIXME: This is only partially true. If we have to do vector compares, any
394+
// SGPR pair can be a condition register. If we have a uniform condition, we
395+
// are better off doing SALU operations, where there is only one SCC. For
396+
// now, we don't have a way of knowing during instruction selection if a
397+
// condition will be uniform and we always use vector compares. Assume we
398+
// are using vector compares until that is fixed.
399+
return true;
400+
}
391401
};
392402

393403
namespace AMDGPUISD {

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1433,7 +1433,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
14331433
// With 32 condition bits, we don't need to sink (and duplicate) compares
14341434
// aggressively in CodeGenPrep.
14351435
if (Subtarget.useCRBits()) {
1436-
setHasMultipleConditionRegisters();
14371436
setJumpIsExpensive();
14381437
}
14391438

@@ -19848,3 +19847,7 @@ Value *PPCTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
1984819847
return Builder.CreateOr(
1984919848
Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
1985019849
}
19850+
19851+
bool PPCTargetLowering::hasMultipleConditionRegisters(EVT VT) const {
19852+
return Subtarget.useCRBits();
19853+
}

llvm/lib/Target/PowerPC/PPCISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1207,6 +1207,8 @@ namespace llvm {
12071207
bool IsVarArg) const;
12081208
bool supportsTailCallFor(const CallBase *CB) const;
12091209

1210+
bool hasMultipleConditionRegisters(EVT VT) const override;
1211+
12101212
private:
12111213
struct ReuseLoadInfo {
12121214
SDValue Ptr;

llvm/test/Transforms/CodeGenPrepare/dont-sink-scalable-vector-compare.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@ define void @do_not_sink_scalable_vector_compare(ptr %a, ptr %b) {
66
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] {
77
; CHECK-NEXT: [[ENTRY:.*]]:
88
; CHECK-NEXT: [[STEP_VECTOR:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
9+
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <vscale x 4 x i32> [[STEP_VECTOR]], splat (i32 16)
910
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
1011
; CHECK: [[VECTOR_BODY]]:
1112
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
12-
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <vscale x 4 x i32> [[STEP_VECTOR]], splat (i32 16)
1313
; CHECK-NEXT: [[SRC:%.*]] = getelementptr inbounds ptr, ptr [[A]], i64 [[INDEX]]
1414
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[SRC]], i32 4, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> poison)
1515
; CHECK-NEXT: [[DST:%.*]] = getelementptr inbounds ptr, ptr [[B]], i64 [[INDEX]]

0 commit comments

Comments
 (0)