Skip to content

Commit e909c0c

Browse files
authored
[SelectOpt] Add support for AShr/LShr operands (llvm#118495)
For conditional increments with sign check conditions like X < 0 or X >= 0, the compiler may generate code like this: %cmp = icmp sgt i64 %1, -1 %shift = ashr i64 %1, 63 %j.next = add nsw i64 %j, %shift %sel = select i1 %cmp ... , where %cmp is not in computation but in some other implicit or regular expressions. This patch allows SelectOptimize pass to recognise these cases.
1 parent 10ef20f commit e909c0c

File tree

2 files changed

+190
-24
lines changed

2 files changed

+190
-24
lines changed

llvm/lib/CodeGen/SelectOptimize.cpp

Lines changed: 66 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
//===----------------------------------------------------------------------===//
1212

1313
#include "llvm/CodeGen/SelectOptimize.h"
14+
#include "llvm/ADT/SetVector.h"
1415
#include "llvm/ADT/SmallVector.h"
1516
#include "llvm/ADT/Statistic.h"
1617
#include "llvm/Analysis/BlockFrequencyInfo.h"
@@ -218,7 +219,7 @@ class SelectOptimizeImpl {
218219
private:
219220
// Select groups consist of consecutive select-like instructions with the same
220221
// condition. Between select-likes could be any number of auxiliary
221-
// instructions related to the condition like not, zext
222+
// instructions related to the condition like not, zext, ashr/lshr
222223
struct SelectGroup {
223224
Value *Condition;
224225
SmallVector<SelectLike, 2> Selects;
@@ -496,7 +497,13 @@ static Value *getTrueOrFalseValue(
496497

497498
auto *CBO = BO->clone();
498499
auto CondIdx = SI.getConditionOpIndex();
499-
CBO->setOperand(CondIdx, ConstantInt::get(CBO->getType(), 1));
500+
auto *AuxI = cast<Instruction>(CBO->getOperand(CondIdx));
501+
if (isa<ZExtInst>(AuxI) || isa<LShrOperator>(AuxI)) {
502+
CBO->setOperand(CondIdx, ConstantInt::get(CBO->getType(), 1));
503+
} else {
504+
assert(isa<AShrOperator>(AuxI) && "Unexpected opcode");
505+
CBO->setOperand(CondIdx, ConstantInt::get(CBO->getType(), -1));
506+
}
500507

501508
unsigned OtherIdx = 1 - CondIdx;
502509
if (auto *IV = dyn_cast<Instruction>(CBO->getOperand(OtherIdx))) {
@@ -755,6 +762,9 @@ void SelectOptimizeImpl::collectSelectGroups(BasicBlock &BB,
755762
// zero or some constant value on True/False branch, such as:
756763
// * ZExt(1bit)
757764
// * Not(1bit)
765+
// * A(L)Shr(Val), ValBitSize - 1, where there is a condition like `Val <= 0`
766+
// earlier in the BB. For conditions that check the sign of the Val compiler
767+
// may generate shifts instead of ZExt/SExt.
758768
struct SelectLikeInfo {
759769
Value *Cond;
760770
bool IsAuxiliary;
@@ -763,11 +773,19 @@ void SelectOptimizeImpl::collectSelectGroups(BasicBlock &BB,
763773
};
764774

765775
DenseMap<Value *, SelectLikeInfo> SelectInfo;
776+
// Keeps visited comparisons to help identify AShr/LShr variants of auxiliary
777+
// instructions.
778+
SmallSetVector<CmpInst *, 4> SeenCmp;
766779

767780
// Check if the instruction is SelectLike or might be part of SelectLike
768781
// expression, put information into SelectInfo and return the iterator to the
769782
// inserted position.
770-
auto ProcessSelectInfo = [&SelectInfo](Instruction *I) {
783+
auto ProcessSelectInfo = [&SelectInfo, &SeenCmp](Instruction *I) {
784+
if (auto *Cmp = dyn_cast<CmpInst>(I)) {
785+
SeenCmp.insert(Cmp);
786+
return SelectInfo.end();
787+
}
788+
771789
Value *Cond;
772790
if (match(I, m_OneUse(m_ZExt(m_Value(Cond)))) &&
773791
Cond->getType()->isIntegerTy(1)) {
@@ -784,35 +802,59 @@ void SelectOptimizeImpl::collectSelectGroups(BasicBlock &BB,
784802
bool Inverted = match(Cond, m_Not(m_Value(Cond)));
785803
return SelectInfo.insert({I, {Cond, false, Inverted, 0}}).first;
786804
}
805+
Value *Val;
806+
ConstantInt *Shift;
807+
if (match(I, m_Shr(m_Value(Val), m_ConstantInt(Shift))) &&
808+
I->getType()->getIntegerBitWidth() == Shift->getZExtValue() + 1) {
809+
for (auto *CmpI : SeenCmp) {
810+
auto Pred = CmpI->getPredicate();
811+
if (Val != CmpI->getOperand(0))
812+
continue;
813+
if ((Pred == CmpInst::ICMP_SGT &&
814+
match(CmpI->getOperand(1), m_ConstantInt<-1>())) ||
815+
(Pred == CmpInst::ICMP_SGE &&
816+
match(CmpI->getOperand(1), m_Zero())) ||
817+
(Pred == CmpInst::ICMP_SLT &&
818+
match(CmpI->getOperand(1), m_Zero())) ||
819+
(Pred == CmpInst::ICMP_SLE &&
820+
match(CmpI->getOperand(1), m_ConstantInt<-1>()))) {
821+
bool Inverted =
822+
Pred == CmpInst::ICMP_SGT || Pred == CmpInst::ICMP_SGE;
823+
return SelectInfo.insert({I, {CmpI, true, Inverted, 0}}).first;
824+
}
825+
}
826+
return SelectInfo.end();
827+
}
787828

788-
// An Or(zext(i1 X), Y) can also be treated like a select, with condition X
829+
// An BinOp(Aux(X), Y) can also be treated like a select, with condition X
789830
// and values Y|1 and Y.
790-
if (auto *BO = dyn_cast<BinaryOperator>(I)) {
791-
switch (I->getOpcode()) {
792-
case Instruction::Add:
793-
case Instruction::Sub: {
794-
Value *X;
795-
if (!((PatternMatch::match(I->getOperand(0),
796-
m_OneUse(m_ZExt(m_Value(X)))) ||
797-
PatternMatch::match(I->getOperand(1),
798-
m_OneUse(m_ZExt(m_Value(X))))) &&
799-
X->getType()->isIntegerTy(1)))
800-
return SelectInfo.end();
801-
break;
802-
}
803-
case Instruction::Or:
804-
if (BO->getType()->isIntegerTy(1) || BO->getOpcode() != Instruction::Or)
805-
return SelectInfo.end();
806-
break;
807-
}
831+
// `Aux` can be either `ZExt(1bit)` or `XShr(Val), ValBitSize - 1`
832+
// `BinOp` can be Add, Sub, Or
833+
Value *X;
834+
auto MatchZExtPattern = m_c_BinOp(m_Value(), m_OneUse(m_ZExt(m_Value(X))));
835+
auto MatchShiftPattern =
836+
m_c_BinOp(m_Value(), m_OneUse(m_Shr(m_Value(X), m_ConstantInt(Shift))));
837+
838+
// This check is unnecessary, but it prevents costly access to the
839+
// SelectInfo map.
840+
if ((match(I, MatchZExtPattern) && X->getType()->isIntegerTy(1)) ||
841+
(match(I, MatchShiftPattern) &&
842+
X->getType()->getIntegerBitWidth() == Shift->getZExtValue() + 1)) {
843+
if (I->getOpcode() != Instruction::Add &&
844+
I->getOpcode() != Instruction::Sub &&
845+
I->getOpcode() != Instruction::Or)
846+
return SelectInfo.end();
847+
848+
if (I->getOpcode() == Instruction::Or && I->getType()->isIntegerTy(1))
849+
return SelectInfo.end();
808850

809851
// Iterate through operands and find dependant on recognised sign
810852
// extending auxiliary select-like instructions. The operand index does
811853
// not matter for Add and Or. However, for Sub, we can only safely
812854
// transform when the operand is second.
813-
unsigned Idx = BO->getOpcode() == Instruction::Sub ? 1 : 0;
855+
unsigned Idx = I->getOpcode() == Instruction::Sub ? 1 : 0;
814856
for (; Idx < 2; Idx++) {
815-
auto *Op = BO->getOperand(Idx);
857+
auto *Op = I->getOperand(Idx);
816858
auto It = SelectInfo.find(Op);
817859
if (It != SelectInfo.end() && It->second.IsAuxiliary) {
818860
Cond = It->second.Cond;

llvm/test/CodeGen/AArch64/selectopt-cast.ll

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -729,3 +729,127 @@ loop:
729729
exit:
730730
ret void
731731
}
732+
733+
define void @test_add_lshr_add_regular_select(ptr %dst, ptr %src, i64 %i.start, i64 %j.start) {
734+
; CHECK-LABEL: @test_add_lshr_add_regular_select(
735+
; CHECK-NEXT: entry:
736+
; CHECK-NEXT: br label [[LOOP:%.*]]
737+
; CHECK: loop:
738+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 100000, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[SELECT_END:%.*]] ]
739+
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_START:%.*]], [[ENTRY]] ], [ [[I_NEXT:%.*]], [[SELECT_END]] ]
740+
; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[J_START:%.*]], [[ENTRY]] ], [ [[J_NEXT:%.*]], [[SELECT_END]] ]
741+
; CHECK-NEXT: [[GEP_I:%.*]] = getelementptr inbounds ptr, ptr [[SRC:%.*]], i64 [[I]]
742+
; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[GEP_I]], align 8
743+
; CHECK-NEXT: [[GEP_J:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 [[J]]
744+
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[GEP_J]], align 8
745+
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP1]], -1
746+
; CHECK-NEXT: [[SHIFT:%.*]] = lshr i64 [[TMP1]], 63
747+
; CHECK-NEXT: [[CMP_FROZEN:%.*]] = freeze i1 [[CMP]]
748+
; CHECK-NEXT: br i1 [[CMP_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_FALSE_SINK:%.*]]
749+
; CHECK: select.true.sink:
750+
; CHECK-NEXT: [[TMP2:%.*]] = add nsw i64 [[I]], 1
751+
; CHECK-NEXT: br label [[SELECT_END]]
752+
; CHECK: select.false.sink:
753+
; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[J]], 1
754+
; CHECK-NEXT: br label [[SELECT_END]]
755+
; CHECK: select.end:
756+
; CHECK-NEXT: [[J_NEXT]] = phi i64 [ [[J]], [[SELECT_TRUE_SINK]] ], [ [[TMP3]], [[SELECT_FALSE_SINK]] ]
757+
; CHECK-NEXT: [[I_NEXT]] = phi i64 [ [[TMP2]], [[SELECT_TRUE_SINK]] ], [ [[I]], [[SELECT_FALSE_SINK]] ]
758+
; CHECK-NEXT: [[COND:%.*]] = phi i64 [ [[J]], [[SELECT_TRUE_SINK]] ], [ [[I]], [[SELECT_FALSE_SINK]] ]
759+
; CHECK-NEXT: [[INC:%.*]] = zext i1 [[CMP]] to i64
760+
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i64, ptr [[DST:%.*]], i64 [[IV]]
761+
; CHECK-NEXT: store i64 [[COND]], ptr [[GEP_DST]], align 8
762+
; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
763+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 0
764+
; CHECK-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
765+
; CHECK: exit:
766+
; CHECK-NEXT: ret void
767+
;
768+
entry:
769+
br label %loop
770+
771+
loop:
772+
%iv = phi i64 [ 100000, %entry ], [ %iv.next, %loop ]
773+
%i = phi i64 [ %i.start, %entry ], [ %i.next, %loop ]
774+
%j = phi i64 [ %j.start, %entry ], [ %j.next, %loop ]
775+
%gep.i = getelementptr inbounds ptr, ptr %src, i64 %i
776+
%0 = load ptr, ptr %gep.i, align 8
777+
%gep.j = getelementptr inbounds i64, ptr %0, i64 %j
778+
%1 = load i64, ptr %gep.j, align 8
779+
%cmp = icmp sgt i64 %1, -1
780+
%shift = lshr i64 %1, 63
781+
%j.next = add nsw i64 %j, %shift
782+
%inc = zext i1 %cmp to i64
783+
%i.next = add nsw i64 %i, %inc
784+
%cond = select i1 %cmp, i64 %j, i64 %i
785+
%gep.dst = getelementptr i64, ptr %dst, i64 %iv
786+
store i64 %cond, ptr %gep.dst, align 8
787+
%iv.next = add nsw i64 %iv, -1
788+
%ec = icmp eq i64 %iv.next, 0
789+
br i1 %ec, label %exit, label %loop
790+
791+
exit:
792+
ret void
793+
}
794+
795+
define void @test_add_ashr_add_regular_select(ptr %dst, ptr %src, i64 %i.start, i64 %j.start) {
796+
; CHECK-LABEL: @test_add_ashr_add_regular_select(
797+
; CHECK-NEXT: entry:
798+
; CHECK-NEXT: br label [[LOOP:%.*]]
799+
; CHECK: loop:
800+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 100000, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[SELECT_END:%.*]] ]
801+
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_START:%.*]], [[ENTRY]] ], [ [[I_NEXT:%.*]], [[SELECT_END]] ]
802+
; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[J_START:%.*]], [[ENTRY]] ], [ [[J_NEXT:%.*]], [[SELECT_END]] ]
803+
; CHECK-NEXT: [[GEP_I:%.*]] = getelementptr inbounds ptr, ptr [[SRC:%.*]], i64 [[I]]
804+
; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[GEP_I]], align 8
805+
; CHECK-NEXT: [[GEP_J:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 [[J]]
806+
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[GEP_J]], align 8
807+
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP1]], -1
808+
; CHECK-NEXT: [[SHIFT:%.*]] = ashr i64 [[TMP1]], 63
809+
; CHECK-NEXT: [[CMP_FROZEN:%.*]] = freeze i1 [[CMP]]
810+
; CHECK-NEXT: br i1 [[CMP_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_FALSE_SINK:%.*]]
811+
; CHECK: select.true.sink:
812+
; CHECK-NEXT: [[TMP2:%.*]] = add nsw i64 [[I]], 1
813+
; CHECK-NEXT: br label [[SELECT_END]]
814+
; CHECK: select.false.sink:
815+
; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[J]], -1
816+
; CHECK-NEXT: br label [[SELECT_END]]
817+
; CHECK: select.end:
818+
; CHECK-NEXT: [[J_NEXT]] = phi i64 [ [[J]], [[SELECT_TRUE_SINK]] ], [ [[TMP3]], [[SELECT_FALSE_SINK]] ]
819+
; CHECK-NEXT: [[I_NEXT]] = phi i64 [ [[TMP2]], [[SELECT_TRUE_SINK]] ], [ [[I]], [[SELECT_FALSE_SINK]] ]
820+
; CHECK-NEXT: [[COND:%.*]] = phi i64 [ [[J]], [[SELECT_TRUE_SINK]] ], [ [[I]], [[SELECT_FALSE_SINK]] ]
821+
; CHECK-NEXT: [[INC:%.*]] = zext i1 [[CMP]] to i64
822+
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i64, ptr [[DST:%.*]], i64 [[IV]]
823+
; CHECK-NEXT: store i64 [[COND]], ptr [[GEP_DST]], align 8
824+
; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
825+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 0
826+
; CHECK-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
827+
; CHECK: exit:
828+
; CHECK-NEXT: ret void
829+
;
830+
entry:
831+
br label %loop
832+
833+
loop:
834+
%iv = phi i64 [ 100000, %entry ], [ %iv.next, %loop ]
835+
%i = phi i64 [ %i.start, %entry ], [ %i.next, %loop ]
836+
%j = phi i64 [ %j.start, %entry ], [ %j.next, %loop ]
837+
%gep.i = getelementptr inbounds ptr, ptr %src, i64 %i
838+
%0 = load ptr, ptr %gep.i, align 8
839+
%gep.j = getelementptr inbounds i64, ptr %0, i64 %j
840+
%1 = load i64, ptr %gep.j, align 8
841+
%cmp = icmp sgt i64 %1, -1
842+
%shift = ashr i64 %1, 63
843+
%j.next = add nsw i64 %j, %shift
844+
%inc = zext i1 %cmp to i64
845+
%i.next = add nsw i64 %i, %inc
846+
%cond = select i1 %cmp, i64 %j, i64 %i
847+
%gep.dst = getelementptr i64, ptr %dst, i64 %iv
848+
store i64 %cond, ptr %gep.dst, align 8
849+
%iv.next = add nsw i64 %iv, -1
850+
%ec = icmp eq i64 %iv.next, 0
851+
br i1 %ec, label %exit, label %loop
852+
853+
exit:
854+
ret void
855+
}

0 commit comments

Comments
 (0)