Skip to content

Commit b83c4a2

Browse files
goussepitstellar
authored andcommitted
[x86] Fix infinite loop inside DAG combiner with lzcnt feature.
The issue affects targets supporting fast-lzcnt such as btver2. This removes extraneous zext/trunc node insertions to fix the infinite loop. This fixes Issue #54694 Differential Revision: https://reviews.llvm.org/D122900 Reviewed By: RKSimon, spatel, lebedev.ri (cherry picked from commit a3d5f1c) Signed-off-by: Warren Ristow <[email protected]> In https://reviews.llvm.org/D122900 a new function (to exercise the infinite-loop bug) was added to llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll. In applying the fix in the main branch, two previously existing functions in that test also changed behavior slightly, and in the review it was noted: The instructions generated end up being reordered in some cases but I think it is equivalent. That reordering did not happen in those pre-existing functions when applying the fix to the slightly older code-base of the llvm14 branch, and so they are suppressed here. So the updated version of the test in this commit has the additional function added to it, but it is otherwise identical to the previous llvm14 version of the test.
1 parent 0fbe860 commit b83c4a2

File tree

2 files changed

+42
-13
lines changed

2 files changed

+42
-13
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -47110,8 +47110,7 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG,
4711047110
// into:
4711147111
// srl(ctlz x), log2(bitsize(x))
4711247112
// Input pattern is checked by caller.
47113-
static SDValue lowerX86CmpEqZeroToCtlzSrl(SDValue Op, EVT ExtTy,
47114-
SelectionDAG &DAG) {
47113+
static SDValue lowerX86CmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) {
4711547114
SDValue Cmp = Op.getOperand(1);
4711647115
EVT VT = Cmp.getOperand(0).getValueType();
4711747116
unsigned Log2b = Log2_32(VT.getSizeInBits());
@@ -47122,7 +47121,7 @@ static SDValue lowerX86CmpEqZeroToCtlzSrl(SDValue Op, EVT ExtTy,
4712247121
SDValue Trunc = DAG.getZExtOrTrunc(Clz, dl, MVT::i32);
4712347122
SDValue Scc = DAG.getNode(ISD::SRL, dl, MVT::i32, Trunc,
4712447123
DAG.getConstant(Log2b, dl, MVT::i8));
47125-
return DAG.getZExtOrTrunc(Scc, dl, ExtTy);
47124+
return Scc;
4712647125
}
4712747126

4712847127
// Try to transform:
@@ -47182,11 +47181,10 @@ static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG,
4718247181
// or(srl(ctlz),srl(ctlz)).
4718347182
// The dag combiner can then fold it into:
4718447183
// srl(or(ctlz, ctlz)).
47185-
EVT VT = OR->getValueType(0);
47186-
SDValue NewLHS = lowerX86CmpEqZeroToCtlzSrl(LHS, VT, DAG);
47184+
SDValue NewLHS = lowerX86CmpEqZeroToCtlzSrl(LHS, DAG);
4718747185
SDValue Ret, NewRHS;
47188-
if (NewLHS && (NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, VT, DAG)))
47189-
Ret = DAG.getNode(ISD::OR, SDLoc(OR), VT, NewLHS, NewRHS);
47186+
if (NewLHS && (NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, DAG)))
47187+
Ret = DAG.getNode(ISD::OR, SDLoc(OR), MVT::i32, NewLHS, NewRHS);
4719047188

4719147189
if (!Ret)
4719247190
return SDValue();
@@ -47199,16 +47197,13 @@ static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG,
4719947197
// Swap rhs with lhs to match or(setcc(eq, cmp, 0), or).
4720047198
if (RHS->getOpcode() == ISD::OR)
4720147199
std::swap(LHS, RHS);
47202-
NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, VT, DAG);
47200+
NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, DAG);
4720347201
if (!NewRHS)
4720447202
return SDValue();
47205-
Ret = DAG.getNode(ISD::OR, SDLoc(OR), VT, Ret, NewRHS);
47203+
Ret = DAG.getNode(ISD::OR, SDLoc(OR), MVT::i32, Ret, NewRHS);
4720647204
}
4720747205

47208-
if (Ret)
47209-
Ret = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), Ret);
47210-
47211-
return Ret;
47206+
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), Ret);
4721247207
}
4721347208

4721447209
static SDValue foldMaskedMergeImpl(SDValue And0_L, SDValue And0_R,

llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,3 +335,37 @@ entry:
335335
%conv = zext i1 %0 to i32
336336
ret i32 %conv
337337
}
338+
339+
; PR54694 Fix an infinite loop in DAG combiner.
340+
define i32 @test_zext_cmp12(i32 %0, i32 %1) {
341+
; FASTLZCNT-LABEL: test_zext_cmp12:
342+
; FASTLZCNT: # %bb.0:
343+
; FASTLZCNT-NEXT: andl $131072, %edi # imm = 0x20000
344+
; FASTLZCNT-NEXT: andl $131072, %esi # imm = 0x20000
345+
; FASTLZCNT-NEXT: lzcntl %edi, %eax
346+
; FASTLZCNT-NEXT: lzcntl %esi, %ecx
347+
; FASTLZCNT-NEXT: orl %eax, %ecx
348+
; FASTLZCNT-NEXT: movl $2, %eax
349+
; FASTLZCNT-NEXT: shrl $5, %ecx
350+
; FASTLZCNT-NEXT: subl %ecx, %eax
351+
; FASTLZCNT-NEXT: retq
352+
;
353+
; NOFASTLZCNT-LABEL: test_zext_cmp12:
354+
; NOFASTLZCNT: # %bb.0:
355+
; NOFASTLZCNT-NEXT: testl $131072, %edi # imm = 0x20000
356+
; NOFASTLZCNT-NEXT: sete %al
357+
; NOFASTLZCNT-NEXT: testl $131072, %esi # imm = 0x20000
358+
; NOFASTLZCNT-NEXT: sete %cl
359+
; NOFASTLZCNT-NEXT: orb %al, %cl
360+
; NOFASTLZCNT-NEXT: movl $2, %eax
361+
; NOFASTLZCNT-NEXT: movzbl %cl, %ecx
362+
; NOFASTLZCNT-NEXT: subl %ecx, %eax
363+
; NOFASTLZCNT-NEXT: retq
364+
%3 = and i32 %0, 131072
365+
%4 = icmp eq i32 %3, 0
366+
%5 = and i32 %1, 131072
367+
%6 = icmp eq i32 %5, 0
368+
%7 = select i1 %4, i1 true, i1 %6
369+
%8 = select i1 %7, i32 1, i32 2
370+
ret i32 %8
371+
}

0 commit comments

Comments
 (0)