Skip to content

Commit 56b3339

Browse files
committed
[SelectionDAG] Move ISD:PARITY formation from DAGCombine to SimplifyDemandedBits.
Previously, we formed ISD::PARITY by looking for (and (ctpop X), 1) but the AND might be separated from the ctpop. For example if the parity result is multiplied by 2, we'll pull the AND through the shift. So to handle more cases, move to SimplifyDemandedBits where we can handle more cases that result in only the LSB of the CTPOP being used.
1 parent 783ba64 commit 56b3339

File tree

4 files changed

+110
-19
lines changed

4 files changed

+110
-19
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5574,25 +5574,6 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
55745574
if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
55755575
return V;
55765576

5577-
// fold (and (ctpop X), 1) -> parity X
5578-
// Only do this before op legalization as it might be turned back into ctpop.
5579-
// TODO: Support vectors?
5580-
if (!LegalOperations && isOneConstant(N1) && N0.hasOneUse()) {
5581-
SDValue Tmp = N0;
5582-
5583-
// It's possible the ctpop has been truncated, but since we only care about
5584-
// the LSB we can look through it.
5585-
if (Tmp.getOpcode() == ISD::TRUNCATE && Tmp.getOperand(0).hasOneUse())
5586-
Tmp = Tmp.getOperand(0);
5587-
5588-
if (Tmp.getOpcode() == ISD::CTPOP) {
5589-
SDLoc dl(N);
5590-
SDValue Parity =
5591-
DAG.getNode(ISD::PARITY, dl, Tmp.getValueType(), Tmp.getOperand(0));
5592-
return DAG.getNode(ISD::TRUNCATE, dl, VT, Parity);
5593-
}
5594-
}
5595-
55965577
return SDValue();
55975578
}
55985579

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3053,6 +3053,11 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
30533053
Known.Zero.setBitsFrom(Log2_32(PossibleOnes) + 1);
30543054
break;
30553055
}
3056+
case ISD::PARITY: {
3057+
// Parity returns 0 everywhere but the LSB.
3058+
Known.Zero.setBitsFrom(1);
3059+
break;
3060+
}
30563061
case ISD::LOAD: {
30573062
LoadSDNode *LD = cast<LoadSDNode>(Op);
30583063
const Constant *Cst = TLI->getTargetConstantFromLoad(LD);

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1748,6 +1748,17 @@ bool TargetLowering::SimplifyDemandedBits(
17481748
Known.Zero = Known2.Zero.byteSwap();
17491749
break;
17501750
}
1751+
case ISD::CTPOP: {
1752+
// If only 1 bit is demanded, replace with PARITY as long as we're before
1753+
// op legalization.
1754+
// FIXME: Limit to scalars for now.
1755+
if (DemandedBits.isOneValue() && !TLO.LegalOps && !VT.isVector())
1756+
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
1757+
Op.getOperand(0)));
1758+
1759+
Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1760+
break;
1761+
}
17511762
case ISD::SIGN_EXTEND_INREG: {
17521763
SDValue Op0 = Op.getOperand(0);
17531764
EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();

llvm/test/CodeGen/X86/parity.ll

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,100 @@ define i32 @parity_8_mask(i32 %x) {
422422
ret i32 %c
423423
}
424424

425+
define i32 @parity_32_shift(i32 %0) {
426+
; X86-NOPOPCNT-LABEL: parity_32_shift:
427+
; X86-NOPOPCNT: # %bb.0:
428+
; X86-NOPOPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
429+
; X86-NOPOPCNT-NEXT: movl %eax, %ecx
430+
; X86-NOPOPCNT-NEXT: shrl $16, %ecx
431+
; X86-NOPOPCNT-NEXT: xorl %eax, %ecx
432+
; X86-NOPOPCNT-NEXT: xorl %eax, %eax
433+
; X86-NOPOPCNT-NEXT: xorb %ch, %cl
434+
; X86-NOPOPCNT-NEXT: setnp %al
435+
; X86-NOPOPCNT-NEXT: addl %eax, %eax
436+
; X86-NOPOPCNT-NEXT: retl
437+
;
438+
; X64-NOPOPCNT-LABEL: parity_32_shift:
439+
; X64-NOPOPCNT: # %bb.0:
440+
; X64-NOPOPCNT-NEXT: movl %edi, %ecx
441+
; X64-NOPOPCNT-NEXT: shrl $16, %ecx
442+
; X64-NOPOPCNT-NEXT: xorl %edi, %ecx
443+
; X64-NOPOPCNT-NEXT: xorl %eax, %eax
444+
; X64-NOPOPCNT-NEXT: xorb %ch, %cl
445+
; X64-NOPOPCNT-NEXT: setnp %al
446+
; X64-NOPOPCNT-NEXT: addl %eax, %eax
447+
; X64-NOPOPCNT-NEXT: retq
448+
;
449+
; X86-POPCNT-LABEL: parity_32_shift:
450+
; X86-POPCNT: # %bb.0:
451+
; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax
452+
; X86-POPCNT-NEXT: andl $1, %eax
453+
; X86-POPCNT-NEXT: addl %eax, %eax
454+
; X86-POPCNT-NEXT: retl
455+
;
456+
; X64-POPCNT-LABEL: parity_32_shift:
457+
; X64-POPCNT: # %bb.0:
458+
; X64-POPCNT-NEXT: popcntl %edi, %eax
459+
; X64-POPCNT-NEXT: andl $1, %eax
460+
; X64-POPCNT-NEXT: addl %eax, %eax
461+
; X64-POPCNT-NEXT: retq
462+
%2 = tail call i32 @llvm.ctpop.i32(i32 %0)
463+
%3 = shl nuw nsw i32 %2, 1
464+
%4 = and i32 %3, 2
465+
ret i32 %4
466+
}
467+
468+
define i64 @parity_64_shift(i64 %0) {
469+
; X86-NOPOPCNT-LABEL: parity_64_shift:
470+
; X86-NOPOPCNT: # %bb.0:
471+
; X86-NOPOPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
472+
; X86-NOPOPCNT-NEXT: xorl {{[0-9]+}}(%esp), %eax
473+
; X86-NOPOPCNT-NEXT: movl %eax, %ecx
474+
; X86-NOPOPCNT-NEXT: shrl $16, %ecx
475+
; X86-NOPOPCNT-NEXT: xorl %eax, %ecx
476+
; X86-NOPOPCNT-NEXT: xorl %eax, %eax
477+
; X86-NOPOPCNT-NEXT: xorb %ch, %cl
478+
; X86-NOPOPCNT-NEXT: setnp %al
479+
; X86-NOPOPCNT-NEXT: addl %eax, %eax
480+
; X86-NOPOPCNT-NEXT: xorl %edx, %edx
481+
; X86-NOPOPCNT-NEXT: retl
482+
;
483+
; X64-NOPOPCNT-LABEL: parity_64_shift:
484+
; X64-NOPOPCNT: # %bb.0:
485+
; X64-NOPOPCNT-NEXT: movq %rdi, %rax
486+
; X64-NOPOPCNT-NEXT: shrq $32, %rax
487+
; X64-NOPOPCNT-NEXT: xorl %edi, %eax
488+
; X64-NOPOPCNT-NEXT: movl %eax, %ecx
489+
; X64-NOPOPCNT-NEXT: shrl $16, %ecx
490+
; X64-NOPOPCNT-NEXT: xorl %eax, %ecx
491+
; X64-NOPOPCNT-NEXT: xorl %eax, %eax
492+
; X64-NOPOPCNT-NEXT: xorb %ch, %cl
493+
; X64-NOPOPCNT-NEXT: setnp %al
494+
; X64-NOPOPCNT-NEXT: addq %rax, %rax
495+
; X64-NOPOPCNT-NEXT: retq
496+
;
497+
; X86-POPCNT-LABEL: parity_64_shift:
498+
; X86-POPCNT: # %bb.0:
499+
; X86-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
500+
; X86-POPCNT-NEXT: xorl {{[0-9]+}}(%esp), %eax
501+
; X86-POPCNT-NEXT: popcntl %eax, %eax
502+
; X86-POPCNT-NEXT: andl $1, %eax
503+
; X86-POPCNT-NEXT: addl %eax, %eax
504+
; X86-POPCNT-NEXT: xorl %edx, %edx
505+
; X86-POPCNT-NEXT: retl
506+
;
507+
; X64-POPCNT-LABEL: parity_64_shift:
508+
; X64-POPCNT: # %bb.0:
509+
; X64-POPCNT-NEXT: popcntq %rdi, %rax
510+
; X64-POPCNT-NEXT: andl $1, %eax
511+
; X64-POPCNT-NEXT: addq %rax, %rax
512+
; X64-POPCNT-NEXT: retq
513+
%2 = tail call i64 @llvm.ctpop.i64(i64 %0)
514+
%3 = shl nuw nsw i64 %2, 1
515+
%4 = and i64 %3, 2
516+
ret i64 %4
517+
}
518+
425519
declare i4 @llvm.ctpop.i4(i4 %x)
426520
declare i8 @llvm.ctpop.i8(i8 %x)
427521
declare i16 @llvm.ctpop.i16(i16 %x)

0 commit comments

Comments
 (0)