Skip to content

Commit 98a07f7

Browse files
committed
[X86] LowerCTPOP - "ctpop(i2 x) --> sub(x, (x >> 1))"
If we only have 2 active bits then we can avoid the i8 CTPOP multiply expansion entirely Another expansion pulled from llvm#79823
1 parent 69279a8 commit 98a07f7

File tree

2 files changed

+31
-25
lines changed

2 files changed

+31
-25
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31053,6 +31053,18 @@ static SDValue LowerCTPOP(SDValue N, const X86Subtarget &Subtarget,
3105331053
unsigned ActiveBits = Known.getBitWidth() - LZ;
3105431054
unsigned ShiftedActiveBits = Known.getBitWidth() - (LZ + TZ);
3105531055

31056+
// i2 CTPOP - "ctpop(x) --> sub(x, (x >> 1))".
31057+
if (ShiftedActiveBits <= 2) {
31058+
if (ActiveBits > 2)
31059+
Op = DAG.getNode(ISD::SRL, DL, VT, Op,
31060+
DAG.getShiftAmountConstant(TZ, VT, DL));
31061+
Op = DAG.getZExtOrTrunc(Op, DL, MVT::i32);
31062+
Op = DAG.getNode(ISD::SUB, DL, MVT::i32, Op,
31063+
DAG.getNode(ISD::SRL, DL, MVT::i32, Op,
31064+
DAG.getShiftAmountConstant(1, VT, DL)));
31065+
return DAG.getZExtOrTrunc(Op, DL, VT);
31066+
}
31067+
3105631068
// i8 CTPOP - with efficient i32 MUL, then attempt multiply-mask-multiply.
3105731069
if (ShiftedActiveBits <= 8) {
3105831070
SDValue Mask11 = DAG.getConstant(0x11111111U, DL, MVT::i32);

llvm/test/CodeGen/X86/ctpop-mask.ll

Lines changed: 19 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -33,22 +33,19 @@ define i64 @ctpop_mask2(i64 %x) nounwind readnone {
3333
; X86-NO-POPCOUNT: # %bb.0:
3434
; X86-NO-POPCOUNT-NEXT: movl {{[0-9]+}}(%esp), %eax
3535
; X86-NO-POPCOUNT-NEXT: andl $3, %eax
36-
; X86-NO-POPCOUNT-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201
37-
; X86-NO-POPCOUNT-NEXT: shrl $3, %eax
38-
; X86-NO-POPCOUNT-NEXT: andl $17895697, %eax # imm = 0x1111111
39-
; X86-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
40-
; X86-NO-POPCOUNT-NEXT: shrl $28, %eax
36+
; X86-NO-POPCOUNT-NEXT: movl %eax, %ecx
37+
; X86-NO-POPCOUNT-NEXT: shrl %ecx
38+
; X86-NO-POPCOUNT-NEXT: subl %ecx, %eax
4139
; X86-NO-POPCOUNT-NEXT: xorl %edx, %edx
4240
; X86-NO-POPCOUNT-NEXT: retl
4341
;
4442
; X64-NO-POPCOUNT-LABEL: ctpop_mask2:
4543
; X64-NO-POPCOUNT: # %bb.0:
46-
; X64-NO-POPCOUNT-NEXT: andl $3, %edi
47-
; X64-NO-POPCOUNT-NEXT: imull $134480385, %edi, %eax # imm = 0x8040201
48-
; X64-NO-POPCOUNT-NEXT: shrl $3, %eax
49-
; X64-NO-POPCOUNT-NEXT: andl $17895697, %eax # imm = 0x1111111
50-
; X64-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
51-
; X64-NO-POPCOUNT-NEXT: shrl $28, %eax
44+
; X64-NO-POPCOUNT-NEXT: movq %rdi, %rax
45+
; X64-NO-POPCOUNT-NEXT: andl $3, %eax
46+
; X64-NO-POPCOUNT-NEXT: movl %eax, %ecx
47+
; X64-NO-POPCOUNT-NEXT: shrl %ecx
48+
; X64-NO-POPCOUNT-NEXT: subl %ecx, %eax
5249
; X64-NO-POPCOUNT-NEXT: retq
5350
%mask = and i64 %x, 3
5451
%count = tail call i64 @llvm.ctpop.i64(i64 %mask)
@@ -71,25 +68,22 @@ define i32 @ctpop_shifted_mask2(i32 %x) nounwind readnone {
7168
;
7269
; X86-NO-POPCOUNT-LABEL: ctpop_shifted_mask2:
7370
; X86-NO-POPCOUNT: # %bb.0:
74-
; X86-NO-POPCOUNT-NEXT: movl {{[0-9]+}}(%esp), %eax
71+
; X86-NO-POPCOUNT-NEXT: movl $1572864, %eax # imm = 0x180000
72+
; X86-NO-POPCOUNT-NEXT: andl {{[0-9]+}}(%esp), %eax
73+
; X86-NO-POPCOUNT-NEXT: movl %eax, %ecx
74+
; X86-NO-POPCOUNT-NEXT: shrl $20, %ecx
7575
; X86-NO-POPCOUNT-NEXT: shrl $19, %eax
76-
; X86-NO-POPCOUNT-NEXT: andl $3, %eax
77-
; X86-NO-POPCOUNT-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201
78-
; X86-NO-POPCOUNT-NEXT: shrl $3, %eax
79-
; X86-NO-POPCOUNT-NEXT: andl $17895697, %eax # imm = 0x1111111
80-
; X86-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
81-
; X86-NO-POPCOUNT-NEXT: shrl $28, %eax
76+
; X86-NO-POPCOUNT-NEXT: subl %ecx, %eax
8277
; X86-NO-POPCOUNT-NEXT: retl
8378
;
8479
; X64-NO-POPCOUNT-LABEL: ctpop_shifted_mask2:
8580
; X64-NO-POPCOUNT: # %bb.0:
86-
; X64-NO-POPCOUNT-NEXT: shrl $19, %edi
87-
; X64-NO-POPCOUNT-NEXT: andl $3, %edi
88-
; X64-NO-POPCOUNT-NEXT: imull $134480385, %edi, %eax # imm = 0x8040201
89-
; X64-NO-POPCOUNT-NEXT: shrl $3, %eax
90-
; X64-NO-POPCOUNT-NEXT: andl $17895697, %eax # imm = 0x1111111
91-
; X64-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
92-
; X64-NO-POPCOUNT-NEXT: shrl $28, %eax
81+
; X64-NO-POPCOUNT-NEXT: movl %edi, %eax
82+
; X64-NO-POPCOUNT-NEXT: andl $1572864, %eax # imm = 0x180000
83+
; X64-NO-POPCOUNT-NEXT: movl %eax, %ecx
84+
; X64-NO-POPCOUNT-NEXT: shrl $20, %ecx
85+
; X64-NO-POPCOUNT-NEXT: shrl $19, %eax
86+
; X64-NO-POPCOUNT-NEXT: subl %ecx, %eax
9387
; X64-NO-POPCOUNT-NEXT: retq
9488
%mask = and i32 %x, 1572864 ; 3 << 19
9589
%count = tail call i32 @llvm.ctpop.i32(i32 %mask)

0 commit comments

Comments
 (0)