Skip to content

Commit b8c9b06

Browse files
committed
[X86] LowerCTPOP - add i3 and i4 LUT 'shift+mask' expansions
Use the 3 or 4 active bits as a shift amount into a i32/i64 constant representing the number of set bits. In future, it might be worthwhile to move this into a generic location in case other targets want to make use of them. Another expansion pulled from llvm#79823
1 parent 98a07f7 commit b8c9b06

File tree

2 files changed

+70
-37
lines changed

2 files changed

+70
-37
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31065,6 +31065,38 @@ static SDValue LowerCTPOP(SDValue N, const X86Subtarget &Subtarget,
3106531065
return DAG.getZExtOrTrunc(Op, DL, VT);
3106631066
}
3106731067

31068+
// i3 CTPOP - perform LUT into i32 integer.
31069+
if (ShiftedActiveBits <= 3) {
31070+
if (ActiveBits > 3)
31071+
Op = DAG.getNode(ISD::SRL, DL, VT, Op,
31072+
DAG.getShiftAmountConstant(TZ, VT, DL));
31073+
Op = DAG.getZExtOrTrunc(Op, DL, MVT::i32);
31074+
Op = DAG.getNode(ISD::SHL, DL, MVT::i32, Op,
31075+
DAG.getShiftAmountConstant(1, VT, DL));
31076+
Op = DAG.getNode(ISD::SRL, DL, MVT::i32,
31077+
DAG.getConstant(0b1110100110010100U, DL, MVT::i32), Op);
31078+
Op = DAG.getNode(ISD::AND, DL, MVT::i32, Op,
31079+
DAG.getConstant(0x3, DL, MVT::i32));
31080+
return DAG.getZExtOrTrunc(Op, DL, VT);
31081+
}
31082+
31083+
// i4 CTPOP - perform LUT into i64 integer.
31084+
if (ShiftedActiveBits <= 4 &&
31085+
DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64)) {
31086+
SDValue LUT = DAG.getConstant(0x4332322132212110ULL, DL, MVT::i64);
31087+
if (ActiveBits > 4)
31088+
Op = DAG.getNode(ISD::SRL, DL, VT, Op,
31089+
DAG.getShiftAmountConstant(TZ, VT, DL));
31090+
Op = DAG.getZExtOrTrunc(Op, DL, MVT::i32);
31091+
Op = DAG.getNode(ISD::MUL, DL, MVT::i32, Op,
31092+
DAG.getConstant(4, DL, MVT::i32));
31093+
Op = DAG.getNode(ISD::SRL, DL, MVT::i64, LUT,
31094+
DAG.getShiftAmountOperand(MVT::i64, Op));
31095+
Op = DAG.getNode(ISD::AND, DL, MVT::i64, Op,
31096+
DAG.getConstant(0x7, DL, MVT::i64));
31097+
return DAG.getZExtOrTrunc(Op, DL, VT);
31098+
}
31099+
3106831100
// i8 CTPOP - with efficient i32 MUL, then attempt multiply-mask-multiply.
3106931101
if (ShiftedActiveBits <= 8) {
3107031102
SDValue Mask11 = DAG.getConstant(0x11111111U, DL, MVT::i32);

llvm/test/CodeGen/X86/ctpop-mask.ll

Lines changed: 38 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -106,23 +106,24 @@ define i32 @ctpop_mask3(i32 %x) nounwind readnone {
106106
;
107107
; X86-NO-POPCOUNT-LABEL: ctpop_mask3:
108108
; X86-NO-POPCOUNT: # %bb.0:
109-
; X86-NO-POPCOUNT-NEXT: movl {{[0-9]+}}(%esp), %eax
110-
; X86-NO-POPCOUNT-NEXT: andl $5, %eax
111-
; X86-NO-POPCOUNT-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201
112-
; X86-NO-POPCOUNT-NEXT: shrl $3, %eax
113-
; X86-NO-POPCOUNT-NEXT: andl $17895697, %eax # imm = 0x1111111
114-
; X86-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
115-
; X86-NO-POPCOUNT-NEXT: shrl $28, %eax
109+
; X86-NO-POPCOUNT-NEXT: movl {{[0-9]+}}(%esp), %ecx
110+
; X86-NO-POPCOUNT-NEXT: andl $5, %ecx
111+
; X86-NO-POPCOUNT-NEXT: addl %ecx, %ecx
112+
; X86-NO-POPCOUNT-NEXT: movl $59796, %eax # imm = 0xE994
113+
; X86-NO-POPCOUNT-NEXT: # kill: def $cl killed $cl killed $ecx
114+
; X86-NO-POPCOUNT-NEXT: shrl %cl, %eax
115+
; X86-NO-POPCOUNT-NEXT: andl $3, %eax
116116
; X86-NO-POPCOUNT-NEXT: retl
117117
;
118118
; X64-NO-POPCOUNT-LABEL: ctpop_mask3:
119119
; X64-NO-POPCOUNT: # %bb.0:
120+
; X64-NO-POPCOUNT-NEXT: # kill: def $edi killed $edi def $rdi
120121
; X64-NO-POPCOUNT-NEXT: andl $5, %edi
121-
; X64-NO-POPCOUNT-NEXT: imull $134480385, %edi, %eax # imm = 0x8040201
122-
; X64-NO-POPCOUNT-NEXT: shrl $3, %eax
123-
; X64-NO-POPCOUNT-NEXT: andl $17895697, %eax # imm = 0x1111111
124-
; X64-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
125-
; X64-NO-POPCOUNT-NEXT: shrl $28, %eax
122+
; X64-NO-POPCOUNT-NEXT: leal (%rdi,%rdi), %ecx
123+
; X64-NO-POPCOUNT-NEXT: movl $59796, %eax # imm = 0xE994
124+
; X64-NO-POPCOUNT-NEXT: # kill: def $cl killed $cl killed $ecx
125+
; X64-NO-POPCOUNT-NEXT: shrl %cl, %eax
126+
; X64-NO-POPCOUNT-NEXT: andl $3, %eax
126127
; X64-NO-POPCOUNT-NEXT: retq
127128
%mask = and i32 %x, 5 ; 0b101
128129
%count = tail call i32 @llvm.ctpop.i32(i32 %mask)
@@ -147,24 +148,23 @@ define i16 @ctpop_shifted_mask3(i16 %x) nounwind readnone {
147148
;
148149
; X86-NO-POPCOUNT-LABEL: ctpop_shifted_mask3:
149150
; X86-NO-POPCOUNT: # %bb.0:
150-
; X86-NO-POPCOUNT-NEXT: movzwl {{[0-9]+}}(%esp), %eax
151-
; X86-NO-POPCOUNT-NEXT: andl $14, %eax
152-
; X86-NO-POPCOUNT-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201
153-
; X86-NO-POPCOUNT-NEXT: shrl $3, %eax
154-
; X86-NO-POPCOUNT-NEXT: andl $17895697, %eax # imm = 0x1111111
155-
; X86-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
156-
; X86-NO-POPCOUNT-NEXT: shrl $28, %eax
151+
; X86-NO-POPCOUNT-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
152+
; X86-NO-POPCOUNT-NEXT: andl $14, %ecx
153+
; X86-NO-POPCOUNT-NEXT: movl $59796, %eax # imm = 0xE994
154+
; X86-NO-POPCOUNT-NEXT: # kill: def $cl killed $cl killed $ecx
155+
; X86-NO-POPCOUNT-NEXT: shrl %cl, %eax
156+
; X86-NO-POPCOUNT-NEXT: andl $3, %eax
157157
; X86-NO-POPCOUNT-NEXT: # kill: def $ax killed $ax killed $eax
158158
; X86-NO-POPCOUNT-NEXT: retl
159159
;
160160
; X64-NO-POPCOUNT-LABEL: ctpop_shifted_mask3:
161161
; X64-NO-POPCOUNT: # %bb.0:
162-
; X64-NO-POPCOUNT-NEXT: andl $14, %edi
163-
; X64-NO-POPCOUNT-NEXT: imull $134480385, %edi, %eax # imm = 0x8040201
164-
; X64-NO-POPCOUNT-NEXT: shrl $3, %eax
165-
; X64-NO-POPCOUNT-NEXT: andl $17895697, %eax # imm = 0x1111111
166-
; X64-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
167-
; X64-NO-POPCOUNT-NEXT: shrl $28, %eax
162+
; X64-NO-POPCOUNT-NEXT: movl %edi, %ecx
163+
; X64-NO-POPCOUNT-NEXT: andl $14, %ecx
164+
; X64-NO-POPCOUNT-NEXT: movl $59796, %eax # imm = 0xE994
165+
; X64-NO-POPCOUNT-NEXT: # kill: def $cl killed $cl killed $ecx
166+
; X64-NO-POPCOUNT-NEXT: shrl %cl, %eax
167+
; X64-NO-POPCOUNT-NEXT: andl $3, %eax
168168
; X64-NO-POPCOUNT-NEXT: # kill: def $ax killed $ax killed $eax
169169
; X64-NO-POPCOUNT-NEXT: retq
170170
%mask = and i16 %x, 14 ; 7 << 1
@@ -202,11 +202,11 @@ define i64 @ctpop_mask4(i64 %x) nounwind readnone {
202202
; X64-NO-POPCOUNT-LABEL: ctpop_mask4:
203203
; X64-NO-POPCOUNT: # %bb.0:
204204
; X64-NO-POPCOUNT-NEXT: andl $15, %edi
205-
; X64-NO-POPCOUNT-NEXT: imull $134480385, %edi, %eax # imm = 0x8040201
206-
; X64-NO-POPCOUNT-NEXT: shrl $3, %eax
207-
; X64-NO-POPCOUNT-NEXT: andl $17895697, %eax # imm = 0x1111111
208-
; X64-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
209-
; X64-NO-POPCOUNT-NEXT: shrl $28, %eax
205+
; X64-NO-POPCOUNT-NEXT: leal (,%rdi,4), %ecx
206+
; X64-NO-POPCOUNT-NEXT: movabsq $4841987667533046032, %rax # imm = 0x4332322132212110
207+
; X64-NO-POPCOUNT-NEXT: # kill: def $cl killed $cl killed $ecx
208+
; X64-NO-POPCOUNT-NEXT: shrq %cl, %rax
209+
; X64-NO-POPCOUNT-NEXT: andl $7, %eax
210210
; X64-NO-POPCOUNT-NEXT: retq
211211
%mask = and i64 %x, 15
212212
%count = tail call i64 @llvm.ctpop.i64(i64 %mask)
@@ -241,13 +241,14 @@ define i32 @ctpop_shifted_mask4(i32 %x) nounwind readnone {
241241
;
242242
; X64-NO-POPCOUNT-LABEL: ctpop_shifted_mask4:
243243
; X64-NO-POPCOUNT: # %bb.0:
244-
; X64-NO-POPCOUNT-NEXT: shrl $9, %edi
245-
; X64-NO-POPCOUNT-NEXT: andl $15, %edi
246-
; X64-NO-POPCOUNT-NEXT: imull $134480385, %edi, %eax # imm = 0x8040201
247-
; X64-NO-POPCOUNT-NEXT: shrl $3, %eax
248-
; X64-NO-POPCOUNT-NEXT: andl $17895697, %eax # imm = 0x1111111
249-
; X64-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
250-
; X64-NO-POPCOUNT-NEXT: shrl $28, %eax
244+
; X64-NO-POPCOUNT-NEXT: movl %edi, %ecx
245+
; X64-NO-POPCOUNT-NEXT: shrl $7, %ecx
246+
; X64-NO-POPCOUNT-NEXT: andl $60, %ecx
247+
; X64-NO-POPCOUNT-NEXT: movabsq $4841987667533046032, %rax # imm = 0x4332322132212110
248+
; X64-NO-POPCOUNT-NEXT: # kill: def $cl killed $cl killed $ecx
249+
; X64-NO-POPCOUNT-NEXT: shrq %cl, %rax
250+
; X64-NO-POPCOUNT-NEXT: andl $7, %eax
251+
; X64-NO-POPCOUNT-NEXT: # kill: def $eax killed $eax killed $rax
251252
; X64-NO-POPCOUNT-NEXT: retq
252253
%mask = and i32 %x, 7680 ; 15 << 9
253254
%count = tail call i32 @llvm.ctpop.i32(i32 %mask)

0 commit comments

Comments
 (0)