Skip to content

Commit 3a75807

Browse files
committed
[X86] Allow i8 CTPOP expansion to work with a 'shifted' active bits value of 8 bits or less
Shift down the value so the active bits are at the lsb
1 parent 7ecfb66 commit 3a75807

File tree

2 files changed

+100
-206
lines changed

2 files changed

+100
-206
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31037,12 +31037,22 @@ static SDValue LowerCTPOP(SDValue N, const X86Subtarget &Subtarget,
3103731037
SDLoc DL(N);
3103831038

3103931039
if (VT.isScalarInteger()) {
31040+
// Compute the lower/upper bounds of the active bits of the value,
31041+
// allowing us to shift the active bits down if necessary to fit into the
31042+
// special cases below.
3104031043
KnownBits Known = DAG.computeKnownBits(Op);
31041-
unsigned ActiveBits = Known.countMaxActiveBits();
31044+
unsigned LZ = Known.countMinLeadingZeros();
31045+
unsigned TZ = Known.countMinTrailingZeros();
31046+
assert((LZ + TZ) < Known.getBitWidth() && "Illegal shifted mask");
31047+
unsigned ActiveBits = Known.getBitWidth() - LZ;
31048+
unsigned ShiftedActiveBits = Known.getBitWidth() - (LZ + TZ);
3104231049

3104331050
// i8 CTPOP - with efficient i32 MUL, then attempt multiply-mask-multiply.
31044-
if (ActiveBits <= 8) {
31051+
if (ShiftedActiveBits <= 8) {
3104531052
SDValue Mask11 = DAG.getConstant(0x11111111U, DL, MVT::i32);
31053+
if (ActiveBits > 8)
31054+
Op = DAG.getNode(ISD::SRL, DL, VT, Op,
31055+
DAG.getShiftAmountConstant(TZ, VT, DL));
3104631056
Op = DAG.getZExtOrTrunc(Op, DL, MVT::i32);
3104731057
Op = DAG.getNode(ISD::MUL, DL, MVT::i32, Op,
3104831058
DAG.getConstant(0x08040201U, DL, MVT::i32));

llvm/test/CodeGen/X86/ctpop-mask.ll

Lines changed: 88 additions & 204 deletions
Original file line numberDiff line numberDiff line change
@@ -70,43 +70,25 @@ define i32 @ctpop_shifted_mask2(i32 %x) nounwind readnone {
7070
;
7171
; X86-NO-POPCOUNT-LABEL: ctpop_shifted_mask2:
7272
; X86-NO-POPCOUNT: # %bb.0:
73-
; X86-NO-POPCOUNT-NEXT: movl {{[0-9]+}}(%esp), %ecx
74-
; X86-NO-POPCOUNT-NEXT: movl %ecx, %eax
75-
; X86-NO-POPCOUNT-NEXT: andl $1572864, %eax # imm = 0x180000
76-
; X86-NO-POPCOUNT-NEXT: shrl %ecx
77-
; X86-NO-POPCOUNT-NEXT: andl $262144, %ecx # imm = 0x40000
78-
; X86-NO-POPCOUNT-NEXT: subl %ecx, %eax
79-
; X86-NO-POPCOUNT-NEXT: movl %eax, %ecx
80-
; X86-NO-POPCOUNT-NEXT: andl $858783744, %ecx # imm = 0x33300000
81-
; X86-NO-POPCOUNT-NEXT: shrl $2, %eax
82-
; X86-NO-POPCOUNT-NEXT: andl $858980352, %eax # imm = 0x33330000
83-
; X86-NO-POPCOUNT-NEXT: addl %ecx, %eax
84-
; X86-NO-POPCOUNT-NEXT: movl %eax, %ecx
85-
; X86-NO-POPCOUNT-NEXT: shrl $4, %ecx
86-
; X86-NO-POPCOUNT-NEXT: addl %eax, %ecx
87-
; X86-NO-POPCOUNT-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
88-
; X86-NO-POPCOUNT-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101
89-
; X86-NO-POPCOUNT-NEXT: shrl $24, %eax
73+
; X86-NO-POPCOUNT-NEXT: movl {{[0-9]+}}(%esp), %eax
74+
; X86-NO-POPCOUNT-NEXT: shrl $19, %eax
75+
; X86-NO-POPCOUNT-NEXT: andl $3, %eax
76+
; X86-NO-POPCOUNT-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201
77+
; X86-NO-POPCOUNT-NEXT: shrl $3, %eax
78+
; X86-NO-POPCOUNT-NEXT: andl $17895697, %eax # imm = 0x1111111
79+
; X86-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
80+
; X86-NO-POPCOUNT-NEXT: shrl $28, %eax
9081
; X86-NO-POPCOUNT-NEXT: retl
9182
;
9283
; X64-NO-POPCOUNT-LABEL: ctpop_shifted_mask2:
9384
; X64-NO-POPCOUNT: # %bb.0:
94-
; X64-NO-POPCOUNT-NEXT: movl %edi, %eax
95-
; X64-NO-POPCOUNT-NEXT: andl $1572864, %eax # imm = 0x180000
96-
; X64-NO-POPCOUNT-NEXT: shrl %edi
97-
; X64-NO-POPCOUNT-NEXT: andl $262144, %edi # imm = 0x40000
98-
; X64-NO-POPCOUNT-NEXT: subl %edi, %eax
99-
; X64-NO-POPCOUNT-NEXT: movl %eax, %ecx
100-
; X64-NO-POPCOUNT-NEXT: andl $858783744, %ecx # imm = 0x33300000
101-
; X64-NO-POPCOUNT-NEXT: shrl $2, %eax
102-
; X64-NO-POPCOUNT-NEXT: andl $858980352, %eax # imm = 0x33330000
103-
; X64-NO-POPCOUNT-NEXT: addl %ecx, %eax
104-
; X64-NO-POPCOUNT-NEXT: movl %eax, %ecx
105-
; X64-NO-POPCOUNT-NEXT: shrl $4, %ecx
106-
; X64-NO-POPCOUNT-NEXT: addl %eax, %ecx
107-
; X64-NO-POPCOUNT-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
108-
; X64-NO-POPCOUNT-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101
109-
; X64-NO-POPCOUNT-NEXT: shrl $24, %eax
85+
; X64-NO-POPCOUNT-NEXT: shrl $19, %edi
86+
; X64-NO-POPCOUNT-NEXT: andl $3, %edi
87+
; X64-NO-POPCOUNT-NEXT: imull $134480385, %edi, %eax # imm = 0x8040201
88+
; X64-NO-POPCOUNT-NEXT: shrl $3, %eax
89+
; X64-NO-POPCOUNT-NEXT: andl $17895697, %eax # imm = 0x1111111
90+
; X64-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
91+
; X64-NO-POPCOUNT-NEXT: shrl $28, %eax
11092
; X64-NO-POPCOUNT-NEXT: retq
11193
%mask = and i32 %x, 1572864 ; 3 << 19
11294
%count = tail call i32 @llvm.ctpop.i32(i32 %mask)
@@ -252,43 +234,25 @@ define i32 @ctpop_shifted_mask4(i32 %x) nounwind readnone {
252234
;
253235
; X86-NO-POPCOUNT-LABEL: ctpop_shifted_mask4:
254236
; X86-NO-POPCOUNT: # %bb.0:
255-
; X86-NO-POPCOUNT-NEXT: movl {{[0-9]+}}(%esp), %ecx
256-
; X86-NO-POPCOUNT-NEXT: movl %ecx, %eax
257-
; X86-NO-POPCOUNT-NEXT: andl $7680, %eax # imm = 0x1E00
258-
; X86-NO-POPCOUNT-NEXT: shrl %ecx
259-
; X86-NO-POPCOUNT-NEXT: andl $1280, %ecx # imm = 0x500
260-
; X86-NO-POPCOUNT-NEXT: subl %ecx, %eax
261-
; X86-NO-POPCOUNT-NEXT: movl %eax, %ecx
262-
; X86-NO-POPCOUNT-NEXT: andl $858993408, %ecx # imm = 0x33333300
263-
; X86-NO-POPCOUNT-NEXT: shrl $2, %eax
264-
; X86-NO-POPCOUNT-NEXT: andl $858993408, %eax # imm = 0x33333300
265-
; X86-NO-POPCOUNT-NEXT: addl %ecx, %eax
266-
; X86-NO-POPCOUNT-NEXT: movl %eax, %ecx
267-
; X86-NO-POPCOUNT-NEXT: shrl $4, %ecx
268-
; X86-NO-POPCOUNT-NEXT: addl %eax, %ecx
269-
; X86-NO-POPCOUNT-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
270-
; X86-NO-POPCOUNT-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101
271-
; X86-NO-POPCOUNT-NEXT: shrl $24, %eax
237+
; X86-NO-POPCOUNT-NEXT: movl {{[0-9]+}}(%esp), %eax
238+
; X86-NO-POPCOUNT-NEXT: shrl $9, %eax
239+
; X86-NO-POPCOUNT-NEXT: andl $15, %eax
240+
; X86-NO-POPCOUNT-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201
241+
; X86-NO-POPCOUNT-NEXT: shrl $3, %eax
242+
; X86-NO-POPCOUNT-NEXT: andl $17895697, %eax # imm = 0x1111111
243+
; X86-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
244+
; X86-NO-POPCOUNT-NEXT: shrl $28, %eax
272245
; X86-NO-POPCOUNT-NEXT: retl
273246
;
274247
; X64-NO-POPCOUNT-LABEL: ctpop_shifted_mask4:
275248
; X64-NO-POPCOUNT: # %bb.0:
276-
; X64-NO-POPCOUNT-NEXT: movl %edi, %eax
277-
; X64-NO-POPCOUNT-NEXT: andl $7680, %eax # imm = 0x1E00
278-
; X64-NO-POPCOUNT-NEXT: shrl %edi
279-
; X64-NO-POPCOUNT-NEXT: andl $1280, %edi # imm = 0x500
280-
; X64-NO-POPCOUNT-NEXT: subl %edi, %eax
281-
; X64-NO-POPCOUNT-NEXT: movl %eax, %ecx
282-
; X64-NO-POPCOUNT-NEXT: andl $858993408, %ecx # imm = 0x33333300
283-
; X64-NO-POPCOUNT-NEXT: shrl $2, %eax
284-
; X64-NO-POPCOUNT-NEXT: andl $858993408, %eax # imm = 0x33333300
285-
; X64-NO-POPCOUNT-NEXT: addl %ecx, %eax
286-
; X64-NO-POPCOUNT-NEXT: movl %eax, %ecx
287-
; X64-NO-POPCOUNT-NEXT: shrl $4, %ecx
288-
; X64-NO-POPCOUNT-NEXT: addl %eax, %ecx
289-
; X64-NO-POPCOUNT-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
290-
; X64-NO-POPCOUNT-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101
291-
; X64-NO-POPCOUNT-NEXT: shrl $24, %eax
249+
; X64-NO-POPCOUNT-NEXT: shrl $9, %edi
250+
; X64-NO-POPCOUNT-NEXT: andl $15, %edi
251+
; X64-NO-POPCOUNT-NEXT: imull $134480385, %edi, %eax # imm = 0x8040201
252+
; X64-NO-POPCOUNT-NEXT: shrl $3, %eax
253+
; X64-NO-POPCOUNT-NEXT: andl $17895697, %eax # imm = 0x1111111
254+
; X64-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
255+
; X64-NO-POPCOUNT-NEXT: shrl $28, %eax
292256
; X64-NO-POPCOUNT-NEXT: retq
293257
%mask = and i32 %x, 7680 ; 15 << 9
294258
%count = tail call i32 @llvm.ctpop.i32(i32 %mask)
@@ -352,43 +316,25 @@ define i32 @ctpop_shifted_mask5(i32 %x) nounwind readnone {
352316
;
353317
; X86-NO-POPCOUNT-LABEL: ctpop_shifted_mask5:
354318
; X86-NO-POPCOUNT: # %bb.0:
355-
; X86-NO-POPCOUNT-NEXT: movl {{[0-9]+}}(%esp), %ecx
356-
; X86-NO-POPCOUNT-NEXT: movl %ecx, %eax
357-
; X86-NO-POPCOUNT-NEXT: andl $11776, %eax # imm = 0x2E00
358-
; X86-NO-POPCOUNT-NEXT: shrl %ecx
359-
; X86-NO-POPCOUNT-NEXT: andl $5376, %ecx # imm = 0x1500
360-
; X86-NO-POPCOUNT-NEXT: subl %ecx, %eax
361-
; X86-NO-POPCOUNT-NEXT: movl %eax, %ecx
362-
; X86-NO-POPCOUNT-NEXT: andl $858993408, %ecx # imm = 0x33333300
363-
; X86-NO-POPCOUNT-NEXT: shrl $2, %eax
364-
; X86-NO-POPCOUNT-NEXT: andl $858993408, %eax # imm = 0x33333300
365-
; X86-NO-POPCOUNT-NEXT: addl %ecx, %eax
366-
; X86-NO-POPCOUNT-NEXT: movl %eax, %ecx
367-
; X86-NO-POPCOUNT-NEXT: shrl $4, %ecx
368-
; X86-NO-POPCOUNT-NEXT: addl %eax, %ecx
369-
; X86-NO-POPCOUNT-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
370-
; X86-NO-POPCOUNT-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101
371-
; X86-NO-POPCOUNT-NEXT: shrl $24, %eax
319+
; X86-NO-POPCOUNT-NEXT: movl {{[0-9]+}}(%esp), %eax
320+
; X86-NO-POPCOUNT-NEXT: shrl $9, %eax
321+
; X86-NO-POPCOUNT-NEXT: andl $23, %eax
322+
; X86-NO-POPCOUNT-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201
323+
; X86-NO-POPCOUNT-NEXT: shrl $3, %eax
324+
; X86-NO-POPCOUNT-NEXT: andl $286331153, %eax # imm = 0x11111111
325+
; X86-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
326+
; X86-NO-POPCOUNT-NEXT: shrl $28, %eax
372327
; X86-NO-POPCOUNT-NEXT: retl
373328
;
374329
; X64-NO-POPCOUNT-LABEL: ctpop_shifted_mask5:
375330
; X64-NO-POPCOUNT: # %bb.0:
376-
; X64-NO-POPCOUNT-NEXT: movl %edi, %eax
377-
; X64-NO-POPCOUNT-NEXT: andl $11776, %eax # imm = 0x2E00
378-
; X64-NO-POPCOUNT-NEXT: shrl %edi
379-
; X64-NO-POPCOUNT-NEXT: andl $5376, %edi # imm = 0x1500
380-
; X64-NO-POPCOUNT-NEXT: subl %edi, %eax
381-
; X64-NO-POPCOUNT-NEXT: movl %eax, %ecx
382-
; X64-NO-POPCOUNT-NEXT: andl $858993408, %ecx # imm = 0x33333300
383-
; X64-NO-POPCOUNT-NEXT: shrl $2, %eax
384-
; X64-NO-POPCOUNT-NEXT: andl $858993408, %eax # imm = 0x33333300
385-
; X64-NO-POPCOUNT-NEXT: addl %ecx, %eax
386-
; X64-NO-POPCOUNT-NEXT: movl %eax, %ecx
387-
; X64-NO-POPCOUNT-NEXT: shrl $4, %ecx
388-
; X64-NO-POPCOUNT-NEXT: addl %eax, %ecx
389-
; X64-NO-POPCOUNT-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
390-
; X64-NO-POPCOUNT-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101
391-
; X64-NO-POPCOUNT-NEXT: shrl $24, %eax
331+
; X64-NO-POPCOUNT-NEXT: shrl $9, %edi
332+
; X64-NO-POPCOUNT-NEXT: andl $23, %edi
333+
; X64-NO-POPCOUNT-NEXT: imull $134480385, %edi, %eax # imm = 0x8040201
334+
; X64-NO-POPCOUNT-NEXT: shrl $3, %eax
335+
; X64-NO-POPCOUNT-NEXT: andl $286331153, %eax # imm = 0x11111111
336+
; X64-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
337+
; X64-NO-POPCOUNT-NEXT: shrl $28, %eax
392338
; X64-NO-POPCOUNT-NEXT: retq
393339
%mask = and i32 %x, 11776 ; 23 << 9
394340
%count = tail call i32 @llvm.ctpop.i32(i32 %mask)
@@ -451,47 +397,26 @@ define i64 @ctpop_shifted_mask6(i64 %x) nounwind readnone {
451397
;
452398
; X86-NO-POPCOUNT-LABEL: ctpop_shifted_mask6:
453399
; X86-NO-POPCOUNT: # %bb.0:
454-
; X86-NO-POPCOUNT-NEXT: movl {{[0-9]+}}(%esp), %ecx
455-
; X86-NO-POPCOUNT-NEXT: movl %ecx, %eax
456-
; X86-NO-POPCOUNT-NEXT: andl $26112, %eax # imm = 0x6600
457-
; X86-NO-POPCOUNT-NEXT: shrl %ecx
458-
; X86-NO-POPCOUNT-NEXT: andl $4352, %ecx # imm = 0x1100
459-
; X86-NO-POPCOUNT-NEXT: subl %ecx, %eax
460-
; X86-NO-POPCOUNT-NEXT: movl %eax, %ecx
461-
; X86-NO-POPCOUNT-NEXT: andl $858993408, %ecx # imm = 0x33333300
462-
; X86-NO-POPCOUNT-NEXT: shrl $2, %eax
463-
; X86-NO-POPCOUNT-NEXT: andl $858993408, %eax # imm = 0x33333300
464-
; X86-NO-POPCOUNT-NEXT: addl %ecx, %eax
465-
; X86-NO-POPCOUNT-NEXT: movl %eax, %ecx
466-
; X86-NO-POPCOUNT-NEXT: shrl $4, %ecx
467-
; X86-NO-POPCOUNT-NEXT: addl %eax, %ecx
468-
; X86-NO-POPCOUNT-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
469-
; X86-NO-POPCOUNT-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101
470-
; X86-NO-POPCOUNT-NEXT: shrl $24, %eax
400+
; X86-NO-POPCOUNT-NEXT: movl {{[0-9]+}}(%esp), %eax
401+
; X86-NO-POPCOUNT-NEXT: shrl $9, %eax
402+
; X86-NO-POPCOUNT-NEXT: andl $51, %eax
403+
; X86-NO-POPCOUNT-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201
404+
; X86-NO-POPCOUNT-NEXT: shrl $3, %eax
405+
; X86-NO-POPCOUNT-NEXT: andl $286331153, %eax # imm = 0x11111111
406+
; X86-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
407+
; X86-NO-POPCOUNT-NEXT: shrl $28, %eax
471408
; X86-NO-POPCOUNT-NEXT: xorl %edx, %edx
472409
; X86-NO-POPCOUNT-NEXT: retl
473410
;
474411
; X64-NO-POPCOUNT-LABEL: ctpop_shifted_mask6:
475412
; X64-NO-POPCOUNT: # %bb.0:
476-
; X64-NO-POPCOUNT-NEXT: movl %edi, %eax
477-
; X64-NO-POPCOUNT-NEXT: andl $26112, %eax # imm = 0x6600
478-
; X64-NO-POPCOUNT-NEXT: shrl %edi
479-
; X64-NO-POPCOUNT-NEXT: andl $4352, %edi # imm = 0x1100
480-
; X64-NO-POPCOUNT-NEXT: subq %rdi, %rax
481-
; X64-NO-POPCOUNT-NEXT: movabsq $3689348814741910272, %rcx # imm = 0x3333333333333300
482-
; X64-NO-POPCOUNT-NEXT: movq %rax, %rdx
483-
; X64-NO-POPCOUNT-NEXT: andq %rcx, %rdx
484-
; X64-NO-POPCOUNT-NEXT: shrq $2, %rax
485-
; X64-NO-POPCOUNT-NEXT: andq %rcx, %rax
486-
; X64-NO-POPCOUNT-NEXT: addq %rdx, %rax
487-
; X64-NO-POPCOUNT-NEXT: movq %rax, %rcx
488-
; X64-NO-POPCOUNT-NEXT: shrq $4, %rcx
489-
; X64-NO-POPCOUNT-NEXT: addq %rax, %rcx
490-
; X64-NO-POPCOUNT-NEXT: movabsq $1085102592571150095, %rdx # imm = 0xF0F0F0F0F0F0F0F
491-
; X64-NO-POPCOUNT-NEXT: andq %rcx, %rdx
492-
; X64-NO-POPCOUNT-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101
493-
; X64-NO-POPCOUNT-NEXT: imulq %rdx, %rax
494-
; X64-NO-POPCOUNT-NEXT: shrq $56, %rax
413+
; X64-NO-POPCOUNT-NEXT: shrl $9, %edi
414+
; X64-NO-POPCOUNT-NEXT: andl $51, %edi
415+
; X64-NO-POPCOUNT-NEXT: imull $134480385, %edi, %eax # imm = 0x8040201
416+
; X64-NO-POPCOUNT-NEXT: shrl $3, %eax
417+
; X64-NO-POPCOUNT-NEXT: andl $286331153, %eax # imm = 0x11111111
418+
; X64-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
419+
; X64-NO-POPCOUNT-NEXT: shrl $28, %eax
495420
; X64-NO-POPCOUNT-NEXT: retq
496421
%mask = and i64 %x, 26112 ; 51 << 9
497422
%count = tail call i64 @llvm.ctpop.i64(i64 %mask)
@@ -557,43 +482,25 @@ define i32 @ctpop_shift_mask7(i32 %x) nounwind readnone {
557482
;
558483
; X86-NO-POPCOUNT-LABEL: ctpop_shift_mask7:
559484
; X86-NO-POPCOUNT: # %bb.0:
560-
; X86-NO-POPCOUNT-NEXT: movl {{[0-9]+}}(%esp), %ecx
561-
; X86-NO-POPCOUNT-NEXT: movl %ecx, %eax
562-
; X86-NO-POPCOUNT-NEXT: andl $1040384, %eax # imm = 0xFE000
563-
; X86-NO-POPCOUNT-NEXT: shrl %ecx
564-
; X86-NO-POPCOUNT-NEXT: andl $348160, %ecx # imm = 0x55000
565-
; X86-NO-POPCOUNT-NEXT: subl %ecx, %eax
566-
; X86-NO-POPCOUNT-NEXT: movl %eax, %ecx
567-
; X86-NO-POPCOUNT-NEXT: andl $858992640, %ecx # imm = 0x33333000
568-
; X86-NO-POPCOUNT-NEXT: shrl $2, %eax
569-
; X86-NO-POPCOUNT-NEXT: andl $858992640, %eax # imm = 0x33333000
570-
; X86-NO-POPCOUNT-NEXT: addl %ecx, %eax
571-
; X86-NO-POPCOUNT-NEXT: movl %eax, %ecx
572-
; X86-NO-POPCOUNT-NEXT: shrl $4, %ecx
573-
; X86-NO-POPCOUNT-NEXT: addl %eax, %ecx
574-
; X86-NO-POPCOUNT-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
575-
; X86-NO-POPCOUNT-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101
576-
; X86-NO-POPCOUNT-NEXT: shrl $24, %eax
485+
; X86-NO-POPCOUNT-NEXT: movl {{[0-9]+}}(%esp), %eax
486+
; X86-NO-POPCOUNT-NEXT: shrl $13, %eax
487+
; X86-NO-POPCOUNT-NEXT: andl $127, %eax
488+
; X86-NO-POPCOUNT-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201
489+
; X86-NO-POPCOUNT-NEXT: shrl $3, %eax
490+
; X86-NO-POPCOUNT-NEXT: andl $286331153, %eax # imm = 0x11111111
491+
; X86-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
492+
; X86-NO-POPCOUNT-NEXT: shrl $28, %eax
577493
; X86-NO-POPCOUNT-NEXT: retl
578494
;
579495
; X64-NO-POPCOUNT-LABEL: ctpop_shift_mask7:
580496
; X64-NO-POPCOUNT: # %bb.0:
581-
; X64-NO-POPCOUNT-NEXT: movl %edi, %eax
582-
; X64-NO-POPCOUNT-NEXT: andl $1040384, %eax # imm = 0xFE000
583-
; X64-NO-POPCOUNT-NEXT: shrl %edi
584-
; X64-NO-POPCOUNT-NEXT: andl $348160, %edi # imm = 0x55000
585-
; X64-NO-POPCOUNT-NEXT: subl %edi, %eax
586-
; X64-NO-POPCOUNT-NEXT: movl %eax, %ecx
587-
; X64-NO-POPCOUNT-NEXT: andl $858992640, %ecx # imm = 0x33333000
588-
; X64-NO-POPCOUNT-NEXT: shrl $2, %eax
589-
; X64-NO-POPCOUNT-NEXT: andl $858992640, %eax # imm = 0x33333000
590-
; X64-NO-POPCOUNT-NEXT: addl %ecx, %eax
591-
; X64-NO-POPCOUNT-NEXT: movl %eax, %ecx
592-
; X64-NO-POPCOUNT-NEXT: shrl $4, %ecx
593-
; X64-NO-POPCOUNT-NEXT: addl %eax, %ecx
594-
; X64-NO-POPCOUNT-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
595-
; X64-NO-POPCOUNT-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101
596-
; X64-NO-POPCOUNT-NEXT: shrl $24, %eax
497+
; X64-NO-POPCOUNT-NEXT: shrl $13, %edi
498+
; X64-NO-POPCOUNT-NEXT: andl $127, %edi
499+
; X64-NO-POPCOUNT-NEXT: imull $134480385, %edi, %eax # imm = 0x8040201
500+
; X64-NO-POPCOUNT-NEXT: shrl $3, %eax
501+
; X64-NO-POPCOUNT-NEXT: andl $286331153, %eax # imm = 0x11111111
502+
; X64-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
503+
; X64-NO-POPCOUNT-NEXT: shrl $28, %eax
597504
; X64-NO-POPCOUNT-NEXT: retq
598505
%mask = and i32 %x, 1040384 ; 127 << 13
599506
%count = tail call i32 @llvm.ctpop.i32(i32 %mask)
@@ -654,47 +561,24 @@ define i64 @ctpop_shifted_mask8(i64 %x) nounwind readnone {
654561
;
655562
; X86-NO-POPCOUNT-LABEL: ctpop_shifted_mask8:
656563
; X86-NO-POPCOUNT: # %bb.0:
657-
; X86-NO-POPCOUNT-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
658-
; X86-NO-POPCOUNT-NEXT: movl %ecx, %eax
659-
; X86-NO-POPCOUNT-NEXT: shll $8, %eax
660-
; X86-NO-POPCOUNT-NEXT: shll $7, %ecx
661-
; X86-NO-POPCOUNT-NEXT: andl $21760, %ecx # imm = 0x5500
662-
; X86-NO-POPCOUNT-NEXT: subl %ecx, %eax
663-
; X86-NO-POPCOUNT-NEXT: movl %eax, %ecx
664-
; X86-NO-POPCOUNT-NEXT: andl $858993408, %ecx # imm = 0x33333300
665-
; X86-NO-POPCOUNT-NEXT: shrl $2, %eax
666-
; X86-NO-POPCOUNT-NEXT: andl $858993408, %eax # imm = 0x33333300
667-
; X86-NO-POPCOUNT-NEXT: addl %ecx, %eax
668-
; X86-NO-POPCOUNT-NEXT: movl %eax, %ecx
669-
; X86-NO-POPCOUNT-NEXT: shrl $4, %ecx
670-
; X86-NO-POPCOUNT-NEXT: addl %eax, %ecx
671-
; X86-NO-POPCOUNT-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
672-
; X86-NO-POPCOUNT-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101
673-
; X86-NO-POPCOUNT-NEXT: shrl $24, %eax
564+
; X86-NO-POPCOUNT-NEXT: movzbl {{[0-9]+}}(%esp), %eax
565+
; X86-NO-POPCOUNT-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201
566+
; X86-NO-POPCOUNT-NEXT: shrl $3, %eax
567+
; X86-NO-POPCOUNT-NEXT: andl $286331153, %eax # imm = 0x11111111
568+
; X86-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
569+
; X86-NO-POPCOUNT-NEXT: shrl $28, %eax
674570
; X86-NO-POPCOUNT-NEXT: xorl %edx, %edx
675571
; X86-NO-POPCOUNT-NEXT: retl
676572
;
677573
; X64-NO-POPCOUNT-LABEL: ctpop_shifted_mask8:
678574
; X64-NO-POPCOUNT: # %bb.0:
679-
; X64-NO-POPCOUNT-NEXT: movl %edi, %eax
680-
; X64-NO-POPCOUNT-NEXT: andl $65280, %eax # imm = 0xFF00
681-
; X64-NO-POPCOUNT-NEXT: shrl %edi
682-
; X64-NO-POPCOUNT-NEXT: andl $21760, %edi # imm = 0x5500
683-
; X64-NO-POPCOUNT-NEXT: subq %rdi, %rax
684-
; X64-NO-POPCOUNT-NEXT: movabsq $3689348814741910272, %rcx # imm = 0x3333333333333300
685-
; X64-NO-POPCOUNT-NEXT: movq %rax, %rdx
686-
; X64-NO-POPCOUNT-NEXT: andq %rcx, %rdx
687-
; X64-NO-POPCOUNT-NEXT: shrq $2, %rax
688-
; X64-NO-POPCOUNT-NEXT: andq %rcx, %rax
689-
; X64-NO-POPCOUNT-NEXT: addq %rdx, %rax
690-
; X64-NO-POPCOUNT-NEXT: movq %rax, %rcx
691-
; X64-NO-POPCOUNT-NEXT: shrq $4, %rcx
692-
; X64-NO-POPCOUNT-NEXT: addq %rax, %rcx
693-
; X64-NO-POPCOUNT-NEXT: movabsq $1085102592571150095, %rdx # imm = 0xF0F0F0F0F0F0F0F
694-
; X64-NO-POPCOUNT-NEXT: andq %rcx, %rdx
695-
; X64-NO-POPCOUNT-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101
696-
; X64-NO-POPCOUNT-NEXT: imulq %rdx, %rax
697-
; X64-NO-POPCOUNT-NEXT: shrq $56, %rax
575+
; X64-NO-POPCOUNT-NEXT: movq %rdi, %rax
576+
; X64-NO-POPCOUNT-NEXT: movzbl %ah, %eax
577+
; X64-NO-POPCOUNT-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201
578+
; X64-NO-POPCOUNT-NEXT: shrl $3, %eax
579+
; X64-NO-POPCOUNT-NEXT: andl $286331153, %eax # imm = 0x11111111
580+
; X64-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
581+
; X64-NO-POPCOUNT-NEXT: shrl $28, %eax
698582
; X64-NO-POPCOUNT-NEXT: retq
699583
%mask = and i64 %x, 65280 ; 255 << 8
700584
%count = tail call i64 @llvm.ctpop.i64(i64 %mask)

0 commit comments

Comments
 (0)