Skip to content

Commit c4e7da3

Browse files
authored
[AArch64] shouldFoldMaskToVariableShiftPair should be true for scalars up to the biggest legal type (#158069)
For AArch64, we want to do this up to 64-bits. Otherwise this results in bloated code.
1 parent 178651a commit c4e7da3

File tree

4 files changed

+161
-93
lines changed

4 files changed

+161
-93
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,16 @@ class AArch64TargetLowering : public TargetLowering {
303303
bool shouldFoldConstantShiftPairToMask(const SDNode *N,
304304
CombineLevel Level) const override;
305305

306+
/// Return true if it is profitable to fold a pair of shifts into a mask.
307+
bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override {
308+
EVT VT = Y.getValueType();
309+
310+
if (VT.isVector())
311+
return false;
312+
313+
return VT.getScalarSizeInBits() <= 64;
314+
}
315+
306316
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT,
307317
unsigned SelectOpcode, SDValue X,
308318
SDValue Y) const override;
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=aarch64-none-elf -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3+
; RUN: llc -mtriple=aarch64-none-elf -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4+
5+
define i32 @mask_pair(i32 %x, i32 %y) {
6+
; CHECK-SD-LABEL: mask_pair:
7+
; CHECK-SD: // %bb.0:
8+
; CHECK-SD-NEXT: lsr w8, w0, w1
9+
; CHECK-SD-NEXT: lsl w0, w8, w1
10+
; CHECK-SD-NEXT: ret
11+
;
12+
; CHECK-GI-LABEL: mask_pair:
13+
; CHECK-GI: // %bb.0:
14+
; CHECK-GI-NEXT: mov w8, #-1 // =0xffffffff
15+
; CHECK-GI-NEXT: lsl w8, w8, w1
16+
; CHECK-GI-NEXT: and w0, w8, w0
17+
; CHECK-GI-NEXT: ret
18+
%shl = shl nsw i32 -1, %y
19+
%and = and i32 %shl, %x
20+
ret i32 %and
21+
}
22+
23+
define i64 @mask_pair_64(i64 %x, i64 %y) {
24+
; CHECK-SD-LABEL: mask_pair_64:
25+
; CHECK-SD: // %bb.0:
26+
; CHECK-SD-NEXT: lsr x8, x0, x1
27+
; CHECK-SD-NEXT: lsl x0, x8, x1
28+
; CHECK-SD-NEXT: ret
29+
;
30+
; CHECK-GI-LABEL: mask_pair_64:
31+
; CHECK-GI: // %bb.0:
32+
; CHECK-GI-NEXT: mov x8, #-1 // =0xffffffffffffffff
33+
; CHECK-GI-NEXT: lsl x8, x8, x1
34+
; CHECK-GI-NEXT: and x0, x8, x0
35+
; CHECK-GI-NEXT: ret
36+
%shl = shl nsw i64 -1, %y
37+
%and = and i64 %shl, %x
38+
ret i64 %and
39+
}
40+
41+
define i128 @mask_pair_128(i128 %x, i128 %y) {
42+
; CHECK-SD-LABEL: mask_pair_128:
43+
; CHECK-SD: // %bb.0:
44+
; CHECK-SD-NEXT: mov x8, #-1 // =0xffffffffffffffff
45+
; CHECK-SD-NEXT: mvn w9, w2
46+
; CHECK-SD-NEXT: mov x10, #9223372036854775807 // =0x7fffffffffffffff
47+
; CHECK-SD-NEXT: lsl x8, x8, x2
48+
; CHECK-SD-NEXT: lsr x9, x10, x9
49+
; CHECK-SD-NEXT: tst x2, #0x40
50+
; CHECK-SD-NEXT: orr x9, x8, x9
51+
; CHECK-SD-NEXT: csel x9, x8, x9, ne
52+
; CHECK-SD-NEXT: csel x8, xzr, x8, ne
53+
; CHECK-SD-NEXT: and x0, x8, x0
54+
; CHECK-SD-NEXT: and x1, x9, x1
55+
; CHECK-SD-NEXT: ret
56+
;
57+
; CHECK-GI-LABEL: mask_pair_128:
58+
; CHECK-GI: // %bb.0:
59+
; CHECK-GI-NEXT: mov w8, #64 // =0x40
60+
; CHECK-GI-NEXT: mov x9, #-1 // =0xffffffffffffffff
61+
; CHECK-GI-NEXT: sub x10, x2, #64
62+
; CHECK-GI-NEXT: sub x8, x8, x2
63+
; CHECK-GI-NEXT: lsl x11, x9, x2
64+
; CHECK-GI-NEXT: cmp x2, #64
65+
; CHECK-GI-NEXT: lsr x8, x9, x8
66+
; CHECK-GI-NEXT: lsl x9, x9, x10
67+
; CHECK-GI-NEXT: csel x10, x11, xzr, lo
68+
; CHECK-GI-NEXT: orr x8, x8, x11
69+
; CHECK-GI-NEXT: and x0, x10, x0
70+
; CHECK-GI-NEXT: csel x8, x8, x9, lo
71+
; CHECK-GI-NEXT: cmp x2, #0
72+
; CHECK-GI-NEXT: csinv x8, x8, xzr, ne
73+
; CHECK-GI-NEXT: and x1, x8, x1
74+
; CHECK-GI-NEXT: ret
75+
%shl = shl nsw i128 -1, %y
76+
%and = and i128 %shl, %x
77+
ret i128 %and
78+
}
79+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
80+
; CHECK: {{.*}}

llvm/test/CodeGen/AArch64/extract-bits.ll

Lines changed: 43 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -532,11 +532,10 @@ define i32 @bextr64_32_b2(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
532532
define i32 @bextr32_c0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
533533
; CHECK-LABEL: bextr32_c0:
534534
; CHECK: // %bb.0:
535-
; CHECK-NEXT: neg w8, w2
536-
; CHECK-NEXT: mov w9, #-1 // =0xffffffff
537-
; CHECK-NEXT: lsr w10, w0, w1
538-
; CHECK-NEXT: lsr w8, w9, w8
539-
; CHECK-NEXT: and w0, w8, w10
535+
; CHECK-NEXT: lsr w8, w0, w1
536+
; CHECK-NEXT: neg w9, w2
537+
; CHECK-NEXT: lsl w8, w8, w9
538+
; CHECK-NEXT: lsr w0, w8, w9
540539
; CHECK-NEXT: ret
541540
%shifted = lshr i32 %val, %numskipbits
542541
%numhighbits = sub i32 32, %numlowbits
@@ -548,12 +547,11 @@ define i32 @bextr32_c0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
548547
define i32 @bextr32_c1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) nounwind {
549548
; CHECK-LABEL: bextr32_c1_indexzext:
550549
; CHECK: // %bb.0:
551-
; CHECK-NEXT: mov w8, #32 // =0x20
552-
; CHECK-NEXT: mov w9, #-1 // =0xffffffff
553-
; CHECK-NEXT: lsr w10, w0, w1
554-
; CHECK-NEXT: sub w8, w8, w2
555-
; CHECK-NEXT: lsr w8, w9, w8
556-
; CHECK-NEXT: and w0, w8, w10
550+
; CHECK-NEXT: lsr w8, w0, w1
551+
; CHECK-NEXT: mov w9, #32 // =0x20
552+
; CHECK-NEXT: sub w9, w9, w2
553+
; CHECK-NEXT: lsl w8, w8, w9
554+
; CHECK-NEXT: lsr w0, w8, w9
557555
; CHECK-NEXT: ret
558556
%skip = zext i8 %numskipbits to i32
559557
%shifted = lshr i32 %val, %skip
@@ -569,10 +567,9 @@ define i32 @bextr32_c2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind
569567
; CHECK: // %bb.0:
570568
; CHECK-NEXT: ldr w8, [x0]
571569
; CHECK-NEXT: neg w9, w2
572-
; CHECK-NEXT: mov w10, #-1 // =0xffffffff
573-
; CHECK-NEXT: lsr w9, w10, w9
574570
; CHECK-NEXT: lsr w8, w8, w1
575-
; CHECK-NEXT: and w0, w9, w8
571+
; CHECK-NEXT: lsl w8, w8, w9
572+
; CHECK-NEXT: lsr w0, w8, w9
576573
; CHECK-NEXT: ret
577574
%val = load i32, ptr %w
578575
%shifted = lshr i32 %val, %numskipbits
@@ -587,11 +584,10 @@ define i32 @bextr32_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
587584
; CHECK: // %bb.0:
588585
; CHECK-NEXT: ldr w8, [x0]
589586
; CHECK-NEXT: mov w9, #32 // =0x20
590-
; CHECK-NEXT: mov w10, #-1 // =0xffffffff
591587
; CHECK-NEXT: sub w9, w9, w2
592588
; CHECK-NEXT: lsr w8, w8, w1
593-
; CHECK-NEXT: lsr w9, w10, w9
594-
; CHECK-NEXT: and w0, w9, w8
589+
; CHECK-NEXT: lsl w8, w8, w9
590+
; CHECK-NEXT: lsr w0, w8, w9
595591
; CHECK-NEXT: ret
596592
%val = load i32, ptr %w
597593
%skip = zext i8 %numskipbits to i32
@@ -606,11 +602,10 @@ define i32 @bextr32_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
606602
define i32 @bextr32_c4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
607603
; CHECK-LABEL: bextr32_c4_commutative:
608604
; CHECK: // %bb.0:
609-
; CHECK-NEXT: neg w8, w2
610-
; CHECK-NEXT: mov w9, #-1 // =0xffffffff
611-
; CHECK-NEXT: lsr w10, w0, w1
612-
; CHECK-NEXT: lsr w8, w9, w8
613-
; CHECK-NEXT: and w0, w10, w8
605+
; CHECK-NEXT: lsr w8, w0, w1
606+
; CHECK-NEXT: neg w9, w2
607+
; CHECK-NEXT: lsl w8, w8, w9
608+
; CHECK-NEXT: lsr w0, w8, w9
614609
; CHECK-NEXT: ret
615610
%shifted = lshr i32 %val, %numskipbits
616611
%numhighbits = sub i32 32, %numlowbits
@@ -624,11 +619,10 @@ define i32 @bextr32_c4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits)
624619
define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
625620
; CHECK-LABEL: bextr64_c0:
626621
; CHECK: // %bb.0:
627-
; CHECK-NEXT: neg x8, x2
628-
; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff
629-
; CHECK-NEXT: lsr x10, x0, x1
630-
; CHECK-NEXT: lsr x8, x9, x8
631-
; CHECK-NEXT: and x0, x8, x10
622+
; CHECK-NEXT: lsr x8, x0, x1
623+
; CHECK-NEXT: neg x9, x2
624+
; CHECK-NEXT: lsl x8, x8, x9
625+
; CHECK-NEXT: lsr x0, x8, x9
632626
; CHECK-NEXT: ret
633627
%shifted = lshr i64 %val, %numskipbits
634628
%numhighbits = sub i64 64, %numlowbits
@@ -640,13 +634,12 @@ define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
640634
define i64 @bextr64_c1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) nounwind {
641635
; CHECK-LABEL: bextr64_c1_indexzext:
642636
; CHECK: // %bb.0:
643-
; CHECK-NEXT: mov w8, #64 // =0x40
644-
; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff
645637
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
646-
; CHECK-NEXT: lsr x10, x0, x1
647-
; CHECK-NEXT: sub w8, w8, w2
648-
; CHECK-NEXT: lsr x8, x9, x8
649-
; CHECK-NEXT: and x0, x8, x10
638+
; CHECK-NEXT: lsr x8, x0, x1
639+
; CHECK-NEXT: mov w9, #64 // =0x40
640+
; CHECK-NEXT: sub w9, w9, w2
641+
; CHECK-NEXT: lsl x8, x8, x9
642+
; CHECK-NEXT: lsr x0, x8, x9
650643
; CHECK-NEXT: ret
651644
%skip = zext i8 %numskipbits to i64
652645
%shifted = lshr i64 %val, %skip
@@ -662,10 +655,9 @@ define i64 @bextr64_c2_load(ptr %w, i64 %numskipbits, i64 %numlowbits) nounwind
662655
; CHECK: // %bb.0:
663656
; CHECK-NEXT: ldr x8, [x0]
664657
; CHECK-NEXT: neg x9, x2
665-
; CHECK-NEXT: mov x10, #-1 // =0xffffffffffffffff
666-
; CHECK-NEXT: lsr x9, x10, x9
667658
; CHECK-NEXT: lsr x8, x8, x1
668-
; CHECK-NEXT: and x0, x9, x8
659+
; CHECK-NEXT: lsl x8, x8, x9
660+
; CHECK-NEXT: lsr x0, x8, x9
669661
; CHECK-NEXT: ret
670662
%val = load i64, ptr %w
671663
%shifted = lshr i64 %val, %numskipbits
@@ -679,13 +671,12 @@ define i64 @bextr64_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
679671
; CHECK-LABEL: bextr64_c3_load_indexzext:
680672
; CHECK: // %bb.0:
681673
; CHECK-NEXT: ldr x8, [x0]
674+
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
682675
; CHECK-NEXT: mov w9, #64 // =0x40
683-
; CHECK-NEXT: mov x10, #-1 // =0xffffffffffffffff
684676
; CHECK-NEXT: sub w9, w9, w2
685-
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
686677
; CHECK-NEXT: lsr x8, x8, x1
687-
; CHECK-NEXT: lsr x9, x10, x9
688-
; CHECK-NEXT: and x0, x9, x8
678+
; CHECK-NEXT: lsl x8, x8, x9
679+
; CHECK-NEXT: lsr x0, x8, x9
689680
; CHECK-NEXT: ret
690681
%val = load i64, ptr %w
691682
%skip = zext i8 %numskipbits to i64
@@ -700,11 +691,10 @@ define i64 @bextr64_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
700691
define i64 @bextr64_c4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
701692
; CHECK-LABEL: bextr64_c4_commutative:
702693
; CHECK: // %bb.0:
703-
; CHECK-NEXT: neg x8, x2
704-
; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff
705-
; CHECK-NEXT: lsr x10, x0, x1
706-
; CHECK-NEXT: lsr x8, x9, x8
707-
; CHECK-NEXT: and x0, x10, x8
694+
; CHECK-NEXT: lsr x8, x0, x1
695+
; CHECK-NEXT: neg x9, x2
696+
; CHECK-NEXT: lsl x8, x8, x9
697+
; CHECK-NEXT: lsr x0, x8, x9
708698
; CHECK-NEXT: ret
709699
%shifted = lshr i64 %val, %numskipbits
710700
%numhighbits = sub i64 64, %numlowbits
@@ -737,11 +727,10 @@ define i32 @bextr64_32_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
737727
define i32 @bextr64_32_c1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind {
738728
; CHECK-LABEL: bextr64_32_c1:
739729
; CHECK: // %bb.0:
740-
; CHECK-NEXT: neg w8, w2
741-
; CHECK-NEXT: mov w9, #-1 // =0xffffffff
742-
; CHECK-NEXT: lsr x10, x0, x1
743-
; CHECK-NEXT: lsr w8, w9, w8
744-
; CHECK-NEXT: and w0, w8, w10
730+
; CHECK-NEXT: lsr x8, x0, x1
731+
; CHECK-NEXT: neg w9, w2
732+
; CHECK-NEXT: lsl w8, w8, w9
733+
; CHECK-NEXT: lsr w0, w8, w9
745734
; CHECK-NEXT: ret
746735
%shifted = lshr i64 %val, %numskipbits
747736
%truncshifted = trunc i64 %shifted to i32
@@ -756,11 +745,10 @@ define i32 @bextr64_32_c1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
756745
define i32 @bextr64_32_c2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind {
757746
; CHECK-LABEL: bextr64_32_c2:
758747
; CHECK: // %bb.0:
759-
; CHECK-NEXT: neg w8, w2
760-
; CHECK-NEXT: mov w9, #-1 // =0xffffffff
761-
; CHECK-NEXT: lsr x10, x0, x1
762-
; CHECK-NEXT: lsr w8, w9, w8
763-
; CHECK-NEXT: and w0, w8, w10
748+
; CHECK-NEXT: lsr x8, x0, x1
749+
; CHECK-NEXT: neg w9, w2
750+
; CHECK-NEXT: lsl w8, w8, w9
751+
; CHECK-NEXT: lsr w0, w8, w9
764752
; CHECK-NEXT: ret
765753
%shifted = lshr i64 %val, %numskipbits
766754
%numhighbits = sub i32 32, %numlowbits

0 commit comments

Comments
 (0)