Skip to content

Commit 44a7d2f

Browse files
authored
[AArch64] Add patterns for add(x, trunc(shift)) (#168927)
This can be lowered to a 64bit add where we only use the bottom 32bits of the result. It is conceptually the same as https://alive2.llvm.org/ce/z/Xfz3Rf, but with the sext replaced by an anyext.
1 parent 675dc35 commit 44a7d2f

File tree

5 files changed

+83
-81
lines changed

5 files changed

+83
-81
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2752,6 +2752,20 @@ def : Pat<(AArch64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
27522752
(ADDSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
27532753
}
27542754

2755+
2756+
def trunc_isWorthFoldingALU : PatFrag<(ops node:$src), (trunc $src)> {
2757+
let PredicateCode = [{ return isWorthFoldingALU(SDValue(N, 0)); }];
2758+
let GISelPredicateCode = [{ return isWorthFoldingIntoExtendedReg(MI, MRI, false); }];
2759+
}
2760+
2761+
// Patterns for (add X, trunc(shift(Y))), for which we can generate 64bit instructions.
2762+
def : Pat<(add GPR32:$Rn, (trunc_isWorthFoldingALU arith_shifted_reg64:$Rm)),
2763+
(EXTRACT_SUBREG (ADDXrs (INSERT_SUBREG (IMPLICIT_DEF), GPR32:$Rn, sub_32),
2764+
arith_shifted_reg64:$Rm), sub_32)>;
2765+
def : Pat<(sub GPR32:$Rn, (trunc_isWorthFoldingALU arith_shifted_reg64:$Rm)),
2766+
(EXTRACT_SUBREG (SUBXrs (INSERT_SUBREG (IMPLICIT_DEF), GPR32:$Rn, sub_32),
2767+
arith_shifted_reg64:$Rm), sub_32)>;
2768+
27552769
def : InstAlias<"neg $dst, $src",
27562770
(SUBWrs GPR32:$dst, WZR,
27572771
(arith_shifted_reg32 GPR32:$src, 0)), 3>;

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -417,10 +417,10 @@ class AArch64InstructionSelector : public InstructionSelector {
417417
}
418418

419419
std::optional<bool>
420-
isWorthFoldingIntoAddrMode(MachineInstr &MI,
420+
isWorthFoldingIntoAddrMode(const MachineInstr &MI,
421421
const MachineRegisterInfo &MRI) const;
422422

423-
bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
423+
bool isWorthFoldingIntoExtendedReg(const MachineInstr &MI,
424424
const MachineRegisterInfo &MRI,
425425
bool IsAddrOperand) const;
426426
ComplexRendererFns
@@ -7068,7 +7068,7 @@ AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
70687068
/// %9:gpr(p0) = G_PTR_ADD %0, %8(s64)
70697069
/// %12:gpr(s32) = G_LOAD %9(p0) :: (load (s16))
70707070
std::optional<bool> AArch64InstructionSelector::isWorthFoldingIntoAddrMode(
7071-
MachineInstr &MI, const MachineRegisterInfo &MRI) const {
7071+
const MachineInstr &MI, const MachineRegisterInfo &MRI) const {
70727072
if (MI.getOpcode() == AArch64::G_SHL) {
70737073
// Address operands with shifts are free, except for running on subtargets
70747074
// with AddrLSLSlow14.
@@ -7089,7 +7089,7 @@ std::optional<bool> AArch64InstructionSelector::isWorthFoldingIntoAddrMode(
70897089
/// \p IsAddrOperand whether the def of MI is used as an address operand
70907090
/// (e.g. feeding into an LDR/STR).
70917091
bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
7092-
MachineInstr &MI, const MachineRegisterInfo &MRI,
7092+
const MachineInstr &MI, const MachineRegisterInfo &MRI,
70937093
bool IsAddrOperand) const {
70947094

70957095
// Always fold if there is one use, or if we're optimizing for size.

llvm/test/CodeGen/AArch64/combine-sdiv.ll

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1493,13 +1493,12 @@ define i5 @combine_i5_sdiv_const7(i5 %x) {
14931493
; CHECK-SD-LABEL: combine_i5_sdiv_const7:
14941494
; CHECK-SD: // %bb.0:
14951495
; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
1496-
; CHECK-SD-NEXT: mov x8, #-56173 // =0xffffffffffff2493
1497-
; CHECK-SD-NEXT: sbfx x9, x0, #0, #5
1498-
; CHECK-SD-NEXT: movk x8, #37449, lsl #16
1499-
; CHECK-SD-NEXT: smull x8, w9, w8
1500-
; CHECK-SD-NEXT: lsl w9, w0, #27
1501-
; CHECK-SD-NEXT: lsr x8, x8, #32
1502-
; CHECK-SD-NEXT: add w8, w8, w9, asr #27
1496+
; CHECK-SD-NEXT: sbfx x8, x0, #0, #5
1497+
; CHECK-SD-NEXT: mov x9, #-56173 // =0xffffffffffff2493
1498+
; CHECK-SD-NEXT: movk x9, #37449, lsl #16
1499+
; CHECK-SD-NEXT: smull x8, w8, w9
1500+
; CHECK-SD-NEXT: sbfx w9, w0, #0, #5
1501+
; CHECK-SD-NEXT: add x8, x9, x8, lsr #32
15031502
; CHECK-SD-NEXT: asr w9, w8, #2
15041503
; CHECK-SD-NEXT: add w0, w9, w8, lsr #31
15051504
; CHECK-SD-NEXT: ret
@@ -1646,21 +1645,21 @@ define i32 @combine_i32_sdiv_const7(i32 %x) {
16461645
; CHECK-SD-LABEL: combine_i32_sdiv_const7:
16471646
; CHECK-SD: // %bb.0:
16481647
; CHECK-SD-NEXT: mov w8, #9363 // =0x2493
1648+
; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
16491649
; CHECK-SD-NEXT: movk w8, #37449, lsl #16
16501650
; CHECK-SD-NEXT: smull x8, w0, w8
1651-
; CHECK-SD-NEXT: lsr x8, x8, #32
1652-
; CHECK-SD-NEXT: add w8, w8, w0
1651+
; CHECK-SD-NEXT: add x8, x0, x8, lsr #32
16531652
; CHECK-SD-NEXT: asr w9, w8, #2
16541653
; CHECK-SD-NEXT: add w0, w9, w8, lsr #31
16551654
; CHECK-SD-NEXT: ret
16561655
;
16571656
; CHECK-GI-LABEL: combine_i32_sdiv_const7:
16581657
; CHECK-GI: // %bb.0:
16591658
; CHECK-GI-NEXT: mov w8, #9363 // =0x2493
1659+
; CHECK-GI-NEXT: // kill: def $w0 killed $w0 def $x0
16601660
; CHECK-GI-NEXT: movk w8, #37449, lsl #16
16611661
; CHECK-GI-NEXT: smull x8, w0, w8
1662-
; CHECK-GI-NEXT: asr x8, x8, #32
1663-
; CHECK-GI-NEXT: add w8, w8, w0
1662+
; CHECK-GI-NEXT: add x8, x0, x8, asr #32
16641663
; CHECK-GI-NEXT: asr w8, w8, #2
16651664
; CHECK-GI-NEXT: add w0, w8, w8, lsr #31
16661665
; CHECK-GI-NEXT: ret

llvm/test/CodeGen/AArch64/rem-by-const.ll

Lines changed: 51 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -247,10 +247,10 @@ define i32 @si32_7(i32 %a, i32 %b) {
247247
; CHECK-SD-LABEL: si32_7:
248248
; CHECK-SD: // %bb.0: // %entry
249249
; CHECK-SD-NEXT: mov w8, #9363 // =0x2493
250+
; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
250251
; CHECK-SD-NEXT: movk w8, #37449, lsl #16
251252
; CHECK-SD-NEXT: smull x8, w0, w8
252-
; CHECK-SD-NEXT: lsr x8, x8, #32
253-
; CHECK-SD-NEXT: add w8, w8, w0
253+
; CHECK-SD-NEXT: add x8, x0, x8, lsr #32
254254
; CHECK-SD-NEXT: asr w9, w8, #2
255255
; CHECK-SD-NEXT: add w8, w9, w8, lsr #31
256256
; CHECK-SD-NEXT: sub w8, w8, w8, lsl #3
@@ -260,10 +260,10 @@ define i32 @si32_7(i32 %a, i32 %b) {
260260
; CHECK-GI-LABEL: si32_7:
261261
; CHECK-GI: // %bb.0: // %entry
262262
; CHECK-GI-NEXT: mov w8, #9363 // =0x2493
263+
; CHECK-GI-NEXT: // kill: def $w0 killed $w0 def $x0
263264
; CHECK-GI-NEXT: movk w8, #37449, lsl #16
264265
; CHECK-GI-NEXT: smull x8, w0, w8
265-
; CHECK-GI-NEXT: asr x8, x8, #32
266-
; CHECK-GI-NEXT: add w8, w8, w0
266+
; CHECK-GI-NEXT: add x8, x0, x8, asr #32
267267
; CHECK-GI-NEXT: asr w8, w8, #2
268268
; CHECK-GI-NEXT: add w8, w8, w8, lsr #31
269269
; CHECK-GI-NEXT: lsl w9, w8, #3
@@ -801,13 +801,10 @@ define <3 x i8> @sv3i8_7(<3 x i8> %d, <3 x i8> %e) {
801801
; CHECK-SD-NEXT: smull x10, w10, w9
802802
; CHECK-SD-NEXT: smull x9, w11, w9
803803
; CHECK-SD-NEXT: sxtb w11, w2
804-
; CHECK-SD-NEXT: lsr x8, x8, #32
805-
; CHECK-SD-NEXT: lsr x10, x10, #32
806-
; CHECK-SD-NEXT: lsr x9, x9, #32
807-
; CHECK-SD-NEXT: add w8, w8, w13
808-
; CHECK-SD-NEXT: add w10, w10, w12
804+
; CHECK-SD-NEXT: add x8, x13, x8, lsr #32
805+
; CHECK-SD-NEXT: add x10, x12, x10, lsr #32
806+
; CHECK-SD-NEXT: add x9, x11, x9, lsr #32
809807
; CHECK-SD-NEXT: asr w14, w8, #2
810-
; CHECK-SD-NEXT: add w9, w9, w11
811808
; CHECK-SD-NEXT: asr w15, w10, #2
812809
; CHECK-SD-NEXT: asr w16, w9, #2
813810
; CHECK-SD-NEXT: add w8, w14, w8, lsr #31
@@ -899,45 +896,41 @@ define <4 x i8> @sv4i8_7(<4 x i8> %d, <4 x i8> %e) {
899896
; CHECK-SD-NEXT: mov x8, #-56173 // =0xffffffffffff2493
900897
; CHECK-SD-NEXT: movk x8, #37449, lsl #16
901898
; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8
902-
; CHECK-SD-NEXT: smov x9, v0.h[0]
903-
; CHECK-SD-NEXT: smov x10, v0.h[1]
904-
; CHECK-SD-NEXT: smov w11, v0.h[0]
905-
; CHECK-SD-NEXT: smov x12, v0.h[2]
906-
; CHECK-SD-NEXT: smov w13, v0.h[1]
907-
; CHECK-SD-NEXT: smov x14, v0.h[3]
908-
; CHECK-SD-NEXT: smov w16, v0.h[2]
909-
; CHECK-SD-NEXT: smull x9, w9, w8
899+
; CHECK-SD-NEXT: smov x10, v0.h[0]
900+
; CHECK-SD-NEXT: smov x9, v0.h[1]
901+
; CHECK-SD-NEXT: smov w12, v0.h[0]
902+
; CHECK-SD-NEXT: smov w11, v0.h[1]
903+
; CHECK-SD-NEXT: smov x13, v0.h[2]
904+
; CHECK-SD-NEXT: smov w14, v0.h[2]
905+
; CHECK-SD-NEXT: smov x17, v0.h[3]
910906
; CHECK-SD-NEXT: smull x10, w10, w8
911-
; CHECK-SD-NEXT: smull x12, w12, w8
912-
; CHECK-SD-NEXT: lsr x9, x9, #32
913-
; CHECK-SD-NEXT: smull x8, w14, w8
914-
; CHECK-SD-NEXT: smov w14, v0.h[3]
915-
; CHECK-SD-NEXT: lsr x10, x10, #32
916-
; CHECK-SD-NEXT: add w9, w9, w11
917-
; CHECK-SD-NEXT: lsr x12, x12, #32
907+
; CHECK-SD-NEXT: smull x9, w9, w8
908+
; CHECK-SD-NEXT: smull x13, w13, w8
909+
; CHECK-SD-NEXT: add x10, x12, x10, lsr #32
910+
; CHECK-SD-NEXT: smull x8, w17, w8
911+
; CHECK-SD-NEXT: add x9, x11, x9, lsr #32
912+
; CHECK-SD-NEXT: asr w16, w10, #2
913+
; CHECK-SD-NEXT: add x13, x14, x13, lsr #32
918914
; CHECK-SD-NEXT: asr w15, w9, #2
919-
; CHECK-SD-NEXT: add w10, w10, w13
920-
; CHECK-SD-NEXT: lsr x8, x8, #32
921-
; CHECK-SD-NEXT: asr w17, w10, #2
922-
; CHECK-SD-NEXT: add w12, w12, w16
915+
; CHECK-SD-NEXT: add w10, w16, w10, lsr #31
916+
; CHECK-SD-NEXT: asr w16, w13, #2
923917
; CHECK-SD-NEXT: add w9, w15, w9, lsr #31
924-
; CHECK-SD-NEXT: asr w15, w12, #2
925-
; CHECK-SD-NEXT: add w8, w8, w14
926-
; CHECK-SD-NEXT: add w10, w17, w10, lsr #31
927-
; CHECK-SD-NEXT: sub w9, w9, w9, lsl #3
918+
; CHECK-SD-NEXT: smov w15, v0.h[3]
928919
; CHECK-SD-NEXT: sub w10, w10, w10, lsl #3
929-
; CHECK-SD-NEXT: add w9, w11, w9
930-
; CHECK-SD-NEXT: fmov s0, w9
931-
; CHECK-SD-NEXT: add w10, w13, w10
932-
; CHECK-SD-NEXT: add w9, w15, w12, lsr #31
933920
; CHECK-SD-NEXT: sub w9, w9, w9, lsl #3
934-
; CHECK-SD-NEXT: mov v0.h[1], w10
921+
; CHECK-SD-NEXT: add w10, w12, w10
922+
; CHECK-SD-NEXT: fmov s0, w10
923+
; CHECK-SD-NEXT: add w9, w11, w9
924+
; CHECK-SD-NEXT: add w10, w16, w13, lsr #31
925+
; CHECK-SD-NEXT: add x8, x15, x8, lsr #32
926+
; CHECK-SD-NEXT: mov v0.h[1], w9
927+
; CHECK-SD-NEXT: sub w9, w10, w10, lsl #3
935928
; CHECK-SD-NEXT: asr w10, w8, #2
936-
; CHECK-SD-NEXT: add w9, w16, w9
929+
; CHECK-SD-NEXT: add w9, w14, w9
937930
; CHECK-SD-NEXT: add w8, w10, w8, lsr #31
938931
; CHECK-SD-NEXT: mov v0.h[2], w9
939932
; CHECK-SD-NEXT: sub w8, w8, w8, lsl #3
940-
; CHECK-SD-NEXT: add w8, w14, w8
933+
; CHECK-SD-NEXT: add w8, w15, w8
941934
; CHECK-SD-NEXT: mov v0.h[3], w8
942935
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
943936
; CHECK-SD-NEXT: ret
@@ -1770,32 +1763,29 @@ define <3 x i16> @sv3i16_7(<3 x i16> %d, <3 x i16> %e) {
17701763
; CHECK-SD-NEXT: mov x8, #-56173 // =0xffffffffffff2493
17711764
; CHECK-SD-NEXT: smov x10, v0.h[1]
17721765
; CHECK-SD-NEXT: movk x8, #37449, lsl #16
1773-
; CHECK-SD-NEXT: smov w12, v0.h[0]
1774-
; CHECK-SD-NEXT: smov x11, v0.h[2]
1775-
; CHECK-SD-NEXT: smov w13, v0.h[1]
1766+
; CHECK-SD-NEXT: smov w11, v0.h[0]
1767+
; CHECK-SD-NEXT: smov x13, v0.h[2]
1768+
; CHECK-SD-NEXT: smov w12, v0.h[1]
1769+
; CHECK-SD-NEXT: smov w14, v0.h[2]
17761770
; CHECK-SD-NEXT: smull x9, w9, w8
17771771
; CHECK-SD-NEXT: smull x10, w10, w8
1778-
; CHECK-SD-NEXT: smull x8, w11, w8
1779-
; CHECK-SD-NEXT: smov w11, v0.h[2]
1780-
; CHECK-SD-NEXT: lsr x9, x9, #32
1781-
; CHECK-SD-NEXT: lsr x10, x10, #32
1782-
; CHECK-SD-NEXT: add w9, w9, w12
1783-
; CHECK-SD-NEXT: lsr x8, x8, #32
1784-
; CHECK-SD-NEXT: asr w14, w9, #2
1785-
; CHECK-SD-NEXT: add w10, w10, w13
1772+
; CHECK-SD-NEXT: smull x8, w13, w8
1773+
; CHECK-SD-NEXT: add x9, x11, x9, lsr #32
1774+
; CHECK-SD-NEXT: add x10, x12, x10, lsr #32
1775+
; CHECK-SD-NEXT: asr w13, w9, #2
1776+
; CHECK-SD-NEXT: add x8, x14, x8, lsr #32
17861777
; CHECK-SD-NEXT: asr w15, w10, #2
1787-
; CHECK-SD-NEXT: add w8, w8, w11
1788-
; CHECK-SD-NEXT: add w9, w14, w9, lsr #31
1789-
; CHECK-SD-NEXT: asr w14, w8, #2
1778+
; CHECK-SD-NEXT: add w9, w13, w9, lsr #31
1779+
; CHECK-SD-NEXT: asr w13, w8, #2
17901780
; CHECK-SD-NEXT: add w10, w15, w10, lsr #31
17911781
; CHECK-SD-NEXT: sub w9, w9, w9, lsl #3
1792-
; CHECK-SD-NEXT: add w8, w14, w8, lsr #31
1782+
; CHECK-SD-NEXT: add w8, w13, w8, lsr #31
17931783
; CHECK-SD-NEXT: sub w10, w10, w10, lsl #3
1794-
; CHECK-SD-NEXT: add w9, w12, w9
1784+
; CHECK-SD-NEXT: add w9, w11, w9
17951785
; CHECK-SD-NEXT: sub w8, w8, w8, lsl #3
17961786
; CHECK-SD-NEXT: fmov s0, w9
1797-
; CHECK-SD-NEXT: add w10, w13, w10
1798-
; CHECK-SD-NEXT: add w8, w11, w8
1787+
; CHECK-SD-NEXT: add w10, w12, w10
1788+
; CHECK-SD-NEXT: add w8, w14, w8
17991789
; CHECK-SD-NEXT: mov v0.h[1], w10
18001790
; CHECK-SD-NEXT: mov v0.h[2], w8
18011791
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
@@ -2444,15 +2434,14 @@ define <3 x i32> @sv3i32_7(<3 x i32> %d, <3 x i32> %e) {
24442434
; CHECK-SD-NEXT: dup v1.2s, w8
24452435
; CHECK-SD-NEXT: smull x8, w9, w8
24462436
; CHECK-SD-NEXT: smull v1.2d, v0.2s, v1.2s
2447-
; CHECK-SD-NEXT: lsr x8, x8, #32
2448-
; CHECK-SD-NEXT: add w8, w8, w9
2449-
; CHECK-SD-NEXT: shrn v1.2s, v1.2d, #32
2437+
; CHECK-SD-NEXT: add x8, x9, x8, lsr #32
24502438
; CHECK-SD-NEXT: asr w10, w8, #2
2439+
; CHECK-SD-NEXT: shrn v1.2s, v1.2d, #32
24512440
; CHECK-SD-NEXT: add w8, w10, w8, lsr #31
24522441
; CHECK-SD-NEXT: add v1.2s, v1.2s, v0.2s
24532442
; CHECK-SD-NEXT: sub w8, w8, w8, lsl #3
2454-
; CHECK-SD-NEXT: sshr v2.2s, v1.2s, #2
24552443
; CHECK-SD-NEXT: add w8, w9, w8
2444+
; CHECK-SD-NEXT: sshr v2.2s, v1.2s, #2
24562445
; CHECK-SD-NEXT: usra v2.2s, v1.2s, #31
24572446
; CHECK-SD-NEXT: mls v0.2s, v2.2s, v3.2s
24582447
; CHECK-SD-NEXT: mov v0.s[2], w8

llvm/test/CodeGen/AArch64/srem-lkk.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@ define i32 @fold_srem_positive_odd(i32 %x) {
55
; CHECK-LABEL: fold_srem_positive_odd:
66
; CHECK: // %bb.0:
77
; CHECK-NEXT: mov w8, #37253 // =0x9185
8+
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
89
; CHECK-NEXT: movk w8, #44150, lsl #16
910
; CHECK-NEXT: smull x8, w0, w8
10-
; CHECK-NEXT: lsr x8, x8, #32
11-
; CHECK-NEXT: add w8, w8, w0
11+
; CHECK-NEXT: add x8, x0, x8, lsr #32
1212
; CHECK-NEXT: asr w9, w8, #6
1313
; CHECK-NEXT: add w8, w9, w8, lsr #31
1414
; CHECK-NEXT: mov w9, #95 // =0x5f
@@ -72,10 +72,10 @@ define i32 @combine_srem_sdiv(i32 %x) {
7272
; CHECK-LABEL: combine_srem_sdiv:
7373
; CHECK: // %bb.0:
7474
; CHECK-NEXT: mov w8, #37253 // =0x9185
75+
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
7576
; CHECK-NEXT: movk w8, #44150, lsl #16
7677
; CHECK-NEXT: smull x8, w0, w8
77-
; CHECK-NEXT: lsr x8, x8, #32
78-
; CHECK-NEXT: add w8, w8, w0
78+
; CHECK-NEXT: add x8, x0, x8, lsr #32
7979
; CHECK-NEXT: asr w9, w8, #6
8080
; CHECK-NEXT: add w8, w9, w8, lsr #31
8181
; CHECK-NEXT: mov w9, #95 // =0x5f

0 commit comments

Comments
 (0)