Skip to content

Commit 619d36f

Browse files
authored
[GISel] Combine shift + trunc + shift pattern (#155583)
Folds shift(trunc(shift(...))) pattern into trunc(shift(...)) by combining the two shift instructions
1 parent 6580c91 commit 619d36f

File tree

13 files changed

+699
-523
lines changed

13 files changed

+699
-523
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,15 @@ struct ShiftOfShiftedLogic {
8181
uint64_t ValSum;
8282
};
8383

84+
struct LshrOfTruncOfLshr {
85+
bool Mask = false;
86+
APInt MaskVal;
87+
Register Src;
88+
APInt ShiftAmt;
89+
LLT ShiftAmtTy;
90+
LLT InnerShiftTy;
91+
};
92+
8493
using BuildFnTy = std::function<void(MachineIRBuilder &)>;
8594

8695
using OperandBuildSteps =
@@ -338,6 +347,12 @@ class CombinerHelper {
338347

339348
bool matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo) const;
340349

350+
/// Fold (lshr (trunc (lshr x, C1)), C2) -> trunc (shift x, (C1 + C2))
351+
bool matchLshrOfTruncOfLshr(MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo,
352+
MachineInstr &ShiftMI) const;
353+
void applyLshrOfTruncOfLshr(MachineInstr &MI,
354+
LshrOfTruncOfLshr &MatchInfo) const;
355+
341356
/// Transform a multiply by a power-of-2 value to a left shift.
342357
bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) const;
343358
void applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) const;

llvm/include/llvm/Target/GlobalISel/Combine.td

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,16 @@ def commute_shift : GICombineRule<
396396
[{ return Helper.matchCommuteShift(*${d}, ${matchinfo}); }]),
397397
(apply [{ Helper.applyBuildFn(*${d}, ${matchinfo}); }])>;
398398

399+
// Fold (lshr (trunc (lshr x, C1)), C2) -> trunc (lshr x, (C1 + C2))
400+
def lshr_of_trunc_of_lshr_matchdata : GIDefMatchData<"LshrOfTruncOfLshr">;
401+
def lshr_of_trunc_of_lshr : GICombineRule<
402+
(defs root:$root, lshr_of_trunc_of_lshr_matchdata:$matchinfo),
403+
(match (G_LSHR $d1, $x, $y):$Shift,
404+
(G_TRUNC $d2, $d1),
405+
(G_LSHR $dst, $d2, $z):$root,
406+
[{ return Helper.matchLshrOfTruncOfLshr(*${root}, ${matchinfo}, *${Shift}); }]),
407+
(apply [{ Helper.applyLshrOfTruncOfLshr(*${root}, ${matchinfo}); }])>;
408+
399409
def narrow_binop_feeding_and : GICombineRule<
400410
(defs root:$root, build_fn_matchinfo:$matchinfo),
401411
(match (wip_match_opcode G_AND):$root,
@@ -2133,7 +2143,8 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
21332143
fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors,
21342144
simplify_neg_minmax, combine_concat_vector,
21352145
sext_trunc, zext_trunc, prefer_sign_combines, shuffle_combines,
2136-
combine_use_vector_truncate, merge_combines, overflow_combines, truncsat_combines]>;
2146+
combine_use_vector_truncate, merge_combines, overflow_combines,
2147+
truncsat_combines, lshr_of_trunc_of_lshr]>;
21372148

21382149
// A combine group used to for prelegalizer combiners at -O0. The combines in
21392150
// this group have been selected based on experiments to balance code size and

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2094,6 +2094,68 @@ bool CombinerHelper::matchCommuteShift(MachineInstr &MI,
20942094
return true;
20952095
}
20962096

2097+
bool CombinerHelper::matchLshrOfTruncOfLshr(MachineInstr &MI,
2098+
LshrOfTruncOfLshr &MatchInfo,
2099+
MachineInstr &ShiftMI) const {
2100+
assert(MI.getOpcode() == TargetOpcode::G_LSHR && "Expected a G_LSHR");
2101+
2102+
Register N0 = MI.getOperand(1).getReg();
2103+
Register N1 = MI.getOperand(2).getReg();
2104+
unsigned OpSizeInBits = MRI.getType(N0).getScalarSizeInBits();
2105+
2106+
APInt N1C, N001C;
2107+
if (!mi_match(N1, MRI, m_ICstOrSplat(N1C)))
2108+
return false;
2109+
auto N001 = ShiftMI.getOperand(2).getReg();
2110+
if (!mi_match(N001, MRI, m_ICstOrSplat(N001C)))
2111+
return false;
2112+
2113+
if (N001C.getBitWidth() > N1C.getBitWidth())
2114+
N1C = N1C.zext(N001C.getBitWidth());
2115+
else
2116+
N001C = N001C.zext(N1C.getBitWidth());
2117+
2118+
Register InnerShift = ShiftMI.getOperand(0).getReg();
2119+
LLT InnerShiftTy = MRI.getType(InnerShift);
2120+
uint64_t InnerShiftSize = InnerShiftTy.getScalarSizeInBits();
2121+
if ((N1C + N001C).ult(InnerShiftSize)) {
2122+
MatchInfo.Src = ShiftMI.getOperand(1).getReg();
2123+
MatchInfo.ShiftAmt = N1C + N001C;
2124+
MatchInfo.ShiftAmtTy = MRI.getType(N001);
2125+
MatchInfo.InnerShiftTy = InnerShiftTy;
2126+
2127+
if ((N001C + OpSizeInBits) == InnerShiftSize)
2128+
return true;
2129+
if (MRI.hasOneUse(N0) && MRI.hasOneUse(InnerShift)) {
2130+
MatchInfo.Mask = true;
2131+
MatchInfo.MaskVal = APInt(N1C.getBitWidth(), OpSizeInBits) - N1C;
2132+
return true;
2133+
}
2134+
}
2135+
return false;
2136+
}
2137+
2138+
void CombinerHelper::applyLshrOfTruncOfLshr(
2139+
MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo) const {
2140+
assert(MI.getOpcode() == TargetOpcode::G_LSHR && "Expected a G_LSHR");
2141+
2142+
Register Dst = MI.getOperand(0).getReg();
2143+
auto ShiftAmt =
2144+
Builder.buildConstant(MatchInfo.ShiftAmtTy, MatchInfo.ShiftAmt);
2145+
auto Shift =
2146+
Builder.buildLShr(MatchInfo.InnerShiftTy, MatchInfo.Src, ShiftAmt);
2147+
if (MatchInfo.Mask == true) {
2148+
APInt MaskVal =
2149+
APInt::getLowBitsSet(MatchInfo.InnerShiftTy.getScalarSizeInBits(),
2150+
MatchInfo.MaskVal.getZExtValue());
2151+
auto Mask = Builder.buildConstant(MatchInfo.InnerShiftTy, MaskVal);
2152+
auto And = Builder.buildAnd(MatchInfo.InnerShiftTy, Shift, Mask);
2153+
Builder.buildTrunc(Dst, And);
2154+
} else
2155+
Builder.buildTrunc(Dst, Shift);
2156+
MI.eraseFromParent();
2157+
}
2158+
20972159
bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,
20982160
unsigned &ShiftVal) const {
20992161
assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");

llvm/lib/Target/AArch64/AArch64Combine.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -369,5 +369,5 @@ def AArch64PostLegalizerCombiner
369369
commute_constant_to_rhs, extract_vec_elt_combines,
370370
push_freeze_to_prevent_poison_from_propagating,
371371
combine_mul_cmlt, combine_use_vector_truncate,
372-
extmultomull, truncsat_combines]> {
372+
extmultomull, truncsat_combines, lshr_of_trunc_of_lshr]> {
373373
}
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=aarch64-none-eabi -global-isel=0 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3+
; RUN: llc -mtriple=aarch64-none-eabi -global-isel=1 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4+
5+
define i32 @s32_test1(i64 %a) {
6+
; CHECK-LABEL: s32_test1:
7+
; CHECK: // %bb.0:
8+
; CHECK-NEXT: lsr x0, x0, #48
9+
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
10+
; CHECK-NEXT: ret
11+
%r = lshr i64 %a, 32
12+
%ret = trunc i64 %r to i32
13+
%x = lshr i32 %ret, 16
14+
ret i32 %x
15+
}
16+
17+
define i32 @s32_test2(i64 %a) {
18+
; CHECK-LABEL: s32_test2:
19+
; CHECK: // %bb.0:
20+
; CHECK-NEXT: ubfx x0, x0, #32, #16
21+
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
22+
; CHECK-NEXT: ret
23+
%r = lshr i64 %a, 16
24+
%ret = trunc i64 %r to i32
25+
%x = lshr i32 %ret, 16
26+
ret i32 %x
27+
}
28+
29+
define <8 x i8> @v8s8_test1(<8 x i16> %a) {
30+
; CHECK-LABEL: v8s8_test1:
31+
; CHECK: // %bb.0:
32+
; CHECK-NEXT: ushr v0.8h, v0.8h, #12
33+
; CHECK-NEXT: xtn v0.8b, v0.8h
34+
; CHECK-NEXT: ret
35+
%r = lshr <8 x i16> %a, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
36+
%ret = trunc <8 x i16> %r to <8 x i8>
37+
%x = lshr <8 x i8> %ret, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
38+
ret <8 x i8> %x
39+
}
40+
41+
define <8 x i8> @v8s8_test2(<8 x i16> %a) {
42+
; CHECK-SD-LABEL: v8s8_test2:
43+
; CHECK-SD: // %bb.0:
44+
; CHECK-SD-NEXT: ushr v0.8h, v0.8h, #8
45+
; CHECK-SD-NEXT: bic v0.8h, #240
46+
; CHECK-SD-NEXT: xtn v0.8b, v0.8h
47+
; CHECK-SD-NEXT: ret
48+
;
49+
; CHECK-GI-LABEL: v8s8_test2:
50+
; CHECK-GI: // %bb.0:
51+
; CHECK-GI-NEXT: movi v1.8h, #15
52+
; CHECK-GI-NEXT: ushr v0.8h, v0.8h, #8
53+
; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
54+
; CHECK-GI-NEXT: xtn v0.8b, v0.8h
55+
; CHECK-GI-NEXT: ret
56+
%r = lshr <8 x i16> %a, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
57+
%ret = trunc <8 x i16> %r to <8 x i8>
58+
%x = lshr <8 x i8> %ret, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
59+
ret <8 x i8> %x
60+
}
61+
62+
define <4 x i16> @v4s16_test1(<4 x i32> %a) {
63+
; CHECK-LABEL: v4s16_test1:
64+
; CHECK: // %bb.0:
65+
; CHECK-NEXT: ushr v0.4s, v0.4s, #24
66+
; CHECK-NEXT: xtn v0.4h, v0.4s
67+
; CHECK-NEXT: ret
68+
%r = lshr <4 x i32> %a, <i32 16, i32 16, i32 16, i32 16>
69+
%ret = trunc <4 x i32> %r to <4 x i16>
70+
%x = lshr <4 x i16> %ret, <i16 8, i16 8, i16 8, i16 8>
71+
ret <4 x i16> %x
72+
}
73+
74+
define <4 x i16> @v4s16_test2(<4 x i32> %a) {
75+
; CHECK-SD-LABEL: v4s16_test2:
76+
; CHECK-SD: // %bb.0:
77+
; CHECK-SD-NEXT: shrn v0.4h, v0.4s, #16
78+
; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8
79+
; CHECK-SD-NEXT: ret
80+
;
81+
; CHECK-GI-LABEL: v4s16_test2:
82+
; CHECK-GI: // %bb.0:
83+
; CHECK-GI-NEXT: movi v1.2d, #0x0000ff000000ff
84+
; CHECK-GI-NEXT: ushr v0.4s, v0.4s, #16
85+
; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
86+
; CHECK-GI-NEXT: xtn v0.4h, v0.4s
87+
; CHECK-GI-NEXT: ret
88+
%r = lshr <4 x i32> %a, <i32 8, i32 8, i32 8, i32 8>
89+
%ret = trunc <4 x i32> %r to <4 x i16>
90+
%x = lshr <4 x i16> %ret, <i16 8, i16 8, i16 8, i16 8>
91+
ret <4 x i16> %x
92+
}
93+
94+
define <2 x i32> @v2s32_test1(<2 x i64> %a) {
95+
; CHECK-LABEL: v2s32_test1:
96+
; CHECK: // %bb.0:
97+
; CHECK-NEXT: ushr v0.2d, v0.2d, #48
98+
; CHECK-NEXT: xtn v0.2s, v0.2d
99+
; CHECK-NEXT: ret
100+
%r = lshr <2 x i64> %a, <i64 32, i64 32>
101+
%ret = trunc <2 x i64> %r to <2 x i32>
102+
%x = lshr <2 x i32> %ret, <i32 16, i32 16>
103+
ret <2 x i32> %x
104+
}
105+
106+
define <2 x i32> @v2s32_test2(<2 x i64> %a) {
107+
; CHECK-SD-LABEL: v2s32_test2:
108+
; CHECK-SD: // %bb.0:
109+
; CHECK-SD-NEXT: movi d1, #0x00ffff0000ffff
110+
; CHECK-SD-NEXT: shrn v0.2s, v0.2d, #32
111+
; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b
112+
; CHECK-SD-NEXT: ret
113+
;
114+
; CHECK-GI-LABEL: v2s32_test2:
115+
; CHECK-GI: // %bb.0:
116+
; CHECK-GI-NEXT: movi v1.2d, #0x0000000000ffff
117+
; CHECK-GI-NEXT: ushr v0.2d, v0.2d, #32
118+
; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
119+
; CHECK-GI-NEXT: xtn v0.2s, v0.2d
120+
; CHECK-GI-NEXT: ret
121+
%r = lshr <2 x i64> %a, <i64 16, i64 16>
122+
%ret = trunc <2 x i64> %r to <2 x i32>
123+
%x = lshr <2 x i32> %ret, <i32 16, i32 16>
124+
ret <2 x i32> %x
125+
}

llvm/test/CodeGen/AArch64/rem-by-const.ll

Lines changed: 32 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -336,26 +336,15 @@ entry:
336336
}
337337

338338
define i32 @ui32_100(i32 %a, i32 %b) {
339-
; CHECK-SD-LABEL: ui32_100:
340-
; CHECK-SD: // %bb.0: // %entry
341-
; CHECK-SD-NEXT: mov w8, #34079 // =0x851f
342-
; CHECK-SD-NEXT: mov w9, #100 // =0x64
343-
; CHECK-SD-NEXT: movk w8, #20971, lsl #16
344-
; CHECK-SD-NEXT: umull x8, w0, w8
345-
; CHECK-SD-NEXT: lsr x8, x8, #37
346-
; CHECK-SD-NEXT: msub w0, w8, w9, w0
347-
; CHECK-SD-NEXT: ret
348-
;
349-
; CHECK-GI-LABEL: ui32_100:
350-
; CHECK-GI: // %bb.0: // %entry
351-
; CHECK-GI-NEXT: mov w8, #34079 // =0x851f
352-
; CHECK-GI-NEXT: mov w9, #100 // =0x64
353-
; CHECK-GI-NEXT: movk w8, #20971, lsl #16
354-
; CHECK-GI-NEXT: umull x8, w0, w8
355-
; CHECK-GI-NEXT: lsr x8, x8, #32
356-
; CHECK-GI-NEXT: lsr w8, w8, #5
357-
; CHECK-GI-NEXT: msub w0, w8, w9, w0
358-
; CHECK-GI-NEXT: ret
339+
; CHECK-LABEL: ui32_100:
340+
; CHECK: // %bb.0: // %entry
341+
; CHECK-NEXT: mov w8, #34079 // =0x851f
342+
; CHECK-NEXT: mov w9, #100 // =0x64
343+
; CHECK-NEXT: movk w8, #20971, lsl #16
344+
; CHECK-NEXT: umull x8, w0, w8
345+
; CHECK-NEXT: lsr x8, x8, #37
346+
; CHECK-NEXT: msub w0, w8, w9, w0
347+
; CHECK-NEXT: ret
359348
entry:
360349
%s = urem i32 %a, 100
361350
ret i32 %s
@@ -1619,15 +1608,25 @@ entry:
16191608
}
16201609

16211610
define <8 x i8> @uv8i8_100(<8 x i8> %d, <8 x i8> %e) {
1622-
; CHECK-LABEL: uv8i8_100:
1623-
; CHECK: // %bb.0: // %entry
1624-
; CHECK-NEXT: movi v1.8b, #41
1625-
; CHECK-NEXT: movi v2.8b, #100
1626-
; CHECK-NEXT: umull v1.8h, v0.8b, v1.8b
1627-
; CHECK-NEXT: shrn v1.8b, v1.8h, #8
1628-
; CHECK-NEXT: ushr v1.8b, v1.8b, #4
1629-
; CHECK-NEXT: mls v0.8b, v1.8b, v2.8b
1630-
; CHECK-NEXT: ret
1611+
; CHECK-SD-LABEL: uv8i8_100:
1612+
; CHECK-SD: // %bb.0: // %entry
1613+
; CHECK-SD-NEXT: movi v1.8b, #41
1614+
; CHECK-SD-NEXT: movi v2.8b, #100
1615+
; CHECK-SD-NEXT: umull v1.8h, v0.8b, v1.8b
1616+
; CHECK-SD-NEXT: shrn v1.8b, v1.8h, #8
1617+
; CHECK-SD-NEXT: ushr v1.8b, v1.8b, #4
1618+
; CHECK-SD-NEXT: mls v0.8b, v1.8b, v2.8b
1619+
; CHECK-SD-NEXT: ret
1620+
;
1621+
; CHECK-GI-LABEL: uv8i8_100:
1622+
; CHECK-GI: // %bb.0: // %entry
1623+
; CHECK-GI-NEXT: movi v1.8b, #41
1624+
; CHECK-GI-NEXT: movi v2.8b, #100
1625+
; CHECK-GI-NEXT: umull v1.8h, v0.8b, v1.8b
1626+
; CHECK-GI-NEXT: ushr v1.8h, v1.8h, #12
1627+
; CHECK-GI-NEXT: xtn v1.8b, v1.8h
1628+
; CHECK-GI-NEXT: mls v0.8b, v1.8b, v2.8b
1629+
; CHECK-GI-NEXT: ret
16311630
entry:
16321631
%s = urem <8 x i8> %d, <i8 100, i8 100, i8 100, i8 100, i8 100, i8 100, i8 100, i8 100>
16331632
ret <8 x i8> %s
@@ -2301,8 +2300,8 @@ define <4 x i16> @uv4i16_100(<4 x i16> %d, <4 x i16> %e) {
23012300
; CHECK-GI-NEXT: ldr d2, [x8, :lo12:.LCPI53_0]
23022301
; CHECK-GI-NEXT: umull v1.4s, v1.4h, v2.4h
23032302
; CHECK-GI-NEXT: movi v2.4h, #100
2304-
; CHECK-GI-NEXT: shrn v1.4h, v1.4s, #16
2305-
; CHECK-GI-NEXT: ushr v1.4h, v1.4h, #1
2303+
; CHECK-GI-NEXT: ushr v1.4s, v1.4s, #17
2304+
; CHECK-GI-NEXT: xtn v1.4h, v1.4s
23062305
; CHECK-GI-NEXT: mls v0.4h, v1.4h, v2.4h
23072306
; CHECK-GI-NEXT: ret
23082307
entry:
@@ -2656,8 +2655,8 @@ define <2 x i32> @uv2i32_100(<2 x i32> %d, <2 x i32> %e) {
26562655
; CHECK-GI-NEXT: movi v2.2s, #100
26572656
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI63_0]
26582657
; CHECK-GI-NEXT: umull v1.2d, v0.2s, v1.2s
2659-
; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #32
2660-
; CHECK-GI-NEXT: ushr v1.2s, v1.2s, #5
2658+
; CHECK-GI-NEXT: ushr v1.2d, v1.2d, #37
2659+
; CHECK-GI-NEXT: xtn v1.2s, v1.2d
26612660
; CHECK-GI-NEXT: mls v0.2s, v1.2s, v2.2s
26622661
; CHECK-GI-NEXT: ret
26632662
entry:

llvm/test/CodeGen/AArch64/urem-lkk.ll

Lines changed: 9 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -20,26 +20,15 @@ define i32 @fold_urem_positive_odd(i32 %x) {
2020
}
2121

2222
define i32 @fold_urem_positive_even(i32 %x) {
23-
; CHECK-SD-LABEL: fold_urem_positive_even:
24-
; CHECK-SD: // %bb.0:
25-
; CHECK-SD-NEXT: mov w8, #16323 // =0x3fc3
26-
; CHECK-SD-NEXT: mov w9, #1060 // =0x424
27-
; CHECK-SD-NEXT: movk w8, #63310, lsl #16
28-
; CHECK-SD-NEXT: umull x8, w0, w8
29-
; CHECK-SD-NEXT: lsr x8, x8, #42
30-
; CHECK-SD-NEXT: msub w0, w8, w9, w0
31-
; CHECK-SD-NEXT: ret
32-
;
33-
; CHECK-GI-LABEL: fold_urem_positive_even:
34-
; CHECK-GI: // %bb.0:
35-
; CHECK-GI-NEXT: mov w8, #16323 // =0x3fc3
36-
; CHECK-GI-NEXT: mov w9, #1060 // =0x424
37-
; CHECK-GI-NEXT: movk w8, #63310, lsl #16
38-
; CHECK-GI-NEXT: umull x8, w0, w8
39-
; CHECK-GI-NEXT: lsr x8, x8, #32
40-
; CHECK-GI-NEXT: lsr w8, w8, #10
41-
; CHECK-GI-NEXT: msub w0, w8, w9, w0
42-
; CHECK-GI-NEXT: ret
23+
; CHECK-LABEL: fold_urem_positive_even:
24+
; CHECK: // %bb.0:
25+
; CHECK-NEXT: mov w8, #16323 // =0x3fc3
26+
; CHECK-NEXT: mov w9, #1060 // =0x424
27+
; CHECK-NEXT: movk w8, #63310, lsl #16
28+
; CHECK-NEXT: umull x8, w0, w8
29+
; CHECK-NEXT: lsr x8, x8, #42
30+
; CHECK-NEXT: msub w0, w8, w9, w0
31+
; CHECK-NEXT: ret
4332
%1 = urem i32 %x, 1060
4433
ret i32 %1
4534
}

0 commit comments

Comments
 (0)