Skip to content

Commit bc8272a

Browse files
committed
[GISel] Combine shift + trunc + shift pattern
1 parent e741b71 commit bc8272a

File tree

13 files changed

+369
-344
lines changed

13 files changed

+369
-344
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,13 @@ struct ShiftOfShiftedLogic {
8181
uint64_t ValSum;
8282
};
8383

84+
struct ShiftOfTruncOfShift {
85+
Register Src;
86+
uint64_t ShiftAmt;
87+
LLT ShiftAmtTy;
88+
LLT InnerShiftTy;
89+
};
90+
8491
using BuildFnTy = std::function<void(MachineIRBuilder &)>;
8592

8693
using OperandBuildSteps =
@@ -338,6 +345,12 @@ class CombinerHelper {
338345

339346
bool matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo) const;
340347

348+
/// Fold (shift (trunc (shift x, C1)), C2) -> trunc (shift x, (C1 + C2))
349+
bool matchShiftOfTruncOfShift(MachineInstr &MI,
350+
ShiftOfTruncOfShift &MatchInfo) const;
351+
void applyShiftOfTruncOfShift(MachineInstr &MI,
352+
ShiftOfTruncOfShift &MatchInfo) const;
353+
341354
/// Transform a multiply by a power-of-2 value to a left shift.
342355
bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) const;
343356
void applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) const;

llvm/include/llvm/Target/GlobalISel/Combine.td

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,14 @@ def commute_shift : GICombineRule<
396396
[{ return Helper.matchCommuteShift(*${d}, ${matchinfo}); }]),
397397
(apply [{ Helper.applyBuildFn(*${d}, ${matchinfo}); }])>;
398398

399+
// Fold (shift (trunc (shift x, C1)), C2) -> trunc (shift x, (C1 + C2))
400+
def shift_of_trunc_of_shift_matchdata : GIDefMatchData<"ShiftOfTruncOfShift">;
401+
def shift_of_trunc_of_shift : GICombineRule<
402+
(defs root:$root, shift_of_trunc_of_shift_matchdata:$matchinfo),
403+
(match (wip_match_opcode G_LSHR, G_ASHR):$root,
404+
[{ return Helper.matchShiftOfTruncOfShift(*${root}, ${matchinfo}); }]),
405+
(apply [{ Helper.applyShiftOfTruncOfShift(*${root}, ${matchinfo}); }])>;
406+
399407
def narrow_binop_feeding_and : GICombineRule<
400408
(defs root:$root, build_fn_matchinfo:$matchinfo),
401409
(match (wip_match_opcode G_AND):$root,
@@ -2133,7 +2141,8 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
21332141
fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors,
21342142
simplify_neg_minmax, combine_concat_vector,
21352143
sext_trunc, zext_trunc, prefer_sign_combines, shuffle_combines,
2136-
combine_use_vector_truncate, merge_combines, overflow_combines, truncsat_combines]>;
2144+
combine_use_vector_truncate, merge_combines, overflow_combines,
2145+
truncsat_combines, shift_of_trunc_of_shift]>;
21372146

21382147
// A combine group used to for prelegalizer combiners at -O0. The combines in
21392148
// this group have been selected based on experiments to balance code size and

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2094,6 +2094,60 @@ bool CombinerHelper::matchCommuteShift(MachineInstr &MI,
20942094
return true;
20952095
}
20962096

2097+
bool CombinerHelper::matchShiftOfTruncOfShift(
2098+
MachineInstr &MI, ShiftOfTruncOfShift &MatchInfo) const {
2099+
unsigned ShiftOpcode = MI.getOpcode();
2100+
assert(ShiftOpcode == TargetOpcode::G_LSHR ||
2101+
ShiftOpcode == TargetOpcode::G_ASHR);
2102+
2103+
Register N0 = MI.getOperand(1).getReg();
2104+
Register N1 = MI.getOperand(2).getReg();
2105+
unsigned OpSizeInBits = MRI.getType(N0).getScalarSizeInBits();
2106+
2107+
APInt N1C;
2108+
Register InnerShift;
2109+
if (!mi_match(N1, MRI, m_ICstOrSplat(N1C)) ||
2110+
!mi_match(N0, MRI, m_GTrunc(m_Reg(InnerShift))))
2111+
return false;
2112+
2113+
auto *InnerMI = MRI.getVRegDef(InnerShift);
2114+
if (InnerMI->getOpcode() != ShiftOpcode)
2115+
return false;
2116+
2117+
APInt N001C;
2118+
auto N001 = InnerMI->getOperand(2).getReg();
2119+
if (!mi_match(N001, MRI, m_ICstOrSplat(N001C)))
2120+
return false;
2121+
2122+
uint64_t c1 = N001C.getZExtValue();
2123+
uint64_t c2 = N1C.getZExtValue();
2124+
LLT InnerShiftTy = MRI.getType(InnerShift);
2125+
uint64_t InnerShiftSize = InnerShiftTy.getScalarSizeInBits();
2126+
if (!(c1 + OpSizeInBits == InnerShiftSize) || !(c1 + c2 < InnerShiftSize))
2127+
return false;
2128+
2129+
MatchInfo.Src = InnerMI->getOperand(1).getReg();
2130+
MatchInfo.ShiftAmt = c1 + c2;
2131+
MatchInfo.ShiftAmtTy = MRI.getType(N001);
2132+
MatchInfo.InnerShiftTy = InnerShiftTy;
2133+
return true;
2134+
}
2135+
2136+
void CombinerHelper::applyShiftOfTruncOfShift(
2137+
MachineInstr &MI, ShiftOfTruncOfShift &MatchInfo) const {
2138+
unsigned ShiftOpcode = MI.getOpcode();
2139+
assert(ShiftOpcode == TargetOpcode::G_LSHR ||
2140+
ShiftOpcode == TargetOpcode::G_ASHR);
2141+
2142+
Register Dst = MI.getOperand(0).getReg();
2143+
auto ShiftAmt =
2144+
Builder.buildConstant(MatchInfo.ShiftAmtTy, MatchInfo.ShiftAmt);
2145+
auto Shift = Builder.buildInstr(ShiftOpcode, {MatchInfo.InnerShiftTy},
2146+
{MatchInfo.Src, ShiftAmt});
2147+
Builder.buildTrunc(Dst, Shift);
2148+
MI.eraseFromParent();
2149+
}
2150+
20972151
bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,
20982152
unsigned &ShiftVal) const {
20992153
assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");

llvm/lib/Target/AArch64/AArch64Combine.td

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,8 @@ def AArch64PostLegalizerLowering
349349
}
350350

351351
// Post-legalization combines which are primarily optimizations.
352+
353+
352354
def AArch64PostLegalizerCombiner
353355
: GICombiner<"AArch64PostLegalizerCombinerImpl",
354356
[copy_prop, cast_of_cast_combines,
@@ -369,5 +371,5 @@ def AArch64PostLegalizerCombiner
369371
commute_constant_to_rhs, extract_vec_elt_combines,
370372
push_freeze_to_prevent_poison_from_propagating,
371373
combine_mul_cmlt, combine_use_vector_truncate,
372-
extmultomull, truncsat_combines]> {
374+
extmultomull, truncsat_combines, shift_of_trunc_of_shift]> {
373375
}

llvm/test/CodeGen/AArch64/combine-sdiv.ll

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1684,24 +1684,14 @@ define i32 @combine_i32_sdiv_const7(i32 %x) {
16841684
}
16851685

16861686
define i32 @combine_i32_sdiv_const100(i32 %x) {
1687-
; CHECK-SD-LABEL: combine_i32_sdiv_const100:
1688-
; CHECK-SD: // %bb.0:
1689-
; CHECK-SD-NEXT: mov w8, #34079 // =0x851f
1690-
; CHECK-SD-NEXT: movk w8, #20971, lsl #16
1691-
; CHECK-SD-NEXT: smull x8, w0, w8
1692-
; CHECK-SD-NEXT: asr x8, x8, #37
1693-
; CHECK-SD-NEXT: add w0, w8, w8, lsr #31
1694-
; CHECK-SD-NEXT: ret
1695-
;
1696-
; CHECK-GI-LABEL: combine_i32_sdiv_const100:
1697-
; CHECK-GI: // %bb.0:
1698-
; CHECK-GI-NEXT: mov w8, #34079 // =0x851f
1699-
; CHECK-GI-NEXT: movk w8, #20971, lsl #16
1700-
; CHECK-GI-NEXT: smull x8, w0, w8
1701-
; CHECK-GI-NEXT: asr x8, x8, #32
1702-
; CHECK-GI-NEXT: asr w8, w8, #5
1703-
; CHECK-GI-NEXT: add w0, w8, w8, lsr #31
1704-
; CHECK-GI-NEXT: ret
1687+
; CHECK-LABEL: combine_i32_sdiv_const100:
1688+
; CHECK: // %bb.0:
1689+
; CHECK-NEXT: mov w8, #34079 // =0x851f
1690+
; CHECK-NEXT: movk w8, #20971, lsl #16
1691+
; CHECK-NEXT: smull x8, w0, w8
1692+
; CHECK-NEXT: asr x8, x8, #37
1693+
; CHECK-NEXT: add w0, w8, w8, lsr #31
1694+
; CHECK-NEXT: ret
17051695
%1 = sdiv i32 %x, 100
17061696
ret i32 %1
17071697
}

llvm/test/CodeGen/AArch64/rem-by-const.ll

Lines changed: 62 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -276,28 +276,16 @@ entry:
276276
}
277277

278278
define i32 @si32_100(i32 %a, i32 %b) {
279-
; CHECK-SD-LABEL: si32_100:
280-
; CHECK-SD: // %bb.0: // %entry
281-
; CHECK-SD-NEXT: mov w8, #34079 // =0x851f
282-
; CHECK-SD-NEXT: mov w9, #100 // =0x64
283-
; CHECK-SD-NEXT: movk w8, #20971, lsl #16
284-
; CHECK-SD-NEXT: smull x8, w0, w8
285-
; CHECK-SD-NEXT: asr x8, x8, #37
286-
; CHECK-SD-NEXT: add w8, w8, w8, lsr #31
287-
; CHECK-SD-NEXT: msub w0, w8, w9, w0
288-
; CHECK-SD-NEXT: ret
289-
;
290-
; CHECK-GI-LABEL: si32_100:
291-
; CHECK-GI: // %bb.0: // %entry
292-
; CHECK-GI-NEXT: mov w8, #34079 // =0x851f
293-
; CHECK-GI-NEXT: mov w9, #100 // =0x64
294-
; CHECK-GI-NEXT: movk w8, #20971, lsl #16
295-
; CHECK-GI-NEXT: smull x8, w0, w8
296-
; CHECK-GI-NEXT: asr x8, x8, #32
297-
; CHECK-GI-NEXT: asr w8, w8, #5
298-
; CHECK-GI-NEXT: add w8, w8, w8, lsr #31
299-
; CHECK-GI-NEXT: msub w0, w8, w9, w0
300-
; CHECK-GI-NEXT: ret
279+
; CHECK-LABEL: si32_100:
280+
; CHECK: // %bb.0: // %entry
281+
; CHECK-NEXT: mov w8, #34079 // =0x851f
282+
; CHECK-NEXT: mov w9, #100 // =0x64
283+
; CHECK-NEXT: movk w8, #20971, lsl #16
284+
; CHECK-NEXT: smull x8, w0, w8
285+
; CHECK-NEXT: asr x8, x8, #37
286+
; CHECK-NEXT: add w8, w8, w8, lsr #31
287+
; CHECK-NEXT: msub w0, w8, w9, w0
288+
; CHECK-NEXT: ret
301289
entry:
302290
%s = srem i32 %a, 100
303291
ret i32 %s
@@ -336,26 +324,15 @@ entry:
336324
}
337325

338326
define i32 @ui32_100(i32 %a, i32 %b) {
339-
; CHECK-SD-LABEL: ui32_100:
340-
; CHECK-SD: // %bb.0: // %entry
341-
; CHECK-SD-NEXT: mov w8, #34079 // =0x851f
342-
; CHECK-SD-NEXT: mov w9, #100 // =0x64
343-
; CHECK-SD-NEXT: movk w8, #20971, lsl #16
344-
; CHECK-SD-NEXT: umull x8, w0, w8
345-
; CHECK-SD-NEXT: lsr x8, x8, #37
346-
; CHECK-SD-NEXT: msub w0, w8, w9, w0
347-
; CHECK-SD-NEXT: ret
348-
;
349-
; CHECK-GI-LABEL: ui32_100:
350-
; CHECK-GI: // %bb.0: // %entry
351-
; CHECK-GI-NEXT: mov w8, #34079 // =0x851f
352-
; CHECK-GI-NEXT: mov w9, #100 // =0x64
353-
; CHECK-GI-NEXT: movk w8, #20971, lsl #16
354-
; CHECK-GI-NEXT: umull x8, w0, w8
355-
; CHECK-GI-NEXT: lsr x8, x8, #32
356-
; CHECK-GI-NEXT: lsr w8, w8, #5
357-
; CHECK-GI-NEXT: msub w0, w8, w9, w0
358-
; CHECK-GI-NEXT: ret
327+
; CHECK-LABEL: ui32_100:
328+
; CHECK: // %bb.0: // %entry
329+
; CHECK-NEXT: mov w8, #34079 // =0x851f
330+
; CHECK-NEXT: mov w9, #100 // =0x64
331+
; CHECK-NEXT: movk w8, #20971, lsl #16
332+
; CHECK-NEXT: umull x8, w0, w8
333+
; CHECK-NEXT: lsr x8, x8, #37
334+
; CHECK-NEXT: msub w0, w8, w9, w0
335+
; CHECK-NEXT: ret
359336
entry:
360337
%s = urem i32 %a, 100
361338
ret i32 %s
@@ -1118,13 +1095,12 @@ define <8 x i8> @sv8i8_100(<8 x i8> %d, <8 x i8> %e) {
11181095
; CHECK-GI-LABEL: sv8i8_100:
11191096
; CHECK-GI: // %bb.0: // %entry
11201097
; CHECK-GI-NEXT: movi v1.8b, #41
1121-
; CHECK-GI-NEXT: movi v3.8b, #100
1098+
; CHECK-GI-NEXT: movi v2.8b, #100
11221099
; CHECK-GI-NEXT: smull v1.8h, v0.8b, v1.8b
1123-
; CHECK-GI-NEXT: shrn v1.8b, v1.8h, #8
1124-
; CHECK-GI-NEXT: sshr v2.8b, v1.8b, #4
1125-
; CHECK-GI-NEXT: ushr v2.8b, v2.8b, #7
1126-
; CHECK-GI-NEXT: ssra v2.8b, v1.8b, #4
1127-
; CHECK-GI-NEXT: mls v0.8b, v2.8b, v3.8b
1100+
; CHECK-GI-NEXT: sshr v1.8h, v1.8h, #12
1101+
; CHECK-GI-NEXT: xtn v1.8b, v1.8h
1102+
; CHECK-GI-NEXT: usra v1.8b, v1.8b, #7
1103+
; CHECK-GI-NEXT: mls v0.8b, v1.8b, v2.8b
11281104
; CHECK-GI-NEXT: ret
11291105
entry:
11301106
%s = srem <8 x i8> %d, <i8 100, i8 100, i8 100, i8 100, i8 100, i8 100, i8 100, i8 100>
@@ -1619,15 +1595,25 @@ entry:
16191595
}
16201596

16211597
define <8 x i8> @uv8i8_100(<8 x i8> %d, <8 x i8> %e) {
1622-
; CHECK-LABEL: uv8i8_100:
1623-
; CHECK: // %bb.0: // %entry
1624-
; CHECK-NEXT: movi v1.8b, #41
1625-
; CHECK-NEXT: movi v2.8b, #100
1626-
; CHECK-NEXT: umull v1.8h, v0.8b, v1.8b
1627-
; CHECK-NEXT: shrn v1.8b, v1.8h, #8
1628-
; CHECK-NEXT: ushr v1.8b, v1.8b, #4
1629-
; CHECK-NEXT: mls v0.8b, v1.8b, v2.8b
1630-
; CHECK-NEXT: ret
1598+
; CHECK-SD-LABEL: uv8i8_100:
1599+
; CHECK-SD: // %bb.0: // %entry
1600+
; CHECK-SD-NEXT: movi v1.8b, #41
1601+
; CHECK-SD-NEXT: movi v2.8b, #100
1602+
; CHECK-SD-NEXT: umull v1.8h, v0.8b, v1.8b
1603+
; CHECK-SD-NEXT: shrn v1.8b, v1.8h, #8
1604+
; CHECK-SD-NEXT: ushr v1.8b, v1.8b, #4
1605+
; CHECK-SD-NEXT: mls v0.8b, v1.8b, v2.8b
1606+
; CHECK-SD-NEXT: ret
1607+
;
1608+
; CHECK-GI-LABEL: uv8i8_100:
1609+
; CHECK-GI: // %bb.0: // %entry
1610+
; CHECK-GI-NEXT: movi v1.8b, #41
1611+
; CHECK-GI-NEXT: movi v2.8b, #100
1612+
; CHECK-GI-NEXT: umull v1.8h, v0.8b, v1.8b
1613+
; CHECK-GI-NEXT: ushr v1.8h, v1.8h, #12
1614+
; CHECK-GI-NEXT: xtn v1.8b, v1.8h
1615+
; CHECK-GI-NEXT: mls v0.8b, v1.8b, v2.8b
1616+
; CHECK-GI-NEXT: ret
16311617
entry:
16321618
%s = urem <8 x i8> %d, <i8 100, i8 100, i8 100, i8 100, i8 100, i8 100, i8 100, i8 100>
16331619
ret <8 x i8> %s
@@ -1904,14 +1890,13 @@ define <4 x i16> @sv4i16_7(<4 x i16> %d, <4 x i16> %e) {
19041890
; CHECK-GI-LABEL: sv4i16_7:
19051891
; CHECK-GI: // %bb.0: // %entry
19061892
; CHECK-GI-NEXT: adrp x8, .LCPI44_0
1907-
; CHECK-GI-NEXT: movi v3.4h, #7
1893+
; CHECK-GI-NEXT: movi v2.4h, #7
19081894
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI44_0]
19091895
; CHECK-GI-NEXT: smull v1.4s, v0.4h, v1.4h
1910-
; CHECK-GI-NEXT: shrn v1.4h, v1.4s, #16
1911-
; CHECK-GI-NEXT: sshr v2.4h, v1.4h, #1
1912-
; CHECK-GI-NEXT: ushr v2.4h, v2.4h, #15
1913-
; CHECK-GI-NEXT: ssra v2.4h, v1.4h, #1
1914-
; CHECK-GI-NEXT: mls v0.4h, v2.4h, v3.4h
1896+
; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #17
1897+
; CHECK-GI-NEXT: xtn v1.4h, v1.4s
1898+
; CHECK-GI-NEXT: usra v1.4h, v1.4h, #15
1899+
; CHECK-GI-NEXT: mls v0.4h, v1.4h, v2.4h
19151900
; CHECK-GI-NEXT: ret
19161901
entry:
19171902
%s = srem <4 x i16> %d, <i16 7, i16 7, i16 7, i16 7>
@@ -1934,14 +1919,13 @@ define <4 x i16> @sv4i16_100(<4 x i16> %d, <4 x i16> %e) {
19341919
; CHECK-GI-LABEL: sv4i16_100:
19351920
; CHECK-GI: // %bb.0: // %entry
19361921
; CHECK-GI-NEXT: adrp x8, .LCPI45_0
1937-
; CHECK-GI-NEXT: movi v3.4h, #100
1922+
; CHECK-GI-NEXT: movi v2.4h, #100
19381923
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI45_0]
19391924
; CHECK-GI-NEXT: smull v1.4s, v0.4h, v1.4h
1940-
; CHECK-GI-NEXT: shrn v1.4h, v1.4s, #16
1941-
; CHECK-GI-NEXT: sshr v2.4h, v1.4h, #3
1942-
; CHECK-GI-NEXT: ushr v2.4h, v2.4h, #15
1943-
; CHECK-GI-NEXT: ssra v2.4h, v1.4h, #3
1944-
; CHECK-GI-NEXT: mls v0.4h, v2.4h, v3.4h
1925+
; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #19
1926+
; CHECK-GI-NEXT: xtn v1.4h, v1.4s
1927+
; CHECK-GI-NEXT: usra v1.4h, v1.4h, #15
1928+
; CHECK-GI-NEXT: mls v0.4h, v1.4h, v2.4h
19451929
; CHECK-GI-NEXT: ret
19461930
entry:
19471931
%s = srem <4 x i16> %d, <i16 100, i16 100, i16 100, i16 100>
@@ -2301,8 +2285,8 @@ define <4 x i16> @uv4i16_100(<4 x i16> %d, <4 x i16> %e) {
23012285
; CHECK-GI-NEXT: ldr d2, [x8, :lo12:.LCPI53_0]
23022286
; CHECK-GI-NEXT: umull v1.4s, v1.4h, v2.4h
23032287
; CHECK-GI-NEXT: movi v2.4h, #100
2304-
; CHECK-GI-NEXT: shrn v1.4h, v1.4s, #16
2305-
; CHECK-GI-NEXT: ushr v1.4h, v1.4h, #1
2288+
; CHECK-GI-NEXT: ushr v1.4s, v1.4s, #17
2289+
; CHECK-GI-NEXT: xtn v1.4h, v1.4s
23062290
; CHECK-GI-NEXT: mls v0.4h, v1.4h, v2.4h
23072291
; CHECK-GI-NEXT: ret
23082292
entry:
@@ -2424,14 +2408,13 @@ define <2 x i32> @sv2i32_100(<2 x i32> %d, <2 x i32> %e) {
24242408
; CHECK-GI-LABEL: sv2i32_100:
24252409
; CHECK-GI: // %bb.0: // %entry
24262410
; CHECK-GI-NEXT: adrp x8, .LCPI57_0
2427-
; CHECK-GI-NEXT: movi v3.2s, #100
2411+
; CHECK-GI-NEXT: movi v2.2s, #100
24282412
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI57_0]
24292413
; CHECK-GI-NEXT: smull v1.2d, v0.2s, v1.2s
2430-
; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #32
2431-
; CHECK-GI-NEXT: sshr v2.2s, v1.2s, #5
2432-
; CHECK-GI-NEXT: ushr v2.2s, v2.2s, #31
2433-
; CHECK-GI-NEXT: ssra v2.2s, v1.2s, #5
2434-
; CHECK-GI-NEXT: mls v0.2s, v2.2s, v3.2s
2414+
; CHECK-GI-NEXT: sshr v1.2d, v1.2d, #37
2415+
; CHECK-GI-NEXT: xtn v1.2s, v1.2d
2416+
; CHECK-GI-NEXT: usra v1.2s, v1.2s, #31
2417+
; CHECK-GI-NEXT: mls v0.2s, v1.2s, v2.2s
24352418
; CHECK-GI-NEXT: ret
24362419
entry:
24372420
%s = srem <2 x i32> %d, <i32 100, i32 100>
@@ -2656,8 +2639,8 @@ define <2 x i32> @uv2i32_100(<2 x i32> %d, <2 x i32> %e) {
26562639
; CHECK-GI-NEXT: movi v2.2s, #100
26572640
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI63_0]
26582641
; CHECK-GI-NEXT: umull v1.2d, v0.2s, v1.2s
2659-
; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #32
2660-
; CHECK-GI-NEXT: ushr v1.2s, v1.2s, #5
2642+
; CHECK-GI-NEXT: ushr v1.2d, v1.2d, #37
2643+
; CHECK-GI-NEXT: xtn v1.2s, v1.2d
26612644
; CHECK-GI-NEXT: mls v0.2s, v1.2s, v2.2s
26622645
; CHECK-GI-NEXT: ret
26632646
entry:

0 commit comments

Comments
 (0)