Skip to content

Commit 4db428c

Browse files
[DAGCombiner] Allow promoted constants when lowering vector UDIVs
1 parent ed3f775 commit 4db428c

File tree

4 files changed

+56
-230
lines changed

4 files changed

+56
-230
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1065,8 +1065,9 @@ static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
10651065

10661066
// Determines if it is a constant integer or a splat/build vector of constant
10671067
// integers (and undefs).
1068-
// Do not permit build vector implicit truncation.
1069-
static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
1068+
// Do not permit build vector implicit truncation unless AllowTruncation is set.
1069+
static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false,
1070+
bool AllowTruncation = false) {
10701071
if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
10711072
return !(Const->isOpaque() && NoOpaques);
10721073
if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
@@ -1076,8 +1077,13 @@ static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
10761077
if (Op.isUndef())
10771078
continue;
10781079
ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
1079-
if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
1080-
(Const->isOpaque() && NoOpaques))
1080+
if (!Const || (Const->isOpaque() && NoOpaques))
1081+
return false;
1082+
// When AllowTruncation is true, allow constants that have been promoted
1083+
// during type legalization as long as the value fits in the target type.
1084+
if ((AllowTruncation &&
1085+
Const->getAPIntValue().getActiveBits() > BitWidth) ||
1086+
(!AllowTruncation && Const->getAPIntValue().getBitWidth() != BitWidth))
10811087
return false;
10821088
}
10831089
return true;
@@ -5322,7 +5328,8 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
53225328
EVT VT = N->getValueType(0);
53235329

53245330
// fold (udiv x, (1 << c)) -> x >>u c
5325-
if (isConstantOrConstantVector(N1, /*NoOpaques*/ true)) {
5331+
if (isConstantOrConstantVector(N1, /*NoOpaques=*/true,
5332+
/*AllowTruncation=*/true)) {
53265333
if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
53275334
AddToWorklist(LogBase2.getNode());
53285335

@@ -5336,7 +5343,8 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
53365343
// fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
53375344
if (N1.getOpcode() == ISD::SHL) {
53385345
SDValue N10 = N1.getOperand(0);
5339-
if (isConstantOrConstantVector(N10, /*NoOpaques*/ true)) {
5346+
if (isConstantOrConstantVector(N10, /*NoOpaques=*/true,
5347+
/*AllowTruncation=*/true)) {
53405348
if (SDValue LogBase2 = BuildLogBase2(N10, DL)) {
53415349
AddToWorklist(LogBase2.getNode());
53425350

@@ -5352,7 +5360,8 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
53525360

53535361
// fold (udiv x, c) -> alternate
53545362
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5355-
if (isConstantOrConstantVector(N1) &&
5363+
if (isConstantOrConstantVector(N1, /*NoOpaques=*/false,
5364+
/*AllowTruncation=*/true) &&
53565365
!TLI.isIntDivCheap(N->getValueType(0), Attr))
53575366
if (SDValue Op = BuildUDIV(N))
53585367
return Op;

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6738,7 +6738,9 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
67386738
auto BuildUDIVPattern = [&](ConstantSDNode *C) {
67396739
if (C->isZero())
67406740
return false;
6741-
const APInt& Divisor = C->getAPIntValue();
6741+
// Truncate the divisor to the target scalar type in case it was promoted
6742+
// during type legalization.
6743+
APInt Divisor = C->getAPIntValue().trunc(EltBits);
67426744

67436745
SDValue PreShift, MagicFactor, NPQFactor, PostShift;
67446746

@@ -6779,7 +6781,8 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
67796781
};
67806782

67816783
// Collect the shifts/magic values from each element.
6782-
if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
6784+
if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern, /*AllowUndefs=*/false,
6785+
/*AllowTruncation=*/true))
67836786
return SDValue();
67846787

67856788
SDValue PreShift, PostShift, MagicFactor, NPQFactor;

llvm/test/CodeGen/AArch64/rem-by-const.ll

Lines changed: 12 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1433,35 +1433,13 @@ entry:
14331433
define <4 x i8> @uv4i8_7(<4 x i8> %d, <4 x i8> %e) {
14341434
; CHECK-SD-LABEL: uv4i8_7:
14351435
; CHECK-SD: // %bb.0: // %entry
1436-
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
1437-
; CHECK-SD-NEXT: mov w8, #18725 // =0x4925
1436+
; CHECK-SD-NEXT: mov w8, #9363 // =0x2493
14381437
; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8
1439-
; CHECK-SD-NEXT: movk w8, #9362, lsl #16
1440-
; CHECK-SD-NEXT: umov w9, v0.h[0]
1441-
; CHECK-SD-NEXT: umov w10, v0.h[1]
1442-
; CHECK-SD-NEXT: umov w13, v0.h[2]
1443-
; CHECK-SD-NEXT: umov w15, v0.h[3]
1444-
; CHECK-SD-NEXT: umull x11, w9, w8
1445-
; CHECK-SD-NEXT: umull x12, w10, w8
1446-
; CHECK-SD-NEXT: umull x14, w13, w8
1447-
; CHECK-SD-NEXT: lsr x11, x11, #32
1448-
; CHECK-SD-NEXT: umull x8, w15, w8
1449-
; CHECK-SD-NEXT: lsr x12, x12, #32
1450-
; CHECK-SD-NEXT: sub w11, w11, w11, lsl #3
1451-
; CHECK-SD-NEXT: sub w12, w12, w12, lsl #3
1452-
; CHECK-SD-NEXT: lsr x8, x8, #32
1453-
; CHECK-SD-NEXT: add w9, w9, w11
1454-
; CHECK-SD-NEXT: fmov s0, w9
1455-
; CHECK-SD-NEXT: add w10, w10, w12
1456-
; CHECK-SD-NEXT: lsr x9, x14, #32
1457-
; CHECK-SD-NEXT: sub w8, w8, w8, lsl #3
1458-
; CHECK-SD-NEXT: sub w9, w9, w9, lsl #3
1459-
; CHECK-SD-NEXT: mov v0.h[1], w10
1460-
; CHECK-SD-NEXT: add w8, w15, w8
1461-
; CHECK-SD-NEXT: add w9, w13, w9
1462-
; CHECK-SD-NEXT: mov v0.h[2], w9
1463-
; CHECK-SD-NEXT: mov v0.h[3], w8
1464-
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
1438+
; CHECK-SD-NEXT: movi v2.4h, #7
1439+
; CHECK-SD-NEXT: dup v1.4h, w8
1440+
; CHECK-SD-NEXT: umull v1.4s, v0.4h, v1.4h
1441+
; CHECK-SD-NEXT: shrn v1.4h, v1.4s, #16
1442+
; CHECK-SD-NEXT: mls v0.4h, v1.4h, v2.4h
14651443
; CHECK-SD-NEXT: ret
14661444
;
14671445
; CHECK-GI-LABEL: uv4i8_7:
@@ -1508,32 +1486,13 @@ entry:
15081486
define <4 x i8> @uv4i8_100(<4 x i8> %d, <4 x i8> %e) {
15091487
; CHECK-SD-LABEL: uv4i8_100:
15101488
; CHECK-SD: // %bb.0: // %entry
1511-
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
1512-
; CHECK-SD-NEXT: mov w8, #23593 // =0x5c29
1513-
; CHECK-SD-NEXT: mov w14, #100 // =0x64
1489+
; CHECK-SD-NEXT: mov w8, #656 // =0x290
15141490
; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8
1515-
; CHECK-SD-NEXT: movk w8, #655, lsl #16
1516-
; CHECK-SD-NEXT: umov w9, v0.h[0]
1517-
; CHECK-SD-NEXT: umov w10, v0.h[1]
1518-
; CHECK-SD-NEXT: umov w12, v0.h[2]
1519-
; CHECK-SD-NEXT: umov w15, v0.h[3]
1520-
; CHECK-SD-NEXT: umull x11, w9, w8
1521-
; CHECK-SD-NEXT: umull x13, w10, w8
1522-
; CHECK-SD-NEXT: lsr x11, x11, #32
1523-
; CHECK-SD-NEXT: lsr x13, x13, #32
1524-
; CHECK-SD-NEXT: msub w9, w11, w14, w9
1525-
; CHECK-SD-NEXT: umull x11, w12, w8
1526-
; CHECK-SD-NEXT: msub w10, w13, w14, w10
1527-
; CHECK-SD-NEXT: fmov s0, w9
1528-
; CHECK-SD-NEXT: umull x8, w15, w8
1529-
; CHECK-SD-NEXT: lsr x9, x11, #32
1530-
; CHECK-SD-NEXT: mov v0.h[1], w10
1531-
; CHECK-SD-NEXT: msub w9, w9, w14, w12
1532-
; CHECK-SD-NEXT: lsr x8, x8, #32
1533-
; CHECK-SD-NEXT: msub w8, w8, w14, w15
1534-
; CHECK-SD-NEXT: mov v0.h[2], w9
1535-
; CHECK-SD-NEXT: mov v0.h[3], w8
1536-
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
1491+
; CHECK-SD-NEXT: movi v2.4h, #100
1492+
; CHECK-SD-NEXT: dup v1.4h, w8
1493+
; CHECK-SD-NEXT: umull v1.4s, v0.4h, v1.4h
1494+
; CHECK-SD-NEXT: shrn v1.4h, v1.4s, #16
1495+
; CHECK-SD-NEXT: mls v0.4h, v1.4h, v2.4h
15371496
; CHECK-SD-NEXT: ret
15381497
;
15391498
; CHECK-GI-LABEL: uv4i8_100:

llvm/test/CodeGen/AArch64/udiv-by-const-promoted-ops.ll

Lines changed: 23 additions & 168 deletions
Original file line numberDiff line numberDiff line change
@@ -25,74 +25,16 @@ define <8 x i16> @udiv_v8i16_by_255(<8 x i16> %x) {
2525
define <16 x i16> @udiv_v16i16_by_255(<16 x i16> %x) {
2626
; CHECK-LABEL: udiv_v16i16_by_255:
2727
; CHECK: // %bb.0:
28-
; CHECK-NEXT: umov w9, v0.h[0]
29-
; CHECK-NEXT: umov w11, v1.h[0]
30-
; CHECK-NEXT: mov w8, #258 // =0x102
31-
; CHECK-NEXT: movk w8, #257, lsl #16
32-
; CHECK-NEXT: umov w10, v0.h[1]
33-
; CHECK-NEXT: umov w12, v1.h[1]
34-
; CHECK-NEXT: umov w13, v0.h[2]
35-
; CHECK-NEXT: umov w14, v1.h[2]
36-
; CHECK-NEXT: umull x9, w9, w8
37-
; CHECK-NEXT: umull x11, w11, w8
38-
; CHECK-NEXT: umull x10, w10, w8
39-
; CHECK-NEXT: umull x12, w12, w8
40-
; CHECK-NEXT: lsr x9, x9, #32
41-
; CHECK-NEXT: lsr x11, x11, #32
42-
; CHECK-NEXT: umull x13, w13, w8
43-
; CHECK-NEXT: fmov s2, w9
44-
; CHECK-NEXT: lsr x10, x10, #32
45-
; CHECK-NEXT: umov w9, v0.h[3]
46-
; CHECK-NEXT: fmov s3, w11
47-
; CHECK-NEXT: lsr x12, x12, #32
48-
; CHECK-NEXT: umull x11, w14, w8
49-
; CHECK-NEXT: umov w14, v1.h[3]
50-
; CHECK-NEXT: mov v2.h[1], w10
51-
; CHECK-NEXT: lsr x10, x13, #32
52-
; CHECK-NEXT: mov v3.h[1], w12
53-
; CHECK-NEXT: umov w12, v0.h[4]
54-
; CHECK-NEXT: lsr x11, x11, #32
55-
; CHECK-NEXT: umull x9, w9, w8
56-
; CHECK-NEXT: umull x13, w14, w8
57-
; CHECK-NEXT: umov w14, v1.h[4]
58-
; CHECK-NEXT: mov v2.h[2], w10
59-
; CHECK-NEXT: mov v3.h[2], w11
60-
; CHECK-NEXT: lsr x9, x9, #32
61-
; CHECK-NEXT: umull x10, w12, w8
62-
; CHECK-NEXT: lsr x12, x13, #32
63-
; CHECK-NEXT: umov w11, v0.h[5]
64-
; CHECK-NEXT: umull x13, w14, w8
65-
; CHECK-NEXT: umov w14, v1.h[5]
66-
; CHECK-NEXT: mov v2.h[3], w9
67-
; CHECK-NEXT: lsr x9, x10, #32
68-
; CHECK-NEXT: mov v3.h[3], w12
69-
; CHECK-NEXT: lsr x12, x13, #32
70-
; CHECK-NEXT: umull x10, w11, w8
71-
; CHECK-NEXT: umov w11, v0.h[6]
72-
; CHECK-NEXT: umull x13, w14, w8
73-
; CHECK-NEXT: umov w14, v1.h[6]
74-
; CHECK-NEXT: mov v2.h[4], w9
75-
; CHECK-NEXT: umov w9, v0.h[7]
76-
; CHECK-NEXT: mov v3.h[4], w12
77-
; CHECK-NEXT: lsr x10, x10, #32
78-
; CHECK-NEXT: lsr x12, x13, #32
79-
; CHECK-NEXT: umull x11, w11, w8
80-
; CHECK-NEXT: umull x13, w14, w8
81-
; CHECK-NEXT: umov w14, v1.h[7]
82-
; CHECK-NEXT: mov v2.h[5], w10
83-
; CHECK-NEXT: umull x9, w9, w8
84-
; CHECK-NEXT: mov v3.h[5], w12
85-
; CHECK-NEXT: lsr x10, x11, #32
86-
; CHECK-NEXT: lsr x11, x13, #32
87-
; CHECK-NEXT: umull x8, w14, w8
88-
; CHECK-NEXT: lsr x9, x9, #32
89-
; CHECK-NEXT: mov v2.h[6], w10
90-
; CHECK-NEXT: mov v3.h[6], w11
91-
; CHECK-NEXT: lsr x8, x8, #32
92-
; CHECK-NEXT: mov v2.h[7], w9
93-
; CHECK-NEXT: mov v3.h[7], w8
94-
; CHECK-NEXT: mov v0.16b, v2.16b
95-
; CHECK-NEXT: mov v1.16b, v3.16b
28+
; CHECK-NEXT: mov w8, #32897 // =0x8081
29+
; CHECK-NEXT: dup v2.8h, w8
30+
; CHECK-NEXT: umull2 v3.4s, v0.8h, v2.8h
31+
; CHECK-NEXT: umull v0.4s, v0.4h, v2.4h
32+
; CHECK-NEXT: umull2 v4.4s, v1.8h, v2.8h
33+
; CHECK-NEXT: umull v1.4s, v1.4h, v2.4h
34+
; CHECK-NEXT: uzp2 v0.8h, v0.8h, v3.8h
35+
; CHECK-NEXT: uzp2 v1.8h, v1.8h, v4.8h
36+
; CHECK-NEXT: ushr v0.8h, v0.8h, #7
37+
; CHECK-NEXT: ushr v1.8h, v1.8h, #7
9638
; CHECK-NEXT: ret
9739
%div = udiv <16 x i16> %x, splat (i16 255)
9840
ret <16 x i16> %div
@@ -117,106 +59,19 @@ define <8 x i16> @urem_v8i16_by_255(<8 x i16> %x) {
11759
define <16 x i16> @urem_v16i16_by_255(<16 x i16> %x) {
11860
; CHECK-LABEL: urem_v16i16_by_255:
11961
; CHECK: // %bb.0:
120-
; CHECK-NEXT: umov w9, v0.h[1]
121-
; CHECK-NEXT: umov w10, v0.h[0]
122-
; CHECK-NEXT: mov w8, #258 // =0x102
123-
; CHECK-NEXT: umov w12, v1.h[0]
124-
; CHECK-NEXT: movk w8, #257, lsl #16
125-
; CHECK-NEXT: umov w11, v1.h[1]
126-
; CHECK-NEXT: umov w17, v0.h[2]
127-
; CHECK-NEXT: umov w18, v1.h[2]
128-
; CHECK-NEXT: umov w0, v0.h[3]
129-
; CHECK-NEXT: umov w1, v1.h[3]
130-
; CHECK-NEXT: umull x13, w9, w8
131-
; CHECK-NEXT: umull x14, w10, w8
132-
; CHECK-NEXT: umull x16, w12, w8
133-
; CHECK-NEXT: umull x15, w11, w8
134-
; CHECK-NEXT: lsr x13, x13, #32
135-
; CHECK-NEXT: lsr x14, x14, #32
136-
; CHECK-NEXT: lsr x16, x16, #32
137-
; CHECK-NEXT: sub w13, w13, w13, lsl #8
138-
; CHECK-NEXT: sub w14, w14, w14, lsl #8
139-
; CHECK-NEXT: lsr x15, x15, #32
140-
; CHECK-NEXT: sub w16, w16, w16, lsl #8
141-
; CHECK-NEXT: add w9, w9, w13
142-
; CHECK-NEXT: umull x13, w17, w8
143-
; CHECK-NEXT: add w10, w10, w14
144-
; CHECK-NEXT: umull x14, w18, w8
145-
; CHECK-NEXT: sub w15, w15, w15, lsl #8
146-
; CHECK-NEXT: add w12, w12, w16
147-
; CHECK-NEXT: fmov s2, w10
148-
; CHECK-NEXT: umov w16, v1.h[4]
149-
; CHECK-NEXT: fmov s3, w12
150-
; CHECK-NEXT: add w11, w11, w15
151-
; CHECK-NEXT: lsr x13, x13, #32
152-
; CHECK-NEXT: lsr x14, x14, #32
153-
; CHECK-NEXT: umov w15, v0.h[4]
154-
; CHECK-NEXT: umull x10, w0, w8
155-
; CHECK-NEXT: umull x12, w1, w8
156-
; CHECK-NEXT: mov v2.h[1], w9
157-
; CHECK-NEXT: sub w13, w13, w13, lsl #8
158-
; CHECK-NEXT: mov v3.h[1], w11
159-
; CHECK-NEXT: sub w14, w14, w14, lsl #8
160-
; CHECK-NEXT: umov w9, v0.h[5]
161-
; CHECK-NEXT: add w13, w17, w13
162-
; CHECK-NEXT: lsr x10, x10, #32
163-
; CHECK-NEXT: umov w11, v1.h[5]
164-
; CHECK-NEXT: add w14, w18, w14
165-
; CHECK-NEXT: lsr x12, x12, #32
166-
; CHECK-NEXT: umull x17, w15, w8
167-
; CHECK-NEXT: umull x18, w16, w8
168-
; CHECK-NEXT: mov v2.h[2], w13
169-
; CHECK-NEXT: sub w10, w10, w10, lsl #8
170-
; CHECK-NEXT: mov v3.h[2], w14
171-
; CHECK-NEXT: sub w12, w12, w12, lsl #8
172-
; CHECK-NEXT: umov w13, v0.h[6]
173-
; CHECK-NEXT: lsr x14, x17, #32
174-
; CHECK-NEXT: add w10, w0, w10
175-
; CHECK-NEXT: umull x17, w9, w8
176-
; CHECK-NEXT: lsr x18, x18, #32
177-
; CHECK-NEXT: add w12, w1, w12
178-
; CHECK-NEXT: umull x0, w11, w8
179-
; CHECK-NEXT: mov v2.h[3], w10
180-
; CHECK-NEXT: umov w10, v1.h[6]
181-
; CHECK-NEXT: sub w14, w14, w14, lsl #8
182-
; CHECK-NEXT: mov v3.h[3], w12
183-
; CHECK-NEXT: sub w18, w18, w18, lsl #8
184-
; CHECK-NEXT: lsr x17, x17, #32
185-
; CHECK-NEXT: add w14, w15, w14
186-
; CHECK-NEXT: umov w12, v0.h[7]
187-
; CHECK-NEXT: add w15, w16, w18
188-
; CHECK-NEXT: lsr x18, x0, #32
189-
; CHECK-NEXT: umov w16, v1.h[7]
190-
; CHECK-NEXT: mov v2.h[4], w14
191-
; CHECK-NEXT: umull x14, w13, w8
192-
; CHECK-NEXT: sub w17, w17, w17, lsl #8
193-
; CHECK-NEXT: mov v3.h[4], w15
194-
; CHECK-NEXT: umull x15, w10, w8
195-
; CHECK-NEXT: sub w18, w18, w18, lsl #8
196-
; CHECK-NEXT: add w9, w9, w17
197-
; CHECK-NEXT: add w11, w11, w18
198-
; CHECK-NEXT: lsr x14, x14, #32
199-
; CHECK-NEXT: lsr x15, x15, #32
200-
; CHECK-NEXT: mov v2.h[5], w9
201-
; CHECK-NEXT: umull x9, w12, w8
202-
; CHECK-NEXT: mov v3.h[5], w11
203-
; CHECK-NEXT: umull x8, w16, w8
204-
; CHECK-NEXT: sub w11, w14, w14, lsl #8
205-
; CHECK-NEXT: sub w14, w15, w15, lsl #8
206-
; CHECK-NEXT: add w11, w13, w11
207-
; CHECK-NEXT: lsr x9, x9, #32
208-
; CHECK-NEXT: add w10, w10, w14
209-
; CHECK-NEXT: lsr x8, x8, #32
210-
; CHECK-NEXT: mov v2.h[6], w11
211-
; CHECK-NEXT: mov v3.h[6], w10
212-
; CHECK-NEXT: sub w9, w9, w9, lsl #8
213-
; CHECK-NEXT: sub w8, w8, w8, lsl #8
214-
; CHECK-NEXT: add w9, w12, w9
215-
; CHECK-NEXT: add w8, w16, w8
216-
; CHECK-NEXT: mov v2.h[7], w9
217-
; CHECK-NEXT: mov v3.h[7], w8
218-
; CHECK-NEXT: mov v0.16b, v2.16b
219-
; CHECK-NEXT: mov v1.16b, v3.16b
62+
; CHECK-NEXT: mov w8, #32897 // =0x8081
63+
; CHECK-NEXT: dup v2.8h, w8
64+
; CHECK-NEXT: umull2 v3.4s, v0.8h, v2.8h
65+
; CHECK-NEXT: umull v4.4s, v0.4h, v2.4h
66+
; CHECK-NEXT: umull2 v5.4s, v1.8h, v2.8h
67+
; CHECK-NEXT: umull v2.4s, v1.4h, v2.4h
68+
; CHECK-NEXT: uzp2 v3.8h, v4.8h, v3.8h
69+
; CHECK-NEXT: movi v4.2d, #0xff00ff00ff00ff
70+
; CHECK-NEXT: uzp2 v2.8h, v2.8h, v5.8h
71+
; CHECK-NEXT: ushr v3.8h, v3.8h, #7
72+
; CHECK-NEXT: ushr v2.8h, v2.8h, #7
73+
; CHECK-NEXT: mls v0.8h, v3.8h, v4.8h
74+
; CHECK-NEXT: mls v1.8h, v2.8h, v4.8h
22075
; CHECK-NEXT: ret
22176
%rem = urem <16 x i16> %x, splat (i16 255)
22277
ret <16 x i16> %rem

0 commit comments

Comments
 (0)