Skip to content

Commit 5628964

Browse files
authored
[DAGCombiner] Preserve nuw when converting mul to shl. Use nuw in srl+shl combine. (llvm#155043)
If the srl+shl have the same shift amount and the shl has the nuw flag, we can remove both. In the affected test, the InterleavedAccess pass will emit a udiv after the `mul nuw`. We expect them to combine away. The remaining shifts on the RV64 tests are because we didn't add the zeroext attribute to the incoming evl operand.
1 parent 81740e0 commit 5628964

File tree

2 files changed

+85
-98
lines changed

2 files changed

+85
-98
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 41 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -4710,7 +4710,10 @@ template <class MatchContextClass> SDValue DAGCombiner::visitMUL(SDNode *N) {
47104710
if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
47114711
EVT ShiftVT = getShiftAmountTy(N0.getValueType());
47124712
SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4713-
return Matcher.getNode(ISD::SHL, DL, VT, N0, Trunc);
4713+
SDNodeFlags Flags;
4714+
Flags.setNoUnsignedWrap(N->getFlags().hasNoUnsignedWrap());
4715+
// TODO: Preserve setNoSignedWrap if LogBase2 isn't BitWidth - 1.
4716+
return Matcher.getNode(ISD::SHL, DL, VT, N0, Trunc, Flags);
47144717
}
47154718
}
47164719

@@ -11089,38 +11092,43 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
1108911092
}
1109011093
}
1109111094

11092-
// fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
11093-
// (and (srl x, (sub c2, c1), MASK)
11094-
if (N0.getOpcode() == ISD::SHL &&
11095-
(N0.getOperand(1) == N1 || N0->hasOneUse()) &&
11096-
TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
11097-
auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
11098-
ConstantSDNode *RHS) {
11099-
const APInt &LHSC = LHS->getAPIntValue();
11100-
const APInt &RHSC = RHS->getAPIntValue();
11101-
return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
11102-
LHSC.getZExtValue() <= RHSC.getZExtValue();
11103-
};
11104-
if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
11105-
/*AllowUndefs*/ false,
11106-
/*AllowTypeMismatch*/ true)) {
11107-
SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
11108-
SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
11109-
SDValue Mask = DAG.getAllOnesConstant(DL, VT);
11110-
Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01);
11111-
Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff);
11112-
SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
11113-
return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
11114-
}
11115-
if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
11116-
/*AllowUndefs*/ false,
11117-
/*AllowTypeMismatch*/ true)) {
11118-
SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
11119-
SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
11120-
SDValue Mask = DAG.getAllOnesConstant(DL, VT);
11121-
Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1);
11122-
SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
11123-
return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
11095+
if (N0.getOpcode() == ISD::SHL) {
11096+
// fold (srl (shl nuw x, c), c) -> x
11097+
if (N0.getOperand(1) == N1 && N0->getFlags().hasNoUnsignedWrap())
11098+
return N0.getOperand(0);
11099+
11100+
// fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
11101+
// (and (srl x, (sub c2, c1), MASK)
11102+
if ((N0.getOperand(1) == N1 || N0->hasOneUse()) &&
11103+
TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
11104+
auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
11105+
ConstantSDNode *RHS) {
11106+
const APInt &LHSC = LHS->getAPIntValue();
11107+
const APInt &RHSC = RHS->getAPIntValue();
11108+
return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
11109+
LHSC.getZExtValue() <= RHSC.getZExtValue();
11110+
};
11111+
if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
11112+
/*AllowUndefs*/ false,
11113+
/*AllowTypeMismatch*/ true)) {
11114+
SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
11115+
SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
11116+
SDValue Mask = DAG.getAllOnesConstant(DL, VT);
11117+
Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01);
11118+
Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff);
11119+
SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
11120+
return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
11121+
}
11122+
if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
11123+
/*AllowUndefs*/ false,
11124+
/*AllowTypeMismatch*/ true)) {
11125+
SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
11126+
SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
11127+
SDValue Mask = DAG.getAllOnesConstant(DL, VT);
11128+
Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1);
11129+
SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
11130+
return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
11131+
}
1112411132
}
1112511133
}
1112611134

llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll

Lines changed: 44 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,14 @@
55
define {<vscale x 2 x i32>, <vscale x 2 x i32>} @load_factor2_v2(ptr %ptr, i32 %evl) {
66
; RV32-LABEL: load_factor2_v2:
77
; RV32: # %bb.0:
8-
; RV32-NEXT: slli a1, a1, 1
9-
; RV32-NEXT: srli a1, a1, 1
108
; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma
119
; RV32-NEXT: vlseg2e32.v v8, (a0)
1210
; RV32-NEXT: ret
1311
;
1412
; RV64-LABEL: load_factor2_v2:
1513
; RV64: # %bb.0:
16-
; RV64-NEXT: slli a1, a1, 33
17-
; RV64-NEXT: srli a1, a1, 33
14+
; RV64-NEXT: slli a1, a1, 32
15+
; RV64-NEXT: srli a1, a1, 32
1816
; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
1917
; RV64-NEXT: vlseg2e32.v v8, (a0)
2018
; RV64-NEXT: ret
@@ -142,16 +140,14 @@ merge:
142140
define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>} @load_factor4_v2(ptr %ptr, i32 %evl) {
143141
; RV32-LABEL: load_factor4_v2:
144142
; RV32: # %bb.0:
145-
; RV32-NEXT: slli a1, a1, 2
146-
; RV32-NEXT: srli a1, a1, 2
147143
; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma
148144
; RV32-NEXT: vlseg4e32.v v8, (a0)
149145
; RV32-NEXT: ret
150146
;
151147
; RV64-LABEL: load_factor4_v2:
152148
; RV64: # %bb.0:
153-
; RV64-NEXT: slli a1, a1, 34
154-
; RV64-NEXT: srli a1, a1, 34
149+
; RV64-NEXT: slli a1, a1, 32
150+
; RV64-NEXT: srli a1, a1, 32
155151
; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
156152
; RV64-NEXT: vlseg4e32.v v8, (a0)
157153
; RV64-NEXT: ret
@@ -237,16 +233,14 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2
237233
define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>} @load_factor8_v2(ptr %ptr, i32 %evl) {
238234
; RV32-LABEL: load_factor8_v2:
239235
; RV32: # %bb.0:
240-
; RV32-NEXT: slli a1, a1, 3
241-
; RV32-NEXT: srli a1, a1, 3
242236
; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma
243237
; RV32-NEXT: vlseg8e32.v v8, (a0)
244238
; RV32-NEXT: ret
245239
;
246240
; RV64-LABEL: load_factor8_v2:
247241
; RV64: # %bb.0:
248-
; RV64-NEXT: slli a1, a1, 35
249-
; RV64-NEXT: srli a1, a1, 35
242+
; RV64-NEXT: slli a1, a1, 32
243+
; RV64-NEXT: srli a1, a1, 32
250244
; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
251245
; RV64-NEXT: vlseg8e32.v v8, (a0)
252246
; RV64-NEXT: ret
@@ -276,16 +270,14 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2
276270
define void @store_factor2_v2(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, ptr %ptr, i32 %evl) {
277271
; RV32-LABEL: store_factor2_v2:
278272
; RV32: # %bb.0:
279-
; RV32-NEXT: slli a1, a1, 1
280-
; RV32-NEXT: srli a1, a1, 1
281273
; RV32-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
282274
; RV32-NEXT: vsseg2e32.v v8, (a0)
283275
; RV32-NEXT: ret
284276
;
285277
; RV64-LABEL: store_factor2_v2:
286278
; RV64: # %bb.0:
287-
; RV64-NEXT: slli a1, a1, 33
288-
; RV64-NEXT: srli a1, a1, 33
279+
; RV64-NEXT: slli a1, a1, 32
280+
; RV64-NEXT: srli a1, a1, 32
289281
; RV64-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
290282
; RV64-NEXT: vsseg2e32.v v8, (a0)
291283
; RV64-NEXT: ret
@@ -384,8 +376,6 @@ define void @store_factor7_v2(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, <v
384376
define void @store_factor8_v2(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, ptr %ptr, i32 %evl) {
385377
; RV32-LABEL: store_factor8_v2:
386378
; RV32: # %bb.0:
387-
; RV32-NEXT: slli a1, a1, 3
388-
; RV32-NEXT: srli a1, a1, 3
389379
; RV32-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
390380
; RV32-NEXT: vmv1r.v v10, v8
391381
; RV32-NEXT: vmv1r.v v11, v9
@@ -398,8 +388,8 @@ define void @store_factor8_v2(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, pt
398388
;
399389
; RV64-LABEL: store_factor8_v2:
400390
; RV64: # %bb.0:
401-
; RV64-NEXT: slli a1, a1, 35
402-
; RV64-NEXT: srli a1, a1, 35
391+
; RV64-NEXT: slli a1, a1, 32
392+
; RV64-NEXT: srli a1, a1, 32
403393
; RV64-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
404394
; RV64-NEXT: vmv1r.v v10, v8
405395
; RV64-NEXT: vmv1r.v v11, v9
@@ -418,16 +408,14 @@ define void @store_factor8_v2(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, pt
418408
define {<vscale x 2 x i32>, <vscale x 2 x i32>} @masked_load_factor2_v2(<vscale x 2 x i1> %mask, ptr %ptr, i32 %evl) {
419409
; RV32-LABEL: masked_load_factor2_v2:
420410
; RV32: # %bb.0:
421-
; RV32-NEXT: slli a1, a1, 1
422-
; RV32-NEXT: srli a1, a1, 1
423411
; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma
424412
; RV32-NEXT: vlseg2e32.v v8, (a0), v0.t
425413
; RV32-NEXT: ret
426414
;
427415
; RV64-LABEL: masked_load_factor2_v2:
428416
; RV64: # %bb.0:
429-
; RV64-NEXT: slli a1, a1, 33
430-
; RV64-NEXT: srli a1, a1, 33
417+
; RV64-NEXT: slli a1, a1, 32
418+
; RV64-NEXT: srli a1, a1, 32
431419
; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
432420
; RV64-NEXT: vlseg2e32.v v8, (a0), v0.t
433421
; RV64-NEXT: ret
@@ -445,16 +433,14 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>} @masked_load_factor2_v2(<vscale
445433
define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>} @masked_load_factor4_v2(<vscale x 2 x i1> %mask, ptr %ptr, i32 %evl) {
446434
; RV32-LABEL: masked_load_factor4_v2:
447435
; RV32: # %bb.0:
448-
; RV32-NEXT: slli a1, a1, 2
449-
; RV32-NEXT: srli a1, a1, 2
450436
; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma
451437
; RV32-NEXT: vlseg4e32.v v8, (a0), v0.t
452438
; RV32-NEXT: ret
453439
;
454440
; RV64-LABEL: masked_load_factor4_v2:
455441
; RV64: # %bb.0:
456-
; RV64-NEXT: slli a1, a1, 34
457-
; RV64-NEXT: srli a1, a1, 34
442+
; RV64-NEXT: slli a1, a1, 32
443+
; RV64-NEXT: srli a1, a1, 32
458444
; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
459445
; RV64-NEXT: vlseg4e32.v v8, (a0), v0.t
460446
; RV64-NEXT: ret
@@ -477,20 +463,17 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2
477463
define void @masked_store_factor2_v2(<vscale x 1 x i1> %mask, <vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, ptr %ptr, i32 %evl) {
478464
; RV32-LABEL: masked_store_factor2_v2:
479465
; RV32: # %bb.0:
480-
; RV32-NEXT: slli a1, a1, 1
481-
; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
482-
; RV32-NEXT: vmv1r.v v9, v8
483-
; RV32-NEXT: srli a1, a1, 1
484466
; RV32-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
467+
; RV32-NEXT: vmv1r.v v9, v8
485468
; RV32-NEXT: vsseg2e32.v v8, (a0), v0.t
486469
; RV32-NEXT: ret
487470
;
488471
; RV64-LABEL: masked_store_factor2_v2:
489472
; RV64: # %bb.0:
490-
; RV64-NEXT: slli a1, a1, 33
473+
; RV64-NEXT: slli a1, a1, 32
491474
; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
492475
; RV64-NEXT: vmv1r.v v9, v8
493-
; RV64-NEXT: srli a1, a1, 33
476+
; RV64-NEXT: srli a1, a1, 32
494477
; RV64-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
495478
; RV64-NEXT: vsseg2e32.v v8, (a0), v0.t
496479
; RV64-NEXT: ret
@@ -504,17 +487,15 @@ define void @masked_store_factor2_v2(<vscale x 1 x i1> %mask, <vscale x 1 x i32>
504487
define void @masked_load_store_factor2_v2_shared_mask(<vscale x 2 x i1> %mask, ptr %ptr, i32 %evl) {
505488
; RV32-LABEL: masked_load_store_factor2_v2_shared_mask:
506489
; RV32: # %bb.0:
507-
; RV32-NEXT: slli a1, a1, 1
508-
; RV32-NEXT: srli a1, a1, 1
509490
; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma
510491
; RV32-NEXT: vlseg2e32.v v8, (a0), v0.t
511492
; RV32-NEXT: vsseg2e32.v v8, (a0), v0.t
512493
; RV32-NEXT: ret
513494
;
514495
; RV64-LABEL: masked_load_store_factor2_v2_shared_mask:
515496
; RV64: # %bb.0:
516-
; RV64-NEXT: slli a1, a1, 33
517-
; RV64-NEXT: srli a1, a1, 33
497+
; RV64-NEXT: slli a1, a1, 32
498+
; RV64-NEXT: srli a1, a1, 32
518499
; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
519500
; RV64-NEXT: vlseg2e32.v v8, (a0), v0.t
520501
; RV64-NEXT: vsseg2e32.v v8, (a0), v0.t
@@ -537,37 +518,36 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract(<vscale x 2 x i1> %
537518
; RV32-NEXT: vmv1r.v v8, v0
538519
; RV32-NEXT: slli a2, a1, 1
539520
; RV32-NEXT: vmv.v.i v9, 0
540-
; RV32-NEXT: li a1, -1
521+
; RV32-NEXT: li a3, -1
541522
; RV32-NEXT: vmerge.vim v10, v9, 1, v0
542523
; RV32-NEXT: vwaddu.vv v11, v10, v10
543-
; RV32-NEXT: vwmaccu.vx v11, a1, v10
544-
; RV32-NEXT: csrr a1, vlenb
524+
; RV32-NEXT: vwmaccu.vx v11, a3, v10
525+
; RV32-NEXT: csrr a3, vlenb
545526
; RV32-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
546527
; RV32-NEXT: vmv.v.i v10, 0
547-
; RV32-NEXT: srli a1, a1, 2
548-
; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
528+
; RV32-NEXT: srli a3, a3, 2
529+
; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
549530
; RV32-NEXT: vmsne.vi v0, v11, 0
550-
; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
551-
; RV32-NEXT: vslidedown.vx v11, v11, a1
531+
; RV32-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
532+
; RV32-NEXT: vslidedown.vx v11, v11, a3
552533
; RV32-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
553534
; RV32-NEXT: vmerge.vim v10, v10, 1, v0
554-
; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
535+
; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
555536
; RV32-NEXT: vmsne.vi v0, v11, 0
556537
; RV32-NEXT: vmerge.vim v9, v9, 1, v0
557538
; RV32-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
558-
; RV32-NEXT: vslideup.vx v10, v9, a1
539+
; RV32-NEXT: vslideup.vx v10, v9, a3
559540
; RV32-NEXT: vmsne.vi v0, v10, 0
560541
; RV32-NEXT: vle32.v v10, (a0), v0.t
561-
; RV32-NEXT: li a1, 32
542+
; RV32-NEXT: li a2, 32
562543
; RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma
563-
; RV32-NEXT: vnsrl.wx v13, v10, a1
564-
; RV32-NEXT: vmv.x.s a1, v10
544+
; RV32-NEXT: vnsrl.wx v13, v10, a2
565545
; RV32-NEXT: vnsrl.wi v12, v10, 0
566-
; RV32-NEXT: srli a2, a2, 1
546+
; RV32-NEXT: vmv.x.s a2, v10
567547
; RV32-NEXT: vmv1r.v v0, v8
568-
; RV32-NEXT: vsetvli zero, a2, e32, m1, ta, ma
548+
; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma
569549
; RV32-NEXT: vsseg2e32.v v12, (a0), v0.t
570-
; RV32-NEXT: mv a0, a1
550+
; RV32-NEXT: mv a0, a2
571551
; RV32-NEXT: ret
572552
;
573553
; RV64-LABEL: masked_load_store_factor2_v2_shared_mask_extract:
@@ -590,20 +570,21 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract(<vscale x 2 x i1> %
590570
; RV64-NEXT: vmerge.vim v10, v10, 1, v0
591571
; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
592572
; RV64-NEXT: vmsne.vi v0, v11, 0
593-
; RV64-NEXT: slli a3, a1, 33
594573
; RV64-NEXT: vmerge.vim v9, v9, 1, v0
595-
; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
574+
; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
596575
; RV64-NEXT: vslideup.vx v10, v9, a2
576+
; RV64-NEXT: slli a2, a1, 33
597577
; RV64-NEXT: vmsne.vi v0, v10, 0
598-
; RV64-NEXT: srli a1, a3, 32
599-
; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
578+
; RV64-NEXT: srli a2, a2, 32
579+
; RV64-NEXT: vsetvli zero, a2, e32, m2, ta, ma
600580
; RV64-NEXT: vle32.v v10, (a0), v0.t
601-
; RV64-NEXT: li a1, 32
602-
; RV64-NEXT: vsetvli a2, zero, e32, m1, ta, ma
603-
; RV64-NEXT: vnsrl.wx v13, v10, a1
581+
; RV64-NEXT: li a2, 32
582+
; RV64-NEXT: slli a3, a1, 32
583+
; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
584+
; RV64-NEXT: vnsrl.wx v13, v10, a2
604585
; RV64-NEXT: vmv.x.s a1, v10
605586
; RV64-NEXT: vnsrl.wi v12, v10, 0
606-
; RV64-NEXT: srli a3, a3, 33
587+
; RV64-NEXT: srli a3, a3, 32
607588
; RV64-NEXT: vmv1r.v v0, v8
608589
; RV64-NEXT: vsetvli zero, a3, e32, m1, ta, ma
609590
; RV64-NEXT: vsseg2e32.v v12, (a0), v0.t
@@ -624,8 +605,6 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract(<vscale x 2 x i1> %
624605
define void @masked_store_factor4_v2(<vscale x 1 x i1> %mask, <vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, ptr %ptr, i32 %evl) {
625606
; RV32-LABEL: masked_store_factor4_v2:
626607
; RV32: # %bb.0:
627-
; RV32-NEXT: slli a1, a1, 2
628-
; RV32-NEXT: srli a1, a1, 2
629608
; RV32-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
630609
; RV32-NEXT: vmv1r.v v10, v8
631610
; RV32-NEXT: vmv1r.v v11, v9
@@ -634,8 +613,8 @@ define void @masked_store_factor4_v2(<vscale x 1 x i1> %mask, <vscale x 1 x i32>
634613
;
635614
; RV64-LABEL: masked_store_factor4_v2:
636615
; RV64: # %bb.0:
637-
; RV64-NEXT: slli a1, a1, 34
638-
; RV64-NEXT: srli a1, a1, 34
616+
; RV64-NEXT: slli a1, a1, 32
617+
; RV64-NEXT: srli a1, a1, 32
639618
; RV64-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
640619
; RV64-NEXT: vmv1r.v v10, v8
641620
; RV64-NEXT: vmv1r.v v11, v9

0 commit comments

Comments
 (0)