Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4710,7 +4710,10 @@ template <class MatchContextClass> SDValue DAGCombiner::visitMUL(SDNode *N) {
if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
EVT ShiftVT = getShiftAmountTy(N0.getValueType());
SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
return Matcher.getNode(ISD::SHL, DL, VT, N0, Trunc);
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(N->getFlags().hasNoUnsignedWrap());
// TODO: Preserve setNoSignedWrap if LogBase2 isn't BitWidth - 1.
return Matcher.getNode(ISD::SHL, DL, VT, N0, Trunc, Flags);
}
}

Expand Down Expand Up @@ -11094,6 +11097,11 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
if (N0.getOpcode() == ISD::SHL &&
(N0.getOperand(1) == N1 || N0->hasOneUse()) &&
TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
// If the shift amounts are the same and the shl doesn't shift out any
// non-zero bits, we can return the shl input.
if (N0.getOperand(1) == N1 && N0->getFlags().hasNoUnsignedWrap())
return N0.getOperand(0);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you're dropping the whole operation, should this be hoisted above the above profitability checks?


auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
ConstantSDNode *RHS) {
const APInt &LHSC = LHS->getAPIntValue();
Expand Down
109 changes: 44 additions & 65 deletions llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,14 @@
define {<vscale x 2 x i32>, <vscale x 2 x i32>} @load_factor2_v2(ptr %ptr, i32 %evl) {
; RV32-LABEL: load_factor2_v2:
; RV32: # %bb.0:
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: srli a1, a1, 1
; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; RV32-NEXT: vlseg2e32.v v8, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: load_factor2_v2:
; RV64: # %bb.0:
; RV64-NEXT: slli a1, a1, 33
; RV64-NEXT: srli a1, a1, 33
; RV64-NEXT: slli a1, a1, 32
; RV64-NEXT: srli a1, a1, 32
; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; RV64-NEXT: vlseg2e32.v v8, (a0)
; RV64-NEXT: ret
Expand Down Expand Up @@ -142,16 +140,14 @@ merge:
define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>} @load_factor4_v2(ptr %ptr, i32 %evl) {
; RV32-LABEL: load_factor4_v2:
; RV32: # %bb.0:
; RV32-NEXT: slli a1, a1, 2
; RV32-NEXT: srli a1, a1, 2
; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; RV32-NEXT: vlseg4e32.v v8, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: load_factor4_v2:
; RV64: # %bb.0:
; RV64-NEXT: slli a1, a1, 34
; RV64-NEXT: srli a1, a1, 34
; RV64-NEXT: slli a1, a1, 32
; RV64-NEXT: srli a1, a1, 32
; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; RV64-NEXT: vlseg4e32.v v8, (a0)
; RV64-NEXT: ret
Expand Down Expand Up @@ -237,16 +233,14 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2
define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>} @load_factor8_v2(ptr %ptr, i32 %evl) {
; RV32-LABEL: load_factor8_v2:
; RV32: # %bb.0:
; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: srli a1, a1, 3
; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; RV32-NEXT: vlseg8e32.v v8, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: load_factor8_v2:
; RV64: # %bb.0:
; RV64-NEXT: slli a1, a1, 35
; RV64-NEXT: srli a1, a1, 35
; RV64-NEXT: slli a1, a1, 32
; RV64-NEXT: srli a1, a1, 32
; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; RV64-NEXT: vlseg8e32.v v8, (a0)
; RV64-NEXT: ret
Expand Down Expand Up @@ -276,16 +270,14 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2
define void @store_factor2_v2(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, ptr %ptr, i32 %evl) {
; RV32-LABEL: store_factor2_v2:
; RV32: # %bb.0:
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: srli a1, a1, 1
; RV32-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
; RV32-NEXT: vsseg2e32.v v8, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: store_factor2_v2:
; RV64: # %bb.0:
; RV64-NEXT: slli a1, a1, 33
; RV64-NEXT: srli a1, a1, 33
; RV64-NEXT: slli a1, a1, 32
; RV64-NEXT: srli a1, a1, 32
; RV64-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
; RV64-NEXT: vsseg2e32.v v8, (a0)
; RV64-NEXT: ret
Expand Down Expand Up @@ -384,8 +376,6 @@ define void @store_factor7_v2(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, <v
define void @store_factor8_v2(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, ptr %ptr, i32 %evl) {
; RV32-LABEL: store_factor8_v2:
; RV32: # %bb.0:
; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: srli a1, a1, 3
; RV32-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
; RV32-NEXT: vmv1r.v v10, v8
; RV32-NEXT: vmv1r.v v11, v9
Expand All @@ -398,8 +388,8 @@ define void @store_factor8_v2(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, pt
;
; RV64-LABEL: store_factor8_v2:
; RV64: # %bb.0:
; RV64-NEXT: slli a1, a1, 35
; RV64-NEXT: srli a1, a1, 35
; RV64-NEXT: slli a1, a1, 32
; RV64-NEXT: srli a1, a1, 32
; RV64-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
; RV64-NEXT: vmv1r.v v10, v8
; RV64-NEXT: vmv1r.v v11, v9
Expand All @@ -418,16 +408,14 @@ define void @store_factor8_v2(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, pt
define {<vscale x 2 x i32>, <vscale x 2 x i32>} @masked_load_factor2_v2(<vscale x 2 x i1> %mask, ptr %ptr, i32 %evl) {
; RV32-LABEL: masked_load_factor2_v2:
; RV32: # %bb.0:
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: srli a1, a1, 1
; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; RV32-NEXT: vlseg2e32.v v8, (a0), v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: masked_load_factor2_v2:
; RV64: # %bb.0:
; RV64-NEXT: slli a1, a1, 33
; RV64-NEXT: srli a1, a1, 33
; RV64-NEXT: slli a1, a1, 32
; RV64-NEXT: srli a1, a1, 32
; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; RV64-NEXT: vlseg2e32.v v8, (a0), v0.t
; RV64-NEXT: ret
Expand All @@ -445,16 +433,14 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>} @masked_load_factor2_v2(<vscale
define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>} @masked_load_factor4_v2(<vscale x 2 x i1> %mask, ptr %ptr, i32 %evl) {
; RV32-LABEL: masked_load_factor4_v2:
; RV32: # %bb.0:
; RV32-NEXT: slli a1, a1, 2
; RV32-NEXT: srli a1, a1, 2
; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; RV32-NEXT: vlseg4e32.v v8, (a0), v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: masked_load_factor4_v2:
; RV64: # %bb.0:
; RV64-NEXT: slli a1, a1, 34
; RV64-NEXT: srli a1, a1, 34
; RV64-NEXT: slli a1, a1, 32
; RV64-NEXT: srli a1, a1, 32
; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; RV64-NEXT: vlseg4e32.v v8, (a0), v0.t
; RV64-NEXT: ret
Expand All @@ -477,20 +463,17 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2
define void @masked_store_factor2_v2(<vscale x 1 x i1> %mask, <vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, ptr %ptr, i32 %evl) {
; RV32-LABEL: masked_store_factor2_v2:
; RV32: # %bb.0:
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV32-NEXT: vmv1r.v v9, v8
; RV32-NEXT: srli a1, a1, 1
; RV32-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
; RV32-NEXT: vmv1r.v v9, v8
; RV32-NEXT: vsseg2e32.v v8, (a0), v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: masked_store_factor2_v2:
; RV64: # %bb.0:
; RV64-NEXT: slli a1, a1, 33
; RV64-NEXT: slli a1, a1, 32
; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64-NEXT: vmv1r.v v9, v8
; RV64-NEXT: srli a1, a1, 33
; RV64-NEXT: srli a1, a1, 32
; RV64-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
; RV64-NEXT: vsseg2e32.v v8, (a0), v0.t
; RV64-NEXT: ret
Expand All @@ -504,17 +487,15 @@ define void @masked_store_factor2_v2(<vscale x 1 x i1> %mask, <vscale x 1 x i32>
define void @masked_load_store_factor2_v2_shared_mask(<vscale x 2 x i1> %mask, ptr %ptr, i32 %evl) {
; RV32-LABEL: masked_load_store_factor2_v2_shared_mask:
; RV32: # %bb.0:
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: srli a1, a1, 1
; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; RV32-NEXT: vlseg2e32.v v8, (a0), v0.t
; RV32-NEXT: vsseg2e32.v v8, (a0), v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: masked_load_store_factor2_v2_shared_mask:
; RV64: # %bb.0:
; RV64-NEXT: slli a1, a1, 33
; RV64-NEXT: srli a1, a1, 33
; RV64-NEXT: slli a1, a1, 32
; RV64-NEXT: srli a1, a1, 32
; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; RV64-NEXT: vlseg2e32.v v8, (a0), v0.t
; RV64-NEXT: vsseg2e32.v v8, (a0), v0.t
Expand All @@ -537,37 +518,36 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract(<vscale x 2 x i1> %
; RV32-NEXT: vmv1r.v v8, v0
; RV32-NEXT: slli a2, a1, 1
; RV32-NEXT: vmv.v.i v9, 0
; RV32-NEXT: li a1, -1
; RV32-NEXT: li a3, -1
; RV32-NEXT: vmerge.vim v10, v9, 1, v0
; RV32-NEXT: vwaddu.vv v11, v10, v10
; RV32-NEXT: vwmaccu.vx v11, a1, v10
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: vwmaccu.vx v11, a3, v10
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
; RV32-NEXT: vmv.v.i v10, 0
; RV32-NEXT: srli a1, a1, 2
; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
; RV32-NEXT: srli a3, a3, 2
; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
; RV32-NEXT: vmsne.vi v0, v11, 0
; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
; RV32-NEXT: vslidedown.vx v11, v11, a1
; RV32-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
; RV32-NEXT: vslidedown.vx v11, v11, a3
; RV32-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
; RV32-NEXT: vmerge.vim v10, v10, 1, v0
; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
; RV32-NEXT: vmsne.vi v0, v11, 0
; RV32-NEXT: vmerge.vim v9, v9, 1, v0
; RV32-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
; RV32-NEXT: vslideup.vx v10, v9, a1
; RV32-NEXT: vslideup.vx v10, v9, a3
; RV32-NEXT: vmsne.vi v0, v10, 0
; RV32-NEXT: vle32.v v10, (a0), v0.t
; RV32-NEXT: li a1, 32
; RV32-NEXT: li a2, 32
; RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma
; RV32-NEXT: vnsrl.wx v13, v10, a1
; RV32-NEXT: vmv.x.s a1, v10
; RV32-NEXT: vnsrl.wx v13, v10, a2
; RV32-NEXT: vnsrl.wi v12, v10, 0
; RV32-NEXT: srli a2, a2, 1
; RV32-NEXT: vmv.x.s a2, v10
; RV32-NEXT: vmv1r.v v0, v8
; RV32-NEXT: vsetvli zero, a2, e32, m1, ta, ma
; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; RV32-NEXT: vsseg2e32.v v12, (a0), v0.t
; RV32-NEXT: mv a0, a1
; RV32-NEXT: mv a0, a2
; RV32-NEXT: ret
;
; RV64-LABEL: masked_load_store_factor2_v2_shared_mask_extract:
Expand All @@ -590,20 +570,21 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract(<vscale x 2 x i1> %
; RV64-NEXT: vmerge.vim v10, v10, 1, v0
; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
; RV64-NEXT: vmsne.vi v0, v11, 0
; RV64-NEXT: slli a3, a1, 33
; RV64-NEXT: vmerge.vim v9, v9, 1, v0
; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
; RV64-NEXT: vslideup.vx v10, v9, a2
; RV64-NEXT: slli a2, a1, 33
; RV64-NEXT: vmsne.vi v0, v10, 0
; RV64-NEXT: srli a1, a3, 32
; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; RV64-NEXT: srli a2, a2, 32
; RV64-NEXT: vsetvli zero, a2, e32, m2, ta, ma
; RV64-NEXT: vle32.v v10, (a0), v0.t
; RV64-NEXT: li a1, 32
; RV64-NEXT: vsetvli a2, zero, e32, m1, ta, ma
; RV64-NEXT: vnsrl.wx v13, v10, a1
; RV64-NEXT: li a2, 32
; RV64-NEXT: slli a3, a1, 32
; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
; RV64-NEXT: vnsrl.wx v13, v10, a2
; RV64-NEXT: vmv.x.s a1, v10
; RV64-NEXT: vnsrl.wi v12, v10, 0
; RV64-NEXT: srli a3, a3, 33
; RV64-NEXT: srli a3, a3, 32
; RV64-NEXT: vmv1r.v v0, v8
; RV64-NEXT: vsetvli zero, a3, e32, m1, ta, ma
; RV64-NEXT: vsseg2e32.v v12, (a0), v0.t
Expand All @@ -624,8 +605,6 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract(<vscale x 2 x i1> %
define void @masked_store_factor4_v2(<vscale x 1 x i1> %mask, <vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, ptr %ptr, i32 %evl) {
; RV32-LABEL: masked_store_factor4_v2:
; RV32: # %bb.0:
; RV32-NEXT: slli a1, a1, 2
; RV32-NEXT: srli a1, a1, 2
; RV32-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
; RV32-NEXT: vmv1r.v v10, v8
; RV32-NEXT: vmv1r.v v11, v9
Expand All @@ -634,8 +613,8 @@ define void @masked_store_factor4_v2(<vscale x 1 x i1> %mask, <vscale x 1 x i32>
;
; RV64-LABEL: masked_store_factor4_v2:
; RV64: # %bb.0:
; RV64-NEXT: slli a1, a1, 34
; RV64-NEXT: srli a1, a1, 34
; RV64-NEXT: slli a1, a1, 32
; RV64-NEXT: srli a1, a1, 32
; RV64-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
; RV64-NEXT: vmv1r.v v10, v8
; RV64-NEXT: vmv1r.v v11, v9
Expand Down
Loading