Skip to content
37 changes: 37 additions & 0 deletions llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,16 +257,27 @@ static OperandInfo getOperandInfo(const MachineOperand &MO,
// Vector Unit-Stride Instructions
// Vector Strided Instructions
/// Dest EEW encoded in the instruction and EMUL=(EEW/SEW)*LMUL
case RISCV::VLM_V:
case RISCV::VSM_V:
return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(0, MI), 0);
case RISCV::VLE8_V:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Eventually, we want to refactor so that we can share this code with getEEWForLoadStore in RISCVInsertVSETVLI.cpp.

case RISCV::VSE8_V:
case RISCV::VLSE8_V:
case RISCV::VSSE8_V:
return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(3, MI), 3);
case RISCV::VLE16_V:
case RISCV::VSE16_V:
case RISCV::VLSE16_V:
case RISCV::VSSE16_V:
return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(4, MI), 4);
case RISCV::VLE32_V:
case RISCV::VSE32_V:
case RISCV::VLSE32_V:
case RISCV::VSSE32_V:
return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(5, MI), 5);
case RISCV::VLE64_V:
case RISCV::VSE64_V:
case RISCV::VLSE64_V:
case RISCV::VSSE64_V:
return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(6, MI), 6);

Expand Down Expand Up @@ -732,6 +743,32 @@ static bool isSupportedInstr(const MachineInstr &MI) {
return false;

switch (RVV->BaseInstr) {
// Vector Unit-Stride Instructions
// Vector Strided Instructions
case RISCV::VLE8_V:
case RISCV::VLM_V:
case RISCV::VLSE8_V:
case RISCV::VLE16_V:
case RISCV::VLSE16_V:
case RISCV::VLE32_V:
case RISCV::VLSE32_V:
case RISCV::VLE64_V:
case RISCV::VLSE64_V:
// Vector Indexed Instructions
case RISCV::VLUXEI8_V:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This part - the volatile check for existing LD/ST opcodes - is a functional fix that really should be extracted and submitted as it's own review with a clear description making clear it is a bug fix.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There wasn't a bug prior to this patch was there?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I'm sorry, I misread. We'd previously only supported stores, and since we weren't reducing their widths anyways, no there isn't.

case RISCV::VLOXEI8_V:
case RISCV::VLUXEI16_V:
case RISCV::VLOXEI16_V:
case RISCV::VLUXEI32_V:
case RISCV::VLOXEI32_V:
case RISCV::VLUXEI64_V:
case RISCV::VLOXEI64_V: {
for (const MachineMemOperand *MMO : MI.memoperands())
if (MMO->isVolatile())
return false;
return true;
}

// Vector Single-Width Integer Add and Subtract
case RISCV::VADD_VI:
case RISCV::VADD_VV:
Expand Down
23 changes: 3 additions & 20 deletions llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1445,10 +1445,9 @@ define <vscale x 1 x i64> @vp_bitreverse_nxv1i64(<vscale x 1 x i64> %va, <vscale
; RV32-NEXT: addi a6, sp, 8
; RV32-NEXT: sw a4, 8(sp)
; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: vsetvli a4, zero, e64, m1, ta, ma
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a6), zero
; RV32-NEXT: lui a4, 61681
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vsll.vx v10, v8, a3, v0.t
; RV32-NEXT: addi a5, a5, -256
; RV32-NEXT: vand.vx v11, v8, a5, v0.t
Expand Down Expand Up @@ -1595,9 +1594,7 @@ define <vscale x 1 x i64> @vp_bitreverse_nxv1i64_unmasked(<vscale x 1 x i64> %va
; RV32-NEXT: vand.vx v13, v8, a1
; RV32-NEXT: vand.vx v12, v12, a1
; RV32-NEXT: vor.vv v11, v12, v11
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v12, (a6), zero
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vsll.vx v13, v13, a4
; RV32-NEXT: vor.vv v10, v10, v13
; RV32-NEXT: vsrl.vi v13, v8, 8
Expand Down Expand Up @@ -1730,10 +1727,9 @@ define <vscale x 2 x i64> @vp_bitreverse_nxv2i64(<vscale x 2 x i64> %va, <vscale
; RV32-NEXT: addi a6, sp, 8
; RV32-NEXT: sw a4, 8(sp)
; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: vsetvli a4, zero, e64, m2, ta, ma
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a6), zero
; RV32-NEXT: lui a4, 61681
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vsll.vx v12, v8, a3, v0.t
; RV32-NEXT: addi a5, a5, -256
; RV32-NEXT: vand.vx v14, v8, a5, v0.t
Expand Down Expand Up @@ -1880,9 +1876,7 @@ define <vscale x 2 x i64> @vp_bitreverse_nxv2i64_unmasked(<vscale x 2 x i64> %va
; RV32-NEXT: vand.vx v18, v8, a1
; RV32-NEXT: vand.vx v16, v16, a1
; RV32-NEXT: vor.vv v10, v16, v10
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v16, (a6), zero
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vsll.vx v18, v18, a4
; RV32-NEXT: vor.vv v12, v12, v18
; RV32-NEXT: vsrl.vi v18, v8, 8
Expand Down Expand Up @@ -2015,10 +2009,9 @@ define <vscale x 4 x i64> @vp_bitreverse_nxv4i64(<vscale x 4 x i64> %va, <vscale
; RV32-NEXT: addi a6, sp, 8
; RV32-NEXT: sw a4, 8(sp)
; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: vsetvli a4, zero, e64, m4, ta, ma
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a6), zero
; RV32-NEXT: lui a4, 61681
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vsll.vx v16, v8, a3, v0.t
; RV32-NEXT: addi a5, a5, -256
; RV32-NEXT: vand.vx v20, v8, a5, v0.t
Expand Down Expand Up @@ -2165,9 +2158,7 @@ define <vscale x 4 x i64> @vp_bitreverse_nxv4i64_unmasked(<vscale x 4 x i64> %va
; RV32-NEXT: vand.vx v28, v8, a1
; RV32-NEXT: vand.vx v24, v24, a1
; RV32-NEXT: vor.vv v12, v24, v12
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v24, (a6), zero
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vsll.vx v28, v28, a4
; RV32-NEXT: vor.vv v16, v16, v28
; RV32-NEXT: vsrl.vi v28, v8, 8
Expand Down Expand Up @@ -2315,15 +2306,13 @@ define <vscale x 7 x i64> @vp_bitreverse_nxv7i64(<vscale x 7 x i64> %va, <vscale
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; RV32-NEXT: vsetvli a3, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a5), zero
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: slli a3, a3, 3
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; RV32-NEXT: lui a3, 4080
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vand.vx v24, v8, a3, v0.t
; RV32-NEXT: vsll.vi v24, v24, 24, v0.t
; RV32-NEXT: addi a5, sp, 16
Expand Down Expand Up @@ -2528,9 +2517,7 @@ define <vscale x 7 x i64> @vp_bitreverse_nxv7i64_unmasked(<vscale x 7 x i64> %va
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a6), zero
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsrl.vi v16, v8, 24
; RV32-NEXT: vand.vx v16, v16, a5
; RV32-NEXT: vsrl.vi v0, v8, 8
Expand Down Expand Up @@ -2704,15 +2691,13 @@ define <vscale x 8 x i64> @vp_bitreverse_nxv8i64(<vscale x 8 x i64> %va, <vscale
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; RV32-NEXT: vsetvli a3, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a5), zero
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: slli a3, a3, 3
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; RV32-NEXT: lui a3, 4080
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vand.vx v24, v8, a3, v0.t
; RV32-NEXT: vsll.vi v24, v24, 24, v0.t
; RV32-NEXT: addi a5, sp, 16
Expand Down Expand Up @@ -2917,9 +2902,7 @@ define <vscale x 8 x i64> @vp_bitreverse_nxv8i64_unmasked(<vscale x 8 x i64> %va
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a6), zero
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsrl.vi v16, v8, 24
; RV32-NEXT: vand.vx v16, v16, a5
; RV32-NEXT: vsrl.vi v0, v8, 8
Expand Down
Loading
Loading