Skip to content

Commit bdddff2

Browse files
authored
[RISCV][RVV] Prohibit conversion of scalar store to single-element vse if vmv.x.s has multiple uses (llvm#152112)
Godbolt example: https://godbolt.org/z/ThdfP475a In the example single-element vse is used to store reduction result instead of scalar store ([this optimization was introduced by this patch](https://reviews.llvm.org/D109482)). However, vmv.x.s can't be eliminated here because it has other uses (e.g. CopyToReg), so it seems more profitable to use scalar store (we already have store value in a scalar register, and can save one vsetvli which is likely to be required for single-element vse). The proposed solution is to this transform only if vmv.x.s has one use (in store instruction)
1 parent d13341d commit bdddff2

File tree

3 files changed

+14
-16
lines changed

3 files changed

+14
-16
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20640,10 +20640,11 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
2064020640
// Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
2064120641
// vfmv.f.s is represented as extract element from 0. Match it late to avoid
2064220642
// any illegal types.
20643-
if (Val.getOpcode() == RISCVISD::VMV_X_S ||
20644-
(DCI.isAfterLegalizeDAG() &&
20645-
Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20646-
isNullConstant(Val.getOperand(1)))) {
20643+
if ((Val.getOpcode() == RISCVISD::VMV_X_S ||
20644+
(DCI.isAfterLegalizeDAG() &&
20645+
Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20646+
isNullConstant(Val.getOperand(1)))) &&
20647+
Val.hasOneUse()) {
2064720648
SDValue Src = Val.getOperand(0);
2064820649
MVT VecVT = Src.getSimpleValueType();
2064920650
// VecVT should be scalable and memory VT should match the element type.

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2047,19 +2047,17 @@ define <2 x i64> @mgather_v2i32_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2
20472047
;
20482048
; RV32ZVE32F-LABEL: mgather_v2i32_sextload_v2i64:
20492049
; RV32ZVE32F: # %bb.0:
2050-
; RV32ZVE32F-NEXT: addi a1, a0, 8
20512050
; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu
20522051
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
20532052
; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1
2054-
; RV32ZVE32F-NEXT: vmv.x.s a2, v9
2055-
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2056-
; RV32ZVE32F-NEXT: vse32.v v9, (a0)
2057-
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
2058-
; RV32ZVE32F-NEXT: srai a2, a2, 31
2059-
; RV32ZVE32F-NEXT: vse32.v v8, (a1)
2060-
; RV32ZVE32F-NEXT: srai a3, a3, 31
2061-
; RV32ZVE32F-NEXT: sw a2, 4(a0)
2062-
; RV32ZVE32F-NEXT: sw a3, 12(a0)
2053+
; RV32ZVE32F-NEXT: vmv.x.s a1, v9
2054+
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
2055+
; RV32ZVE32F-NEXT: srai a3, a1, 31
2056+
; RV32ZVE32F-NEXT: srai a4, a2, 31
2057+
; RV32ZVE32F-NEXT: sw a1, 0(a0)
2058+
; RV32ZVE32F-NEXT: sw a3, 4(a0)
2059+
; RV32ZVE32F-NEXT: sw a2, 8(a0)
2060+
; RV32ZVE32F-NEXT: sw a4, 12(a0)
20632061
; RV32ZVE32F-NEXT: ret
20642062
;
20652063
; RV64ZVE32F-LABEL: mgather_v2i32_sextload_v2i64:

llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -266,8 +266,7 @@ define float @reduce_fadd3(float %x, <4 x float> %v, ptr %rdxptr) {
266266
; CHECK-NEXT: vfredusum.vs v8, v8, v9
267267
; CHECK-NEXT: vfmv.f.s fa5, v8
268268
; CHECK-NEXT: fadd.s fa0, fa5, fa0
269-
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
270-
; CHECK-NEXT: vse32.v v8, (a0)
269+
; CHECK-NEXT: fsw fa5, 0(a0)
271270
; CHECK-NEXT: ret
272271
entry:
273272
%rdx = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.0, <4 x float> %v)

0 commit comments

Comments
 (0)