Skip to content

Commit 3b4d653

Browse files
committed
[RISCV] Implement isHighLatencyDef()
And returns true for div/rem/sqrt/... operations. This is an alternative if we don't support generic scheduling model.
1 parent 7eadc19 commit 3b4d653

File tree

5 files changed

+135
-99
lines changed

5 files changed

+135
-99
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfo.cpp

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3679,6 +3679,52 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
36793679
return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
36803680
}
36813681

3682+
bool RISCVInstrInfo::isHighLatencyDef(int Opc) const {
3683+
switch (Opc) {
3684+
default:
3685+
return false;
3686+
// Integer div/rem.
3687+
case RISCV::DIV:
3688+
case RISCV::DIVW:
3689+
case RISCV::DIVU:
3690+
case RISCV::DIVUW:
3691+
case RISCV::REM:
3692+
case RISCV::REMW:
3693+
case RISCV::REMU:
3694+
case RISCV::REMUW:
3695+
// Floating-point div/rem/sqrt.
3696+
case RISCV::FDIV_H:
3697+
case RISCV::FDIV_S:
3698+
case RISCV::FDIV_D:
3699+
case RISCV::FDIV_H_INX:
3700+
case RISCV::FDIV_S_INX:
3701+
case RISCV::FDIV_D_INX:
3702+
case RISCV::FDIV_D_IN32X:
3703+
case RISCV::FSQRT_H:
3704+
case RISCV::FSQRT_S:
3705+
case RISCV::FSQRT_D:
3706+
case RISCV::FSQRT_H_INX:
3707+
case RISCV::FSQRT_S_INX:
3708+
case RISCV::FSQRT_D_INX:
3709+
case RISCV::FSQRT_D_IN32X:
3710+
// Integer div/rem.
3711+
case CASE_VFMA_OPCODE_VV(DIV):
3712+
case CASE_VFMA_OPCODE_VV(DIVU):
3713+
case CASE_VFMA_OPCODE_VV(REM):
3714+
case CASE_VFMA_OPCODE_VV(REMU):
3715+
// case CASE_VFMA_OPCODE_VX(DIV):
3716+
// case CASE_VFMA_OPCODE_VX(DIVU):
3717+
// case CASE_VFMA_OPCODE_VX(REM):
3718+
// case CASE_VFMA_OPCODE_VX(REMU):
3719+
// Vector floating-point div/sqrt.
3720+
case CASE_VFMA_OPCODE_VV(FDIV):
3721+
// case CASE_VFMA_OPCODE_VF(FRDIV):
3722+
// case CASE_VFMA_OPCODE_VV(FSQRT):
3723+
// case CASE_VFMA_OPCODE_VV(FRSQRT7):
3724+
return true;
3725+
}
3726+
}
3727+
36823728
#undef CASE_RVV_OPCODE_UNMASK_LMUL
36833729
#undef CASE_RVV_OPCODE_MASK_LMUL
36843730
#undef CASE_RVV_OPCODE_LMUL

llvm/lib/Target/RISCV/RISCVInstrInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,8 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
300300
std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
301301
analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override;
302302

303+
bool isHighLatencyDef(int Opc) const override;
304+
303305
protected:
304306
const RISCVSubtarget &STI;
305307

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -894,18 +894,18 @@ define <2 x i16> @vwmul_v2i16_multiuse(ptr %x, ptr %y, ptr %z, ptr %w) {
894894
; CHECK-LABEL: vwmul_v2i16_multiuse:
895895
; CHECK: # %bb.0:
896896
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
897-
; CHECK-NEXT: vle8.v v8, (a0)
898-
; CHECK-NEXT: vle8.v v9, (a1)
899-
; CHECK-NEXT: vle8.v v10, (a2)
900-
; CHECK-NEXT: vle8.v v11, (a3)
901-
; CHECK-NEXT: vsext.vf2 v12, v8
897+
; CHECK-NEXT: vle8.v v8, (a1)
898+
; CHECK-NEXT: vle8.v v9, (a2)
899+
; CHECK-NEXT: vsext.vf2 v10, v8
902900
; CHECK-NEXT: vsext.vf2 v8, v9
903-
; CHECK-NEXT: vsext.vf2 v9, v10
904-
; CHECK-NEXT: vsext.vf2 v10, v11
905-
; CHECK-NEXT: vmul.vv v11, v12, v10
906-
; CHECK-NEXT: vmul.vv v10, v8, v10
907-
; CHECK-NEXT: vdivu.vv v8, v8, v9
908-
; CHECK-NEXT: vor.vv v9, v11, v10
901+
; CHECK-NEXT: vdivu.vv v8, v10, v8
902+
; CHECK-NEXT: vle8.v v9, (a0)
903+
; CHECK-NEXT: vle8.v v11, (a3)
904+
; CHECK-NEXT: vsext.vf2 v12, v9
905+
; CHECK-NEXT: vsext.vf2 v9, v11
906+
; CHECK-NEXT: vmul.vv v11, v12, v9
907+
; CHECK-NEXT: vmul.vv v9, v10, v9
908+
; CHECK-NEXT: vor.vv v9, v11, v9
909909
; CHECK-NEXT: vor.vv v8, v9, v8
910910
; CHECK-NEXT: ret
911911
%a = load <2 x i8>, ptr %x

llvm/test/CodeGen/RISCV/rvv/vfdiv-constrained-sdnode.ll

Lines changed: 54 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -221,16 +221,16 @@ define <vscale x 32 x bfloat> @vfdiv_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <v
221221
; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
222222
; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8
223223
; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
224-
; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
225-
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
224+
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
226225
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
227-
; CHECK-NEXT: vfdiv.vv v0, v0, v8
226+
; CHECK-NEXT: vfdiv.vv v16, v0, v16
228227
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
229-
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v0
228+
; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v12
230229
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
231-
; CHECK-NEXT: vfdiv.vv v16, v16, v24
230+
; CHECK-NEXT: vfdiv.vv v24, v0, v24
232231
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
233-
; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
232+
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
233+
; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24
234234
; CHECK-NEXT: csrr a0, vlenb
235235
; CHECK-NEXT: slli a0, a0, 3
236236
; CHECK-NEXT: add sp, sp, a0
@@ -249,32 +249,42 @@ define <vscale x 32 x bfloat> @vfdiv_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bf
249249
; CHECK-NEXT: addi sp, sp, -16
250250
; CHECK-NEXT: .cfi_def_cfa_offset 16
251251
; CHECK-NEXT: csrr a0, vlenb
252-
; CHECK-NEXT: slli a0, a0, 3
252+
; CHECK-NEXT: slli a0, a0, 4
253253
; CHECK-NEXT: sub sp, sp, a0
254-
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
254+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
255255
; CHECK-NEXT: fmv.x.h a0, fa0
256256
; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
257257
; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
258258
; CHECK-NEXT: addi a1, sp, 16
259259
; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
260-
; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
261260
; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
262-
; CHECK-NEXT: vmv.v.x v8, a0
261+
; CHECK-NEXT: vmv.v.x v16, a0
263262
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
264-
; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8
265-
; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
263+
; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v16
264+
; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
265+
; CHECK-NEXT: csrr a0, vlenb
266+
; CHECK-NEXT: slli a0, a0, 3
267+
; CHECK-NEXT: add a0, sp, a0
268+
; CHECK-NEXT: addi a0, a0, 16
269+
; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
266270
; CHECK-NEXT: addi a0, sp, 16
267-
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
271+
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
268272
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
269-
; CHECK-NEXT: vfdiv.vv v0, v8, v0
273+
; CHECK-NEXT: vfdiv.vv v24, v16, v0
270274
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
271-
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v0
275+
; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v12
276+
; CHECK-NEXT: csrr a0, vlenb
277+
; CHECK-NEXT: slli a0, a0, 3
278+
; CHECK-NEXT: add a0, sp, a0
279+
; CHECK-NEXT: addi a0, a0, 16
280+
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
272281
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
273-
; CHECK-NEXT: vfdiv.vv v16, v24, v16
282+
; CHECK-NEXT: vfdiv.vv v16, v0, v8
274283
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
284+
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
275285
; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
276286
; CHECK-NEXT: csrr a0, vlenb
277-
; CHECK-NEXT: slli a0, a0, 3
287+
; CHECK-NEXT: slli a0, a0, 4
278288
; CHECK-NEXT: add sp, sp, a0
279289
; CHECK-NEXT: .cfi_def_cfa sp, 16
280290
; CHECK-NEXT: addi sp, sp, 16
@@ -573,16 +583,16 @@ define <vscale x 32 x half> @vfdiv_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
573583
; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
574584
; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
575585
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20
576-
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
577-
; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
586+
; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
578587
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
579-
; ZVFHMIN-NEXT: vfdiv.vv v0, v0, v8
588+
; ZVFHMIN-NEXT: vfdiv.vv v16, v0, v16
580589
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
581-
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
590+
; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v12
582591
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
583-
; ZVFHMIN-NEXT: vfdiv.vv v16, v16, v24
592+
; ZVFHMIN-NEXT: vfdiv.vv v24, v0, v24
584593
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
585-
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
594+
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
595+
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
586596
; ZVFHMIN-NEXT: csrr a0, vlenb
587597
; ZVFHMIN-NEXT: slli a0, a0, 3
588598
; ZVFHMIN-NEXT: add sp, sp, a0
@@ -607,32 +617,42 @@ define <vscale x 32 x half> @vfdiv_vf_nxv32f16(<vscale x 32 x half> %va, half %b
607617
; ZVFHMIN-NEXT: addi sp, sp, -16
608618
; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
609619
; ZVFHMIN-NEXT: csrr a0, vlenb
610-
; ZVFHMIN-NEXT: slli a0, a0, 3
620+
; ZVFHMIN-NEXT: slli a0, a0, 4
611621
; ZVFHMIN-NEXT: sub sp, sp, a0
612-
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
622+
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
613623
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
614624
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
615625
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
616626
; ZVFHMIN-NEXT: addi a1, sp, 16
617627
; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
618-
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
619628
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
620-
; ZVFHMIN-NEXT: vmv.v.x v8, a0
629+
; ZVFHMIN-NEXT: vmv.v.x v16, a0
621630
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
622-
; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
623-
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
631+
; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16
632+
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20
633+
; ZVFHMIN-NEXT: csrr a0, vlenb
634+
; ZVFHMIN-NEXT: slli a0, a0, 3
635+
; ZVFHMIN-NEXT: add a0, sp, a0
636+
; ZVFHMIN-NEXT: addi a0, a0, 16
637+
; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
624638
; ZVFHMIN-NEXT: addi a0, sp, 16
625-
; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
639+
; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
626640
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
627-
; ZVFHMIN-NEXT: vfdiv.vv v0, v8, v0
641+
; ZVFHMIN-NEXT: vfdiv.vv v24, v16, v0
628642
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
629-
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
643+
; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v12
644+
; ZVFHMIN-NEXT: csrr a0, vlenb
645+
; ZVFHMIN-NEXT: slli a0, a0, 3
646+
; ZVFHMIN-NEXT: add a0, sp, a0
647+
; ZVFHMIN-NEXT: addi a0, a0, 16
648+
; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
630649
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
631-
; ZVFHMIN-NEXT: vfdiv.vv v16, v24, v16
650+
; ZVFHMIN-NEXT: vfdiv.vv v16, v0, v8
632651
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
652+
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
633653
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
634654
; ZVFHMIN-NEXT: csrr a0, vlenb
635-
; ZVFHMIN-NEXT: slli a0, a0, 3
655+
; ZVFHMIN-NEXT: slli a0, a0, 4
636656
; ZVFHMIN-NEXT: add sp, sp, a0
637657
; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16
638658
; ZVFHMIN-NEXT: addi sp, sp, 16

llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll

Lines changed: 22 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -200,16 +200,16 @@ define <vscale x 32 x bfloat> @vfdiv_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <v
200200
; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
201201
; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8
202202
; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
203-
; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
204-
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
203+
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
205204
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
206-
; CHECK-NEXT: vfdiv.vv v0, v0, v8
205+
; CHECK-NEXT: vfdiv.vv v16, v0, v16
207206
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
208-
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v0
207+
; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v12
209208
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
210-
; CHECK-NEXT: vfdiv.vv v16, v16, v24
209+
; CHECK-NEXT: vfdiv.vv v24, v0, v24
211210
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
212-
; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
211+
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
212+
; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24
213213
; CHECK-NEXT: csrr a0, vlenb
214214
; CHECK-NEXT: slli a0, a0, 3
215215
; CHECK-NEXT: add sp, sp, a0
@@ -224,39 +224,23 @@ define <vscale x 32 x bfloat> @vfdiv_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <v
224224
define <vscale x 32 x bfloat> @vfdiv_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bfloat %b) {
225225
; CHECK-LABEL: vfdiv_vf_nxv32bf16:
226226
; CHECK: # %bb.0:
227-
; CHECK-NEXT: addi sp, sp, -16
228-
; CHECK-NEXT: .cfi_def_cfa_offset 16
229-
; CHECK-NEXT: csrr a0, vlenb
230-
; CHECK-NEXT: slli a0, a0, 3
231-
; CHECK-NEXT: sub sp, sp, a0
232-
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
233227
; CHECK-NEXT: fmv.x.h a0, fa0
234228
; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
235229
; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
236-
; CHECK-NEXT: addi a1, sp, 16
237-
; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
238230
; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
239231
; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
240232
; CHECK-NEXT: vmv.v.x v8, a0
241233
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
242234
; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8
243-
; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
244-
; CHECK-NEXT: addi a0, sp, 16
245-
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
246235
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
247-
; CHECK-NEXT: vfdiv.vv v0, v8, v0
236+
; CHECK-NEXT: vfdiv.vv v16, v16, v0
248237
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
249-
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v0
238+
; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v12
250239
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
251-
; CHECK-NEXT: vfdiv.vv v16, v24, v16
240+
; CHECK-NEXT: vfdiv.vv v24, v24, v0
252241
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
253-
; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
254-
; CHECK-NEXT: csrr a0, vlenb
255-
; CHECK-NEXT: slli a0, a0, 3
256-
; CHECK-NEXT: add sp, sp, a0
257-
; CHECK-NEXT: .cfi_def_cfa sp, 16
258-
; CHECK-NEXT: addi sp, sp, 16
259-
; CHECK-NEXT: .cfi_def_cfa_offset 0
242+
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
243+
; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24
260244
; CHECK-NEXT: ret
261245
%head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
262246
%splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
@@ -528,16 +512,16 @@ define <vscale x 32 x half> @vfdiv_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
528512
; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
529513
; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
530514
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20
531-
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
532-
; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
515+
; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
533516
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
534-
; ZVFHMIN-NEXT: vfdiv.vv v0, v0, v8
517+
; ZVFHMIN-NEXT: vfdiv.vv v16, v0, v16
535518
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
536-
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
519+
; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v12
537520
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
538-
; ZVFHMIN-NEXT: vfdiv.vv v16, v16, v24
521+
; ZVFHMIN-NEXT: vfdiv.vv v24, v0, v24
539522
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
540-
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
523+
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
524+
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
541525
; ZVFHMIN-NEXT: csrr a0, vlenb
542526
; ZVFHMIN-NEXT: slli a0, a0, 3
543527
; ZVFHMIN-NEXT: add sp, sp, a0
@@ -558,39 +542,23 @@ define <vscale x 32 x half> @vfdiv_vf_nxv32f16(<vscale x 32 x half> %va, half %b
558542
;
559543
; ZVFHMIN-LABEL: vfdiv_vf_nxv32f16:
560544
; ZVFHMIN: # %bb.0:
561-
; ZVFHMIN-NEXT: addi sp, sp, -16
562-
; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
563-
; ZVFHMIN-NEXT: csrr a0, vlenb
564-
; ZVFHMIN-NEXT: slli a0, a0, 3
565-
; ZVFHMIN-NEXT: sub sp, sp, a0
566-
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
567545
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
568546
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
569547
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
570-
; ZVFHMIN-NEXT: addi a1, sp, 16
571-
; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
572548
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
573549
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
574550
; ZVFHMIN-NEXT: vmv.v.x v8, a0
575551
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
576552
; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
577-
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
578-
; ZVFHMIN-NEXT: addi a0, sp, 16
579-
; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
580553
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
581-
; ZVFHMIN-NEXT: vfdiv.vv v0, v8, v0
554+
; ZVFHMIN-NEXT: vfdiv.vv v16, v16, v0
582555
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
583-
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
556+
; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v12
584557
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
585-
; ZVFHMIN-NEXT: vfdiv.vv v16, v24, v16
558+
; ZVFHMIN-NEXT: vfdiv.vv v24, v24, v0
586559
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
587-
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
588-
; ZVFHMIN-NEXT: csrr a0, vlenb
589-
; ZVFHMIN-NEXT: slli a0, a0, 3
590-
; ZVFHMIN-NEXT: add sp, sp, a0
591-
; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16
592-
; ZVFHMIN-NEXT: addi sp, sp, 16
593-
; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
560+
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
561+
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
594562
; ZVFHMIN-NEXT: ret
595563
%head = insertelement <vscale x 32 x half> poison, half %b, i32 0
596564
%splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer

0 commit comments

Comments
 (0)