Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 24 additions & 1 deletion llvm/lib/CodeGen/RegAllocGreedy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2387,19 +2387,42 @@ void RAGreedy::initializeCSRCost() {
/// The results are stored into \p Out.
/// \p Out is not cleared before being populated.
void RAGreedy::collectHintInfo(Register Reg, HintsInfo &Out) {
const TargetRegisterClass *RC = MRI->getRegClass(Reg);

for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) {
if (!TII->isFullCopyInstr(Instr))
if (!Instr.isCopy())
continue;

// Look for the other end of the copy.
Register OtherReg = Instr.getOperand(0).getReg();
unsigned OtherSubReg = Instr.getOperand(0).getSubReg();
unsigned SubReg = Instr.getOperand(1).getSubReg();

if (OtherReg == Reg) {
OtherReg = Instr.getOperand(1).getReg();
OtherSubReg = Instr.getOperand(1).getSubReg();
SubReg = Instr.getOperand(0).getSubReg();
if (OtherReg == Reg)
continue;
}
Comment on lines 2401 to 2407
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is starting to look ugly so I created #159724 to try to simplify it.


// Get the current assignment.
MCRegister OtherPhysReg =
OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg);
if (OtherSubReg) {
if (OtherReg.isPhysical()) {
MCRegister Tuple =
TRI->getMatchingSuperReg(OtherPhysReg, OtherSubReg, RC);
if (!Tuple)
continue;
OtherPhysReg = Tuple;
} else {
// TODO: There should be a hinting mechanism for subregisters
if (SubReg != OtherSubReg)
continue;
}
}

// Push the collected information.
Out.push_back(HintInfo(MBFI->getBlockFreq(Instr.getParent()), OtherReg,
OtherPhysReg));
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll
Original file line number Diff line number Diff line change
Expand Up @@ -159246,7 +159246,7 @@ define <128 x i8> @bitcast_v64bf16_to_v128i8(<64 x bfloat> %a, i32 %b) {
; GFX9-NEXT: v_lshrrev_b32_e32 v37, 24, v61
; GFX9-NEXT: v_lshrrev_b32_e32 v50, 8, v61
; GFX9-NEXT: v_lshrrev_b32_e32 v56, 16, v60
; GFX9-NEXT: v_mov_b32_e32 v33, v60
; GFX9-NEXT: v_lshrrev_b32_e32 v55, 8, v60
; GFX9-NEXT: buffer_load_dword v61, off, s[0:3], s32 offset:416 ; 4-byte Folded Reload
; GFX9-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:420 ; 4-byte Folded Reload
; GFX9-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:408 ; 4-byte Folded Reload
Expand All @@ -159259,7 +159259,6 @@ define <128 x i8> @bitcast_v64bf16_to_v128i8(<64 x bfloat> %a, i32 %b) {
; GFX9-NEXT: v_lshrrev_b32_e32 v48, 8, v48
; GFX9-NEXT: v_lshrrev_b32_e32 v36, 16, v58
; GFX9-NEXT: v_lshrrev_b32_e32 v58, 8, v58
; GFX9-NEXT: v_lshrrev_b32_e32 v55, 8, v33
; GFX9-NEXT: s_waitcnt vmcnt(3)
; GFX9-NEXT: v_lshrrev_b32_e32 v34, 16, v61
; GFX9-NEXT: s_waitcnt vmcnt(2)
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/AMDGPU/load-global-i16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7398,7 +7398,7 @@ define amdgpu_kernel void @global_zextload_v32i16_to_v32i64(ptr addrspace(1) %ou
; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v32, 16, v15
; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v36, 16, v17
; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(2)
; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v5, 16, v20
; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v6, 16, v20
; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v3, 16, v16
; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v2, 16, v14
; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, 0xffff, v14
Expand All @@ -7413,7 +7413,6 @@ define amdgpu_kernel void @global_zextload_v32i16_to_v32i64(ptr addrspace(1) %ou
; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v10, 16, v18
; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v8, 0xffff, v18
; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v4, 0xffff, v20
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, v5
; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v18, 16, v19
; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v16, 0xffff, v19
; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v44, 16, v21
Expand Down
24 changes: 10 additions & 14 deletions llvm/test/CodeGen/AMDGPU/load-local-i16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3851,9 +3851,9 @@ define amdgpu_kernel void @local_zextload_v64i16_to_v64i32(ptr addrspace(3) %out
; VI-DS128-NEXT: v_and_b32_e32 v32, 0xffff, v24
; VI-DS128-NEXT: ds_read_b128 v[24:27], v0 offset:80
; VI-DS128-NEXT: ds_read_b128 v[55:58], v0 offset:96
; VI-DS128-NEXT: v_lshrrev_b32_e32 v15, 16, v11
; VI-DS128-NEXT: v_lshrrev_b32_e32 v31, 16, v11
; VI-DS128-NEXT: v_lshrrev_b32_e32 v29, 16, v10
; VI-DS128-NEXT: v_mov_b32_e32 v31, v15
; VI-DS128-NEXT: v_lshrrev_b32_e32 v15, 16, v9
; VI-DS128-NEXT: s_waitcnt lgkmcnt(1)
; VI-DS128-NEXT: v_lshrrev_b32_e32 v50, 16, v27
; VI-DS128-NEXT: v_lshrrev_b32_e32 v48, 16, v26
Expand All @@ -3864,17 +3864,16 @@ define amdgpu_kernel void @local_zextload_v64i16_to_v64i32(ptr addrspace(3) %out
; VI-DS128-NEXT: v_and_b32_e32 v53, 0xffff, v25
; VI-DS128-NEXT: v_and_b32_e32 v51, 0xffff, v24
; VI-DS128-NEXT: ds_read_b128 v[24:27], v0 offset:112
; VI-DS128-NEXT: v_lshrrev_b32_e32 v15, 16, v9
; VI-DS128-NEXT: v_lshrrev_b32_e32 v13, 16, v8
; VI-DS128-NEXT: v_and_b32_e32 v30, 0xffff, v11
; VI-DS128-NEXT: v_and_b32_e32 v28, 0xffff, v10
; VI-DS128-NEXT: v_and_b32_e32 v14, 0xffff, v9
; VI-DS128-NEXT: s_waitcnt lgkmcnt(0)
; VI-DS128-NEXT: v_lshrrev_b32_e32 v3, 16, v25
; VI-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v24
; VI-DS128-NEXT: v_and_b32_e32 v2, 0xffff, v25
; VI-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v24
; VI-DS128-NEXT: v_mov_b32_e32 v24, s0
; VI-DS128-NEXT: v_and_b32_e32 v14, 0xffff, v9
; VI-DS128-NEXT: v_and_b32_e32 v12, 0xffff, v8
; VI-DS128-NEXT: v_lshrrev_b32_e32 v42, 16, v39
; VI-DS128-NEXT: v_lshrrev_b32_e32 v40, 16, v38
Expand Down Expand Up @@ -3944,7 +3943,7 @@ define amdgpu_kernel void @local_zextload_v64i16_to_v64i32(ptr addrspace(3) %out
; GFX9-DS128-NEXT: s_addc_u32 s13, s13, 0
; GFX9-DS128-NEXT: ds_read_b128 v[20:23], v0 offset:32
; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(2)
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v15, 16, v11
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v31, 16, v11
; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(1)
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v19
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v18
Expand Down Expand Up @@ -3992,8 +3991,8 @@ define amdgpu_kernel void @local_zextload_v64i16_to_v64i32(ptr addrspace(3) %out
; GFX9-DS128-NEXT: ds_read_b128 v[24:27], v0 offset:80
; GFX9-DS128-NEXT: ds_read_b128 v[55:58], v0 offset:96
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v29, 16, v10
; GFX9-DS128-NEXT: v_mov_b32_e32 v31, v15
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v15, 16, v9
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v13, 16, v8
; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(1)
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v50, 16, v27
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v48, 16, v26
Expand All @@ -4004,17 +4003,16 @@ define amdgpu_kernel void @local_zextload_v64i16_to_v64i32(ptr addrspace(3) %out
; GFX9-DS128-NEXT: v_and_b32_e32 v53, 0xffff, v25
; GFX9-DS128-NEXT: v_and_b32_e32 v51, 0xffff, v24
; GFX9-DS128-NEXT: ds_read_b128 v[24:27], v0 offset:112
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v13, 16, v8
; GFX9-DS128-NEXT: v_and_b32_e32 v30, 0xffff, v11
; GFX9-DS128-NEXT: v_and_b32_e32 v28, 0xffff, v10
; GFX9-DS128-NEXT: v_and_b32_e32 v14, 0xffff, v9
; GFX9-DS128-NEXT: v_and_b32_e32 v12, 0xffff, v8
; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v3, 16, v25
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v24
; GFX9-DS128-NEXT: v_and_b32_e32 v2, 0xffff, v25
; GFX9-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v24
; GFX9-DS128-NEXT: v_mov_b32_e32 v24, s0
; GFX9-DS128-NEXT: v_and_b32_e32 v12, 0xffff, v8
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v42, 16, v39
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v40, 16, v38
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v46, 16, v37
Expand Down Expand Up @@ -4890,7 +4888,7 @@ define amdgpu_kernel void @local_sextload_v64i16_to_v64i32(ptr addrspace(3) %out
; VI-DS128-NEXT: s_waitcnt lgkmcnt(2)
; VI-DS128-NEXT: v_ashrrev_i32_e32 v53, 16, v40
; VI-DS128-NEXT: v_bfe_i32 v52, v40, 0, 16
; VI-DS128-NEXT: v_ashrrev_i32_e32 v15, 16, v11
; VI-DS128-NEXT: v_ashrrev_i32_e32 v23, 16, v11
; VI-DS128-NEXT: s_waitcnt lgkmcnt(1)
; VI-DS128-NEXT: v_ashrrev_i32_e32 v47, 16, v39
; VI-DS128-NEXT: v_ashrrev_i32_e32 v45, 16, v38
Expand All @@ -4901,14 +4899,13 @@ define amdgpu_kernel void @local_sextload_v64i16_to_v64i32(ptr addrspace(3) %out
; VI-DS128-NEXT: ds_read_b128 v[37:40], v32 offset:112
; VI-DS128-NEXT: v_mov_b32_e32 v32, s0
; VI-DS128-NEXT: v_ashrrev_i32_e32 v21, 16, v10
; VI-DS128-NEXT: v_mov_b32_e32 v23, v15
; VI-DS128-NEXT: v_ashrrev_i32_e32 v15, 16, v9
; VI-DS128-NEXT: v_ashrrev_i32_e32 v13, 16, v8
; VI-DS128-NEXT: s_waitcnt lgkmcnt(0)
; VI-DS128-NEXT: v_ashrrev_i32_e32 v3, 16, v38
; VI-DS128-NEXT: v_ashrrev_i32_e32 v1, 16, v37
; VI-DS128-NEXT: v_bfe_i32 v2, v38, 0, 16
; VI-DS128-NEXT: v_bfe_i32 v0, v37, 0, 16
; VI-DS128-NEXT: v_ashrrev_i32_e32 v13, 16, v8
; VI-DS128-NEXT: v_bfe_i32 v22, v11, 0, 16
; VI-DS128-NEXT: v_bfe_i32 v20, v10, 0, 16
; VI-DS128-NEXT: v_bfe_i32 v14, v9, 0, 16
Expand Down Expand Up @@ -4986,7 +4983,7 @@ define amdgpu_kernel void @local_sextload_v64i16_to_v64i32(ptr addrspace(3) %out
; GFX9-DS128-NEXT: s_addc_u32 s13, s13, 0
; GFX9-DS128-NEXT: ds_read_b128 v[24:27], v32 offset:32
; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(2)
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v15, 16, v11
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v23, 16, v11
; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(1)
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v3, 16, v19
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v1, 16, v18
Expand Down Expand Up @@ -5031,15 +5028,14 @@ define amdgpu_kernel void @local_sextload_v64i16_to_v64i32(ptr addrspace(3) %out
; GFX9-DS128-NEXT: v_bfe_i32 v50, v37, 0, 16
; GFX9-DS128-NEXT: ds_read_b128 v[37:40], v32 offset:112
; GFX9-DS128-NEXT: v_mov_b32_e32 v32, s0
; GFX9-DS128-NEXT: v_mov_b32_e32 v23, v15
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v15, 16, v9
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v13, 16, v8
; GFX9-DS128-NEXT: v_bfe_i32 v22, v11, 0, 16
; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v3, 16, v38
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v1, 16, v37
; GFX9-DS128-NEXT: v_bfe_i32 v2, v38, 0, 16
; GFX9-DS128-NEXT: v_bfe_i32 v0, v37, 0, 16
; GFX9-DS128-NEXT: v_bfe_i32 v22, v11, 0, 16
; GFX9-DS128-NEXT: v_bfe_i32 v20, v10, 0, 16
; GFX9-DS128-NEXT: v_bfe_i32 v14, v9, 0, 16
; GFX9-DS128-NEXT: v_bfe_i32 v12, v8, 0, 16
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AMDGPU/unspill-vgpr-after-rewrite-vgpr-mfma.ll
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ define void @eliminate_spill_after_mfma_rewrite(i32 %x, i32 %y, <4 x i32> %arg,
; CHECK-NEXT: v_accvgpr_read_b32 v2, a2
; CHECK-NEXT: v_accvgpr_read_b32 v3, a3
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def v[10:13]
; CHECK-NEXT: ; def v[6:9]
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: ;;#ASMSTART
Expand Down Expand Up @@ -142,7 +142,7 @@ define void @eliminate_spill_after_mfma_rewrite(i32 %x, i32 %y, <4 x i32> %arg,
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v0, a[36:39], s[16:17] offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v0, v[10:13], s[16:17]
; CHECK-NEXT: global_store_dwordx4 v0, v[6:9], s[16:17]
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_load_dword a63, off, s[0:3], s32 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a62, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
Expand Down Expand Up @@ -306,10 +306,10 @@ define void @eliminate_spill_after_mfma_rewrite_x2(i32 %x, i32 %y, <4 x i32> %ar
; CHECK-NEXT: v_accvgpr_read_b32 v5, a1
; CHECK-NEXT: v_accvgpr_read_b32 v4, a0
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def v[10:13]
; CHECK-NEXT: ; def v[8:11]
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def v[14:17]
; CHECK-NEXT: ; def v[12:15]
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def a[0:31]
Expand Down Expand Up @@ -349,9 +349,9 @@ define void @eliminate_spill_after_mfma_rewrite_x2(i32 %x, i32 %y, <4 x i32> %ar
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v0, a[36:39], s[16:17] offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v0, v[10:13], s[16:17]
; CHECK-NEXT: global_store_dwordx4 v0, v[8:11], s[16:17]
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v0, v[14:17], s[16:17]
; CHECK-NEXT: global_store_dwordx4 v0, v[12:15], s[16:17]
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_load_dword a63, off, s[0:3], s32 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword a62, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -415,8 +415,7 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
; RV32-NEXT: mul a4, a4, a5
; RV32-NEXT: add a4, sp, a4
; RV32-NEXT: addi a4, a4, 16
; RV32-NEXT: vl8r.v v24, (a4) # vscale x 64-byte Folded Reload
; RV32-NEXT: vmv4r.v v8, v24
; RV32-NEXT: vl8r.v v8, (a4) # vscale x 64-byte Folded Reload
; RV32-NEXT: csrr a4, vlenb
; RV32-NEXT: slli a4, a4, 4
; RV32-NEXT: add a4, sp, a4
Expand Down Expand Up @@ -726,8 +725,7 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
; RV64-NEXT: mul a4, a4, a5
; RV64-NEXT: add a4, sp, a4
; RV64-NEXT: addi a4, a4, 32
; RV64-NEXT: vl8r.v v24, (a4) # vscale x 64-byte Folded Reload
; RV64-NEXT: vmv4r.v v8, v24
; RV64-NEXT: vl8r.v v8, (a4) # vscale x 64-byte Folded Reload
; RV64-NEXT: csrr a4, vlenb
; RV64-NEXT: slli a4, a4, 4
; RV64-NEXT: add a4, sp, a4
Expand Down
24 changes: 8 additions & 16 deletions llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8831,8 +8831,7 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t
; ZVFHMIN-NEXT: vmv.v.v v4, v12
; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v24, v0.t
; ZVFHMIN-NEXT: bltu a1, a0, .LBB286_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a1, a0
Expand Down Expand Up @@ -9460,8 +9459,7 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_commute(<vscale x 32 x half> %v
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t
; ZVFHMIN-NEXT: vmv.v.v v4, v12
; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v16, v0.t
; ZVFHMIN-NEXT: bltu a0, a1, .LBB291_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
Expand Down Expand Up @@ -9832,8 +9830,7 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat(<vscale x 32 x half>
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t
; ZVFHMIN-NEXT: vmv.v.v v4, v12
; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v16, v0.t
; ZVFHMIN-NEXT: bltu a0, a1, .LBB294_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
Expand Down Expand Up @@ -10347,8 +10344,7 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t
; ZVFHMIN-NEXT: vmv.v.v v4, v12
; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v24, v0.t
; ZVFHMIN-NEXT: bltu a1, a0, .LBB298_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a1, a0
Expand Down Expand Up @@ -10975,8 +10971,7 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_commute(<vscale x 32 x half> %v
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t
; ZVFHMIN-NEXT: vmv.v.v v4, v12
; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v24, v0.t
; ZVFHMIN-NEXT: bltu a0, a1, .LBB303_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
Expand Down Expand Up @@ -11343,8 +11338,7 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat(<vscale x 32 x half>
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t
; ZVFHMIN-NEXT: vmv.v.v v4, v12
; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v24, v0.t
; ZVFHMIN-NEXT: bltu a0, a1, .LBB306_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
Expand Down Expand Up @@ -11453,12 +11447,11 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_commute(<vscale x 32
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t
; ZVFHMIN-NEXT: addi a2, sp, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
; ZVFHMIN-NEXT: vmv4r.v v8, v24
; ZVFHMIN-NEXT: csrr a2, vlenb
; ZVFHMIN-NEXT: slli a2, a2, 4
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
; ZVFHMIN-NEXT: vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t
; ZVFHMIN-NEXT: csrr a2, vlenb
; ZVFHMIN-NEXT: slli a2, a2, 3
Expand Down Expand Up @@ -11580,12 +11573,11 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_unmasked(<vscale x 32
; ZVFHMIN-NEXT: sltu a3, a0, a2
; ZVFHMIN-NEXT: addi a3, a3, -1
; ZVFHMIN-NEXT: and a2, a3, a2
; ZVFHMIN-NEXT: vmv4r.v v8, v16
; ZVFHMIN-NEXT: csrr a3, vlenb
; ZVFHMIN-NEXT: slli a3, a3, 3
; ZVFHMIN-NEXT: add a3, sp, a3
; ZVFHMIN-NEXT: addi a3, a3, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a3) # vscale x 64-byte Folded Spill
; ZVFHMIN-NEXT: vs8r.v v16, (a3) # vscale x 64-byte Folded Spill
; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t
; ZVFHMIN-NEXT: csrr a2, vlenb
Expand Down
Loading