diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index 1ba7f0b522a2b..1537cb1e9125e 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -1092,6 +1092,10 @@ static bool isSupportedInstr(const MachineInstr &MI) { case RISCV::VFWNMSAC_VF: case RISCV::VFWMACCBF16_VV: case RISCV::VFWMACCBF16_VF: + // Vector Floating-Point Square-Root Instruction + case RISCV::VFSQRT_V: + // Vector Floating-Point Reciprocal Square-Root Estimate Instruction + case RISCV::VFRSQRT7_V: // Vector Floating-Point MIN/MAX Instructions case RISCV::VFMIN_VF: case RISCV::VFMIN_VV: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll index 585a331e55094..bef29dfecef4c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -1318,11 +1318,10 @@ define void @sqrt_v6bf16(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v10 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 ; CHECK-NEXT: vse16.v v10, (a0) ; CHECK-NEXT: ret @@ -1371,11 +1370,10 @@ define void @sqrt_v6f16(ptr %x) { ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFHMIN-NEXT: vle16.v v8, (a0) -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfsqrt.v v8, v10 -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 ; ZVFHMIN-NEXT: vse16.v v10, (a0) ; ZVFHMIN-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll index c6ee9e34dc207..5cd9b77af82cf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll @@ -5069,3 +5069,51 @@ define @vfwmaccbf16_vf( %a, bfloat %b, %2 = call @llvm.riscv.vfadd( poison, %1, %d, iXLen 7, iXLen %vl) ret %2 } + +define @vfsqrt( %a) { +; NOVLOPT-LABEL: vfsqrt: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: fsrmi a0, 0 +; NOVLOPT-NEXT: vsetivli zero, 7, e32, m2, ta, ma +; NOVLOPT-NEXT: vfsqrt.v v10, v8 +; NOVLOPT-NEXT: fsrm a0 +; NOVLOPT-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; NOVLOPT-NEXT: vfwmacc.vv v12, v8, v10 +; NOVLOPT-NEXT: vmv4r.v v8, v12 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfsqrt: +; VLOPT: # %bb.0: +; VLOPT-NEXT: fsrmi a0, 0 +; VLOPT-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; VLOPT-NEXT: vfsqrt.v v10, v8 +; VLOPT-NEXT: fsrm a0 +; VLOPT-NEXT: vfwmacc.vv v12, v8, v10 +; VLOPT-NEXT: vmv4r.v v8, v12 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfsqrt.nxv4f32( poison, %a, iXLen 0, iXLen 7) + %2 = call @llvm.riscv.vfwmacc( poison, %a, %1, iXLen 7, iXLen 6, iXLen 0) + ret %2 +} + +define @vfrsqrt7( %a) { +; NOVLOPT-LABEL: vfrsqrt7: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetivli zero, 7, e32, m2, ta, ma +; NOVLOPT-NEXT: vfrsqrt7.v v10, v8 +; NOVLOPT-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; NOVLOPT-NEXT: vfwmacc.vv v12, v8, v10 +; NOVLOPT-NEXT: vmv4r.v v8, v12 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vfrsqrt7: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; VLOPT-NEXT: vfrsqrt7.v v10, v8 +; VLOPT-NEXT: vfwmacc.vv v12, v8, v10 +; VLOPT-NEXT: vmv4r.v v8, v12 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vfrsqrt7.nxv4f32( poison, %a, iXLen 7) + %2 = call @llvm.riscv.vfwmacc( poison, %a, %1, iXLen 7, iXLen 6, iXLen 0) + ret %2 +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir index 0475a988e9851..cb43a89ea3bc6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir @@ -141,6 +141,46 @@ body: | %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 4 /* e16 */, 0 ... --- +name: vfsqrt_nofpexcept +body: | + bb.0: + ; CHECK-LABEL: name: vfsqrt_nofpexcept + ; CHECK: %x:vrm2 = nofpexcept PseudoVFSQRT_V_M2_E32 $noreg, $noreg, 7, 6, 5 /* e32 */, 3 /* ta, ma */, implicit $frm + ; CHECK-NEXT: early-clobber %y:vr = nofpexcept PseudoVFNCVTBF16_F_F_W_M1_E16 $noreg, %x, 7, 6, 4 /* e16 */, 3 /* ta, ma */, implicit $frm + %x:vrm2 = nofpexcept PseudoVFSQRT_V_M2_E32 $noreg, $noreg, 7, 8, 5, 3, implicit $frm + early-clobber %y:vr = nofpexcept PseudoVFNCVTBF16_F_F_W_M1_E16 $noreg, %x, 7, 6, 4, 3, implicit $frm +... +--- +name: vfsqrt_fpexcept +body: | + bb.0: + ; CHECK-LABEL: name: vfsqrt_fpexcept + ; CHECK: %x:vrm2 = PseudoVFSQRT_V_M2_E32 $noreg, $noreg, 7, 8, 5 /* e32 */, 3 /* ta, ma */, implicit $frm + ; CHECK-NEXT: early-clobber %y:vr = nofpexcept PseudoVFNCVTBF16_F_F_W_M1_E16 $noreg, %x, 7, 6, 4 /* e16 */, 3 /* ta, ma */, implicit $frm + %x:vrm2 = PseudoVFSQRT_V_M2_E32 $noreg, $noreg, 7, 8, 5, 3, implicit $frm + early-clobber %y:vr = nofpexcept PseudoVFNCVTBF16_F_F_W_M1_E16 $noreg, %x, 7, 6, 4, 3, implicit $frm +... +--- +name: vfrsqrt7_nofpexcept +body: | + bb.0: + ; CHECK-LABEL: name: vfrsqrt7_nofpexcept + ; CHECK: %x:vrm2 = nofpexcept PseudoVFRSQRT7_V_M2_E32 $noreg, $noreg, 1, 5 /* e32 */, 0 /* tu, mu */ + ; CHECK-NEXT: %y:vrm2 = PseudoVADD_VV_M2 $noreg, %x, $noreg, 1, 5 /* e32 */, 0 /* tu, mu */ + %x:vrm2 = nofpexcept PseudoVFRSQRT7_V_M2_E32 $noreg, $noreg, 7, 5, 0 + %y:vrm2 = PseudoVADD_VV_M2 $noreg, %x, $noreg, 1, 5 /* e32 */, 0 +... +--- +name: vfrsqrt7_fpexcept +body: | + bb.0: + ; CHECK-LABEL: name: vfrsqrt7_fpexcept + ; CHECK: %x:vrm2 = PseudoVFRSQRT7_V_M2_E32 $noreg, $noreg, 7, 5 /* e32 */, 0 /* tu, mu */ + ; CHECK-NEXT: %y:vrm2 = PseudoVADD_VV_M2 $noreg, %x, $noreg, 1, 5 /* e32 */, 0 /* tu, mu */ + %x:vrm2 = PseudoVFRSQRT7_V_M2_E32 $noreg, $noreg, 7, 5, 0 + %y:vrm2 = PseudoVADD_VV_M2 $noreg, %x, $noreg, 1, 5 /* e32 */, 0 +... +--- name: vwadd_tied_vs1 body: | bb.0: