address the comments

LiqinWeng · LiqinWeng · commit 5a5811fd3a4e · 2025-02-19T10:55:13.000+08:00
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -290,6 +290,7 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
       return MILog2SEW;
     return 6;
   }
+
   // Vector Integer Arithmetic Instructions
   // Vector Single-Width Integer Add and Subtract
   case RISCV::VADD_VI:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb,+zvfbfwma,+zvfh,+zvfbfmin -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT
-; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb,+zvfbfwma,+zvfh,+zvfbfmin -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT
-; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb,+zvfbfwma,+zvfh,+zvfbfmin -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT
-; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb,+zvfbfwma,+zvfh,+zvfbfmin -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT
 
 ; The purpose of this file is to check the behavior of specific instructions as it relates to the VL optimizer
 
@@ -5070,62 +5070,50 @@ define <vscale x 4 x float> @vfwmaccbf16_vf(<vscale x 4 x float> %a, bfloat %b,
   ret <vscale x 4 x float> %2
 }
 
-define <vscale x 4 x half> @vfsqrt(<vscale x 4 x half> %a) {
+define <vscale x 4 x double> @vfsqrt(<vscale x 4 x float> %a) {
 ; NOVLOPT-LABEL: vfsqrt:
 ; NOVLOPT:       # %bb.0:
-; NOVLOPT-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; NOVLOPT-NEXT:    vfwcvt.f.f.v v10, v8
 ; NOVLOPT-NEXT:    fsrmi a0, 0
 ; NOVLOPT-NEXT:    vsetivli zero, 7, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vfsqrt.v v10, v10
-; NOVLOPT-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; NOVLOPT-NEXT:    vfncvt.f.f.w v8, v10
+; NOVLOPT-NEXT:    vfsqrt.v v10, v8
 ; NOVLOPT-NEXT:    fsrm a0
+; NOVLOPT-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfwmacc.vv v12, v8, v10
+; NOVLOPT-NEXT:    vmv4r.v v8, v12
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vfsqrt:
 ; VLOPT:       # %bb.0:
-; VLOPT-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; VLOPT-NEXT:    vfwcvt.f.f.v v10, v8
 ; VLOPT-NEXT:    fsrmi a0, 0
-; VLOPT-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; VLOPT-NEXT:    vfsqrt.v v10, v10
-; VLOPT-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; VLOPT-NEXT:    vfncvt.f.f.w v8, v10
+; VLOPT-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
+; VLOPT-NEXT:    vfsqrt.v v10, v8
 ; VLOPT-NEXT:    fsrm a0
+; VLOPT-NEXT:    vfwmacc.vv v12, v8, v10
+; VLOPT-NEXT:    vmv4r.v v8, v12
 ; VLOPT-NEXT:    ret
-  %1 = call <vscale x 4 x float> @llvm.riscv.vfwcvt.f.f.v.nxv4f32.nxv4f16(<vscale x 4 x float> poison, <vscale x 4 x half> %a, iXLen 6)
-  %2 = call <vscale x 4 x float> @llvm.riscv.vfsqrt.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, iXLen 0, iXLen 7)
-  %3 = call <vscale x 4 x half> @llvm.riscv.vfncvt.f.f.w.nxv4f16.nxv4f32(<vscale x 4 x half> poison, <vscale x 4 x float> %2, iXLen 0, iXLen 6)
-  ret <vscale x 4 x half> %3
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfsqrt.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, iXLen 0, iXLen 7)
+  %2 = call <vscale x 4 x double> @llvm.riscv.vfwmacc(<vscale x 4 x double> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %1, iXLen 7, iXLen 6, iXLen 0)
+  ret <vscale x 4 x double> %2
 }
 
-define <vscale x 4 x half> @vfrsqrt7(<vscale x 4 x half> %a) {
+define <vscale x 4 x double> @vfrsqrt7(<vscale x 4 x float> %a) {
 ; NOVLOPT-LABEL: vfrsqrt7:
 ; NOVLOPT:       # %bb.0:
-; NOVLOPT-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; NOVLOPT-NEXT:    vfwcvt.f.f.v v10, v8
 ; NOVLOPT-NEXT:    vsetivli zero, 7, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vfrsqrt7.v v10, v10
-; NOVLOPT-NEXT:    fsrmi a0, 0
-; NOVLOPT-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; NOVLOPT-NEXT:    vfncvt.f.f.w v8, v10
-; NOVLOPT-NEXT:    fsrm a0
+; NOVLOPT-NEXT:    vfrsqrt7.v v10, v8
+; NOVLOPT-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfwmacc.vv v12, v8, v10
+; NOVLOPT-NEXT:    vmv4r.v v8, v12
 ; NOVLOPT-NEXT:    ret
 ;
 ; VLOPT-LABEL: vfrsqrt7:
 ; VLOPT:       # %bb.0:
-; VLOPT-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; VLOPT-NEXT:    vfwcvt.f.f.v v10, v8
-; VLOPT-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; VLOPT-NEXT:    vfrsqrt7.v v10, v10
-; VLOPT-NEXT:    fsrmi a0, 0
-; VLOPT-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; VLOPT-NEXT:    vfncvt.f.f.w v8, v10
-; VLOPT-NEXT:    fsrm a0
+; VLOPT-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
+; VLOPT-NEXT:    vfrsqrt7.v v10, v8
+; VLOPT-NEXT:    vfwmacc.vv v12, v8, v10
+; VLOPT-NEXT:    vmv4r.v v8, v12
 ; VLOPT-NEXT:    ret
-  %1 = call <vscale x 4 x float> @llvm.riscv.vfwcvt.f.f.v.nxv4f32.nxv4f16(<vscale x 4 x float> poison, <vscale x 4 x half> %a, iXLen 6)
-  %2 = call <vscale x 4 x float> @llvm.riscv.vfrsqrt7.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, iXLen 7)
-  %3 = call <vscale x 4 x half> @llvm.riscv.vfncvt.f.f.w.nxv4f16.nxv4f32(<vscale x 4 x half> poison,<vscale x 4 x float> %2, iXLen 0, iXLen 6)
-  ret <vscale x 4 x half> %3
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfrsqrt7.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, iXLen 7)
+  %2 = call <vscale x 4 x double> @llvm.riscv.vfwmacc(<vscale x 4 x double> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %1, iXLen 7, iXLen 6, iXLen 0)
+  ret <vscale x 4 x double> %2
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
@@ -141,43 +141,43 @@ body: |
     %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 4 /* e16 */, 0
 ...
 ---
-name: vfsqrt
+name: vfsqrt_nofpexcept
 body: |
   bb.0:
-    ; CHECK-LABEL: name: vfsqrt
+    ; CHECK-LABEL: name: vfsqrt_nofpexcept
     ; CHECK: %x:vrm2 = nofpexcept PseudoVFSQRT_V_M2_E32 $noreg, $noreg, 7, 6, 5 /* e32 */, 3 /* ta, ma */, implicit $frm
     ; CHECK-NEXT: early-clobber %y:vr = nofpexcept PseudoVFNCVTBF16_F_F_W_M1_E16 $noreg, %x, 7, 6, 4 /* e16 */, 3 /* ta, ma */, implicit $frm
     %x:vrm2 = nofpexcept PseudoVFSQRT_V_M2_E32 $noreg, $noreg, 7, 8, 5, 3, implicit $frm
     early-clobber %y:vr = nofpexcept PseudoVFNCVTBF16_F_F_W_M1_E16 $noreg, %x, 7, 6, 4, 3, implicit $frm
 ...
 ---
-name: vfsqrt_nofpexcept
+name: vfsqrt_fpexcept
 body: |
   bb.0:
-    ; CHECK-LABEL: name: vfsqrt_nofpexcept
+    ; CHECK-LABEL: name: vfsqrt_fpexcept
     ; CHECK: %x:vrm2 = PseudoVFSQRT_V_M2_E32 $noreg, $noreg, 7, 8, 5 /* e32 */, 3 /* ta, ma */, implicit $frm
     ; CHECK-NEXT: early-clobber %y:vr = nofpexcept PseudoVFNCVTBF16_F_F_W_M1_E16 $noreg, %x, 7, 6, 4 /* e16 */, 3 /* ta, ma */, implicit $frm
     %x:vrm2 = PseudoVFSQRT_V_M2_E32 $noreg, $noreg, 7, 8, 5, 3, implicit $frm
     early-clobber %y:vr = nofpexcept PseudoVFNCVTBF16_F_F_W_M1_E16 $noreg, %x, 7, 6, 4, 3, implicit $frm
 ...
 ---
-name: vfrsqrt7
+name: vfrsqrt7_nofpexcept
 body: |
   bb.0:
-    ; CHECK-LABEL: name: vfrsqrt7
+    ; CHECK-LABEL: name: vfrsqrt7_nofpexcept
     ; CHECK: %x:vrm2 = nofpexcept PseudoVFRSQRT7_V_M2_E32 $noreg, $noreg, 1, 5 /* e32 */, 0 /* tu, mu */
     ; CHECK-NEXT: %y:vrm2 = PseudoVADD_VV_M2 $noreg, %x, $noreg, 1, 5 /* e32 */, 0 /* tu, mu */
     %x:vrm2 = nofpexcept PseudoVFRSQRT7_V_M2_E32 $noreg, $noreg, 7, 5, 0
     %y:vrm2 = PseudoVADD_VV_M2 $noreg, %x, $noreg, 1, 5 /* e32 */, 0
 ...
 ---
-name: vfrsqrt7_nofpexcept
+name: vfrsqrt7_fpexcept
 body: |
   bb.0:
-    ; CHECK-LABEL: name: vfrsqrt7_nofpexcept
+    ; CHECK-LABEL: name: vfrsqrt7_fpexcept
     ; CHECK: %x:vrm2 = PseudoVFRSQRT7_V_M2_E32 $noreg, $noreg, 7, 5 /* e32 */, 0 /* tu, mu */
     ; CHECK-NEXT: %y:vrm2 = PseudoVADD_VV_M2 $noreg, %x, $noreg, 1, 5 /* e32 */, 0 /* tu, mu */
-        %x:vrm2 = PseudoVFRSQRT7_V_M2_E32 $noreg, $noreg, 7, 5, 0
+    %x:vrm2 = PseudoVFRSQRT7_V_M2_E32 $noreg, $noreg, 7, 5, 0
     %y:vrm2 = PseudoVADD_VV_M2 $noreg, %x, $noreg, 1, 5 /* e32 */, 0
 ...
 ---

Original file line number	Diff line number	Diff line change
`@@ -290,6 +290,7 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {`
`290`	`290`	`return MILog2SEW;`
`291`	`291`	`return 6;`
`292`	`292`	`}`
	`293`	`+`
`293`	`294`	`// Vector Integer Arithmetic Instructions`
`294`	`295`	`// Vector Single-Width Integer Add and Subtract`
`295`	`296`	`case RISCV::VADD_VI:`