|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
2 | | -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb,+zvfbfwma,+zvfh,+zvfbfmin -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT |
3 | | -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb,+zvfbfwma,+zvfh,+zvfbfmin -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT |
4 | | -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb,+zvfbfwma,+zvfh,+zvfbfmin -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT |
5 | | -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb,+zvfbfwma,+zvfh,+zvfbfmin -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT |
| 2 | +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT |
| 3 | +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT |
| 4 | +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT |
| 5 | +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT |
6 | 6 |
|
7 | 7 | ; The purpose of this file is to check the behavior of specific instructions as it relates to the VL optimizer |
8 | 8 |
|
@@ -5070,62 +5070,50 @@ define <vscale x 4 x float> @vfwmaccbf16_vf(<vscale x 4 x float> %a, bfloat %b, |
5070 | 5070 | ret <vscale x 4 x float> %2 |
5071 | 5071 | } |
5072 | 5072 |
|
5073 | | -define <vscale x 4 x half> @vfsqrt(<vscale x 4 x half> %a) { |
| 5073 | +define <vscale x 4 x double> @vfsqrt(<vscale x 4 x float> %a) { |
5074 | 5074 | ; NOVLOPT-LABEL: vfsqrt: |
5075 | 5075 | ; NOVLOPT: # %bb.0: |
5076 | | -; NOVLOPT-NEXT: vsetivli zero, 6, e16, m1, ta, ma |
5077 | | -; NOVLOPT-NEXT: vfwcvt.f.f.v v10, v8 |
5078 | 5076 | ; NOVLOPT-NEXT: fsrmi a0, 0 |
5079 | 5077 | ; NOVLOPT-NEXT: vsetivli zero, 7, e32, m2, ta, ma |
5080 | | -; NOVLOPT-NEXT: vfsqrt.v v10, v10 |
5081 | | -; NOVLOPT-NEXT: vsetivli zero, 6, e16, m1, ta, ma |
5082 | | -; NOVLOPT-NEXT: vfncvt.f.f.w v8, v10 |
| 5078 | +; NOVLOPT-NEXT: vfsqrt.v v10, v8 |
5083 | 5079 | ; NOVLOPT-NEXT: fsrm a0 |
| 5080 | +; NOVLOPT-NEXT: vsetivli zero, 6, e32, m2, ta, ma |
| 5081 | +; NOVLOPT-NEXT: vfwmacc.vv v12, v8, v10 |
| 5082 | +; NOVLOPT-NEXT: vmv4r.v v8, v12 |
5084 | 5083 | ; NOVLOPT-NEXT: ret |
5085 | 5084 | ; |
5086 | 5085 | ; VLOPT-LABEL: vfsqrt: |
5087 | 5086 | ; VLOPT: # %bb.0: |
5088 | | -; VLOPT-NEXT: vsetivli zero, 6, e16, m1, ta, ma |
5089 | | -; VLOPT-NEXT: vfwcvt.f.f.v v10, v8 |
5090 | 5087 | ; VLOPT-NEXT: fsrmi a0, 0 |
5091 | | -; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma |
5092 | | -; VLOPT-NEXT: vfsqrt.v v10, v10 |
5093 | | -; VLOPT-NEXT: vsetvli zero, zero, e16, m1, ta, ma |
5094 | | -; VLOPT-NEXT: vfncvt.f.f.w v8, v10 |
| 5088 | +; VLOPT-NEXT: vsetivli zero, 6, e32, m2, ta, ma |
| 5089 | +; VLOPT-NEXT: vfsqrt.v v10, v8 |
5095 | 5090 | ; VLOPT-NEXT: fsrm a0 |
| 5091 | +; VLOPT-NEXT: vfwmacc.vv v12, v8, v10 |
| 5092 | +; VLOPT-NEXT: vmv4r.v v8, v12 |
5096 | 5093 | ; VLOPT-NEXT: ret |
5097 | | - %1 = call <vscale x 4 x float> @llvm.riscv.vfwcvt.f.f.v.nxv4f32.nxv4f16(<vscale x 4 x float> poison, <vscale x 4 x half> %a, iXLen 6) |
5098 | | - %2 = call <vscale x 4 x float> @llvm.riscv.vfsqrt.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, iXLen 0, iXLen 7) |
5099 | | - %3 = call <vscale x 4 x half> @llvm.riscv.vfncvt.f.f.w.nxv4f16.nxv4f32(<vscale x 4 x half> poison, <vscale x 4 x float> %2, iXLen 0, iXLen 6) |
5100 | | - ret <vscale x 4 x half> %3 |
| 5094 | + %1 = call <vscale x 4 x float> @llvm.riscv.vfsqrt.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, iXLen 0, iXLen 7) |
| 5095 | + %2 = call <vscale x 4 x double> @llvm.riscv.vfwmacc(<vscale x 4 x double> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %1, iXLen 7, iXLen 6, iXLen 0) |
| 5096 | + ret <vscale x 4 x double> %2 |
5101 | 5097 | } |
5102 | 5098 |
|
5103 | | -define <vscale x 4 x half> @vfrsqrt7(<vscale x 4 x half> %a) { |
| 5099 | +define <vscale x 4 x double> @vfrsqrt7(<vscale x 4 x float> %a) { |
5104 | 5100 | ; NOVLOPT-LABEL: vfrsqrt7: |
5105 | 5101 | ; NOVLOPT: # %bb.0: |
5106 | | -; NOVLOPT-NEXT: vsetivli zero, 6, e16, m1, ta, ma |
5107 | | -; NOVLOPT-NEXT: vfwcvt.f.f.v v10, v8 |
5108 | 5102 | ; NOVLOPT-NEXT: vsetivli zero, 7, e32, m2, ta, ma |
5109 | | -; NOVLOPT-NEXT: vfrsqrt7.v v10, v10 |
5110 | | -; NOVLOPT-NEXT: fsrmi a0, 0 |
5111 | | -; NOVLOPT-NEXT: vsetivli zero, 6, e16, m1, ta, ma |
5112 | | -; NOVLOPT-NEXT: vfncvt.f.f.w v8, v10 |
5113 | | -; NOVLOPT-NEXT: fsrm a0 |
| 5103 | +; NOVLOPT-NEXT: vfrsqrt7.v v10, v8 |
| 5104 | +; NOVLOPT-NEXT: vsetivli zero, 6, e32, m2, ta, ma |
| 5105 | +; NOVLOPT-NEXT: vfwmacc.vv v12, v8, v10 |
| 5106 | +; NOVLOPT-NEXT: vmv4r.v v8, v12 |
5114 | 5107 | ; NOVLOPT-NEXT: ret |
5115 | 5108 | ; |
5116 | 5109 | ; VLOPT-LABEL: vfrsqrt7: |
5117 | 5110 | ; VLOPT: # %bb.0: |
5118 | | -; VLOPT-NEXT: vsetivli zero, 6, e16, m1, ta, ma |
5119 | | -; VLOPT-NEXT: vfwcvt.f.f.v v10, v8 |
5120 | | -; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma |
5121 | | -; VLOPT-NEXT: vfrsqrt7.v v10, v10 |
5122 | | -; VLOPT-NEXT: fsrmi a0, 0 |
5123 | | -; VLOPT-NEXT: vsetvli zero, zero, e16, m1, ta, ma |
5124 | | -; VLOPT-NEXT: vfncvt.f.f.w v8, v10 |
5125 | | -; VLOPT-NEXT: fsrm a0 |
| 5111 | +; VLOPT-NEXT: vsetivli zero, 6, e32, m2, ta, ma |
| 5112 | +; VLOPT-NEXT: vfrsqrt7.v v10, v8 |
| 5113 | +; VLOPT-NEXT: vfwmacc.vv v12, v8, v10 |
| 5114 | +; VLOPT-NEXT: vmv4r.v v8, v12 |
5126 | 5115 | ; VLOPT-NEXT: ret |
5127 | | - %1 = call <vscale x 4 x float> @llvm.riscv.vfwcvt.f.f.v.nxv4f32.nxv4f16(<vscale x 4 x float> poison, <vscale x 4 x half> %a, iXLen 6) |
5128 | | - %2 = call <vscale x 4 x float> @llvm.riscv.vfrsqrt7.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, iXLen 7) |
5129 | | - %3 = call <vscale x 4 x half> @llvm.riscv.vfncvt.f.f.w.nxv4f16.nxv4f32(<vscale x 4 x half> poison,<vscale x 4 x float> %2, iXLen 0, iXLen 6) |
5130 | | - ret <vscale x 4 x half> %3 |
| 5116 | + %1 = call <vscale x 4 x float> @llvm.riscv.vfrsqrt7.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, iXLen 7) |
| 5117 | + %2 = call <vscale x 4 x double> @llvm.riscv.vfwmacc(<vscale x 4 x double> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %1, iXLen 7, iXLen 6, iXLen 0) |
| 5118 | + ret <vscale x 4 x double> %2 |
5131 | 5119 | } |
0 commit comments