|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2 |
| -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=ilp32d \ |
| 2 | +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=ilp32d \ |
3 | 3 | ; RUN: -verify-machineinstrs < %s | FileCheck %s
|
4 |
| -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=lp64d \ |
| 4 | +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=lp64d \ |
5 | 5 | ; RUN: -verify-machineinstrs < %s | FileCheck %s
|
6 | 6 |
|
7 | 7 | declare half @llvm.vector.reduce.fadd.nxv1f16(half, <vscale x 1 x half>)
|
@@ -1048,3 +1048,101 @@ define float @vreduce_nsz_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
|
1048 | 1048 | %red = call reassoc nsz float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
|
1049 | 1049 | ret float %red
|
1050 | 1050 | }
|
| 1051 | + |
| 1052 | +; Test Widen VECREDUCE_SEQ_FADD |
| 1053 | +declare half @llvm.vector.reduce.fadd.nxv3f16(half, <vscale x 3 x half>) |
| 1054 | + |
| 1055 | +define half @vreduce_ord_fadd_nxv3f16(<vscale x 3 x half> %v, half %s) { |
| 1056 | +; CHECK-LABEL: vreduce_ord_fadd_nxv3f16: |
| 1057 | +; CHECK: # %bb.0: |
| 1058 | +; CHECK-NEXT: csrr a0, vlenb |
| 1059 | +; CHECK-NEXT: srli a0, a0, 3 |
| 1060 | +; CHECK-NEXT: slli a1, a0, 1 |
| 1061 | +; CHECK-NEXT: add a1, a1, a0 |
| 1062 | +; CHECK-NEXT: add a0, a1, a0 |
| 1063 | +; CHECK-NEXT: fmv.h.x ft0, zero |
| 1064 | +; CHECK-NEXT: fneg.h ft0, ft0 |
| 1065 | +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu |
| 1066 | +; CHECK-NEXT: vfmv.v.f v9, ft0 |
| 1067 | +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu |
| 1068 | +; CHECK-NEXT: vslideup.vx v8, v9, a1 |
| 1069 | +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| 1070 | +; CHECK-NEXT: vfmv.s.f v9, fa0 |
| 1071 | +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu |
| 1072 | +; CHECK-NEXT: vfredosum.vs v8, v8, v9 |
| 1073 | +; CHECK-NEXT: vfmv.f.s fa0, v8 |
| 1074 | +; CHECK-NEXT: ret |
| 1075 | + %red = call half @llvm.vector.reduce.fadd.nxv3f16(half %s, <vscale x 3 x half> %v) |
| 1076 | + ret half %red |
| 1077 | +} |
| 1078 | + |
| 1079 | +declare half @llvm.vector.reduce.fadd.nxv6f16(half, <vscale x 6 x half>) |
| 1080 | + |
| 1081 | +define half @vreduce_ord_fadd_nxv6f16(<vscale x 6 x half> %v, half %s) { |
| 1082 | +; CHECK-LABEL: vreduce_ord_fadd_nxv6f16: |
| 1083 | +; CHECK: # %bb.0: |
| 1084 | +; CHECK-NEXT: csrr a0, vlenb |
| 1085 | +; CHECK-NEXT: srli a0, a0, 2 |
| 1086 | +; CHECK-NEXT: add a1, a0, a0 |
| 1087 | +; CHECK-NEXT: fmv.h.x ft0, zero |
| 1088 | +; CHECK-NEXT: fneg.h ft0, ft0 |
| 1089 | +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu |
| 1090 | +; CHECK-NEXT: vfmv.v.f v10, ft0 |
| 1091 | +; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu |
| 1092 | +; CHECK-NEXT: vslideup.vx v9, v10, a0 |
| 1093 | +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| 1094 | +; CHECK-NEXT: vfmv.s.f v10, fa0 |
| 1095 | +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu |
| 1096 | +; CHECK-NEXT: vfredosum.vs v8, v8, v10 |
| 1097 | +; CHECK-NEXT: vfmv.f.s fa0, v8 |
| 1098 | +; CHECK-NEXT: ret |
| 1099 | + %red = call half @llvm.vector.reduce.fadd.nxv6f16(half %s, <vscale x 6 x half> %v) |
| 1100 | + ret half %red |
| 1101 | +} |
| 1102 | + |
| 1103 | +declare half @llvm.vector.reduce.fadd.nxv10f16(half, <vscale x 10 x half>) |
| 1104 | + |
| 1105 | +define half @vreduce_ord_fadd_nxv10f16(<vscale x 10 x half> %v, half %s) { |
| 1106 | +; CHECK-LABEL: vreduce_ord_fadd_nxv10f16: |
| 1107 | +; CHECK: # %bb.0: |
| 1108 | +; CHECK-NEXT: csrr a0, vlenb |
| 1109 | +; CHECK-NEXT: srli a0, a0, 2 |
| 1110 | +; CHECK-NEXT: add a1, a0, a0 |
| 1111 | +; CHECK-NEXT: fmv.h.x ft0, zero |
| 1112 | +; CHECK-NEXT: fneg.h ft0, ft0 |
| 1113 | +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu |
| 1114 | +; CHECK-NEXT: vfmv.v.f v12, ft0 |
| 1115 | +; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu |
| 1116 | +; CHECK-NEXT: vslideup.vx v10, v12, a0 |
| 1117 | +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu |
| 1118 | +; CHECK-NEXT: vslideup.vi v11, v12, 0 |
| 1119 | +; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu |
| 1120 | +; CHECK-NEXT: vslideup.vx v11, v12, a0 |
| 1121 | +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| 1122 | +; CHECK-NEXT: vfmv.s.f v12, fa0 |
| 1123 | +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu |
| 1124 | +; CHECK-NEXT: vfredosum.vs v8, v8, v12 |
| 1125 | +; CHECK-NEXT: vfmv.f.s fa0, v8 |
| 1126 | +; CHECK-NEXT: ret |
| 1127 | + %red = call half @llvm.vector.reduce.fadd.nxv10f16(half %s, <vscale x 10 x half> %v) |
| 1128 | + ret half %red |
| 1129 | +} |
| 1130 | + |
| 1131 | +declare half @llvm.vector.reduce.fadd.nxv12f16(half, <vscale x 12 x half>) |
| 1132 | + |
| 1133 | +define half @vreduce_ord_fadd_nxv12f16(<vscale x 12 x half> %v, half %s) { |
| 1134 | +; CHECK-LABEL: vreduce_ord_fadd_nxv12f16: |
| 1135 | +; CHECK: # %bb.0: |
| 1136 | +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| 1137 | +; CHECK-NEXT: vfmv.s.f v12, fa0 |
| 1138 | +; CHECK-NEXT: fmv.h.x ft0, zero |
| 1139 | +; CHECK-NEXT: fneg.h ft0, ft0 |
| 1140 | +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu |
| 1141 | +; CHECK-NEXT: vfmv.v.f v11, ft0 |
| 1142 | +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu |
| 1143 | +; CHECK-NEXT: vfredosum.vs v8, v8, v12 |
| 1144 | +; CHECK-NEXT: vfmv.f.s fa0, v8 |
| 1145 | +; CHECK-NEXT: ret |
| 1146 | + %red = call half @llvm.vector.reduce.fadd.nxv12f16(half %s, <vscale x 12 x half> %v) |
| 1147 | + ret half %red |
| 1148 | +} |
0 commit comments