Skip to content

Commit e4ee9bf

Browse files
authored
[RISCV] Custom legalize vXf16 BUILD_VECTOR without Zfhmin. (llvm#97874)
If we don't have Zfhmin, we will call `SoftPromoteHalfOperand` on the BUILD_VECTOR. This operation is not supported by the generic code. Instead, custom lower to a vXi16 BUILD_VECTOR using bitcasts. Fixes llvm#97849.
1 parent 431b996 commit e4ee9bf

File tree

2 files changed

+189
-37
lines changed

2 files changed

+189
-37
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1329,6 +1329,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
13291329
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
13301330
// FIXME: We should prefer BUILD_VECTOR over SPLAT_VECTOR.
13311331
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1332+
// We need to custom legalize f16 build vectors if Zfhmin isn't
1333+
// available.
1334+
if (!Subtarget.hasStdExtZfhminOrZhinxmin())
1335+
setOperationAction(ISD::BUILD_VECTOR, MVT::f16, Custom);
13321336
MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
13331337
// Don't promote f16 vector operations to f32 if f32 vector type is
13341338
// not legal.
@@ -3901,11 +3905,27 @@ static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG,
39013905
return SDValue();
39023906
}
39033907

3908+
// Convert to an vXf16 build_vector to vXi16 with bitcasts.
3909+
static SDValue lowerBUILD_VECTORvXf16(SDValue Op, SelectionDAG &DAG) {
3910+
MVT VT = Op.getSimpleValueType();
3911+
MVT IVT = VT.changeVectorElementType(MVT::i16);
3912+
SmallVector<SDValue, 16> NewOps(Op.getNumOperands());
3913+
for (unsigned I = 0, E = Op.getNumOperands(); I != E; ++I)
3914+
NewOps[I] = DAG.getBitcast(MVT::i16, Op.getOperand(I));
3915+
SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), IVT, NewOps);
3916+
return DAG.getBitcast(VT, Res);
3917+
}
3918+
39043919
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
39053920
const RISCVSubtarget &Subtarget) {
39063921
MVT VT = Op.getSimpleValueType();
39073922
assert(VT.isFixedLengthVector() && "Unexpected vector!");
39083923

3924+
// If we don't have scalar f16, we need to bitcast to an i16 vector.
3925+
if (VT.getVectorElementType() == MVT::f16 &&
3926+
!Subtarget.hasStdExtZfhminOrZhinxmin())
3927+
return lowerBUILD_VECTORvXf16(Op, DAG);
3928+
39093929
if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
39103930
ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
39113931
return lowerBuildVectorOfConstants(Op, DAG, Subtarget);

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll

Lines changed: 169 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22

3-
; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
4-
; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+zba,+zbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
5-
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V
6-
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+rva22u64 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RVA22U64
3+
; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVFH
4+
; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+zba,+zbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVFH
5+
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVFH,RV64V
6+
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+rva22u64 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVFH,RVA22U64
7+
; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfhmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVFHMIN
8+
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfhmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVFHMIN
79

810
; Tests that a floating-point build_vector doesn't try and generate a VID
911
; instruction
@@ -249,6 +251,20 @@ define dso_local void @splat_load_licm(ptr %0) {
249251
; RVA22U64-NEXT: bne a0, a1, .LBB12_1
250252
; RVA22U64-NEXT: # %bb.2:
251253
; RVA22U64-NEXT: ret
254+
;
255+
; RV64ZVFHMIN-LABEL: splat_load_licm:
256+
; RV64ZVFHMIN: # %bb.0:
257+
; RV64ZVFHMIN-NEXT: lui a1, 1
258+
; RV64ZVFHMIN-NEXT: add a1, a0, a1
259+
; RV64ZVFHMIN-NEXT: lui a2, 263168
260+
; RV64ZVFHMIN-NEXT: vsetivli zero, 4, e32, m1, ta, ma
261+
; RV64ZVFHMIN-NEXT: vmv.v.x v8, a2
262+
; RV64ZVFHMIN-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1
263+
; RV64ZVFHMIN-NEXT: vse32.v v8, (a0)
264+
; RV64ZVFHMIN-NEXT: addi a0, a0, 16
265+
; RV64ZVFHMIN-NEXT: bne a0, a1, .LBB12_1
266+
; RV64ZVFHMIN-NEXT: # %bb.2:
267+
; RV64ZVFHMIN-NEXT: ret
252268
br label %2
253269

254270
2: ; preds = %2, %1
@@ -265,12 +281,37 @@ define dso_local void @splat_load_licm(ptr %0) {
265281
}
266282

267283
define <2 x half> @buildvec_v2f16(half %a, half %b) {
268-
; CHECK-LABEL: buildvec_v2f16:
269-
; CHECK: # %bb.0:
270-
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
271-
; CHECK-NEXT: vfmv.v.f v8, fa0
272-
; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
273-
; CHECK-NEXT: ret
284+
; RV32ZVFH-LABEL: buildvec_v2f16:
285+
; RV32ZVFH: # %bb.0:
286+
; RV32ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
287+
; RV32ZVFH-NEXT: vfmv.v.f v8, fa0
288+
; RV32ZVFH-NEXT: vfslide1down.vf v8, v8, fa1
289+
; RV32ZVFH-NEXT: ret
290+
;
291+
; RV64ZVFH-LABEL: buildvec_v2f16:
292+
; RV64ZVFH: # %bb.0:
293+
; RV64ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
294+
; RV64ZVFH-NEXT: vfmv.v.f v8, fa0
295+
; RV64ZVFH-NEXT: vfslide1down.vf v8, v8, fa1
296+
; RV64ZVFH-NEXT: ret
297+
;
298+
; RV32ZVFHMIN-LABEL: buildvec_v2f16:
299+
; RV32ZVFHMIN: # %bb.0:
300+
; RV32ZVFHMIN-NEXT: fmv.x.w a0, fa1
301+
; RV32ZVFHMIN-NEXT: fmv.x.w a1, fa0
302+
; RV32ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
303+
; RV32ZVFHMIN-NEXT: vmv.v.x v8, a1
304+
; RV32ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0
305+
; RV32ZVFHMIN-NEXT: ret
306+
;
307+
; RV64ZVFHMIN-LABEL: buildvec_v2f16:
308+
; RV64ZVFHMIN: # %bb.0:
309+
; RV64ZVFHMIN-NEXT: fmv.x.w a0, fa1
310+
; RV64ZVFHMIN-NEXT: fmv.x.w a1, fa0
311+
; RV64ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
312+
; RV64ZVFHMIN-NEXT: vmv.v.x v8, a1
313+
; RV64ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0
314+
; RV64ZVFHMIN-NEXT: ret
274315
%v1 = insertelement <2 x half> poison, half %a, i64 0
275316
%v2 = insertelement <2 x half> %v1, half %b, i64 1
276317
ret <2 x half> %v2
@@ -1297,45 +1338,136 @@ entry:
12971338
}
12981339

12991340
define <2 x half> @vid_v2f16() {
1300-
; CHECK-LABEL: vid_v2f16:
1301-
; CHECK: # %bb.0:
1302-
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1303-
; CHECK-NEXT: vid.v v8
1304-
; CHECK-NEXT: vfcvt.f.x.v v8, v8
1305-
; CHECK-NEXT: ret
1341+
; RV32ZVFH-LABEL: vid_v2f16:
1342+
; RV32ZVFH: # %bb.0:
1343+
; RV32ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1344+
; RV32ZVFH-NEXT: vid.v v8
1345+
; RV32ZVFH-NEXT: vfcvt.f.x.v v8, v8
1346+
; RV32ZVFH-NEXT: ret
1347+
;
1348+
; RV64ZVFH-LABEL: vid_v2f16:
1349+
; RV64ZVFH: # %bb.0:
1350+
; RV64ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1351+
; RV64ZVFH-NEXT: vid.v v8
1352+
; RV64ZVFH-NEXT: vfcvt.f.x.v v8, v8
1353+
; RV64ZVFH-NEXT: ret
1354+
;
1355+
; RV32ZVFHMIN-LABEL: vid_v2f16:
1356+
; RV32ZVFHMIN: # %bb.0:
1357+
; RV32ZVFHMIN-NEXT: lui a0, 245760
1358+
; RV32ZVFHMIN-NEXT: vsetivli zero, 2, e32, m1, ta, ma
1359+
; RV32ZVFHMIN-NEXT: vmv.s.x v8, a0
1360+
; RV32ZVFHMIN-NEXT: ret
1361+
;
1362+
; RV64ZVFHMIN-LABEL: vid_v2f16:
1363+
; RV64ZVFHMIN: # %bb.0:
1364+
; RV64ZVFHMIN-NEXT: lui a0, 245760
1365+
; RV64ZVFHMIN-NEXT: vsetivli zero, 2, e32, m1, ta, ma
1366+
; RV64ZVFHMIN-NEXT: vmv.s.x v8, a0
1367+
; RV64ZVFHMIN-NEXT: ret
13061368
ret <2 x half> <half 0.0, half 1.0>
13071369
}
13081370

13091371
define <2 x half> @vid_addend1_v2f16() {
1310-
; CHECK-LABEL: vid_addend1_v2f16:
1311-
; CHECK: # %bb.0:
1312-
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1313-
; CHECK-NEXT: vid.v v8
1314-
; CHECK-NEXT: vadd.vi v8, v8, 1
1315-
; CHECK-NEXT: vfcvt.f.x.v v8, v8
1316-
; CHECK-NEXT: ret
1372+
; RV32ZVFH-LABEL: vid_addend1_v2f16:
1373+
; RV32ZVFH: # %bb.0:
1374+
; RV32ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1375+
; RV32ZVFH-NEXT: vid.v v8
1376+
; RV32ZVFH-NEXT: vadd.vi v8, v8, 1
1377+
; RV32ZVFH-NEXT: vfcvt.f.x.v v8, v8
1378+
; RV32ZVFH-NEXT: ret
1379+
;
1380+
; RV64ZVFH-LABEL: vid_addend1_v2f16:
1381+
; RV64ZVFH: # %bb.0:
1382+
; RV64ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1383+
; RV64ZVFH-NEXT: vid.v v8
1384+
; RV64ZVFH-NEXT: vadd.vi v8, v8, 1
1385+
; RV64ZVFH-NEXT: vfcvt.f.x.v v8, v8
1386+
; RV64ZVFH-NEXT: ret
1387+
;
1388+
; RV32ZVFHMIN-LABEL: vid_addend1_v2f16:
1389+
; RV32ZVFHMIN: # %bb.0:
1390+
; RV32ZVFHMIN-NEXT: lui a0, 262148
1391+
; RV32ZVFHMIN-NEXT: addi a0, a0, -1024
1392+
; RV32ZVFHMIN-NEXT: vsetivli zero, 2, e32, m1, ta, ma
1393+
; RV32ZVFHMIN-NEXT: vmv.s.x v8, a0
1394+
; RV32ZVFHMIN-NEXT: ret
1395+
;
1396+
; RV64ZVFHMIN-LABEL: vid_addend1_v2f16:
1397+
; RV64ZVFHMIN: # %bb.0:
1398+
; RV64ZVFHMIN-NEXT: lui a0, 262148
1399+
; RV64ZVFHMIN-NEXT: addi a0, a0, -1024
1400+
; RV64ZVFHMIN-NEXT: vsetivli zero, 2, e32, m1, ta, ma
1401+
; RV64ZVFHMIN-NEXT: vmv.s.x v8, a0
1402+
; RV64ZVFHMIN-NEXT: ret
13171403
ret <2 x half> <half 1.0, half 2.0>
13181404
}
13191405

13201406
define <2 x half> @vid_denominator2_v2f16() {
1321-
; CHECK-LABEL: vid_denominator2_v2f16:
1322-
; CHECK: # %bb.0:
1323-
; CHECK-NEXT: lui a0, %hi(.LCPI28_0)
1324-
; CHECK-NEXT: addi a0, a0, %lo(.LCPI28_0)
1325-
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1326-
; CHECK-NEXT: vle16.v v8, (a0)
1327-
; CHECK-NEXT: ret
1407+
; RV32ZVFH-LABEL: vid_denominator2_v2f16:
1408+
; RV32ZVFH: # %bb.0:
1409+
; RV32ZVFH-NEXT: lui a0, %hi(.LCPI28_0)
1410+
; RV32ZVFH-NEXT: addi a0, a0, %lo(.LCPI28_0)
1411+
; RV32ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1412+
; RV32ZVFH-NEXT: vle16.v v8, (a0)
1413+
; RV32ZVFH-NEXT: ret
1414+
;
1415+
; RV64ZVFH-LABEL: vid_denominator2_v2f16:
1416+
; RV64ZVFH: # %bb.0:
1417+
; RV64ZVFH-NEXT: lui a0, %hi(.LCPI28_0)
1418+
; RV64ZVFH-NEXT: addi a0, a0, %lo(.LCPI28_0)
1419+
; RV64ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1420+
; RV64ZVFH-NEXT: vle16.v v8, (a0)
1421+
; RV64ZVFH-NEXT: ret
1422+
;
1423+
; RV32ZVFHMIN-LABEL: vid_denominator2_v2f16:
1424+
; RV32ZVFHMIN: # %bb.0:
1425+
; RV32ZVFHMIN-NEXT: lui a0, 245764
1426+
; RV32ZVFHMIN-NEXT: addi a0, a0, -2048
1427+
; RV32ZVFHMIN-NEXT: vsetivli zero, 2, e32, m1, ta, ma
1428+
; RV32ZVFHMIN-NEXT: vmv.s.x v8, a0
1429+
; RV32ZVFHMIN-NEXT: ret
1430+
;
1431+
; RV64ZVFHMIN-LABEL: vid_denominator2_v2f16:
1432+
; RV64ZVFHMIN: # %bb.0:
1433+
; RV64ZVFHMIN-NEXT: lui a0, 245764
1434+
; RV64ZVFHMIN-NEXT: addi a0, a0, -2048
1435+
; RV64ZVFHMIN-NEXT: vsetivli zero, 2, e32, m1, ta, ma
1436+
; RV64ZVFHMIN-NEXT: vmv.s.x v8, a0
1437+
; RV64ZVFHMIN-NEXT: ret
13281438
ret <2 x half> <half 0.5, half 1.0>
13291439
}
13301440

13311441
define <2 x half> @vid_step2_v2f16() {
1332-
; CHECK-LABEL: vid_step2_v2f16:
1333-
; CHECK: # %bb.0:
1334-
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1335-
; CHECK-NEXT: vid.v v8
1336-
; CHECK-NEXT: vadd.vv v8, v8, v8
1337-
; CHECK-NEXT: vfcvt.f.x.v v8, v8
1338-
; CHECK-NEXT: ret
1442+
; RV32ZVFH-LABEL: vid_step2_v2f16:
1443+
; RV32ZVFH: # %bb.0:
1444+
; RV32ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1445+
; RV32ZVFH-NEXT: vid.v v8
1446+
; RV32ZVFH-NEXT: vadd.vv v8, v8, v8
1447+
; RV32ZVFH-NEXT: vfcvt.f.x.v v8, v8
1448+
; RV32ZVFH-NEXT: ret
1449+
;
1450+
; RV64ZVFH-LABEL: vid_step2_v2f16:
1451+
; RV64ZVFH: # %bb.0:
1452+
; RV64ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1453+
; RV64ZVFH-NEXT: vid.v v8
1454+
; RV64ZVFH-NEXT: vadd.vv v8, v8, v8
1455+
; RV64ZVFH-NEXT: vfcvt.f.x.v v8, v8
1456+
; RV64ZVFH-NEXT: ret
1457+
;
1458+
; RV32ZVFHMIN-LABEL: vid_step2_v2f16:
1459+
; RV32ZVFHMIN: # %bb.0:
1460+
; RV32ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1461+
; RV32ZVFHMIN-NEXT: vid.v v8
1462+
; RV32ZVFHMIN-NEXT: vsll.vi v8, v8, 14
1463+
; RV32ZVFHMIN-NEXT: ret
1464+
;
1465+
; RV64ZVFHMIN-LABEL: vid_step2_v2f16:
1466+
; RV64ZVFHMIN: # %bb.0:
1467+
; RV64ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1468+
; RV64ZVFHMIN-NEXT: vid.v v8
1469+
; RV64ZVFHMIN-NEXT: vsll.vi v8, v8, 14
1470+
; RV64ZVFHMIN-NEXT: ret
13391471
ret <2 x half> <half 0.0, half 2.0>
13401472
}
13411473

0 commit comments

Comments
 (0)