Skip to content

Commit 0d35d2d

Browse files
committed
[AArch64][SDAG] Lower f16->s16 FP_TO_INT_SAT to *v1f16
Conversions from f16 to s16 performed by FP_TO_INT_SAT can be done directly within FPRs, e.g. `fcvtzs h0, h0`. Generating this format reduces the number of instruction required for correct behaviour, as it sidesteps the issues with incorrect saturation that arise when using `fcvtzs w0, h0` for the same casts. Signed-off-by: Kajetan Puchalski <[email protected]>
1 parent 319705d commit 0d35d2d

File tree

3 files changed

+22
-11
lines changed

3 files changed

+22
-11
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4911,6 +4911,24 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
49114911
if (DstWidth < SatWidth)
49124912
return SDValue();
49134913

4914+
if (SrcVT == MVT::f16 && SatVT == MVT::i16 && DstVT == MVT::i32) {
4915+
auto Opcode = (Op.getOpcode() == ISD::FP_TO_SINT_SAT)
4916+
? AArch64::FCVTZSv1f16
4917+
: AArch64::FCVTZUv1f16;
4918+
auto Cvt = SDValue(DAG.getMachineNode(Opcode, DL, MVT::f16, SrcVal), 0);
4919+
auto Sign = DAG.getTargetConstant(-1, DL, MVT::i64);
4920+
auto Hsub = DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32);
4921+
auto SubregToReg =
4922+
SDValue(DAG.getMachineNode(TargetOpcode::SUBREG_TO_REG, DL, MVT::v8f16,
4923+
Sign, Cvt, Hsub),
4924+
0);
4925+
auto Ssub = DAG.getTargetConstant(AArch64::ssub, DL, MVT::i32);
4926+
auto Extract = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
4927+
MVT::f32, SubregToReg, Ssub),
4928+
0);
4929+
return DAG.getBitcast(MVT::i32, Extract);
4930+
}
4931+
49144932
SDValue NativeCvt =
49154933
DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal, DAG.getValueType(DstVT));
49164934
SDValue Sat;

llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -670,13 +670,8 @@ define i16 @test_signed_i16_f16(half %f) nounwind {
670670
;
671671
; CHECK-SD-FP16-LABEL: test_signed_i16_f16:
672672
; CHECK-SD-FP16: // %bb.0:
673-
; CHECK-SD-FP16-NEXT: fcvtzs w8, h0
674-
; CHECK-SD-FP16-NEXT: mov w9, #32767 // =0x7fff
675-
; CHECK-SD-FP16-NEXT: cmp w8, w9
676-
; CHECK-SD-FP16-NEXT: csel w8, w8, w9, lt
677-
; CHECK-SD-FP16-NEXT: mov w9, #-32768 // =0xffff8000
678-
; CHECK-SD-FP16-NEXT: cmn w8, #8, lsl #12 // =32768
679-
; CHECK-SD-FP16-NEXT: csel w0, w8, w9, gt
673+
; CHECK-SD-FP16-NEXT: fcvtzs h0, h0
674+
; CHECK-SD-FP16-NEXT: fmov w0, s0
680675
; CHECK-SD-FP16-NEXT: ret
681676
;
682677
; CHECK-GI-CVT-LABEL: test_signed_i16_f16:

llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -531,10 +531,8 @@ define i16 @test_unsigned_i16_f16(half %f) nounwind {
531531
;
532532
; CHECK-SD-FP16-LABEL: test_unsigned_i16_f16:
533533
; CHECK-SD-FP16: // %bb.0:
534-
; CHECK-SD-FP16-NEXT: fcvtzu w8, h0
535-
; CHECK-SD-FP16-NEXT: mov w9, #65535 // =0xffff
536-
; CHECK-SD-FP16-NEXT: cmp w8, w9
537-
; CHECK-SD-FP16-NEXT: csel w0, w8, w9, lo
534+
; CHECK-SD-FP16-NEXT: fcvtzu h0, h0
535+
; CHECK-SD-FP16-NEXT: fmov w0, s0
538536
; CHECK-SD-FP16-NEXT: ret
539537
;
540538
; CHECK-GI-CVT-LABEL: test_unsigned_i16_f16:

0 commit comments

Comments
 (0)