Skip to content

Commit 6dd67f8

Browse files
authored
[AArch64][SDAG] Lower f16->s16 FP_TO_INT_SAT to *v1f16 (#154822)
Conversions from f16 to s16 performed by FP_TO_INT_SAT can be done directly within FPRs, e.g. `fcvtzs h0, h0`. Generating this format reduces the number of instruction required for correct behaviour, as it sidesteps the issues with incorrect saturation that arise when using `fcvtzs w0, h0` for the same casts. Add new AArch64ISD::FCVTZS_HALF and AArch64ISD::FCVTZU_HALF nodes to represent the necessary instruction sequence. Related to #154343. --------- Signed-off-by: Kajetan Puchalski <[email protected]>
1 parent 12def78 commit 6dd67f8

File tree

4 files changed

+30
-11
lines changed

4 files changed

+30
-11
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4935,6 +4935,18 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
49354935
if (DstWidth < SatWidth)
49364936
return SDValue();
49374937

4938+
if (SrcVT == MVT::f16 && SatVT == MVT::i16 && DstVT == MVT::i32) {
4939+
if (Op.getOpcode() == ISD::FP_TO_SINT_SAT) {
4940+
SDValue CVTf32 =
4941+
DAG.getNode(AArch64ISD::FCVTZS_HALF, DL, MVT::f32, SrcVal);
4942+
SDValue Bitcast = DAG.getBitcast(DstVT, CVTf32);
4943+
return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, DstVT, Bitcast,
4944+
DAG.getValueType(SatVT));
4945+
}
4946+
SDValue CVTf32 = DAG.getNode(AArch64ISD::FCVTZU_HALF, DL, MVT::f32, SrcVal);
4947+
return DAG.getBitcast(DstVT, CVTf32);
4948+
}
4949+
49384950
SDValue NativeCvt =
49394951
DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal, DAG.getValueType(DstVT));
49404952
SDValue Sat;

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -989,6 +989,9 @@ def AArch64fcvtxnv: PatFrags<(ops node:$Rn),
989989
[(int_aarch64_neon_fcvtxn node:$Rn),
990990
(AArch64fcvtxn_n node:$Rn)]>;
991991

992+
def AArch64fcvtzs_half : SDNode<"AArch64ISD::FCVTZS_HALF", SDTFPExtendOp>;
993+
def AArch64fcvtzu_half : SDNode<"AArch64ISD::FCVTZU_HALF", SDTFPExtendOp>;
994+
992995
//def Aarch64softf32tobf16v8: SDNode<"AArch64ISD::", SDTFPRoundOp>;
993996

994997
// Vector immediate ops
@@ -6539,6 +6542,16 @@ defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd",
65396542
def : Pat<(v1i64 (AArch64vashr (v1i64 V64:$Rn), (i32 63))),
65406543
(CMLTv1i64rz V64:$Rn)>;
65416544

6545+
// f16 -> i16 conversions leave the bit pattern in a f32
6546+
class F16ToI16ScalarPat<SDNode cvt_isd, BaseSIMDTwoScalar instr>
6547+
: Pat<(f32 (cvt_isd (f16 FPR16:$Rn))),
6548+
(f32 (SUBREG_TO_REG (i64 0), (instr FPR16:$Rn), hsub))>;
6549+
6550+
let Predicates = [HasFullFP16] in {
6551+
def : F16ToI16ScalarPat<AArch64fcvtzs_half, FCVTZSv1f16>;
6552+
def : F16ToI16ScalarPat<AArch64fcvtzu_half, FCVTZUv1f16>;
6553+
}
6554+
65426555
// Round FP64 to BF16.
65436556
let Predicates = [HasNEONandIsStreamingSafe, HasBF16] in
65446557
def : Pat<(bf16 (any_fpround (f64 FPR64:$Rn))),

llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -670,13 +670,9 @@ define i16 @test_signed_i16_f16(half %f) nounwind {
670670
;
671671
; CHECK-SD-FP16-LABEL: test_signed_i16_f16:
672672
; CHECK-SD-FP16: // %bb.0:
673-
; CHECK-SD-FP16-NEXT: fcvtzs w8, h0
674-
; CHECK-SD-FP16-NEXT: mov w9, #32767 // =0x7fff
675-
; CHECK-SD-FP16-NEXT: cmp w8, w9
676-
; CHECK-SD-FP16-NEXT: csel w8, w8, w9, lt
677-
; CHECK-SD-FP16-NEXT: mov w9, #-32768 // =0xffff8000
678-
; CHECK-SD-FP16-NEXT: cmn w8, #8, lsl #12 // =32768
679-
; CHECK-SD-FP16-NEXT: csel w0, w8, w9, gt
673+
; CHECK-SD-FP16-NEXT: fcvtzs h0, h0
674+
; CHECK-SD-FP16-NEXT: fmov w8, s0
675+
; CHECK-SD-FP16-NEXT: sxth w0, w8
680676
; CHECK-SD-FP16-NEXT: ret
681677
;
682678
; CHECK-GI-CVT-LABEL: test_signed_i16_f16:

llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -531,10 +531,8 @@ define i16 @test_unsigned_i16_f16(half %f) nounwind {
531531
;
532532
; CHECK-SD-FP16-LABEL: test_unsigned_i16_f16:
533533
; CHECK-SD-FP16: // %bb.0:
534-
; CHECK-SD-FP16-NEXT: fcvtzu w8, h0
535-
; CHECK-SD-FP16-NEXT: mov w9, #65535 // =0xffff
536-
; CHECK-SD-FP16-NEXT: cmp w8, w9
537-
; CHECK-SD-FP16-NEXT: csel w0, w8, w9, lo
534+
; CHECK-SD-FP16-NEXT: fcvtzu h0, h0
535+
; CHECK-SD-FP16-NEXT: fmov w0, s0
538536
; CHECK-SD-FP16-NEXT: ret
539537
;
540538
; CHECK-GI-CVT-LABEL: test_unsigned_i16_f16:

0 commit comments

Comments
 (0)