Skip to content

Commit 5c9b497

Browse files
authored
[AArch64][GlobalISel] Select *v1f16 for f16->s16 to_int_sat_gi (#154562)
Conversions from f16 to s16 performed by to_int_sat_gi can be done directly within FPRs, e.g. `fcvtzs h0, h0`. Generating this format reduces the number of instruction required for correct behaviour, as it sidesteps the issues with incorrect saturation that arise when using `fcvtzs w0, h0` for the same casts. --------- Signed-off-by: Kajetan Puchalski <[email protected]>
1 parent 9565763 commit 5c9b497

File tree

5 files changed

+25
-15
lines changed

5 files changed

+25
-15
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6539,6 +6539,12 @@ defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar
65396539
defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd",
65406540
int_aarch64_neon_usqadd>;
65416541

6542+
// f16 -> s16 conversions
6543+
let Predicates = [HasFullFP16] in {
6544+
def : Pat<(i16(fp_to_sint_sat_gi f16:$Rn)), (FCVTZSv1f16 f16:$Rn)>;
6545+
def : Pat<(i16(fp_to_uint_sat_gi f16:$Rn)), (FCVTZUv1f16 f16:$Rn)>;
6546+
}
6547+
65426548
def : Pat<(v1i64 (AArch64vashr (v1i64 V64:$Rn), (i32 63))),
65436549
(CMLTv1i64rz V64:$Rn)>;
65446550

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -879,8 +879,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
879879
{v2s32, v2s32},
880880
{v4s32, v4s32},
881881
{v2s64, v2s64}})
882-
.legalFor(HasFP16,
883-
{{s32, s16}, {s64, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
882+
.legalFor(
883+
HasFP16,
884+
{{s16, s16}, {s32, s16}, {s64, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
884885
// Handle types larger than i64 by scalarizing/lowering.
885886
.scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
886887
.scalarizeIf(scalarOrEltWiderThan(1, 64), 1)

llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -848,10 +848,20 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
848848
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
849849
break;
850850
}
851+
case TargetOpcode::G_FPTOSI_SAT:
852+
case TargetOpcode::G_FPTOUI_SAT: {
853+
LLT DstType = MRI.getType(MI.getOperand(0).getReg());
854+
if (DstType.isVector())
855+
break;
856+
if (DstType == LLT::scalar(16)) {
857+
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
858+
break;
859+
}
860+
OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
861+
break;
862+
}
851863
case TargetOpcode::G_FPTOSI:
852864
case TargetOpcode::G_FPTOUI:
853-
case TargetOpcode::G_FPTOSI_SAT:
854-
case TargetOpcode::G_FPTOUI_SAT:
855865
case TargetOpcode::G_INTRINSIC_LRINT:
856866
case TargetOpcode::G_INTRINSIC_LLRINT:
857867
if (MRI.getType(MI.getOperand(0).getReg()).isVector())

llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -689,13 +689,8 @@ define i16 @test_signed_i16_f16(half %f) nounwind {
689689
;
690690
; CHECK-GI-FP16-LABEL: test_signed_i16_f16:
691691
; CHECK-GI-FP16: // %bb.0:
692-
; CHECK-GI-FP16-NEXT: fcvtzs w8, h0
693-
; CHECK-GI-FP16-NEXT: mov w9, #32767 // =0x7fff
694-
; CHECK-GI-FP16-NEXT: cmp w8, w9
695-
; CHECK-GI-FP16-NEXT: csel w8, w8, w9, lt
696-
; CHECK-GI-FP16-NEXT: mov w9, #-32768 // =0xffff8000
697-
; CHECK-GI-FP16-NEXT: cmn w8, #8, lsl #12 // =32768
698-
; CHECK-GI-FP16-NEXT: csel w0, w8, w9, gt
692+
; CHECK-GI-FP16-NEXT: fcvtzs h0, h0
693+
; CHECK-GI-FP16-NEXT: fmov w0, s0
699694
; CHECK-GI-FP16-NEXT: ret
700695
%x = call i16 @llvm.fptosi.sat.i16.f16(half %f)
701696
ret i16 %x

llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -546,10 +546,8 @@ define i16 @test_unsigned_i16_f16(half %f) nounwind {
546546
;
547547
; CHECK-GI-FP16-LABEL: test_unsigned_i16_f16:
548548
; CHECK-GI-FP16: // %bb.0:
549-
; CHECK-GI-FP16-NEXT: fcvtzu w8, h0
550-
; CHECK-GI-FP16-NEXT: mov w9, #65535 // =0xffff
551-
; CHECK-GI-FP16-NEXT: cmp w8, w9
552-
; CHECK-GI-FP16-NEXT: csel w0, w8, w9, lo
549+
; CHECK-GI-FP16-NEXT: fcvtzu h0, h0
550+
; CHECK-GI-FP16-NEXT: fmov w0, s0
553551
; CHECK-GI-FP16-NEXT: ret
554552
%x = call i16 @llvm.fptoui.sat.i16.f16(half %f)
555553
ret i16 %x

0 commit comments

Comments
 (0)