Skip to content

Commit ac2318a

Browse files
committed
[AArch64] Lower aarch64.neon.fcvtzs.i16.f16 to FP_TO_SINT_SAT
FP_TO_SINT_SAT is capable of correctly handling a f16 -> s16 conversion, including correct overflow behaviour. The semantics of the operation match those of the vcvth_s16_f16 NEON intrinsic. Enable correct lowering of aarch64.neon.fcvtzs.i16.f16 by making use of it. Signed-off-by: Kajetan Puchalski <[email protected]>
1 parent 0d05c42 commit ac2318a

File tree

3 files changed

+50
-0
lines changed

3 files changed

+50
-0
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1289,6 +1289,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
12891289
setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
12901290
setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
12911291
setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
1292+
1293+
// f16 -> i16 conversion intrinsics need custom lowering
1294+
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
12921295
} else {
12931296
// when AArch64 doesn't have fullfp16 support, promote the input
12941297
// to i32 first.
@@ -28238,6 +28241,16 @@ void AArch64TargetLowering::ReplaceNodeResults(
2823828241
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
2823928242
return;
2824028243
}
28244+
case Intrinsic::aarch64_neon_fcvtzs: {
28245+
if (VT.getScalarType() != MVT::i16)
28246+
return;
28247+
28248+
SDLoc DL(N);
28249+
auto CVT = DAG.getNode(ISD::FP_TO_SINT_SAT, DL, VT, N->getOperand(1),
28250+
DAG.getValueType(MVT::i16));
28251+
Results.push_back(CVT);
28252+
return;
28253+
}
2824128254
}
2824228255
}
2824328256
case ISD::READ_REGISTER: {

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2278,6 +2278,23 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
22782278
}
22792279
return false;
22802280
}
2281+
case TargetOpcode::G_INTRINSIC: {
2282+
unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();
2283+
switch (IntrinID) {
2284+
default:
2285+
break;
2286+
case Intrinsic::aarch64_neon_fcvtzs: {
2287+
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2288+
if (DstTy != LLT::scalar(16))
2289+
return false;
2290+
// Remove the no longer needed intrinsic ID operand
2291+
I.removeOperand(1);
2292+
I.setDesc(TII.get(TargetOpcode::G_FPTOSI_SAT));
2293+
return true;
2294+
}
2295+
}
2296+
return false;
2297+
}
22812298
default:
22822299
return false;
22832300
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
; Test fp16 -> s16 conversion intrinsics which require special handling to ensure correct behaviour.
2+
; RUN: llc < %s -mtriple=aarch64 -global-isel=0 -mattr=+v8.2a,+fullfp16 | FileCheck %s --check-prefixes=CHECK-SD
3+
4+
declare i16 @llvm.aarch64.neon.fcvtzs.i16.f16(half)
5+
6+
define i16 @fcvtzs_intrinsic_i16(half %a) {
7+
; CHECK-SD-LABEL: fcvtzs_intrinsic_i16:
8+
; CHECK-SD: // %bb.0: // %entry
9+
; CHECK-SD-NEXT: fcvtzs w8, h0
10+
; CHECK-SD-NEXT: mov w9, #32767
11+
; CHECK-SD-NEXT: cmp w8, w9
12+
; CHECK-SD-NEXT: csel w8, w8, w9, lt
13+
; CHECK-SD-NEXT: mov w9, #-32768
14+
; CHECK-SD-NEXT: cmn w8, #8, lsl #12
15+
; CHECK-SD-NEXT: csel
16+
; CHECK-SD-NEXT: ret
17+
entry:
18+
%fcvt = tail call i16 @llvm.aarch64.neon.fcvtzs.i16.f16(half %a)
19+
ret i16 %fcvt
20+
}

0 commit comments

Comments
 (0)