Skip to content

Commit e5dff84

Browse files
guy-davidPriyanshu3820
authored andcommitted
[AArch64] Extend int-to-fp load optimization to support f16 (llvm#168076)
1 parent 526901d commit e5dff84

File tree

4 files changed

+73
-28
lines changed

4 files changed

+73
-28
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20146,7 +20146,9 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
2014620146
return Res;
2014720147

2014820148
EVT VT = N->getValueType(0);
20149-
if (VT != MVT::f32 && VT != MVT::f64)
20149+
if (VT != MVT::f16 && VT != MVT::f32 && VT != MVT::f64)
20150+
return SDValue();
20151+
if (VT == MVT::f16 && !Subtarget->hasFullFP16())
2015020152
return SDValue();
2015120153

2015220154
// Only optimize when the source and destination types have the same width.

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7014,6 +7014,19 @@ multiclass UIntToFPROLoadPat<ValueType DstTy, ValueType SrcTy,
70147014
sub))>;
70157015
}
70167016

7017+
let Predicates = [HasNEONandIsSME2p2StreamingSafe, HasFullFP16] in {
7018+
defm : UIntToFPROLoadPat<f16, i32, zextloadi8,
7019+
UCVTFv1i16, ro8, LDRBroW, LDRBroX, bsub>;
7020+
def : Pat <(f16 (uint_to_fp (i32
7021+
(zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
7022+
(UCVTFv1i16 (INSERT_SUBREG (f16 (IMPLICIT_DEF)),
7023+
(LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>;
7024+
def : Pat <(f16 (uint_to_fp (i32
7025+
(zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))),
7026+
(UCVTFv1i16 (INSERT_SUBREG (f16 (IMPLICIT_DEF)),
7027+
(LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>;
7028+
}
7029+
70177030
defm : UIntToFPROLoadPat<f32, i32, zextloadi8,
70187031
UCVTFv1i32, ro8, LDRBroW, LDRBroX, bsub>;
70197032
def : Pat <(f32 (uint_to_fp (i32

llvm/test/CodeGen/AArch64/int-to-fp-no-neon.ll

Lines changed: 33 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,17 @@ entry:
4343
}
4444

4545
define half @ui8_to_half(ptr %i, ptr %f) {
46-
; CHECK-LABEL: ui8_to_half:
47-
; CHECK: // %bb.0: // %entry
48-
; CHECK-NEXT: ldrb w8, [x0]
49-
; CHECK-NEXT: ucvtf h0, w8
50-
; CHECK-NEXT: ret
46+
; NEON-ENABLED-LABEL: ui8_to_half:
47+
; NEON-ENABLED: // %bb.0: // %entry
48+
; NEON-ENABLED-NEXT: ldr b0, [x0]
49+
; NEON-ENABLED-NEXT: ucvtf h0, h0
50+
; NEON-ENABLED-NEXT: ret
51+
;
52+
; NEON-DISABLED-LABEL: ui8_to_half:
53+
; NEON-DISABLED: // %bb.0: // %entry
54+
; NEON-DISABLED-NEXT: ldrb w8, [x0]
55+
; NEON-DISABLED-NEXT: ucvtf h0, w8
56+
; NEON-DISABLED-NEXT: ret
5157
entry:
5258
%ld = load i8, ptr %i, align 1
5359
%conv = uitofp i8 %ld to half
@@ -91,11 +97,17 @@ entry:
9197
}
9298

9399
define half @ui16_to_half(ptr %i, ptr %f) {
94-
; CHECK-LABEL: ui16_to_half:
95-
; CHECK: // %bb.0: // %entry
96-
; CHECK-NEXT: ldrh w8, [x0]
97-
; CHECK-NEXT: ucvtf h0, w8
98-
; CHECK-NEXT: ret
100+
; NEON-ENABLED-LABEL: ui16_to_half:
101+
; NEON-ENABLED: // %bb.0: // %entry
102+
; NEON-ENABLED-NEXT: ldr h0, [x0]
103+
; NEON-ENABLED-NEXT: ucvtf h0, h0
104+
; NEON-ENABLED-NEXT: ret
105+
;
106+
; NEON-DISABLED-LABEL: ui16_to_half:
107+
; NEON-DISABLED: // %bb.0: // %entry
108+
; NEON-DISABLED-NEXT: ldrh w8, [x0]
109+
; NEON-DISABLED-NEXT: ucvtf h0, w8
110+
; NEON-DISABLED-NEXT: ret
99111
entry:
100112
%ld = load i16, ptr %i, align 1
101113
%conv = uitofp i16 %ld to half
@@ -277,11 +289,17 @@ entry:
277289
}
278290

279291
define half @si16_to_half(ptr %i, ptr %f) {
280-
; CHECK-LABEL: si16_to_half:
281-
; CHECK: // %bb.0: // %entry
282-
; CHECK-NEXT: ldrsh w8, [x0]
283-
; CHECK-NEXT: scvtf h0, w8
284-
; CHECK-NEXT: ret
292+
; NEON-ENABLED-LABEL: si16_to_half:
293+
; NEON-ENABLED: // %bb.0: // %entry
294+
; NEON-ENABLED-NEXT: ldr h0, [x0]
295+
; NEON-ENABLED-NEXT: scvtf h0, h0
296+
; NEON-ENABLED-NEXT: ret
297+
;
298+
; NEON-DISABLED-LABEL: si16_to_half:
299+
; NEON-DISABLED: // %bb.0: // %entry
300+
; NEON-DISABLED-NEXT: ldrsh w8, [x0]
301+
; NEON-DISABLED-NEXT: scvtf h0, w8
302+
; NEON-DISABLED-NEXT: ret
285303
entry:
286304
%ld = load i16, ptr %i, align 1
287305
%conv = sitofp i16 %ld to half

llvm/test/CodeGen/AArch64/itofp.ll

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1090,11 +1090,17 @@ define half @stofp_load_i16_f16(ptr %p) {
10901090
; CHECK-NOFP16-NEXT: fcvt h0, s0
10911091
; CHECK-NOFP16-NEXT: ret
10921092
;
1093-
; CHECK-FP16-LABEL: stofp_load_i16_f16:
1094-
; CHECK-FP16: // %bb.0: // %entry
1095-
; CHECK-FP16-NEXT: ldrsh w8, [x0]
1096-
; CHECK-FP16-NEXT: scvtf h0, w8
1097-
; CHECK-FP16-NEXT: ret
1093+
; CHECK-FP16-SD-LABEL: stofp_load_i16_f16:
1094+
; CHECK-FP16-SD: // %bb.0: // %entry
1095+
; CHECK-FP16-SD-NEXT: ldr h0, [x0]
1096+
; CHECK-FP16-SD-NEXT: scvtf h0, h0
1097+
; CHECK-FP16-SD-NEXT: ret
1098+
;
1099+
; CHECK-FP16-GI-LABEL: stofp_load_i16_f16:
1100+
; CHECK-FP16-GI: // %bb.0: // %entry
1101+
; CHECK-FP16-GI-NEXT: ldrsh w8, [x0]
1102+
; CHECK-FP16-GI-NEXT: scvtf h0, w8
1103+
; CHECK-FP16-GI-NEXT: ret
10981104
entry:
10991105
%a = load i16, ptr %p
11001106
%c = sitofp i16 %a to half
@@ -1109,11 +1115,17 @@ define half @utofp_load_i16_f16(ptr %p) {
11091115
; CHECK-NOFP16-NEXT: fcvt h0, s0
11101116
; CHECK-NOFP16-NEXT: ret
11111117
;
1112-
; CHECK-FP16-LABEL: utofp_load_i16_f16:
1113-
; CHECK-FP16: // %bb.0: // %entry
1114-
; CHECK-FP16-NEXT: ldrh w8, [x0]
1115-
; CHECK-FP16-NEXT: ucvtf h0, w8
1116-
; CHECK-FP16-NEXT: ret
1118+
; CHECK-FP16-SD-LABEL: utofp_load_i16_f16:
1119+
; CHECK-FP16-SD: // %bb.0: // %entry
1120+
; CHECK-FP16-SD-NEXT: ldr h0, [x0]
1121+
; CHECK-FP16-SD-NEXT: ucvtf h0, h0
1122+
; CHECK-FP16-SD-NEXT: ret
1123+
;
1124+
; CHECK-FP16-GI-LABEL: utofp_load_i16_f16:
1125+
; CHECK-FP16-GI: // %bb.0: // %entry
1126+
; CHECK-FP16-GI-NEXT: ldrh w8, [x0]
1127+
; CHECK-FP16-GI-NEXT: ucvtf h0, w8
1128+
; CHECK-FP16-GI-NEXT: ret
11171129
entry:
11181130
%a = load i16, ptr %p
11191131
%c = uitofp i16 %a to half
@@ -1149,8 +1161,8 @@ define half @utofp_load_i8_f16(ptr %p) {
11491161
;
11501162
; CHECK-FP16-LABEL: utofp_load_i8_f16:
11511163
; CHECK-FP16: // %bb.0: // %entry
1152-
; CHECK-FP16-NEXT: ldrb w8, [x0]
1153-
; CHECK-FP16-NEXT: ucvtf h0, w8
1164+
; CHECK-FP16-NEXT: ldr b0, [x0]
1165+
; CHECK-FP16-NEXT: ucvtf h0, h0
11541166
; CHECK-FP16-NEXT: ret
11551167
entry:
11561168
%a = load i8, ptr %p

0 commit comments

Comments
 (0)