Skip to content

Commit 9e2768a

Browse files
committed
[ARM] Add neon FP16 scalar_to_vector patterns.
This adds some simple fp16 scalar_to_vector patterns, preventing a selection failure if this came up. Differential Revision: https://reviews.llvm.org/D95427
1 parent cde1f54 commit 9e2768a

File tree

2 files changed

+38
-2
lines changed

2 files changed

+38
-2
lines changed

llvm/lib/Target/ARM/ARMInstrNEON.td

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6482,8 +6482,6 @@ def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)),
64826482

64836483
defm : InsertEltF16<f16, v4f16, v8f16>;
64846484

6485-
//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
6486-
// (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
64876485
def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
64886486
(INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
64896487

@@ -6494,6 +6492,11 @@ def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))),
64946492
def : Pat<(v4f32 (scalar_to_vector SPR:$src)),
64956493
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
64966494

6495+
def : Pat<(v4f16 (scalar_to_vector (f16 HPR:$src))),
6496+
(INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), HPR:$src, ssub_0)>;
6497+
def : Pat<(v8f16 (scalar_to_vector (f16 HPR:$src))),
6498+
(INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), HPR:$src, ssub_0)>;
6499+
64976500
def : Pat<(v8i8 (scalar_to_vector GPR:$src)),
64986501
(VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
64996502
def : Pat<(v4i16 (scalar_to_vector GPR:$src)),

llvm/test/CodeGen/ARM/fp16-insert-extract.ll

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,39 @@ entry:
7474
ret float %conv
7575
}
7676

77+
define <4 x half> @insert_v4f16(half %a) {
78+
; CHECKHARD-LABEL: insert_v4f16:
79+
; CHECKHARD: @ %bb.0: @ %entry
80+
; CHECKHARD-NEXT: @ kill: def $s0 killed $s0 def $d0
81+
; CHECKHARD-NEXT: bx lr
82+
;
83+
; CHECKSOFT-LABEL: insert_v4f16:
84+
; CHECKSOFT: @ %bb.0: @ %entry
85+
; CHECKSOFT-NEXT: vmov.f16 s0, r0
86+
; CHECKSOFT-NEXT: vmov r0, r1, d0
87+
; CHECKSOFT-NEXT: bx lr
88+
entry:
89+
%res = insertelement <4 x half> undef, half %a, i32 0
90+
ret <4 x half> %res
91+
}
92+
93+
define <8 x half> @insert_v8f16(half %a) {
94+
; CHECKHARD-LABEL: insert_v8f16:
95+
; CHECKHARD: @ %bb.0: @ %entry
96+
; CHECKHARD-NEXT: @ kill: def $s0 killed $s0 def $q0
97+
; CHECKHARD-NEXT: bx lr
98+
;
99+
; CHECKSOFT-LABEL: insert_v8f16:
100+
; CHECKSOFT: @ %bb.0: @ %entry
101+
; CHECKSOFT-NEXT: vmov.f16 s0, r0
102+
; CHECKSOFT-NEXT: vmov r2, r3, d1
103+
; CHECKSOFT-NEXT: vmov r0, r1, d0
104+
; CHECKSOFT-NEXT: bx lr
105+
entry:
106+
%res = insertelement <8 x half> undef, half %a, i32 0
107+
ret <8 x half> %res
108+
}
109+
77110
define <4 x half> @test_vset_lane_f16(<4 x half> %a, float %fb) nounwind {
78111
; CHECKHARD-LABEL: test_vset_lane_f16:
79112
; CHECKHARD: @ %bb.0: @ %entry

0 commit comments

Comments
 (0)