Skip to content

Commit 44db4f1

Browse files
committed
[ISel] Replace expensive mov from wzr by two moves via fpr
1 parent dcea5f1 commit 44db4f1

File tree

2 files changed

+22
-11
lines changed

2 files changed

+22
-11
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7356,16 +7356,10 @@ def : Pat<(v4f16 (vector_insert (v4f16 V64:$Rn),
73567356
(i64 0)),
73577357
dsub)>;
73587358

7359-
def : Pat<(vector_insert (v8f16 V128:$Rn), (f16 fpimm0), (i64 VectorIndexH:$imm)),
7360-
(INSvi16gpr V128:$Rn, VectorIndexH:$imm, WZR)>;
73617359
def : Pat<(vector_insert (v4f16 V64:$Rn), (f16 fpimm0), (i64 VectorIndexH:$imm)),
73627360
(EXTRACT_SUBREG (INSvi16gpr (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), V64:$Rn, dsub)), VectorIndexH:$imm, WZR), dsub)>;
7363-
def : Pat<(vector_insert (v4f32 V128:$Rn), (f32 fpimm0), (i64 VectorIndexS:$imm)),
7364-
(INSvi32gpr V128:$Rn, VectorIndexS:$imm, WZR)>;
73657361
def : Pat<(vector_insert (v2f32 V64:$Rn), (f32 fpimm0), (i64 VectorIndexS:$imm)),
73667362
(EXTRACT_SUBREG (INSvi32gpr (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)), VectorIndexS:$imm, WZR), dsub)>;
7367-
def : Pat<(vector_insert v2f64:$Rn, (f64 fpimm0), (i64 VectorIndexD:$imm)),
7368-
(INSvi64gpr V128:$Rn, VectorIndexS:$imm, XZR)>;
73697363

73707364
def : Pat<(v8f16 (vector_insert (v8f16 V128:$Rn),
73717365
(f16 FPR16:$Rm), (i64 VectorIndexH:$imm))),
@@ -8035,6 +8029,18 @@ def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1110, V128,
80358029
"movi", ".2d",
80368030
[(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>;
80378031

8032+
def : Pat<(vector_insert (v8f16 V128:$Rn), (f16 fpimm0), (i64 VectorIndexH:$imm)),
8033+
(INSvi16lane V128:$Rn, VectorIndexH:$imm,
8034+
(v8f16 (MOVIv2d_ns (i32 0))), (i64 0))>;
8035+
8036+
def : Pat<(vector_insert (v4f32 V128:$Rn), (f32 fpimm0), (i64 VectorIndexS:$imm)),
8037+
(INSvi32lane V128:$Rn, VectorIndexS:$imm,
8038+
(v4f32 (MOVIv2d_ns (i32 0))), (i64 0))>;
8039+
8040+
def : Pat<(vector_insert (v2f64 V128:$Rn), (f64 fpimm0), (i64 VectorIndexD:$imm)),
8041+
(INSvi64lane V128:$Rn, VectorIndexD:$imm,
8042+
(v2f64 (MOVIv2d_ns (i32 0))), (i64 0))>;
8043+
80388044
let Predicates = [HasNEON] in {
80398045
def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>;
80408046
def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>;

llvm/test/CodeGen/AArch64/arm64-vector-insertion.ll

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -172,8 +172,9 @@ define <8 x half> @test_insert_v8f16_insert_1(half %a) {
172172
; CHECK-LABEL: test_insert_v8f16_insert_1:
173173
; CHECK: // %bb.0:
174174
; CHECK-NEXT: // kill: def $h0 killed $h0 def $q0
175+
; CHECK-NEXT: movi.2d v1, #0000000000000000
175176
; CHECK-NEXT: dup.8h v0, v0[0]
176-
; CHECK-NEXT: mov.h v0[7], wzr
177+
; CHECK-NEXT: mov.h v0[7], v1[0]
177178
; CHECK-NEXT: ret
178179
%v.0 = insertelement <8 x half> <half undef, half undef, half undef, half undef, half undef, half undef, half undef, half 0.0>, half %a, i32 0
179180
%v.1 = insertelement <8 x half> %v.0, half %a, i32 1
@@ -278,8 +279,9 @@ define <4 x float> @test_insert_3_f32_undef_zero_vector(float %a) {
278279
; CHECK-LABEL: test_insert_3_f32_undef_zero_vector:
279280
; CHECK: // %bb.0:
280281
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
282+
; CHECK-NEXT: movi.2d v1, #0000000000000000
281283
; CHECK-NEXT: dup.4s v0, v0[0]
282-
; CHECK-NEXT: mov.s v0[3], wzr
284+
; CHECK-NEXT: mov.s v0[3], v1[0]
283285
; CHECK-NEXT: ret
284286
%v.0 = insertelement <4 x float> <float undef, float undef, float undef, float 0.000000e+00>, float %a, i32 0
285287
%v.1 = insertelement <4 x float> %v.0, float %a, i32 1
@@ -362,7 +364,8 @@ define <4 x half> @test_insert_v4f16_f16_zero(<4 x half> %a) {
362364
define <8 x half> @test_insert_v8f16_f16_zero(<8 x half> %a) {
363365
; CHECK-LABEL: test_insert_v8f16_f16_zero:
364366
; CHECK: // %bb.0:
365-
; CHECK-NEXT: mov.h v0[6], wzr
367+
; CHECK-NEXT: movi.2d v1, #0000000000000000
368+
; CHECK-NEXT: mov.h v0[6], v1[0]
366369
; CHECK-NEXT: ret
367370
%v.0 = insertelement <8 x half> %a, half 0.000000e+00, i32 6
368371
ret <8 x half> %v.0
@@ -382,7 +385,8 @@ define <2 x float> @test_insert_v2f32_f32_zero(<2 x float> %a) {
382385
define <4 x float> @test_insert_v4f32_f32_zero(<4 x float> %a) {
383386
; CHECK-LABEL: test_insert_v4f32_f32_zero:
384387
; CHECK: // %bb.0:
385-
; CHECK-NEXT: mov.s v0[3], wzr
388+
; CHECK-NEXT: movi.2d v1, #0000000000000000
389+
; CHECK-NEXT: mov.s v0[3], v1[0]
386390
; CHECK-NEXT: ret
387391
%v.0 = insertelement <4 x float> %a, float 0.000000e+00, i32 3
388392
ret <4 x float> %v.0
@@ -391,7 +395,8 @@ define <4 x float> @test_insert_v4f32_f32_zero(<4 x float> %a) {
391395
define <2 x double> @test_insert_v2f64_f64_zero(<2 x double> %a) {
392396
; CHECK-LABEL: test_insert_v2f64_f64_zero:
393397
; CHECK: // %bb.0:
394-
; CHECK-NEXT: mov.d v0[1], xzr
398+
; CHECK-NEXT: movi.2d v1, #0000000000000000
399+
; CHECK-NEXT: mov.d v0[1], v1[0]
395400
; CHECK-NEXT: ret
396401
%v.0 = insertelement <2 x double> %a, double 0.000000e+00, i32 1
397402
ret <2 x double> %v.0

0 commit comments

Comments
 (0)