@@ -172,7 +172,7 @@ define <8 x half> @test_insert_v8f16_insert_1(half %a) {
172172; CHECK-LABEL: test_insert_v8f16_insert_1:
173173; CHECK: // %bb.0:
174174; CHECK-NEXT: // kill: def $h0 killed $h0 def $q0
175- ; CHECK-NEXT: movi.2d v1 , #0000000000000000
175+ ; CHECK-NEXT: movi d1 , #0000000000000000
176176; CHECK-NEXT: dup.8h v0, v0[0]
177177; CHECK-NEXT: mov.h v0[7], v1[0]
178178; CHECK-NEXT: ret
@@ -279,7 +279,7 @@ define <4 x float> @test_insert_3_f32_undef_zero_vector(float %a) {
279279; CHECK-LABEL: test_insert_3_f32_undef_zero_vector:
280280; CHECK: // %bb.0:
281281; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
282- ; CHECK-NEXT: movi.2d v1 , #0000000000000000
282+ ; CHECK-NEXT: movi d1 , #0000000000000000
283283; CHECK-NEXT: dup.4s v0, v0[0]
284284; CHECK-NEXT: mov.s v0[3], v1[0]
285285; CHECK-NEXT: ret
@@ -349,12 +349,12 @@ define <8 x i16> @test_insert_v8i16_i16_zero(<8 x i16> %a) {
349349 ret <8 x i16 > %v.0
350350}
351351
352- ; TODO: This should jsut be a mov.s v0[3], wzr
353352define <4 x half > @test_insert_v4f16_f16_zero (<4 x half > %a ) {
354353; CHECK-LABEL: test_insert_v4f16_f16_zero:
355354; CHECK: // %bb.0:
355+ ; CHECK-NEXT: movi d1, #0000000000000000
356356; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
357- ; CHECK-NEXT: mov.h v0[0], wzr
357+ ; CHECK-NEXT: mov.h v0[0], v1[0]
358358; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
359359; CHECK-NEXT: ret
360360 %v.0 = insertelement <4 x half > %a , half 0 .000000e+00 , i32 0
@@ -364,7 +364,7 @@ define <4 x half> @test_insert_v4f16_f16_zero(<4 x half> %a) {
364364define <8 x half > @test_insert_v8f16_f16_zero (<8 x half > %a ) {
365365; CHECK-LABEL: test_insert_v8f16_f16_zero:
366366; CHECK: // %bb.0:
367- ; CHECK-NEXT: movi.2d v1 , #0000000000000000
367+ ; CHECK-NEXT: movi d1 , #0000000000000000
368368; CHECK-NEXT: mov.h v0[6], v1[0]
369369; CHECK-NEXT: ret
370370 %v.0 = insertelement <8 x half > %a , half 0 .000000e+00 , i32 6
@@ -374,8 +374,9 @@ define <8 x half> @test_insert_v8f16_f16_zero(<8 x half> %a) {
374374define <2 x float > @test_insert_v2f32_f32_zero (<2 x float > %a ) {
375375; CHECK-LABEL: test_insert_v2f32_f32_zero:
376376; CHECK: // %bb.0:
377+ ; CHECK-NEXT: movi d1, #0000000000000000
377378; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
378- ; CHECK-NEXT: mov.s v0[0], wzr
379+ ; CHECK-NEXT: mov.s v0[0], v1[0]
379380; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
380381; CHECK-NEXT: ret
381382 %v.0 = insertelement <2 x float > %a , float 0 .000000e+00 , i32 0
@@ -385,7 +386,7 @@ define <2 x float> @test_insert_v2f32_f32_zero(<2 x float> %a) {
385386define <4 x float > @test_insert_v4f32_f32_zero (<4 x float > %a ) {
386387; CHECK-LABEL: test_insert_v4f32_f32_zero:
387388; CHECK: // %bb.0:
388- ; CHECK-NEXT: movi.2d v1 , #0000000000000000
389+ ; CHECK-NEXT: movi d1 , #0000000000000000
389390; CHECK-NEXT: mov.s v0[3], v1[0]
390391; CHECK-NEXT: ret
391392 %v.0 = insertelement <4 x float > %a , float 0 .000000e+00 , i32 3
@@ -395,9 +396,60 @@ define <4 x float> @test_insert_v4f32_f32_zero(<4 x float> %a) {
395396define <2 x double > @test_insert_v2f64_f64_zero (<2 x double > %a ) {
396397; CHECK-LABEL: test_insert_v2f64_f64_zero:
397398; CHECK: // %bb.0:
398- ; CHECK-NEXT: movi.2d v1 , #0000000000000000
399+ ; CHECK-NEXT: movi d1 , #0000000000000000
399400; CHECK-NEXT: mov.d v0[1], v1[0]
400401; CHECK-NEXT: ret
401402 %v.0 = insertelement <2 x double > %a , double 0 .000000e+00 , i32 1
402403 ret <2 x double > %v.0
403404}
405+
406+ define <4 x half > @test_insert_v4f16_f16_zero_wzr (<4 x half > %a ) #1 {
407+ ; CHECK-LABEL: test_insert_v4f16_f16_zero_wzr:
408+ ; CHECK: // %bb.0:
409+ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
410+ ; CHECK-NEXT: mov.h v0[0], wzr
411+ ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
412+ ; CHECK-NEXT: ret
413+ %v.0 = insertelement <4 x half > %a , half 0 .000000e+00 , i32 0
414+ ret <4 x half > %v.0
415+ }
416+
417+ define <8 x half > @test_insert_v8f16_f16_zero_wzr (<8 x half > %a ) #1 {
418+ ; CHECK-LABEL: test_insert_v8f16_f16_zero_wzr:
419+ ; CHECK: // %bb.0:
420+ ; CHECK-NEXT: mov.h v0[6], wzr
421+ ; CHECK-NEXT: ret
422+ %v.0 = insertelement <8 x half > %a , half 0 .000000e+00 , i32 6
423+ ret <8 x half > %v.0
424+ }
425+
426+ define <2 x float > @test_insert_v2f32_f32_zero_wzr (<2 x float > %a ) #1 {
427+ ; CHECK-LABEL: test_insert_v2f32_f32_zero_wzr:
428+ ; CHECK: // %bb.0:
429+ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
430+ ; CHECK-NEXT: mov.s v0[0], wzr
431+ ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
432+ ; CHECK-NEXT: ret
433+ %v.0 = insertelement <2 x float > %a , float 0 .000000e+00 , i32 0
434+ ret <2 x float > %v.0
435+ }
436+
437+ define <4 x float > @test_insert_v4f32_f32_zero_wzr (<4 x float > %a ) #1 {
438+ ; CHECK-LABEL: test_insert_v4f32_f32_zero_wzr:
439+ ; CHECK: // %bb.0:
440+ ; CHECK-NEXT: mov.s v0[3], wzr
441+ ; CHECK-NEXT: ret
442+ %v.0 = insertelement <4 x float > %a , float 0 .000000e+00 , i32 3
443+ ret <4 x float > %v.0
444+ }
445+
446+ define <2 x double > @test_insert_v2f64_f64_zero_xzr (<2 x double > %a ) #1 {
447+ ; CHECK-LABEL: test_insert_v2f64_f64_zero_xzr:
448+ ; CHECK: // %bb.0:
449+ ; CHECK-NEXT: mov.d v0[1], xzr
450+ ; CHECK-NEXT: ret
451+ %v.0 = insertelement <2 x double > %a , double 0 .000000e+00 , i32 1
452+ ret <2 x double > %v.0
453+ }
454+
455+ attributes #1 = {"tune-cpu" ="cortex-a55" }
0 commit comments