From a583765724df097132b03c537e0100dc07cc2f51 Mon Sep 17 00:00:00 2001 From: David Green Date: Thu, 17 Apr 2025 07:54:39 +0100 Subject: [PATCH] [AArch64] Add tablegen patterns for i8 and i16 vector insert/extract pairs. An i8 and i16 vector extract/insert has to go via a i32 to make sure the types are legal. This patch adds patterns for extract from a i8/i16 vector, inserted into a i16/i32 vector. This avoids the round trip via a GPR which can limit performance. --- .../include/llvm/Target/TargetSelectionDAG.td | 7 +- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 35 + .../AArch64/arm64-extract-insert-varidx.ll | 16 +- llvm/test/CodeGen/AArch64/bitcast-extend.ll | 32 +- .../AArch64/fix-shuffle-vector-be-rev.ll | 32 +- llvm/test/CodeGen/AArch64/itofp.ll | 646 ++++++++---------- llvm/test/CodeGen/AArch64/neon-bitcast.ll | 16 +- llvm/test/CodeGen/AArch64/shuffle-extend.ll | 86 +-- llvm/test/CodeGen/AArch64/vector-fcvt.ll | 236 +++---- 9 files changed, 467 insertions(+), 639 deletions(-) diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index 9c241b6c4df0f..30f580faee290 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -818,8 +818,11 @@ def step_vector : SDNode<"ISD::STEP_VECTOR", SDTypeProfile<1, 1, def scalar_to_vector : SDNode<"ISD::SCALAR_TO_VECTOR", SDTypeProfile<1, 1, []>, []>; -// vector_extract/vector_insert are deprecated. extractelt/insertelt -// are preferred. +// vector_extract/vector_insert are similar to extractelt/insertelt but allow +// types that require promotion (a 16i8 extract where i8 is not a legal type so +// uses i32 for example). extractelt/insertelt are preferred where the element +// type and the extracted types match due to the extra type checking they +// perform. def vector_extract : SDNode<"ISD::EXTRACT_VECTOR_ELT", SDTypeProfile<1, 2, [SDTCisPtrTy<2>]>, []>; def vector_insert : SDNode<"ISD::INSERT_VECTOR_ELT", diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index a7a01ed785afa..33fe71e4913b5 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -7307,6 +7307,41 @@ def : Pat<(v2i32 (vector_insert v2i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), def : Pat<(v2i64 (vector_insert v2i64:$src, (i64 (bitconvert (f64 FPR64:$Sn))), (i64 imm:$Immd))), (INSvi64lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$Sn, dsub), 0)>; +// Patterns for i8/i16 -> v2i32/v4i16 lane moves via insert and extract that go via i32. +multiclass Neon_INS_elt_ext_pattern { + // VT64->OutVT + def : Pat<(OutVT (vector_insert (OutVT V64:$src), + (i32 (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))), + (i64 imm:$Immd))), + (EXTRACT_SUBREG + (INS (INSERT_SUBREG (VT128 (IMPLICIT_DEF)), V64:$src, dsub), (VecIndexMult imm:$Immd), + (INSERT_SUBREG (VT128 (IMPLICIT_DEF)), V64:$Rn, dsub), imm:$Immn), + dsub)>; + def : Pat<(OutVT (scalar_to_vector (i32 (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))))), + (EXTRACT_SUBREG + (INS (IMPLICIT_DEF), 0, + (INSERT_SUBREG (VT128 (IMPLICIT_DEF)), V64:$Rn, dsub), imm:$Immn), + dsub)>; + + // VT128->OutVT + def : Pat<(OutVT (vector_insert (OutVT V64:$src), + (i32 (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))), + (i64 imm:$Immd))), + (EXTRACT_SUBREG + (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), (VecIndexMult imm:$Immd), + V128:$Rn, imm:$Immn), + dsub)>; + def : Pat<(OutVT (scalar_to_vector (i32 (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))))), + (EXTRACT_SUBREG + (INS (IMPLICIT_DEF), 0, V128:$Rn, imm:$Immn), + dsub)>; +} + +defm : Neon_INS_elt_ext_pattern; +defm : Neon_INS_elt_ext_pattern; +defm : Neon_INS_elt_ext_pattern; + // bitcast of an extract // f32 bitcast(vector_extract(v4i32 src, lane)) -> EXTRACT_SUBREG(INSvi32lane(-, 0, src, lane)) def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, imm:$Immd)))), diff --git a/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll b/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll index 7a4cdd52db904..fccb1fb675768 100644 --- a/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll +++ b/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll @@ -11,14 +11,11 @@ define <4 x i8> @test_varidx_extract_v8s8(<8 x i8> %x, i32 %idx) { ; CHECK-SDAG-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-SDAG-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SDAG-NEXT: str d0, [sp, #8] -; CHECK-SDAG-NEXT: umov w9, v0.b[1] ; CHECK-SDAG-NEXT: bfxil x8, x0, #0, #3 ; CHECK-SDAG-NEXT: ld1 { v1.b }[0], [x8] -; CHECK-SDAG-NEXT: umov w8, v0.b[2] -; CHECK-SDAG-NEXT: mov v1.h[1], w9 -; CHECK-SDAG-NEXT: umov w9, v0.b[3] -; CHECK-SDAG-NEXT: mov v1.h[2], w8 -; CHECK-SDAG-NEXT: mov v1.h[3], w9 +; CHECK-SDAG-NEXT: mov v1.b[2], v0.b[1] +; CHECK-SDAG-NEXT: mov v1.b[4], v0.b[2] +; CHECK-SDAG-NEXT: mov v1.b[6], v0.b[3] ; CHECK-SDAG-NEXT: fmov d0, d1 ; CHECK-SDAG-NEXT: add sp, sp, #16 ; CHECK-SDAG-NEXT: ret @@ -168,11 +165,10 @@ define <2 x i16> @test_varidx_extract_v4s16(<4 x i16> %x, i32 %idx) { ; CHECK-SDAG-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-SDAG-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SDAG-NEXT: str d0, [sp, #8] -; CHECK-SDAG-NEXT: umov w9, v0.h[1] ; CHECK-SDAG-NEXT: bfi x8, x0, #1, #2 -; CHECK-SDAG-NEXT: ld1 { v0.h }[0], [x8] -; CHECK-SDAG-NEXT: mov v0.s[1], w9 -; CHECK-SDAG-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SDAG-NEXT: ld1 { v1.h }[0], [x8] +; CHECK-SDAG-NEXT: mov v1.h[2], v0.h[1] +; CHECK-SDAG-NEXT: fmov d0, d1 ; CHECK-SDAG-NEXT: add sp, sp, #16 ; CHECK-SDAG-NEXT: ret ; diff --git a/llvm/test/CodeGen/AArch64/bitcast-extend.ll b/llvm/test/CodeGen/AArch64/bitcast-extend.ll index 85daa3ca6623e..33238ccf86a39 100644 --- a/llvm/test/CodeGen/AArch64/bitcast-extend.ll +++ b/llvm/test/CodeGen/AArch64/bitcast-extend.ll @@ -70,16 +70,12 @@ define <4 x i64> @z_i32_v4i64(i32 %x) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fmov s0, w0 ; CHECK-SD-NEXT: movi v1.2d, #0x000000000000ff -; CHECK-SD-NEXT: umov w8, v0.b[2] -; CHECK-SD-NEXT: umov w9, v0.b[0] -; CHECK-SD-NEXT: umov w10, v0.b[3] -; CHECK-SD-NEXT: umov w11, v0.b[1] -; CHECK-SD-NEXT: fmov s0, w9 -; CHECK-SD-NEXT: fmov s2, w8 -; CHECK-SD-NEXT: mov v0.s[1], w11 -; CHECK-SD-NEXT: mov v2.s[1], w10 -; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0 -; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0 +; CHECK-SD-NEXT: mov v2.b[0], v0.b[0] +; CHECK-SD-NEXT: mov v3.b[0], v0.b[2] +; CHECK-SD-NEXT: mov v2.b[4], v0.b[1] +; CHECK-SD-NEXT: mov v3.b[4], v0.b[3] +; CHECK-SD-NEXT: ushll v0.2d, v2.2s, #0 +; CHECK-SD-NEXT: ushll v2.2d, v3.2s, #0 ; CHECK-SD-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-SD-NEXT: and v1.16b, v2.16b, v1.16b ; CHECK-SD-NEXT: ret @@ -176,16 +172,12 @@ define <4 x i64> @s_i32_v4i64(i32 %x) { ; CHECK-SD-LABEL: s_i32_v4i64: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fmov s0, w0 -; CHECK-SD-NEXT: umov w8, v0.b[2] -; CHECK-SD-NEXT: umov w9, v0.b[0] -; CHECK-SD-NEXT: umov w10, v0.b[3] -; CHECK-SD-NEXT: umov w11, v0.b[1] -; CHECK-SD-NEXT: fmov s0, w9 -; CHECK-SD-NEXT: fmov s1, w8 -; CHECK-SD-NEXT: mov v0.s[1], w11 -; CHECK-SD-NEXT: mov v1.s[1], w10 -; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0 -; CHECK-SD-NEXT: ushll v1.2d, v1.2s, #0 +; CHECK-SD-NEXT: mov v1.b[0], v0.b[0] +; CHECK-SD-NEXT: mov v2.b[0], v0.b[2] +; CHECK-SD-NEXT: mov v1.b[4], v0.b[1] +; CHECK-SD-NEXT: mov v2.b[4], v0.b[3] +; CHECK-SD-NEXT: ushll v0.2d, v1.2s, #0 +; CHECK-SD-NEXT: ushll v1.2d, v2.2s, #0 ; CHECK-SD-NEXT: shl v0.2d, v0.2d, #56 ; CHECK-SD-NEXT: shl v1.2d, v1.2d, #56 ; CHECK-SD-NEXT: sshr v0.2d, v0.2d, #56 diff --git a/llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll b/llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll index 8b74de1c127dd..e90b6cb7f809b 100644 --- a/llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll +++ b/llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll @@ -5,16 +5,12 @@ define <4 x i16> @test_reconstructshuffle(<16 x i8> %a, <16 x i8> %b) nounwind { ; CHECKLE-LABEL: test_reconstructshuffle: ; CHECKLE: // %bb.0: -; CHECKLE-NEXT: umov w8, v0.b[3] -; CHECKLE-NEXT: umov w9, v0.b[2] -; CHECKLE-NEXT: fmov s2, w8 -; CHECKLE-NEXT: umov w8, v0.b[1] -; CHECKLE-NEXT: mov v2.h[1], w9 -; CHECKLE-NEXT: mov v2.h[2], w8 -; CHECKLE-NEXT: umov w8, v0.b[0] -; CHECKLE-NEXT: ext v0.16b, v1.16b, v1.16b, #8 -; CHECKLE-NEXT: mov v2.h[3], w8 -; CHECKLE-NEXT: zip2 v0.8b, v0.8b, v0.8b +; CHECKLE-NEXT: mov v2.b[0], v0.b[3] +; CHECKLE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; CHECKLE-NEXT: mov v2.b[2], v0.b[2] +; CHECKLE-NEXT: mov v2.b[4], v0.b[1] +; CHECKLE-NEXT: mov v2.b[6], v0.b[0] +; CHECKLE-NEXT: zip2 v0.8b, v1.8b, v0.8b ; CHECKLE-NEXT: add v0.4h, v2.4h, v0.4h ; CHECKLE-NEXT: bic v0.4h, #255, lsl #8 ; CHECKLE-NEXT: ret @@ -25,16 +21,12 @@ define <4 x i16> @test_reconstructshuffle(<16 x i8> %a, <16 x i8> %b) nounwind { ; CHECKBE-NEXT: rev64 v1.16b, v1.16b ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 -; CHECKBE-NEXT: umov w8, v0.b[3] -; CHECKBE-NEXT: umov w9, v0.b[2] -; CHECKBE-NEXT: fmov s2, w8 -; CHECKBE-NEXT: umov w8, v0.b[1] -; CHECKBE-NEXT: mov v2.h[1], w9 -; CHECKBE-NEXT: mov v2.h[2], w8 -; CHECKBE-NEXT: umov w8, v0.b[0] -; CHECKBE-NEXT: ext v0.16b, v1.16b, v1.16b, #8 -; CHECKBE-NEXT: mov v2.h[3], w8 -; CHECKBE-NEXT: zip2 v0.8b, v0.8b, v0.8b +; CHECKBE-NEXT: mov v2.b[0], v0.b[3] +; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; CHECKBE-NEXT: mov v2.b[2], v0.b[2] +; CHECKBE-NEXT: mov v2.b[4], v0.b[1] +; CHECKBE-NEXT: mov v2.b[6], v0.b[0] +; CHECKBE-NEXT: zip2 v0.8b, v1.8b, v0.8b ; CHECKBE-NEXT: add v0.4h, v2.4h, v0.4h ; CHECKBE-NEXT: bic v0.4h, #255, lsl #8 ; CHECKBE-NEXT: rev64 v0.4h, v0.4h diff --git a/llvm/test/CodeGen/AArch64/itofp.ll b/llvm/test/CodeGen/AArch64/itofp.ll index 07957c117868d..fb2bdb4d63f47 100644 --- a/llvm/test/CodeGen/AArch64/itofp.ll +++ b/llvm/test/CodeGen/AArch64/itofp.ll @@ -3443,26 +3443,18 @@ define <8 x double> @stofp_v8i8_v8f64(<8 x i8> %a) { ; CHECK-SD-LABEL: stofp_v8i8_v8f64: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: umov w8, v0.b[0] -; CHECK-SD-NEXT: umov w9, v0.b[2] -; CHECK-SD-NEXT: umov w11, v0.b[4] -; CHECK-SD-NEXT: umov w12, v0.b[6] -; CHECK-SD-NEXT: umov w10, v0.b[1] -; CHECK-SD-NEXT: umov w13, v0.b[3] -; CHECK-SD-NEXT: umov w14, v0.b[5] -; CHECK-SD-NEXT: umov w15, v0.b[7] -; CHECK-SD-NEXT: fmov s0, w8 -; CHECK-SD-NEXT: fmov s1, w9 -; CHECK-SD-NEXT: fmov s2, w11 -; CHECK-SD-NEXT: fmov s3, w12 -; CHECK-SD-NEXT: mov v0.s[1], w10 -; CHECK-SD-NEXT: mov v1.s[1], w13 -; CHECK-SD-NEXT: mov v2.s[1], w14 -; CHECK-SD-NEXT: mov v3.s[1], w15 -; CHECK-SD-NEXT: shl v0.2s, v0.2s, #24 -; CHECK-SD-NEXT: shl v1.2s, v1.2s, #24 -; CHECK-SD-NEXT: shl v2.2s, v2.2s, #24 -; CHECK-SD-NEXT: shl v3.2s, v3.2s, #24 +; CHECK-SD-NEXT: mov v1.b[0], v0.b[0] +; CHECK-SD-NEXT: mov v2.b[0], v0.b[2] +; CHECK-SD-NEXT: mov v3.b[0], v0.b[4] +; CHECK-SD-NEXT: mov v4.b[0], v0.b[6] +; CHECK-SD-NEXT: mov v1.b[4], v0.b[1] +; CHECK-SD-NEXT: mov v2.b[4], v0.b[3] +; CHECK-SD-NEXT: mov v3.b[4], v0.b[5] +; CHECK-SD-NEXT: mov v4.b[4], v0.b[7] +; CHECK-SD-NEXT: shl v0.2s, v1.2s, #24 +; CHECK-SD-NEXT: shl v1.2s, v2.2s, #24 +; CHECK-SD-NEXT: shl v2.2s, v3.2s, #24 +; CHECK-SD-NEXT: shl v3.2s, v4.2s, #24 ; CHECK-SD-NEXT: sshr v0.2s, v0.2s, #24 ; CHECK-SD-NEXT: sshr v1.2s, v1.2s, #24 ; CHECK-SD-NEXT: sshr v2.2s, v2.2s, #24 @@ -3500,27 +3492,19 @@ define <8 x double> @utofp_v8i8_v8f64(<8 x i8> %a) { ; CHECK-SD-LABEL: utofp_v8i8_v8f64: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: umov w8, v0.b[0] -; CHECK-SD-NEXT: umov w9, v0.b[2] -; CHECK-SD-NEXT: umov w11, v0.b[4] -; CHECK-SD-NEXT: umov w12, v0.b[6] -; CHECK-SD-NEXT: umov w10, v0.b[1] -; CHECK-SD-NEXT: umov w13, v0.b[3] -; CHECK-SD-NEXT: umov w14, v0.b[5] -; CHECK-SD-NEXT: umov w15, v0.b[7] +; CHECK-SD-NEXT: mov v2.b[0], v0.b[0] +; CHECK-SD-NEXT: mov v3.b[0], v0.b[2] +; CHECK-SD-NEXT: mov v4.b[0], v0.b[4] +; CHECK-SD-NEXT: mov v5.b[0], v0.b[6] ; CHECK-SD-NEXT: movi d1, #0x0000ff000000ff -; CHECK-SD-NEXT: fmov s0, w8 -; CHECK-SD-NEXT: fmov s2, w9 -; CHECK-SD-NEXT: fmov s3, w11 -; CHECK-SD-NEXT: fmov s4, w12 -; CHECK-SD-NEXT: mov v0.s[1], w10 -; CHECK-SD-NEXT: mov v2.s[1], w13 -; CHECK-SD-NEXT: mov v3.s[1], w14 -; CHECK-SD-NEXT: mov v4.s[1], w15 -; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b -; CHECK-SD-NEXT: and v2.8b, v2.8b, v1.8b -; CHECK-SD-NEXT: and v3.8b, v3.8b, v1.8b -; CHECK-SD-NEXT: and v1.8b, v4.8b, v1.8b +; CHECK-SD-NEXT: mov v2.b[4], v0.b[1] +; CHECK-SD-NEXT: mov v3.b[4], v0.b[3] +; CHECK-SD-NEXT: mov v4.b[4], v0.b[5] +; CHECK-SD-NEXT: mov v5.b[4], v0.b[7] +; CHECK-SD-NEXT: and v0.8b, v2.8b, v1.8b +; CHECK-SD-NEXT: and v2.8b, v3.8b, v1.8b +; CHECK-SD-NEXT: and v3.8b, v4.8b, v1.8b +; CHECK-SD-NEXT: and v1.8b, v5.8b, v1.8b ; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0 ; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0 ; CHECK-SD-NEXT: ushll v3.2d, v3.2s, #0 @@ -3553,68 +3537,52 @@ entry: define <16 x double> @stofp_v16i8_v16f64(<16 x i8> %a) { ; CHECK-SD-LABEL: stofp_v16i8_v16f64: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: umov w8, v0.b[0] ; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-SD-NEXT: umov w9, v0.b[1] -; CHECK-SD-NEXT: umov w10, v0.b[2] -; CHECK-SD-NEXT: umov w12, v0.b[4] -; CHECK-SD-NEXT: umov w14, v0.b[6] -; CHECK-SD-NEXT: umov w11, v0.b[3] -; CHECK-SD-NEXT: umov w13, v0.b[5] -; CHECK-SD-NEXT: fmov s2, w8 -; CHECK-SD-NEXT: umov w15, v1.b[0] -; CHECK-SD-NEXT: umov w17, v1.b[2] -; CHECK-SD-NEXT: umov w0, v1.b[4] -; CHECK-SD-NEXT: umov w16, v1.b[1] -; CHECK-SD-NEXT: umov w18, v1.b[3] -; CHECK-SD-NEXT: umov w8, v0.b[7] -; CHECK-SD-NEXT: fmov s0, w10 -; CHECK-SD-NEXT: umov w10, v1.b[5] -; CHECK-SD-NEXT: mov v2.s[1], w9 -; CHECK-SD-NEXT: umov w9, v1.b[6] -; CHECK-SD-NEXT: fmov s3, w12 -; CHECK-SD-NEXT: umov w12, v1.b[7] -; CHECK-SD-NEXT: fmov s1, w14 -; CHECK-SD-NEXT: fmov s4, w15 -; CHECK-SD-NEXT: fmov s5, w17 -; CHECK-SD-NEXT: fmov s6, w0 -; CHECK-SD-NEXT: mov v0.s[1], w11 -; CHECK-SD-NEXT: mov v3.s[1], w13 -; CHECK-SD-NEXT: fmov s7, w9 -; CHECK-SD-NEXT: mov v1.s[1], w8 -; CHECK-SD-NEXT: mov v4.s[1], w16 -; CHECK-SD-NEXT: mov v5.s[1], w18 -; CHECK-SD-NEXT: mov v6.s[1], w10 -; CHECK-SD-NEXT: shl v2.2s, v2.2s, #24 -; CHECK-SD-NEXT: shl v0.2s, v0.2s, #24 -; CHECK-SD-NEXT: mov v7.s[1], w12 -; CHECK-SD-NEXT: shl v3.2s, v3.2s, #24 -; CHECK-SD-NEXT: shl v1.2s, v1.2s, #24 -; CHECK-SD-NEXT: shl v4.2s, v4.2s, #24 -; CHECK-SD-NEXT: sshr v2.2s, v2.2s, #24 -; CHECK-SD-NEXT: shl v5.2s, v5.2s, #24 -; CHECK-SD-NEXT: shl v6.2s, v6.2s, #24 +; CHECK-SD-NEXT: mov v2.b[0], v0.b[0] +; CHECK-SD-NEXT: mov v3.b[0], v0.b[2] +; CHECK-SD-NEXT: mov v4.b[0], v0.b[4] +; CHECK-SD-NEXT: mov v5.b[0], v0.b[6] +; CHECK-SD-NEXT: mov v6.b[0], v1.b[0] +; CHECK-SD-NEXT: mov v7.b[0], v1.b[2] +; CHECK-SD-NEXT: mov v16.b[0], v1.b[4] +; CHECK-SD-NEXT: mov v17.b[0], v1.b[6] +; CHECK-SD-NEXT: mov v2.b[4], v0.b[1] +; CHECK-SD-NEXT: mov v3.b[4], v0.b[3] +; CHECK-SD-NEXT: mov v4.b[4], v0.b[5] +; CHECK-SD-NEXT: mov v5.b[4], v0.b[7] +; CHECK-SD-NEXT: mov v6.b[4], v1.b[1] +; CHECK-SD-NEXT: mov v7.b[4], v1.b[3] +; CHECK-SD-NEXT: mov v16.b[4], v1.b[5] +; CHECK-SD-NEXT: mov v17.b[4], v1.b[7] +; CHECK-SD-NEXT: shl v0.2s, v2.2s, #24 +; CHECK-SD-NEXT: shl v1.2s, v3.2s, #24 +; CHECK-SD-NEXT: shl v2.2s, v4.2s, #24 +; CHECK-SD-NEXT: shl v3.2s, v5.2s, #24 +; CHECK-SD-NEXT: shl v4.2s, v6.2s, #24 +; CHECK-SD-NEXT: shl v5.2s, v7.2s, #24 +; CHECK-SD-NEXT: shl v6.2s, v16.2s, #24 +; CHECK-SD-NEXT: shl v7.2s, v17.2s, #24 ; CHECK-SD-NEXT: sshr v0.2s, v0.2s, #24 +; CHECK-SD-NEXT: sshr v1.2s, v1.2s, #24 +; CHECK-SD-NEXT: sshr v2.2s, v2.2s, #24 ; CHECK-SD-NEXT: sshr v3.2s, v3.2s, #24 -; CHECK-SD-NEXT: shl v7.2s, v7.2s, #24 ; CHECK-SD-NEXT: sshr v4.2s, v4.2s, #24 -; CHECK-SD-NEXT: sshr v1.2s, v1.2s, #24 ; CHECK-SD-NEXT: sshr v5.2s, v5.2s, #24 ; CHECK-SD-NEXT: sshr v6.2s, v6.2s, #24 +; CHECK-SD-NEXT: sshr v7.2s, v7.2s, #24 +; CHECK-SD-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-SD-NEXT: sshll v1.2d, v1.2s, #0 ; CHECK-SD-NEXT: sshll v2.2d, v2.2s, #0 -; CHECK-SD-NEXT: sshll v16.2d, v0.2s, #0 ; CHECK-SD-NEXT: sshll v3.2d, v3.2s, #0 -; CHECK-SD-NEXT: sshr v7.2s, v7.2s, #24 ; CHECK-SD-NEXT: sshll v4.2d, v4.2s, #0 -; CHECK-SD-NEXT: sshll v17.2d, v1.2s, #0 ; CHECK-SD-NEXT: sshll v5.2d, v5.2s, #0 ; CHECK-SD-NEXT: sshll v6.2d, v6.2s, #0 -; CHECK-SD-NEXT: scvtf v0.2d, v2.2d -; CHECK-SD-NEXT: scvtf v1.2d, v16.2d -; CHECK-SD-NEXT: scvtf v2.2d, v3.2d ; CHECK-SD-NEXT: sshll v7.2d, v7.2s, #0 +; CHECK-SD-NEXT: scvtf v0.2d, v0.2d +; CHECK-SD-NEXT: scvtf v1.2d, v1.2d +; CHECK-SD-NEXT: scvtf v2.2d, v2.2d +; CHECK-SD-NEXT: scvtf v3.2d, v3.2d ; CHECK-SD-NEXT: scvtf v4.2d, v4.2d -; CHECK-SD-NEXT: scvtf v3.2d, v17.2d ; CHECK-SD-NEXT: scvtf v5.2d, v5.2d ; CHECK-SD-NEXT: scvtf v6.2d, v6.2d ; CHECK-SD-NEXT: scvtf v7.2d, v7.2d @@ -3654,63 +3622,47 @@ define <16 x double> @utofp_v16i8_v16f64(<16 x i8> %a) { ; CHECK-SD-LABEL: utofp_v16i8_v16f64: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: ext v2.16b, v0.16b, v0.16b, #8 -; CHECK-SD-NEXT: umov w8, v0.b[0] -; CHECK-SD-NEXT: umov w10, v0.b[2] -; CHECK-SD-NEXT: umov w9, v0.b[1] -; CHECK-SD-NEXT: umov w12, v0.b[4] -; CHECK-SD-NEXT: umov w11, v0.b[3] -; CHECK-SD-NEXT: umov w13, v0.b[5] -; CHECK-SD-NEXT: umov w18, v0.b[6] +; CHECK-SD-NEXT: mov v3.b[0], v0.b[0] +; CHECK-SD-NEXT: mov v4.b[0], v0.b[2] +; CHECK-SD-NEXT: mov v5.b[0], v0.b[4] +; CHECK-SD-NEXT: mov v6.b[0], v0.b[6] ; CHECK-SD-NEXT: movi d1, #0x0000ff000000ff -; CHECK-SD-NEXT: umov w14, v2.b[0] -; CHECK-SD-NEXT: umov w16, v2.b[2] -; CHECK-SD-NEXT: umov w0, v2.b[4] -; CHECK-SD-NEXT: fmov s3, w8 -; CHECK-SD-NEXT: umov w8, v0.b[7] -; CHECK-SD-NEXT: fmov s0, w10 -; CHECK-SD-NEXT: umov w10, v2.b[6] -; CHECK-SD-NEXT: umov w15, v2.b[1] -; CHECK-SD-NEXT: umov w17, v2.b[3] -; CHECK-SD-NEXT: fmov s4, w12 -; CHECK-SD-NEXT: umov w12, v2.b[5] -; CHECK-SD-NEXT: fmov s7, w18 -; CHECK-SD-NEXT: mov v3.s[1], w9 -; CHECK-SD-NEXT: umov w9, v2.b[7] -; CHECK-SD-NEXT: fmov s2, w14 -; CHECK-SD-NEXT: fmov s5, w16 -; CHECK-SD-NEXT: fmov s6, w0 -; CHECK-SD-NEXT: mov v0.s[1], w11 -; CHECK-SD-NEXT: fmov s16, w10 -; CHECK-SD-NEXT: mov v4.s[1], w13 -; CHECK-SD-NEXT: mov v7.s[1], w8 -; CHECK-SD-NEXT: mov v2.s[1], w15 -; CHECK-SD-NEXT: mov v5.s[1], w17 -; CHECK-SD-NEXT: mov v6.s[1], w12 -; CHECK-SD-NEXT: and v3.8b, v3.8b, v1.8b -; CHECK-SD-NEXT: mov v16.s[1], w9 -; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b -; CHECK-SD-NEXT: and v4.8b, v4.8b, v1.8b -; CHECK-SD-NEXT: and v7.8b, v7.8b, v1.8b -; CHECK-SD-NEXT: and v2.8b, v2.8b, v1.8b -; CHECK-SD-NEXT: ushll v3.2d, v3.2s, #0 -; CHECK-SD-NEXT: and v5.8b, v5.8b, v1.8b -; CHECK-SD-NEXT: and v6.8b, v6.8b, v1.8b -; CHECK-SD-NEXT: and v1.8b, v16.8b, v1.8b -; CHECK-SD-NEXT: ushll v16.2d, v0.2s, #0 -; CHECK-SD-NEXT: ushll v17.2d, v4.2s, #0 +; CHECK-SD-NEXT: mov v7.b[0], v2.b[0] +; CHECK-SD-NEXT: mov v16.b[0], v2.b[2] +; CHECK-SD-NEXT: mov v17.b[0], v2.b[4] +; CHECK-SD-NEXT: mov v18.b[0], v2.b[6] +; CHECK-SD-NEXT: mov v3.b[4], v0.b[1] +; CHECK-SD-NEXT: mov v4.b[4], v0.b[3] +; CHECK-SD-NEXT: mov v5.b[4], v0.b[5] +; CHECK-SD-NEXT: mov v6.b[4], v0.b[7] +; CHECK-SD-NEXT: mov v7.b[4], v2.b[1] +; CHECK-SD-NEXT: mov v16.b[4], v2.b[3] +; CHECK-SD-NEXT: mov v17.b[4], v2.b[5] +; CHECK-SD-NEXT: mov v18.b[4], v2.b[7] +; CHECK-SD-NEXT: and v0.8b, v3.8b, v1.8b +; CHECK-SD-NEXT: and v2.8b, v4.8b, v1.8b +; CHECK-SD-NEXT: and v3.8b, v5.8b, v1.8b +; CHECK-SD-NEXT: and v4.8b, v6.8b, v1.8b +; CHECK-SD-NEXT: and v5.8b, v7.8b, v1.8b +; CHECK-SD-NEXT: and v6.8b, v16.8b, v1.8b +; CHECK-SD-NEXT: and v7.8b, v17.8b, v1.8b +; CHECK-SD-NEXT: and v1.8b, v18.8b, v1.8b +; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0 ; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0 -; CHECK-SD-NEXT: ushll v7.2d, v7.2s, #0 -; CHECK-SD-NEXT: ucvtf v0.2d, v3.2d +; CHECK-SD-NEXT: ushll v3.2d, v3.2s, #0 +; CHECK-SD-NEXT: ushll v4.2d, v4.2s, #0 ; CHECK-SD-NEXT: ushll v5.2d, v5.2s, #0 ; CHECK-SD-NEXT: ushll v6.2d, v6.2s, #0 -; CHECK-SD-NEXT: ushll v18.2d, v1.2s, #0 -; CHECK-SD-NEXT: ucvtf v1.2d, v16.2d -; CHECK-SD-NEXT: ucvtf v4.2d, v2.2d -; CHECK-SD-NEXT: ucvtf v2.2d, v17.2d -; CHECK-SD-NEXT: ucvtf v3.2d, v7.2d -; CHECK-SD-NEXT: ucvtf v5.2d, v5.2d -; CHECK-SD-NEXT: ucvtf v6.2d, v6.2d -; CHECK-SD-NEXT: ucvtf v7.2d, v18.2d +; CHECK-SD-NEXT: ushll v7.2d, v7.2s, #0 +; CHECK-SD-NEXT: ushll v16.2d, v1.2s, #0 +; CHECK-SD-NEXT: ucvtf v0.2d, v0.2d +; CHECK-SD-NEXT: ucvtf v1.2d, v2.2d +; CHECK-SD-NEXT: ucvtf v2.2d, v3.2d +; CHECK-SD-NEXT: ucvtf v3.2d, v4.2d +; CHECK-SD-NEXT: ucvtf v4.2d, v5.2d +; CHECK-SD-NEXT: ucvtf v5.2d, v6.2d +; CHECK-SD-NEXT: ucvtf v6.2d, v7.2d +; CHECK-SD-NEXT: ucvtf v7.2d, v16.2d ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: utofp_v16i8_v16f64: @@ -3747,143 +3699,111 @@ define <32 x double> @stofp_v32i8_v32f64(<32 x i8> %a) { ; CHECK-SD-LABEL: stofp_v32i8_v32f64: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: ext v3.16b, v0.16b, v0.16b, #8 -; CHECK-SD-NEXT: ext v2.16b, v1.16b, v1.16b, #8 -; CHECK-SD-NEXT: umov w9, v3.b[0] -; CHECK-SD-NEXT: umov w11, v3.b[4] -; CHECK-SD-NEXT: umov w13, v3.b[6] -; CHECK-SD-NEXT: umov w18, v2.b[2] -; CHECK-SD-NEXT: umov w10, v3.b[2] -; CHECK-SD-NEXT: umov w12, v3.b[1] -; CHECK-SD-NEXT: umov w16, v2.b[0] -; CHECK-SD-NEXT: umov w14, v3.b[3] -; CHECK-SD-NEXT: umov w15, v3.b[5] -; CHECK-SD-NEXT: umov w17, v3.b[7] -; CHECK-SD-NEXT: fmov s6, w9 -; CHECK-SD-NEXT: fmov s5, w11 -; CHECK-SD-NEXT: fmov s7, w13 -; CHECK-SD-NEXT: umov w13, v2.b[4] -; CHECK-SD-NEXT: umov w11, v2.b[3] -; CHECK-SD-NEXT: umov w9, v2.b[6] -; CHECK-SD-NEXT: fmov s17, w18 -; CHECK-SD-NEXT: fmov s4, w10 -; CHECK-SD-NEXT: umov w10, v2.b[1] -; CHECK-SD-NEXT: mov v6.s[1], w12 -; CHECK-SD-NEXT: fmov s3, w16 -; CHECK-SD-NEXT: umov w12, v2.b[5] -; CHECK-SD-NEXT: mov v5.s[1], w15 -; CHECK-SD-NEXT: umov w15, v1.b[0] -; CHECK-SD-NEXT: umov w16, v0.b[6] -; CHECK-SD-NEXT: fmov s16, w13 -; CHECK-SD-NEXT: umov w13, v1.b[2] -; CHECK-SD-NEXT: mov v17.s[1], w11 -; CHECK-SD-NEXT: umov w11, v1.b[6] -; CHECK-SD-NEXT: fmov s18, w9 -; CHECK-SD-NEXT: umov w9, v1.b[4] -; CHECK-SD-NEXT: mov v3.s[1], w10 -; CHECK-SD-NEXT: umov w10, v0.b[0] -; CHECK-SD-NEXT: mov v4.s[1], w14 -; CHECK-SD-NEXT: mov v16.s[1], w12 -; CHECK-SD-NEXT: umov w12, v1.b[7] -; CHECK-SD-NEXT: umov w14, v1.b[5] -; CHECK-SD-NEXT: fmov s20, w13 -; CHECK-SD-NEXT: umov w13, v1.b[3] -; CHECK-SD-NEXT: fmov s22, w15 -; CHECK-SD-NEXT: fmov s21, w11 -; CHECK-SD-NEXT: umov w11, v1.b[1] -; CHECK-SD-NEXT: fmov s1, w9 -; CHECK-SD-NEXT: fmov s19, w10 -; CHECK-SD-NEXT: umov w10, v0.b[4] -; CHECK-SD-NEXT: umov w9, v0.b[7] -; CHECK-SD-NEXT: fmov s23, w16 -; CHECK-SD-NEXT: mov v7.s[1], w17 +; CHECK-SD-NEXT: mov v5.b[0], v1.b[6] +; CHECK-SD-NEXT: mov v17.b[0], v1.b[4] +; CHECK-SD-NEXT: mov v20.b[0], v1.b[2] +; CHECK-SD-NEXT: mov v21.b[0], v1.b[0] +; CHECK-SD-NEXT: mov v18.b[0], v0.b[0] +; CHECK-SD-NEXT: mov v19.b[0], v0.b[6] +; CHECK-SD-NEXT: mov v22.b[0], v0.b[4] +; CHECK-SD-NEXT: ext v16.16b, v1.16b, v1.16b, #8 +; CHECK-SD-NEXT: mov v2.b[0], v3.b[0] +; CHECK-SD-NEXT: mov v4.b[0], v3.b[2] +; CHECK-SD-NEXT: mov v6.b[0], v3.b[4] +; CHECK-SD-NEXT: mov v7.b[0], v3.b[6] +; CHECK-SD-NEXT: mov v5.b[4], v1.b[7] +; CHECK-SD-NEXT: mov v17.b[4], v1.b[5] +; CHECK-SD-NEXT: mov v20.b[4], v1.b[3] +; CHECK-SD-NEXT: mov v21.b[4], v1.b[1] +; CHECK-SD-NEXT: mov v19.b[4], v0.b[7] +; CHECK-SD-NEXT: mov v22.b[4], v0.b[5] +; CHECK-SD-NEXT: mov v18.b[4], v0.b[1] +; CHECK-SD-NEXT: mov v23.b[0], v16.b[0] +; CHECK-SD-NEXT: mov v2.b[4], v3.b[1] +; CHECK-SD-NEXT: mov v4.b[4], v3.b[3] +; CHECK-SD-NEXT: mov v6.b[4], v3.b[5] +; CHECK-SD-NEXT: mov v7.b[4], v3.b[7] +; CHECK-SD-NEXT: mov v3.b[0], v0.b[2] +; CHECK-SD-NEXT: shl v5.2s, v5.2s, #24 ; CHECK-SD-NEXT: shl v17.2s, v17.2s, #24 -; CHECK-SD-NEXT: mov v21.s[1], w12 -; CHECK-SD-NEXT: mov v1.s[1], w14 -; CHECK-SD-NEXT: umov w14, v0.b[5] -; CHECK-SD-NEXT: umov w12, v0.b[1] -; CHECK-SD-NEXT: mov v20.s[1], w13 -; CHECK-SD-NEXT: umov w13, v0.b[2] -; CHECK-SD-NEXT: mov v22.s[1], w11 -; CHECK-SD-NEXT: umov w11, v0.b[3] -; CHECK-SD-NEXT: fmov s0, w10 -; CHECK-SD-NEXT: mov v23.s[1], w9 -; CHECK-SD-NEXT: umov w9, v2.b[7] -; CHECK-SD-NEXT: shl v16.2s, v16.2s, #24 -; CHECK-SD-NEXT: shl v21.2s, v21.2s, #24 -; CHECK-SD-NEXT: shl v1.2s, v1.2s, #24 -; CHECK-SD-NEXT: sshr v17.2s, v17.2s, #24 -; CHECK-SD-NEXT: mov v0.s[1], w14 -; CHECK-SD-NEXT: fmov s24, w13 -; CHECK-SD-NEXT: mov v19.s[1], w12 -; CHECK-SD-NEXT: sshr v16.2s, v16.2s, #24 -; CHECK-SD-NEXT: shl v6.2s, v6.2s, #24 ; CHECK-SD-NEXT: shl v20.2s, v20.2s, #24 -; CHECK-SD-NEXT: sshr v21.2s, v21.2s, #24 -; CHECK-SD-NEXT: sshr v1.2s, v1.2s, #24 -; CHECK-SD-NEXT: shl v2.2s, v23.2s, #24 -; CHECK-SD-NEXT: mov v18.s[1], w9 -; CHECK-SD-NEXT: mov v24.s[1], w11 -; CHECK-SD-NEXT: shl v22.2s, v22.2s, #24 -; CHECK-SD-NEXT: shl v0.2s, v0.2s, #24 -; CHECK-SD-NEXT: shl v4.2s, v4.2s, #24 -; CHECK-SD-NEXT: shl v5.2s, v5.2s, #24 -; CHECK-SD-NEXT: sshll v21.2d, v21.2s, #0 -; CHECK-SD-NEXT: sshll v1.2d, v1.2s, #0 -; CHECK-SD-NEXT: sshr v2.2s, v2.2s, #24 -; CHECK-SD-NEXT: shl v7.2s, v7.2s, #24 +; CHECK-SD-NEXT: mov v24.b[0], v16.b[4] +; CHECK-SD-NEXT: mov v23.b[4], v16.b[1] +; CHECK-SD-NEXT: shl v18.2s, v18.2s, #24 ; CHECK-SD-NEXT: shl v19.2s, v19.2s, #24 +; CHECK-SD-NEXT: sshr v5.2s, v5.2s, #24 +; CHECK-SD-NEXT: shl v1.2s, v2.2s, #24 +; CHECK-SD-NEXT: shl v2.2s, v4.2s, #24 +; CHECK-SD-NEXT: sshr v17.2s, v17.2s, #24 +; CHECK-SD-NEXT: mov v3.b[4], v0.b[3] +; CHECK-SD-NEXT: shl v0.2s, v21.2s, #24 +; CHECK-SD-NEXT: shl v4.2s, v6.2s, #24 +; CHECK-SD-NEXT: shl v6.2s, v7.2s, #24 +; CHECK-SD-NEXT: mov v7.b[0], v16.b[2] +; CHECK-SD-NEXT: sshll v5.2d, v5.2s, #0 ; CHECK-SD-NEXT: sshr v20.2s, v20.2s, #24 +; CHECK-SD-NEXT: mov v21.b[0], v16.b[6] +; CHECK-SD-NEXT: sshll v17.2d, v17.2s, #0 ; CHECK-SD-NEXT: sshr v0.2s, v0.2s, #24 -; CHECK-SD-NEXT: shl v18.2s, v18.2s, #24 -; CHECK-SD-NEXT: shl v23.2s, v24.2s, #24 -; CHECK-SD-NEXT: scvtf v21.2d, v21.2d -; CHECK-SD-NEXT: scvtf v1.2d, v1.2d -; CHECK-SD-NEXT: sshll v2.2d, v2.2s, #0 -; CHECK-SD-NEXT: sshr v22.2s, v22.2s, #24 +; CHECK-SD-NEXT: shl v22.2s, v22.2s, #24 +; CHECK-SD-NEXT: shl v3.2s, v3.2s, #24 +; CHECK-SD-NEXT: mov v24.b[4], v16.b[5] ; CHECK-SD-NEXT: sshr v19.2s, v19.2s, #24 -; CHECK-SD-NEXT: sshr v5.2s, v5.2s, #24 +; CHECK-SD-NEXT: scvtf v5.2d, v5.2d +; CHECK-SD-NEXT: mov v7.b[4], v16.b[3] +; CHECK-SD-NEXT: sshll v20.2d, v20.2s, #0 +; CHECK-SD-NEXT: scvtf v17.2d, v17.2d ; CHECK-SD-NEXT: sshll v0.2d, v0.2s, #0 -; CHECK-SD-NEXT: sshr v23.2s, v23.2s, #24 -; CHECK-SD-NEXT: sshr v4.2s, v4.2s, #24 -; CHECK-SD-NEXT: scvtf v2.2d, v2.2d +; CHECK-SD-NEXT: mov v21.b[4], v16.b[7] +; CHECK-SD-NEXT: sshr v3.2s, v3.2s, #24 +; CHECK-SD-NEXT: sshr v16.2s, v22.2s, #24 +; CHECK-SD-NEXT: sshll v19.2d, v19.2s, #0 +; CHECK-SD-NEXT: scvtf v20.2d, v20.2d +; CHECK-SD-NEXT: shl v22.2s, v24.2s, #24 ; CHECK-SD-NEXT: sshr v6.2s, v6.2s, #24 -; CHECK-SD-NEXT: sshll v20.2d, v20.2s, #0 -; CHECK-SD-NEXT: stp q1, q21, [x8, #160] -; CHECK-SD-NEXT: shl v1.2s, v3.2s, #24 -; CHECK-SD-NEXT: sshr v3.2s, v18.2s, #24 ; CHECK-SD-NEXT: scvtf v0.2d, v0.2d -; CHECK-SD-NEXT: sshll v22.2d, v22.2s, #0 -; CHECK-SD-NEXT: sshll v18.2d, v23.2s, #0 -; CHECK-SD-NEXT: sshll v19.2d, v19.2s, #0 -; CHECK-SD-NEXT: sshll v5.2d, v5.2s, #0 -; CHECK-SD-NEXT: sshll v4.2d, v4.2s, #0 +; CHECK-SD-NEXT: shl v7.2s, v7.2s, #24 +; CHECK-SD-NEXT: sshr v4.2s, v4.2s, #24 +; CHECK-SD-NEXT: stp q17, q5, [x8, #160] +; CHECK-SD-NEXT: sshr v5.2s, v18.2s, #24 +; CHECK-SD-NEXT: shl v17.2s, v23.2s, #24 +; CHECK-SD-NEXT: sshll v3.2d, v3.2s, #0 +; CHECK-SD-NEXT: sshll v16.2d, v16.2s, #0 +; CHECK-SD-NEXT: shl v21.2s, v21.2s, #24 +; CHECK-SD-NEXT: scvtf v19.2d, v19.2d +; CHECK-SD-NEXT: sshr v2.2s, v2.2s, #24 ; CHECK-SD-NEXT: sshr v1.2s, v1.2s, #24 -; CHECK-SD-NEXT: sshll v6.2d, v6.2s, #0 -; CHECK-SD-NEXT: scvtf v20.2d, v20.2d -; CHECK-SD-NEXT: scvtf v22.2d, v22.2d -; CHECK-SD-NEXT: stp q0, q2, [x8, #32] -; CHECK-SD-NEXT: sshll v2.2d, v3.2s, #0 -; CHECK-SD-NEXT: sshll v3.2d, v16.2s, #0 -; CHECK-SD-NEXT: sshll v16.2d, v17.2s, #0 -; CHECK-SD-NEXT: sshll v1.2d, v1.2s, #0 +; CHECK-SD-NEXT: stp q0, q20, [x8, #128] +; CHECK-SD-NEXT: sshll v5.2d, v5.2s, #0 ; CHECK-SD-NEXT: sshr v0.2s, v7.2s, #24 -; CHECK-SD-NEXT: scvtf v7.2d, v18.2d -; CHECK-SD-NEXT: scvtf v17.2d, v19.2d -; CHECK-SD-NEXT: stp q22, q20, [x8, #128] -; CHECK-SD-NEXT: scvtf v2.2d, v2.2d +; CHECK-SD-NEXT: sshr v7.2s, v17.2s, #24 ; CHECK-SD-NEXT: scvtf v3.2d, v3.2d ; CHECK-SD-NEXT: scvtf v16.2d, v16.2d -; CHECK-SD-NEXT: scvtf v1.2d, v1.2d +; CHECK-SD-NEXT: sshr v18.2s, v21.2s, #24 +; CHECK-SD-NEXT: sshr v20.2s, v22.2s, #24 +; CHECK-SD-NEXT: sshll v6.2d, v6.2s, #0 +; CHECK-SD-NEXT: scvtf v5.2d, v5.2d ; CHECK-SD-NEXT: sshll v0.2d, v0.2s, #0 -; CHECK-SD-NEXT: stp q17, q7, [x8] -; CHECK-SD-NEXT: stp q3, q2, [x8, #224] +; CHECK-SD-NEXT: sshll v4.2d, v4.2s, #0 +; CHECK-SD-NEXT: sshll v7.2d, v7.2s, #0 +; CHECK-SD-NEXT: sshll v2.2d, v2.2s, #0 +; CHECK-SD-NEXT: sshll v1.2d, v1.2s, #0 +; CHECK-SD-NEXT: stp q16, q19, [x8, #32] +; CHECK-SD-NEXT: sshll v17.2d, v18.2s, #0 +; CHECK-SD-NEXT: sshll v16.2d, v20.2s, #0 ; CHECK-SD-NEXT: scvtf v0.2d, v0.2d -; CHECK-SD-NEXT: scvtf v2.2d, v5.2d -; CHECK-SD-NEXT: stp q1, q16, [x8, #192] -; CHECK-SD-NEXT: scvtf v3.2d, v4.2d -; CHECK-SD-NEXT: scvtf v1.2d, v6.2d -; CHECK-SD-NEXT: stp q2, q0, [x8, #96] -; CHECK-SD-NEXT: stp q1, q3, [x8, #64] +; CHECK-SD-NEXT: scvtf v4.2d, v4.2d +; CHECK-SD-NEXT: stp q5, q3, [x8] +; CHECK-SD-NEXT: scvtf v3.2d, v7.2d +; CHECK-SD-NEXT: scvtf v5.2d, v6.2d +; CHECK-SD-NEXT: scvtf v17.2d, v17.2d +; CHECK-SD-NEXT: scvtf v16.2d, v16.2d +; CHECK-SD-NEXT: scvtf v2.2d, v2.2d +; CHECK-SD-NEXT: stp q4, q5, [x8, #96] +; CHECK-SD-NEXT: stp q3, q0, [x8, #192] +; CHECK-SD-NEXT: scvtf v0.2d, v1.2d +; CHECK-SD-NEXT: stp q16, q17, [x8, #224] +; CHECK-SD-NEXT: stp q0, q2, [x8, #64] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: stofp_v32i8_v32f64: @@ -3949,129 +3869,97 @@ entry: define <32 x double> @utofp_v32i8_v32f64(<32 x i8> %a) { ; CHECK-SD-LABEL: utofp_v32i8_v32f64: ; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: mov v6.b[0], v1.b[6] +; CHECK-SD-NEXT: mov v7.b[0], v1.b[4] +; CHECK-SD-NEXT: ext v3.16b, v1.16b, v1.16b, #8 +; CHECK-SD-NEXT: mov v16.b[0], v1.b[2] +; CHECK-SD-NEXT: mov v17.b[0], v1.b[0] +; CHECK-SD-NEXT: mov v19.b[0], v0.b[6] +; CHECK-SD-NEXT: mov v20.b[0], v0.b[4] +; CHECK-SD-NEXT: movi d5, #0x0000ff000000ff +; CHECK-SD-NEXT: mov v24.b[0], v0.b[2] +; CHECK-SD-NEXT: mov v25.b[0], v0.b[0] ; CHECK-SD-NEXT: ext v2.16b, v0.16b, v0.16b, #8 -; CHECK-SD-NEXT: ext v16.16b, v1.16b, v1.16b, #8 -; CHECK-SD-NEXT: movi d3, #0x0000ff000000ff -; CHECK-SD-NEXT: umov w11, v2.b[0] -; CHECK-SD-NEXT: umov w14, v2.b[4] -; CHECK-SD-NEXT: umov w12, v2.b[2] -; CHECK-SD-NEXT: umov w15, v2.b[6] -; CHECK-SD-NEXT: umov w16, v2.b[1] -; CHECK-SD-NEXT: umov w13, v16.b[2] -; CHECK-SD-NEXT: umov w17, v16.b[0] -; CHECK-SD-NEXT: umov w9, v2.b[3] -; CHECK-SD-NEXT: umov w10, v2.b[5] -; CHECK-SD-NEXT: fmov s4, w11 -; CHECK-SD-NEXT: fmov s6, w14 -; CHECK-SD-NEXT: umov w14, v16.b[6] -; CHECK-SD-NEXT: fmov s5, w12 -; CHECK-SD-NEXT: umov w12, v16.b[4] -; CHECK-SD-NEXT: umov w11, v16.b[1] -; CHECK-SD-NEXT: fmov s7, w15 -; CHECK-SD-NEXT: umov w15, v16.b[7] -; CHECK-SD-NEXT: fmov s18, w13 -; CHECK-SD-NEXT: mov v4.s[1], w16 -; CHECK-SD-NEXT: umov w16, v1.b[4] -; CHECK-SD-NEXT: umov w13, v1.b[6] -; CHECK-SD-NEXT: fmov s17, w17 -; CHECK-SD-NEXT: fmov s20, w14 -; CHECK-SD-NEXT: mov v5.s[1], w9 -; CHECK-SD-NEXT: umov w9, v1.b[7] -; CHECK-SD-NEXT: fmov s19, w12 -; CHECK-SD-NEXT: mov v6.s[1], w10 -; CHECK-SD-NEXT: umov w10, v1.b[2] -; CHECK-SD-NEXT: umov w12, v0.b[6] -; CHECK-SD-NEXT: umov w14, v1.b[0] -; CHECK-SD-NEXT: mov v17.s[1], w11 -; CHECK-SD-NEXT: mov v20.s[1], w15 -; CHECK-SD-NEXT: umov w11, v1.b[5] -; CHECK-SD-NEXT: umov w15, v1.b[3] -; CHECK-SD-NEXT: fmov s21, w16 -; CHECK-SD-NEXT: umov w16, v1.b[1] -; CHECK-SD-NEXT: fmov s1, w13 -; CHECK-SD-NEXT: umov w13, v0.b[4] -; CHECK-SD-NEXT: and v6.8b, v6.8b, v3.8b -; CHECK-SD-NEXT: fmov s22, w10 -; CHECK-SD-NEXT: fmov s23, w12 -; CHECK-SD-NEXT: fmov s24, w14 -; CHECK-SD-NEXT: mov v21.s[1], w11 -; CHECK-SD-NEXT: umov w10, v0.b[5] -; CHECK-SD-NEXT: umov w12, v0.b[0] -; CHECK-SD-NEXT: mov v1.s[1], w9 -; CHECK-SD-NEXT: umov w9, v0.b[7] -; CHECK-SD-NEXT: umov w11, v16.b[3] -; CHECK-SD-NEXT: mov v22.s[1], w15 -; CHECK-SD-NEXT: mov v24.s[1], w16 -; CHECK-SD-NEXT: fmov s25, w13 -; CHECK-SD-NEXT: umov w13, v0.b[3] -; CHECK-SD-NEXT: and v20.8b, v20.8b, v3.8b -; CHECK-SD-NEXT: and v5.8b, v5.8b, v3.8b -; CHECK-SD-NEXT: and v21.8b, v21.8b, v3.8b -; CHECK-SD-NEXT: mov v23.s[1], w9 -; CHECK-SD-NEXT: umov w9, v0.b[2] -; CHECK-SD-NEXT: and v1.8b, v1.8b, v3.8b -; CHECK-SD-NEXT: mov v25.s[1], w10 -; CHECK-SD-NEXT: umov w10, v0.b[1] -; CHECK-SD-NEXT: and v0.8b, v22.8b, v3.8b -; CHECK-SD-NEXT: fmov s22, w12 -; CHECK-SD-NEXT: and v24.8b, v24.8b, v3.8b -; CHECK-SD-NEXT: umov w12, v16.b[5] -; CHECK-SD-NEXT: ushll v1.2d, v1.2s, #0 -; CHECK-SD-NEXT: ushll v21.2d, v21.2s, #0 -; CHECK-SD-NEXT: mov v18.s[1], w11 -; CHECK-SD-NEXT: and v16.8b, v23.8b, v3.8b -; CHECK-SD-NEXT: fmov s23, w9 -; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0 -; CHECK-SD-NEXT: and v25.8b, v25.8b, v3.8b -; CHECK-SD-NEXT: ushll v24.2d, v24.2s, #0 -; CHECK-SD-NEXT: mov v22.s[1], w10 -; CHECK-SD-NEXT: ucvtf v1.2d, v1.2d -; CHECK-SD-NEXT: ucvtf v21.2d, v21.2d -; CHECK-SD-NEXT: umov w9, v2.b[7] -; CHECK-SD-NEXT: mov v23.s[1], w13 +; CHECK-SD-NEXT: mov v6.b[4], v1.b[7] +; CHECK-SD-NEXT: mov v7.b[4], v1.b[5] +; CHECK-SD-NEXT: mov v18.b[0], v3.b[0] +; CHECK-SD-NEXT: mov v21.b[0], v3.b[2] +; CHECK-SD-NEXT: mov v23.b[0], v3.b[4] +; CHECK-SD-NEXT: mov v16.b[4], v1.b[3] +; CHECK-SD-NEXT: mov v17.b[4], v1.b[1] +; CHECK-SD-NEXT: mov v1.b[0], v3.b[6] +; CHECK-SD-NEXT: mov v19.b[4], v0.b[7] +; CHECK-SD-NEXT: mov v20.b[4], v0.b[5] +; CHECK-SD-NEXT: mov v24.b[4], v0.b[3] +; CHECK-SD-NEXT: mov v25.b[4], v0.b[1] +; CHECK-SD-NEXT: and v6.8b, v6.8b, v5.8b +; CHECK-SD-NEXT: and v7.8b, v7.8b, v5.8b +; CHECK-SD-NEXT: mov v18.b[4], v3.b[1] +; CHECK-SD-NEXT: mov v21.b[4], v3.b[3] +; CHECK-SD-NEXT: mov v23.b[4], v3.b[5] +; CHECK-SD-NEXT: and v16.8b, v16.8b, v5.8b +; CHECK-SD-NEXT: and v17.8b, v17.8b, v5.8b +; CHECK-SD-NEXT: mov v1.b[4], v3.b[7] +; CHECK-SD-NEXT: and v3.8b, v19.8b, v5.8b +; CHECK-SD-NEXT: ushll v6.2d, v6.2s, #0 +; CHECK-SD-NEXT: ushll v7.2d, v7.2s, #0 +; CHECK-SD-NEXT: and v20.8b, v20.8b, v5.8b ; CHECK-SD-NEXT: ushll v16.2d, v16.2s, #0 -; CHECK-SD-NEXT: ucvtf v0.2d, v0.2d -; CHECK-SD-NEXT: ushll v2.2d, v25.2s, #0 -; CHECK-SD-NEXT: mov v19.s[1], w12 -; CHECK-SD-NEXT: ucvtf v24.2d, v24.2d -; CHECK-SD-NEXT: and v18.8b, v18.8b, v3.8b -; CHECK-SD-NEXT: stp q21, q1, [x8, #160] -; CHECK-SD-NEXT: and v1.8b, v22.8b, v3.8b +; CHECK-SD-NEXT: mov v4.b[0], v2.b[0] +; CHECK-SD-NEXT: mov v22.b[0], v2.b[2] +; CHECK-SD-NEXT: ushll v17.2d, v17.2s, #0 +; CHECK-SD-NEXT: ushll v0.2d, v3.2s, #0 +; CHECK-SD-NEXT: mov v19.b[0], v2.b[4] +; CHECK-SD-NEXT: ucvtf v6.2d, v6.2d +; CHECK-SD-NEXT: ucvtf v3.2d, v7.2d +; CHECK-SD-NEXT: ushll v20.2d, v20.2s, #0 +; CHECK-SD-NEXT: mov v7.b[0], v2.b[6] ; CHECK-SD-NEXT: ucvtf v16.2d, v16.2d -; CHECK-SD-NEXT: and v23.8b, v23.8b, v3.8b -; CHECK-SD-NEXT: ucvtf v2.2d, v2.2d -; CHECK-SD-NEXT: mov v7.s[1], w9 -; CHECK-SD-NEXT: stp q24, q0, [x8, #128] -; CHECK-SD-NEXT: and v0.8b, v19.8b, v3.8b -; CHECK-SD-NEXT: ushll v18.2d, v18.2s, #0 -; CHECK-SD-NEXT: ushll v1.2d, v1.2s, #0 -; CHECK-SD-NEXT: ushll v19.2d, v23.2s, #0 -; CHECK-SD-NEXT: stp q2, q16, [x8, #32] -; CHECK-SD-NEXT: and v16.8b, v17.8b, v3.8b -; CHECK-SD-NEXT: ushll v17.2d, v20.2s, #0 -; CHECK-SD-NEXT: and v2.8b, v7.8b, v3.8b -; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0 -; CHECK-SD-NEXT: ucvtf v1.2d, v1.2d -; CHECK-SD-NEXT: ucvtf v7.2d, v19.2d -; CHECK-SD-NEXT: and v3.8b, v4.8b, v3.8b -; CHECK-SD-NEXT: ushll v4.2d, v6.2s, #0 +; CHECK-SD-NEXT: and v24.8b, v24.8b, v5.8b ; CHECK-SD-NEXT: ucvtf v17.2d, v17.2d -; CHECK-SD-NEXT: ushll v16.2d, v16.2s, #0 -; CHECK-SD-NEXT: ucvtf v6.2d, v18.2d ; CHECK-SD-NEXT: ucvtf v0.2d, v0.2d +; CHECK-SD-NEXT: mov v4.b[4], v2.b[1] +; CHECK-SD-NEXT: ucvtf v20.2d, v20.2d +; CHECK-SD-NEXT: mov v22.b[4], v2.b[3] +; CHECK-SD-NEXT: mov v19.b[4], v2.b[5] +; CHECK-SD-NEXT: stp q3, q6, [x8, #160] +; CHECK-SD-NEXT: and v6.8b, v25.8b, v5.8b +; CHECK-SD-NEXT: and v1.8b, v1.8b, v5.8b +; CHECK-SD-NEXT: mov v7.b[4], v2.b[7] +; CHECK-SD-NEXT: and v2.8b, v23.8b, v5.8b +; CHECK-SD-NEXT: ushll v3.2d, v24.2s, #0 +; CHECK-SD-NEXT: stp q17, q16, [x8, #128] +; CHECK-SD-NEXT: and v16.8b, v21.8b, v5.8b +; CHECK-SD-NEXT: and v4.8b, v4.8b, v5.8b +; CHECK-SD-NEXT: stp q20, q0, [x8, #32] +; CHECK-SD-NEXT: ushll v6.2d, v6.2s, #0 +; CHECK-SD-NEXT: and v0.8b, v18.8b, v5.8b +; CHECK-SD-NEXT: ushll v1.2d, v1.2s, #0 ; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0 -; CHECK-SD-NEXT: ushll v3.2d, v3.2s, #0 -; CHECK-SD-NEXT: stp q1, q7, [x8] -; CHECK-SD-NEXT: ushll v1.2d, v5.2s, #0 -; CHECK-SD-NEXT: ucvtf v5.2d, v16.2d -; CHECK-SD-NEXT: ucvtf v2.2d, v2.2d -; CHECK-SD-NEXT: stp q0, q17, [x8, #224] -; CHECK-SD-NEXT: ucvtf v0.2d, v4.2d ; CHECK-SD-NEXT: ucvtf v3.2d, v3.2d +; CHECK-SD-NEXT: ushll v16.2d, v16.2s, #0 +; CHECK-SD-NEXT: and v7.8b, v7.8b, v5.8b +; CHECK-SD-NEXT: and v17.8b, v19.8b, v5.8b +; CHECK-SD-NEXT: ucvtf v6.2d, v6.2d +; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-SD-NEXT: and v18.8b, v22.8b, v5.8b ; CHECK-SD-NEXT: ucvtf v1.2d, v1.2d -; CHECK-SD-NEXT: stp q5, q6, [x8, #192] -; CHECK-SD-NEXT: stp q0, q2, [x8, #96] -; CHECK-SD-NEXT: stp q3, q1, [x8, #64] +; CHECK-SD-NEXT: ucvtf v2.2d, v2.2d +; CHECK-SD-NEXT: ushll v4.2d, v4.2s, #0 +; CHECK-SD-NEXT: ucvtf v16.2d, v16.2d +; CHECK-SD-NEXT: ushll v5.2d, v7.2s, #0 +; CHECK-SD-NEXT: ushll v7.2d, v17.2s, #0 +; CHECK-SD-NEXT: ucvtf v0.2d, v0.2d +; CHECK-SD-NEXT: stp q6, q3, [x8] +; CHECK-SD-NEXT: ushll v3.2d, v18.2s, #0 +; CHECK-SD-NEXT: stp q2, q1, [x8, #224] +; CHECK-SD-NEXT: ucvtf v5.2d, v5.2d +; CHECK-SD-NEXT: ucvtf v1.2d, v7.2d +; CHECK-SD-NEXT: stp q0, q16, [x8, #192] +; CHECK-SD-NEXT: ucvtf v2.2d, v3.2d +; CHECK-SD-NEXT: ucvtf v0.2d, v4.2d +; CHECK-SD-NEXT: stp q1, q5, [x8, #96] +; CHECK-SD-NEXT: stp q0, q2, [x8, #64] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: utofp_v32i8_v32f64: diff --git a/llvm/test/CodeGen/AArch64/neon-bitcast.ll b/llvm/test/CodeGen/AArch64/neon-bitcast.ll index 07772b716ec58..c039da26b7c15 100644 --- a/llvm/test/CodeGen/AArch64/neon-bitcast.ll +++ b/llvm/test/CodeGen/AArch64/neon-bitcast.ll @@ -554,11 +554,9 @@ define <4 x i8> @bitcast_i32_to_v4i8(i32 %word) { define <2 x i8> @bitcast_i16_to_v2i8(i16 %word) { ; CHECK-LE-LABEL: bitcast_i16_to_v2i8: ; CHECK-LE: // %bb.0: -; CHECK-LE-NEXT: fmov s0, w0 -; CHECK-LE-NEXT: umov w8, v0.b[0] -; CHECK-LE-NEXT: umov w9, v0.b[1] -; CHECK-LE-NEXT: fmov s0, w8 -; CHECK-LE-NEXT: mov v0.s[1], w9 +; CHECK-LE-NEXT: fmov s1, w0 +; CHECK-LE-NEXT: mov v0.b[0], v1.b[0] +; CHECK-LE-NEXT: mov v0.b[4], v1.b[1] ; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-LE-NEXT: ret ; @@ -566,11 +564,9 @@ define <2 x i8> @bitcast_i16_to_v2i8(i16 %word) { ; CHECK-BE: // %bb.0: ; CHECK-BE-NEXT: fmov s0, w0 ; CHECK-BE-NEXT: rev16 v0.16b, v0.16b -; CHECK-BE-NEXT: umov w8, v0.b[0] -; CHECK-BE-NEXT: umov w9, v0.b[1] -; CHECK-BE-NEXT: fmov s0, w8 -; CHECK-BE-NEXT: mov v0.s[1], w9 -; CHECK-BE-NEXT: rev64 v0.2s, v0.2s +; CHECK-BE-NEXT: mov v1.b[0], v0.b[0] +; CHECK-BE-NEXT: mov v1.b[4], v0.b[1] +; CHECK-BE-NEXT: rev64 v0.2s, v1.2s ; CHECK-BE-NEXT: ret %ret = bitcast i16 %word to <2 x i8> ret <2 x i8> %ret diff --git a/llvm/test/CodeGen/AArch64/shuffle-extend.ll b/llvm/test/CodeGen/AArch64/shuffle-extend.ll index bb31380cc3ade..7658e5ab6936b 100644 --- a/llvm/test/CodeGen/AArch64/shuffle-extend.ll +++ b/llvm/test/CodeGen/AArch64/shuffle-extend.ll @@ -4,10 +4,8 @@ define <2 x i8> @test_v16i8_v2i32_824(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: test_v16i8_v2i32_824: ; CHECK: // %bb.0: -; CHECK-NEXT: umov w8, v0.b[8] -; CHECK-NEXT: umov w9, v1.b[8] -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: mov v0.s[1], w9 +; CHECK-NEXT: mov v0.b[0], v0.b[8] +; CHECK-NEXT: mov v0.b[4], v1.b[8] ; CHECK-NEXT: add v0.2s, v0.2s, v0.2s ; CHECK-NEXT: ret %c = shufflevector <16 x i8> %a, <16 x i8> %b, <2 x i32> @@ -18,10 +16,8 @@ define <2 x i8> @test_v16i8_v2i32_824(<16 x i8> %a, <16 x i8> %b) { define <2 x i8> @test_v16i8_v2i32_016(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: test_v16i8_v2i32_016: ; CHECK: // %bb.0: -; CHECK-NEXT: umov w8, v0.b[0] -; CHECK-NEXT: umov w9, v1.b[0] -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: mov v0.s[1], w9 +; CHECK-NEXT: mov v0.b[0], v0.b[0] +; CHECK-NEXT: mov v0.b[4], v1.b[0] ; CHECK-NEXT: add v0.2s, v0.2s, v0.2s ; CHECK-NEXT: ret %c = shufflevector <16 x i8> %a, <16 x i8> %b, <2 x i32> @@ -33,11 +29,9 @@ define <2 x i8> @test_v8i8_v2i32_08(<8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: test_v8i8_v2i32_08: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: umov w8, v0.b[0] ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: umov w9, v1.b[0] -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: mov v0.s[1], w9 +; CHECK-NEXT: mov v0.b[0], v0.b[0] +; CHECK-NEXT: mov v0.b[4], v1.b[0] ; CHECK-NEXT: add v0.2s, v0.2s, v0.2s ; CHECK-NEXT: ret %c = shufflevector <8 x i8> %a, <8 x i8> %b, <2 x i32> @@ -48,10 +42,8 @@ define <2 x i8> @test_v8i8_v2i32_08(<8 x i8> %a, <8 x i8> %b) { define <2 x i16> @test_v8i16_v2i32_08(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: test_v8i16_v2i32_08: ; CHECK: // %bb.0: -; CHECK-NEXT: umov w8, v0.h[0] -; CHECK-NEXT: umov w9, v1.h[0] -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: mov v0.s[1], w9 +; CHECK-NEXT: mov v0.h[0], v0.h[0] +; CHECK-NEXT: mov v0.h[2], v1.h[0] ; CHECK-NEXT: add v0.2s, v0.2s, v0.2s ; CHECK-NEXT: ret %c = shufflevector <8 x i16> %a, <8 x i16> %b, <2 x i32> @@ -63,11 +55,9 @@ define <2 x i16> @test_v4i16_v2i32_04(<4 x i16> %a, <4 x i16> %b) { ; CHECK-LABEL: test_v4i16_v2i32_04: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: umov w8, v0.h[0] ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: umov w9, v1.h[0] -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: mov v0.s[1], w9 +; CHECK-NEXT: mov v0.h[0], v0.h[0] +; CHECK-NEXT: mov v0.h[2], v1.h[0] ; CHECK-NEXT: add v0.2s, v0.2s, v0.2s ; CHECK-NEXT: ret %c = shufflevector <4 x i16> %a, <4 x i16> %b, <2 x i32> @@ -79,14 +69,10 @@ define <2 x i16> @test_v4i16_v2i32_04(<4 x i16> %a, <4 x i16> %b) { define <4 x i8> @test_v16i8_v4i16_824(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: test_v16i8_v4i16_824: ; CHECK: // %bb.0: -; CHECK-NEXT: umov w8, v0.b[8] -; CHECK-NEXT: umov w9, v1.b[8] -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: umov w8, v0.b[0] -; CHECK-NEXT: mov v2.h[1], w9 -; CHECK-NEXT: mov v2.h[2], w8 -; CHECK-NEXT: umov w8, v1.b[0] -; CHECK-NEXT: mov v2.h[3], w8 +; CHECK-NEXT: mov v2.b[0], v0.b[8] +; CHECK-NEXT: mov v2.b[2], v1.b[8] +; CHECK-NEXT: mov v2.b[4], v0.b[0] +; CHECK-NEXT: mov v2.b[6], v1.b[0] ; CHECK-NEXT: add v0.4h, v2.4h, v2.4h ; CHECK-NEXT: ret %c = shufflevector <16 x i8> %a, <16 x i8> %b, <4 x i32> @@ -97,14 +83,10 @@ define <4 x i8> @test_v16i8_v4i16_824(<16 x i8> %a, <16 x i8> %b) { define <4 x i8> @test_v16i8_v4i16_016(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: test_v16i8_v4i16_016: ; CHECK: // %bb.0: -; CHECK-NEXT: umov w8, v0.b[0] -; CHECK-NEXT: umov w9, v1.b[0] -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: umov w8, v0.b[4] -; CHECK-NEXT: mov v2.h[1], w9 -; CHECK-NEXT: mov v2.h[2], w8 -; CHECK-NEXT: umov w8, v1.b[4] -; CHECK-NEXT: mov v2.h[3], w8 +; CHECK-NEXT: mov v2.b[0], v0.b[0] +; CHECK-NEXT: mov v2.b[2], v1.b[0] +; CHECK-NEXT: mov v2.b[4], v0.b[4] +; CHECK-NEXT: mov v2.b[6], v1.b[4] ; CHECK-NEXT: add v0.4h, v2.4h, v2.4h ; CHECK-NEXT: ret %c = shufflevector <16 x i8> %a, <16 x i8> %b, <4 x i32> @@ -116,15 +98,11 @@ define <4 x i8> @test_v8i8_v4i16_08(<8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: test_v8i8_v4i16_08: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: umov w8, v0.b[0] +; CHECK-NEXT: mov v2.b[0], v0.b[0] ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: umov w9, v1.b[0] -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: umov w8, v0.b[4] -; CHECK-NEXT: mov v2.h[1], w9 -; CHECK-NEXT: mov v2.h[2], w8 -; CHECK-NEXT: umov w8, v1.b[4] -; CHECK-NEXT: mov v2.h[3], w8 +; CHECK-NEXT: mov v2.b[2], v1.b[0] +; CHECK-NEXT: mov v2.b[4], v0.b[4] +; CHECK-NEXT: mov v2.b[6], v1.b[4] ; CHECK-NEXT: add v0.4h, v2.4h, v2.4h ; CHECK-NEXT: ret %c = shufflevector <8 x i8> %a, <8 x i8> %b, <4 x i32> @@ -215,23 +193,19 @@ define i1 @test2(ptr %add.ptr, ptr %result, <2 x i64> %hi, <2 x i64> %lo) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q2, [x0] ; CHECK-NEXT: movi v3.16b, #1 +; CHECK-NEXT: mov w9, #1 // =0x1 ; CHECK-NEXT: cmgt v0.2d, v2.2d, v0.2d ; CHECK-NEXT: cmgt v4.2d, v1.2d, v2.2d ; CHECK-NEXT: sub v1.2d, v2.2d, v1.2d +; CHECK-NEXT: dup v2.2d, x9 ; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: and v3.16b, v4.16b, v3.16b -; CHECK-NEXT: umov w8, v0.b[8] -; CHECK-NEXT: umov w9, v3.b[8] -; CHECK-NEXT: umov w10, v0.b[0] -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: umov w8, v3.b[0] -; CHECK-NEXT: fmov s3, w10 -; CHECK-NEXT: mov v0.s[1], w9 -; CHECK-NEXT: mov w9, #1 // =0x1 -; CHECK-NEXT: mov v3.s[1], w8 -; CHECK-NEXT: dup v2.2d, x9 -; CHECK-NEXT: add v0.2s, v0.2s, v0.2s -; CHECK-NEXT: orr v0.8b, v0.8b, v3.8b +; CHECK-NEXT: mov v5.b[0], v0.b[8] +; CHECK-NEXT: mov v0.b[0], v0.b[0] +; CHECK-NEXT: mov v5.b[4], v3.b[8] +; CHECK-NEXT: mov v0.b[4], v3.b[0] +; CHECK-NEXT: add v3.2s, v5.2s, v5.2s +; CHECK-NEXT: orr v0.8b, v3.8b, v0.8b ; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 ; CHECK-NEXT: add v0.2d, v1.2d, v2.2d diff --git a/llvm/test/CodeGen/AArch64/vector-fcvt.ll b/llvm/test/CodeGen/AArch64/vector-fcvt.ll index a6b43d514594e..d31659c30f21d 100644 --- a/llvm/test/CodeGen/AArch64/vector-fcvt.ll +++ b/llvm/test/CodeGen/AArch64/vector-fcvt.ll @@ -243,26 +243,18 @@ define <8 x double> @sitofp_v8i8_double(<8 x i8> %a) { ; CHECK-LABEL: sitofp_v8i8_double: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: umov w8, v0.b[0] -; CHECK-NEXT: umov w9, v0.b[2] -; CHECK-NEXT: umov w11, v0.b[4] -; CHECK-NEXT: umov w12, v0.b[6] -; CHECK-NEXT: umov w10, v0.b[1] -; CHECK-NEXT: umov w13, v0.b[3] -; CHECK-NEXT: umov w14, v0.b[5] -; CHECK-NEXT: umov w15, v0.b[7] -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: fmov s2, w11 -; CHECK-NEXT: fmov s3, w12 -; CHECK-NEXT: mov v0.s[1], w10 -; CHECK-NEXT: mov v1.s[1], w13 -; CHECK-NEXT: mov v2.s[1], w14 -; CHECK-NEXT: mov v3.s[1], w15 -; CHECK-NEXT: shl v0.2s, v0.2s, #24 -; CHECK-NEXT: shl v1.2s, v1.2s, #24 -; CHECK-NEXT: shl v2.2s, v2.2s, #24 -; CHECK-NEXT: shl v3.2s, v3.2s, #24 +; CHECK-NEXT: mov v1.b[0], v0.b[0] +; CHECK-NEXT: mov v2.b[0], v0.b[2] +; CHECK-NEXT: mov v3.b[0], v0.b[4] +; CHECK-NEXT: mov v4.b[0], v0.b[6] +; CHECK-NEXT: mov v1.b[4], v0.b[1] +; CHECK-NEXT: mov v2.b[4], v0.b[3] +; CHECK-NEXT: mov v3.b[4], v0.b[5] +; CHECK-NEXT: mov v4.b[4], v0.b[7] +; CHECK-NEXT: shl v0.2s, v1.2s, #24 +; CHECK-NEXT: shl v1.2s, v2.2s, #24 +; CHECK-NEXT: shl v2.2s, v3.2s, #24 +; CHECK-NEXT: shl v3.2s, v4.2s, #24 ; CHECK-NEXT: sshr v0.2s, v0.2s, #24 ; CHECK-NEXT: sshr v1.2s, v1.2s, #24 ; CHECK-NEXT: sshr v2.2s, v2.2s, #24 @@ -283,68 +275,52 @@ define <8 x double> @sitofp_v8i8_double(<8 x i8> %a) { define <16 x double> @sitofp_v16i8_double(<16 x i8> %a) { ; CHECK-LABEL: sitofp_v16i8_double: ; CHECK: // %bb.0: -; CHECK-NEXT: umov w8, v0.b[0] ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: umov w9, v0.b[1] -; CHECK-NEXT: umov w10, v0.b[2] -; CHECK-NEXT: umov w12, v0.b[4] -; CHECK-NEXT: umov w14, v0.b[6] -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w13, v0.b[5] -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: umov w15, v1.b[0] -; CHECK-NEXT: umov w17, v1.b[2] -; CHECK-NEXT: umov w0, v1.b[4] -; CHECK-NEXT: umov w16, v1.b[1] -; CHECK-NEXT: umov w18, v1.b[3] -; CHECK-NEXT: umov w8, v0.b[7] -; CHECK-NEXT: fmov s0, w10 -; CHECK-NEXT: umov w10, v1.b[5] -; CHECK-NEXT: mov v2.s[1], w9 -; CHECK-NEXT: umov w9, v1.b[6] -; CHECK-NEXT: fmov s3, w12 -; CHECK-NEXT: umov w12, v1.b[7] -; CHECK-NEXT: fmov s1, w14 -; CHECK-NEXT: fmov s4, w15 -; CHECK-NEXT: fmov s5, w17 -; CHECK-NEXT: fmov s6, w0 -; CHECK-NEXT: mov v0.s[1], w11 -; CHECK-NEXT: mov v3.s[1], w13 -; CHECK-NEXT: fmov s7, w9 -; CHECK-NEXT: mov v1.s[1], w8 -; CHECK-NEXT: mov v4.s[1], w16 -; CHECK-NEXT: mov v5.s[1], w18 -; CHECK-NEXT: mov v6.s[1], w10 -; CHECK-NEXT: shl v2.2s, v2.2s, #24 -; CHECK-NEXT: shl v0.2s, v0.2s, #24 -; CHECK-NEXT: mov v7.s[1], w12 -; CHECK-NEXT: shl v3.2s, v3.2s, #24 -; CHECK-NEXT: shl v1.2s, v1.2s, #24 -; CHECK-NEXT: shl v4.2s, v4.2s, #24 -; CHECK-NEXT: sshr v2.2s, v2.2s, #24 -; CHECK-NEXT: shl v5.2s, v5.2s, #24 -; CHECK-NEXT: shl v6.2s, v6.2s, #24 +; CHECK-NEXT: mov v2.b[0], v0.b[0] +; CHECK-NEXT: mov v3.b[0], v0.b[2] +; CHECK-NEXT: mov v4.b[0], v0.b[4] +; CHECK-NEXT: mov v5.b[0], v0.b[6] +; CHECK-NEXT: mov v6.b[0], v1.b[0] +; CHECK-NEXT: mov v7.b[0], v1.b[2] +; CHECK-NEXT: mov v16.b[0], v1.b[4] +; CHECK-NEXT: mov v17.b[0], v1.b[6] +; CHECK-NEXT: mov v2.b[4], v0.b[1] +; CHECK-NEXT: mov v3.b[4], v0.b[3] +; CHECK-NEXT: mov v4.b[4], v0.b[5] +; CHECK-NEXT: mov v5.b[4], v0.b[7] +; CHECK-NEXT: mov v6.b[4], v1.b[1] +; CHECK-NEXT: mov v7.b[4], v1.b[3] +; CHECK-NEXT: mov v16.b[4], v1.b[5] +; CHECK-NEXT: mov v17.b[4], v1.b[7] +; CHECK-NEXT: shl v0.2s, v2.2s, #24 +; CHECK-NEXT: shl v1.2s, v3.2s, #24 +; CHECK-NEXT: shl v2.2s, v4.2s, #24 +; CHECK-NEXT: shl v3.2s, v5.2s, #24 +; CHECK-NEXT: shl v4.2s, v6.2s, #24 +; CHECK-NEXT: shl v5.2s, v7.2s, #24 +; CHECK-NEXT: shl v6.2s, v16.2s, #24 +; CHECK-NEXT: shl v7.2s, v17.2s, #24 ; CHECK-NEXT: sshr v0.2s, v0.2s, #24 +; CHECK-NEXT: sshr v1.2s, v1.2s, #24 +; CHECK-NEXT: sshr v2.2s, v2.2s, #24 ; CHECK-NEXT: sshr v3.2s, v3.2s, #24 -; CHECK-NEXT: shl v7.2s, v7.2s, #24 ; CHECK-NEXT: sshr v4.2s, v4.2s, #24 -; CHECK-NEXT: sshr v1.2s, v1.2s, #24 ; CHECK-NEXT: sshr v5.2s, v5.2s, #24 ; CHECK-NEXT: sshr v6.2s, v6.2s, #24 +; CHECK-NEXT: sshr v7.2s, v7.2s, #24 +; CHECK-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-NEXT: sshll v1.2d, v1.2s, #0 ; CHECK-NEXT: sshll v2.2d, v2.2s, #0 -; CHECK-NEXT: sshll v16.2d, v0.2s, #0 ; CHECK-NEXT: sshll v3.2d, v3.2s, #0 -; CHECK-NEXT: sshr v7.2s, v7.2s, #24 ; CHECK-NEXT: sshll v4.2d, v4.2s, #0 -; CHECK-NEXT: sshll v17.2d, v1.2s, #0 ; CHECK-NEXT: sshll v5.2d, v5.2s, #0 ; CHECK-NEXT: sshll v6.2d, v6.2s, #0 -; CHECK-NEXT: scvtf v0.2d, v2.2d -; CHECK-NEXT: scvtf v1.2d, v16.2d -; CHECK-NEXT: scvtf v2.2d, v3.2d ; CHECK-NEXT: sshll v7.2d, v7.2s, #0 +; CHECK-NEXT: scvtf v0.2d, v0.2d +; CHECK-NEXT: scvtf v1.2d, v1.2d +; CHECK-NEXT: scvtf v2.2d, v2.2d +; CHECK-NEXT: scvtf v3.2d, v3.2d ; CHECK-NEXT: scvtf v4.2d, v4.2d -; CHECK-NEXT: scvtf v3.2d, v17.2d ; CHECK-NEXT: scvtf v5.2d, v5.2d ; CHECK-NEXT: scvtf v6.2d, v6.2d ; CHECK-NEXT: scvtf v7.2d, v7.2d @@ -420,27 +396,19 @@ define <8 x double> @uitofp_v8i8_double(<8 x i8> %a) { ; CHECK-LABEL: uitofp_v8i8_double: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: umov w8, v0.b[0] -; CHECK-NEXT: umov w9, v0.b[2] -; CHECK-NEXT: umov w11, v0.b[4] -; CHECK-NEXT: umov w12, v0.b[6] -; CHECK-NEXT: umov w10, v0.b[1] -; CHECK-NEXT: umov w13, v0.b[3] -; CHECK-NEXT: umov w14, v0.b[5] -; CHECK-NEXT: umov w15, v0.b[7] +; CHECK-NEXT: mov v2.b[0], v0.b[0] +; CHECK-NEXT: mov v3.b[0], v0.b[2] +; CHECK-NEXT: mov v4.b[0], v0.b[4] +; CHECK-NEXT: mov v5.b[0], v0.b[6] ; CHECK-NEXT: movi d1, #0x0000ff000000ff -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: fmov s3, w11 -; CHECK-NEXT: fmov s4, w12 -; CHECK-NEXT: mov v0.s[1], w10 -; CHECK-NEXT: mov v2.s[1], w13 -; CHECK-NEXT: mov v3.s[1], w14 -; CHECK-NEXT: mov v4.s[1], w15 -; CHECK-NEXT: and v0.8b, v0.8b, v1.8b -; CHECK-NEXT: and v2.8b, v2.8b, v1.8b -; CHECK-NEXT: and v3.8b, v3.8b, v1.8b -; CHECK-NEXT: and v1.8b, v4.8b, v1.8b +; CHECK-NEXT: mov v2.b[4], v0.b[1] +; CHECK-NEXT: mov v3.b[4], v0.b[3] +; CHECK-NEXT: mov v4.b[4], v0.b[5] +; CHECK-NEXT: mov v5.b[4], v0.b[7] +; CHECK-NEXT: and v0.8b, v2.8b, v1.8b +; CHECK-NEXT: and v2.8b, v3.8b, v1.8b +; CHECK-NEXT: and v3.8b, v4.8b, v1.8b +; CHECK-NEXT: and v1.8b, v5.8b, v1.8b ; CHECK-NEXT: ushll v0.2d, v0.2s, #0 ; CHECK-NEXT: ushll v2.2d, v2.2s, #0 ; CHECK-NEXT: ushll v3.2d, v3.2s, #0 @@ -458,63 +426,47 @@ define <16 x double> @uitofp_v16i8_double(<16 x i8> %a) { ; CHECK-LABEL: uitofp_v16i8_double: ; CHECK: // %bb.0: ; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: umov w8, v0.b[0] -; CHECK-NEXT: umov w10, v0.b[2] -; CHECK-NEXT: umov w9, v0.b[1] -; CHECK-NEXT: umov w12, v0.b[4] -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w13, v0.b[5] -; CHECK-NEXT: umov w18, v0.b[6] +; CHECK-NEXT: mov v3.b[0], v0.b[0] +; CHECK-NEXT: mov v4.b[0], v0.b[2] +; CHECK-NEXT: mov v5.b[0], v0.b[4] +; CHECK-NEXT: mov v6.b[0], v0.b[6] ; CHECK-NEXT: movi d1, #0x0000ff000000ff -; CHECK-NEXT: umov w14, v2.b[0] -; CHECK-NEXT: umov w16, v2.b[2] -; CHECK-NEXT: umov w0, v2.b[4] -; CHECK-NEXT: fmov s3, w8 -; CHECK-NEXT: umov w8, v0.b[7] -; CHECK-NEXT: fmov s0, w10 -; CHECK-NEXT: umov w10, v2.b[6] -; CHECK-NEXT: umov w15, v2.b[1] -; CHECK-NEXT: umov w17, v2.b[3] -; CHECK-NEXT: fmov s4, w12 -; CHECK-NEXT: umov w12, v2.b[5] -; CHECK-NEXT: fmov s7, w18 -; CHECK-NEXT: mov v3.s[1], w9 -; CHECK-NEXT: umov w9, v2.b[7] -; CHECK-NEXT: fmov s2, w14 -; CHECK-NEXT: fmov s5, w16 -; CHECK-NEXT: fmov s6, w0 -; CHECK-NEXT: mov v0.s[1], w11 -; CHECK-NEXT: fmov s16, w10 -; CHECK-NEXT: mov v4.s[1], w13 -; CHECK-NEXT: mov v7.s[1], w8 -; CHECK-NEXT: mov v2.s[1], w15 -; CHECK-NEXT: mov v5.s[1], w17 -; CHECK-NEXT: mov v6.s[1], w12 -; CHECK-NEXT: and v3.8b, v3.8b, v1.8b -; CHECK-NEXT: mov v16.s[1], w9 -; CHECK-NEXT: and v0.8b, v0.8b, v1.8b -; CHECK-NEXT: and v4.8b, v4.8b, v1.8b -; CHECK-NEXT: and v7.8b, v7.8b, v1.8b -; CHECK-NEXT: and v2.8b, v2.8b, v1.8b -; CHECK-NEXT: ushll v3.2d, v3.2s, #0 -; CHECK-NEXT: and v5.8b, v5.8b, v1.8b -; CHECK-NEXT: and v6.8b, v6.8b, v1.8b -; CHECK-NEXT: and v1.8b, v16.8b, v1.8b -; CHECK-NEXT: ushll v16.2d, v0.2s, #0 -; CHECK-NEXT: ushll v17.2d, v4.2s, #0 +; CHECK-NEXT: mov v7.b[0], v2.b[0] +; CHECK-NEXT: mov v16.b[0], v2.b[2] +; CHECK-NEXT: mov v17.b[0], v2.b[4] +; CHECK-NEXT: mov v18.b[0], v2.b[6] +; CHECK-NEXT: mov v3.b[4], v0.b[1] +; CHECK-NEXT: mov v4.b[4], v0.b[3] +; CHECK-NEXT: mov v5.b[4], v0.b[5] +; CHECK-NEXT: mov v6.b[4], v0.b[7] +; CHECK-NEXT: mov v7.b[4], v2.b[1] +; CHECK-NEXT: mov v16.b[4], v2.b[3] +; CHECK-NEXT: mov v17.b[4], v2.b[5] +; CHECK-NEXT: mov v18.b[4], v2.b[7] +; CHECK-NEXT: and v0.8b, v3.8b, v1.8b +; CHECK-NEXT: and v2.8b, v4.8b, v1.8b +; CHECK-NEXT: and v3.8b, v5.8b, v1.8b +; CHECK-NEXT: and v4.8b, v6.8b, v1.8b +; CHECK-NEXT: and v5.8b, v7.8b, v1.8b +; CHECK-NEXT: and v6.8b, v16.8b, v1.8b +; CHECK-NEXT: and v7.8b, v17.8b, v1.8b +; CHECK-NEXT: and v1.8b, v18.8b, v1.8b +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 ; CHECK-NEXT: ushll v2.2d, v2.2s, #0 -; CHECK-NEXT: ushll v7.2d, v7.2s, #0 -; CHECK-NEXT: ucvtf v0.2d, v3.2d +; CHECK-NEXT: ushll v3.2d, v3.2s, #0 +; CHECK-NEXT: ushll v4.2d, v4.2s, #0 ; CHECK-NEXT: ushll v5.2d, v5.2s, #0 ; CHECK-NEXT: ushll v6.2d, v6.2s, #0 -; CHECK-NEXT: ushll v18.2d, v1.2s, #0 -; CHECK-NEXT: ucvtf v1.2d, v16.2d -; CHECK-NEXT: ucvtf v4.2d, v2.2d -; CHECK-NEXT: ucvtf v2.2d, v17.2d -; CHECK-NEXT: ucvtf v3.2d, v7.2d -; CHECK-NEXT: ucvtf v5.2d, v5.2d -; CHECK-NEXT: ucvtf v6.2d, v6.2d -; CHECK-NEXT: ucvtf v7.2d, v18.2d +; CHECK-NEXT: ushll v7.2d, v7.2s, #0 +; CHECK-NEXT: ushll v16.2d, v1.2s, #0 +; CHECK-NEXT: ucvtf v0.2d, v0.2d +; CHECK-NEXT: ucvtf v1.2d, v2.2d +; CHECK-NEXT: ucvtf v2.2d, v3.2d +; CHECK-NEXT: ucvtf v3.2d, v4.2d +; CHECK-NEXT: ucvtf v4.2d, v5.2d +; CHECK-NEXT: ucvtf v5.2d, v6.2d +; CHECK-NEXT: ucvtf v6.2d, v7.2d +; CHECK-NEXT: ucvtf v7.2d, v16.2d ; CHECK-NEXT: ret %1 = uitofp <16 x i8> %a to <16 x double> ret <16 x double> %1