diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index eb007c25ac89e..5b941d173ec2b 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -20945,8 +20945,9 @@ static SDValue performBuildVectorCombine(SDNode *N, return SDValue(); } -static SDValue performTruncateCombine(SDNode *N, - SelectionDAG &DAG) { +static SDValue performTruncateCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { + SDLoc DL(N); EVT VT = N->getValueType(0); SDValue N0 = N->getOperand(0); if (VT.isFixedLengthVector() && VT.is64BitVector() && N0.hasOneUse() && @@ -20954,8 +20955,37 @@ static SDValue performTruncateCombine(SDNode *N, SDValue Op = N0.getOperand(0); if (VT.getScalarType() == MVT::i32 && N0.getOperand(0).getValueType().getScalarType() == MVT::i64) - Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i32, Op); - return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Op); + Op = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op); + return DAG.getNode(N0.getOpcode(), DL, VT, Op); + } + + // Performing the following combine produces a preferable form for ISEL. + // i32 (trunc (extract Vi64, idx)) -> i32 (extract (nvcast Vi32), idx*2)) + if (DCI.isAfterLegalizeDAG() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + N0.hasOneUse()) { + SDValue Op = N0.getOperand(0); + SDValue ExtractIndexNode = N0.getOperand(1); + if (!isa(ExtractIndexNode)) + return SDValue(); + + // For a legal DAG, EXTRACT_VECTOR_ELT can only have produced an i32 or i64. + // So we can only expect: i32 (trunc (i64 (extract Vi64, idx))). + assert((VT == MVT::i32 && N0.getValueType() == MVT::i64) && + "Unexpected legalisation result!"); + + EVT SrcVectorType = Op.getValueType(); + // We also assume that SrcVectorType cannot be a V64 (see + // LowerEXTRACT_VECTOR_ELT). + assert((SrcVectorType == MVT::v2i64 || SrcVectorType == MVT::nxv2i64) && + "Unexpected legalisation result!"); + + unsigned ExtractIndex = + cast(ExtractIndexNode)->getZExtValue(); + MVT CastVT = SrcVectorType.isScalableVector() ? MVT::nxv4i32 : MVT::v4i32; + + Op = DAG.getNode(AArch64ISD::NVCAST, DL, CastVT, Op); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op, + DAG.getVectorIdxConstant(ExtractIndex * 2, DL)); } return SDValue(); @@ -26258,7 +26288,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, case ISD::BUILD_VECTOR: return performBuildVectorCombine(N, DCI, DAG); case ISD::TRUNCATE: - return performTruncateCombine(N, DAG); + return performTruncateCombine(N, DAG, DCI); case AArch64ISD::ANDS: return performFlagSettingCombine(N, DCI, ISD::AND); case AArch64ISD::ADC: diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index d015cc15581ad..b37f4a08755c5 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -6977,6 +6977,12 @@ def : Pat<(v4f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), def : Pat<(v2f64 (AArch64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)), (DUPv2i64lane V128:$Rn, VectorIndexD:$imm)>; +// Also covers DUP (truncate i64 to i32) +def : Pat<(v2i32 (AArch64dup (i32 (extractelt (v4i32 V128:$Rn), imm:$idx)))), + (DUPv2i32lane V128:$Rn, imm:$idx)>; +def : Pat<(v4i32 (AArch64dup (i32 (extractelt (v4i32 V128:$Rn), imm:$idx)))), + (DUPv4i32lane V128:$Rn, imm:$idx)>; + // If there's an (AArch64dup (vector_extract ...) ...), we can use a duplane // instruction even if the types don't match: we just have to remap the lane // carefully. N.b. this trick only applies to truncations. @@ -6990,44 +6996,20 @@ def VecIndex_x8 : SDNodeXFormgetTargetConstant(8 * N->getZExtValue(), SDLoc(N), MVT::i64); }]>; -multiclass DUPWithTruncPats { - def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src128VT V128:$Rn), - imm:$idx)))), - (DUP V128:$Rn, (IdxXFORM imm:$idx))>; - - def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src64VT V64:$Rn), - imm:$idx)))), - (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>; -} - -defm : DUPWithTruncPats; -defm : DUPWithTruncPats; -defm : DUPWithTruncPats; - -defm : DUPWithTruncPats; -defm : DUPWithTruncPats; -defm : DUPWithTruncPats; - -multiclass DUPWithTrunci64Pats { - def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v2i64 V128:$Rn), - imm:$idx))))), - (DUP V128:$Rn, (IdxXFORM imm:$idx))>; - - def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v1i64 V64:$Rn), - imm:$idx))))), - (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>; -} - -defm : DUPWithTrunci64Pats; -defm : DUPWithTrunci64Pats; -defm : DUPWithTrunci64Pats; - -defm : DUPWithTrunci64Pats; -defm : DUPWithTrunci64Pats; -defm : DUPWithTrunci64Pats; +class DUPWithTruncPat + : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (SrcVT V128:$Rn), imm:$idx)))), + (DUP V128:$Rn, (IdxXFORM imm:$idx))>; + +// DUP (truncate i16 to i8) +def : DUPWithTruncPat; +def : DUPWithTruncPat; +// DUP (truncate i32/64 to i8) +def : DUPWithTruncPat; +def : DUPWithTruncPat; +// DUP (truncate i32/i64 to i16) +def : DUPWithTruncPat; +def : DUPWithTruncPat; // SMOV and UMOV definitions, with some extra patterns for convenience defm SMOV : SMov; diff --git a/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll b/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll index 2e16517938182..1b7bc128d6332 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll @@ -384,9 +384,9 @@ define void @insert_vec_v4i16_uaddlv_from_v4i32(ptr %0) { ; CHECK-NEXT: movi.2d v1, #0000000000000000 ; CHECK-NEXT: uaddlv.4s d0, v0 ; CHECK-NEXT: mov.h v1[0], v0[0] -; CHECK-NEXT: ushll.4s v0, v1, #0 -; CHECK-NEXT: ucvtf.4s v0, v0 -; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ushll.4s v1, v1, #0 +; CHECK-NEXT: ucvtf.4s v1, v1 +; CHECK-NEXT: str q1, [x0] ; CHECK-NEXT: ret entry: @@ -403,13 +403,13 @@ define void @insert_vec_v16i16_uaddlv_from_v4i32(ptr %0) { ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.2d v0, #0000000000000000 ; CHECK-NEXT: movi.2d v1, #0000000000000000 +; CHECK-NEXT: movi.2d v2, #0000000000000000 ; CHECK-NEXT: uaddlv.4s d0, v0 +; CHECK-NEXT: stp q2, q2, [x0, #32] ; CHECK-NEXT: mov.h v1[0], v0[0] -; CHECK-NEXT: movi.2d v0, #0000000000000000 ; CHECK-NEXT: ushll.4s v1, v1, #0 -; CHECK-NEXT: stp q0, q0, [x0, #32] ; CHECK-NEXT: ucvtf.4s v1, v1 -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: stp q1, q2, [x0] ; CHECK-NEXT: ret entry: @@ -430,9 +430,9 @@ define void @insert_vec_v8i8_uaddlv_from_v4i32(ptr %0) { ; CHECK-NEXT: uaddlv.4s d0, v0 ; CHECK-NEXT: mov.h v1[0], v0[0] ; CHECK-NEXT: bic.4h v1, #255, lsl #8 -; CHECK-NEXT: ushll.4s v0, v1, #0 -; CHECK-NEXT: ucvtf.4s v0, v0 -; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ushll.4s v1, v1, #0 +; CHECK-NEXT: ucvtf.4s v1, v1 +; CHECK-NEXT: str q1, [x0] ; CHECK-NEXT: ret entry: @@ -449,14 +449,14 @@ define void @insert_vec_v16i8_uaddlv_from_v4i32(ptr %0) { ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.2d v0, #0000000000000000 ; CHECK-NEXT: movi.2d v1, #0000000000000000 +; CHECK-NEXT: movi.2d v2, #0000000000000000 ; CHECK-NEXT: uaddlv.4s d0, v0 +; CHECK-NEXT: stp q2, q2, [x0, #32] ; CHECK-NEXT: mov.h v1[0], v0[0] -; CHECK-NEXT: movi.2d v0, #0000000000000000 ; CHECK-NEXT: bic.4h v1, #255, lsl #8 -; CHECK-NEXT: stp q0, q0, [x0, #32] ; CHECK-NEXT: ushll.4s v1, v1, #0 ; CHECK-NEXT: ucvtf.4s v1, v1 -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: stp q1, q2, [x0] ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/neon-ins-trunc-elt.ll b/llvm/test/CodeGen/AArch64/neon-ins-trunc-elt.ll new file mode 100644 index 0000000000000..0d58fc59c2c31 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/neon-ins-trunc-elt.ll @@ -0,0 +1,136 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+neon < %s | FileCheck %s + +; Inserting a truncated (i64 to i32) element from the bottom 128-bits of any vector type into a NEON vector should use INS (element) of the +; truncated size to avoid pointless GPR trips. + + +define <2 x i32> @test_s_trunc_d_lane0(<2 x i32> %a, <1 x i64> %b) { +; CHECK-LABEL: test_s_trunc_d_lane0: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: mov v0.s[0], v1.s[0] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret + %c = extractelement <1 x i64> %b, i32 0 + %d = trunc i64 %c to i32 + %e = insertelement <2 x i32> %a, i32 %d, i64 0 + ret <2 x i32> %e +} + +define <2 x i32> @test_s_trunc_d_qlane1(<2 x i32> %a, <2 x i64> %b) { +; CHECK-LABEL: test_s_trunc_d_qlane1: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov v0.s[0], v1.s[2] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret + %c = extractelement <2 x i64> %b, i32 1 + %d = trunc i64 %c to i32 + %e = insertelement <2 x i32> %a, i32 %d, i64 0 + ret <2 x i32> %e +} + +define <4 x i32> @test_qs_trunc_d_lane0(<4 x i32> %a, <1 x i64> %b) { +; CHECK-LABEL: test_qs_trunc_d_lane0: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: mov v0.s[0], v1.s[0] +; CHECK-NEXT: ret + %c = extractelement <1 x i64> %b, i32 0 + %d = trunc i64 %c to i32 + %e = insertelement <4 x i32> %a, i32 %d, i64 0 + ret <4 x i32> %e +} + +define <4 x i32> @test_qs_trunc_d_qlane1(<4 x i32> %a, <2 x i64> %b) { +; CHECK-LABEL: test_qs_trunc_d_qlane1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov v0.s[3], v1.s[2] +; CHECK-NEXT: ret + %c = extractelement <2 x i64> %b, i32 1 + %d = trunc i64 %c to i32 + %e = insertelement <4 x i32> %a, i32 %d, i64 3 + ret <4 x i32> %e +} + +; ---- From the bottom 128b of an SVE vector + +define <2 x i32> @test_s_trunc_dsve_lane0(<2 x i32> %a, %b) { +; CHECK-LABEL: test_s_trunc_dsve_lane0: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov v0.s[0], v1.s[0] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret + %c = extractelement %b, i32 0 + %d = trunc i64 %c to i32 + %e = insertelement <2 x i32> %a, i32 %d, i64 0 + ret <2 x i32> %e +} + +define <2 x i32> @test_s_trunc_dsve_lane1(<2 x i32> %a, %b) { +; CHECK-LABEL: test_s_trunc_dsve_lane1: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov v0.s[1], v1.s[2] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret + %c = extractelement %b, i32 1 + %d = trunc i64 %c to i32 + %e = insertelement <2 x i32> %a, i32 %d, i64 1 + ret <2 x i32> %e +} + +; (negative test) Extracted element is not within V-register. +define <2 x i32> @test_s_trunc_dsve_lane2(<2 x i32> %a, %b) { +; CHECK-LABEL: test_s_trunc_dsve_lane2: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.s, z1.s[4] +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret + %c = extractelement %b, i32 2 + %d = trunc i64 %c to i32 + %e = insertelement <2 x i32> %a, i32 %d, i64 1 + ret <2 x i32> %e +} + +define <4 x i32> @test_qs_trunc_dsve_lane0(<4 x i32> %a, %b) { +; CHECK-LABEL: test_qs_trunc_dsve_lane0: +; CHECK: // %bb.0: +; CHECK-NEXT: mov v0.s[0], v1.s[0] +; CHECK-NEXT: ret + %c = extractelement %b, i32 0 + %d = trunc i64 %c to i32 + %e = insertelement <4 x i32> %a, i32 %d, i64 0 + ret <4 x i32> %e +} + +define <4 x i32> @test_qs_trunc_dsve_lane1(<4 x i32> %a, %b) { +; CHECK-LABEL: test_qs_trunc_dsve_lane1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov v0.s[3], v1.s[2] +; CHECK-NEXT: ret + %c = extractelement %b, i32 1 + %d = trunc i64 %c to i32 + %e = insertelement <4 x i32> %a, i32 %d, i64 3 + ret <4 x i32> %e +} + +; (negative test) Extracted element is not within V-register. +define <4 x i32> @test_qs_trunc_dsve_lane2(<4 x i32> %a, %b) { +; CHECK-LABEL: test_qs_trunc_dsve_lane2: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.s, z1.s[4] +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov v0.s[3], w8 +; CHECK-NEXT: ret + %c = extractelement %b, i32 2 + %d = trunc i64 %c to i32 + %e = insertelement <4 x i32> %a, i32 %d, i64 3 + ret <4 x i32> %e +} diff --git a/llvm/test/CodeGen/AArch64/sve-doublereduct.ll b/llvm/test/CodeGen/AArch64/sve-doublereduct.ll index 7bc31d44bb654..b813b8f84ba16 100644 --- a/llvm/test/CodeGen/AArch64/sve-doublereduct.ll +++ b/llvm/test/CodeGen/AArch64/sve-doublereduct.ll @@ -91,8 +91,7 @@ define i32 @add_i32( %a, %b) { ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: add z0.s, z0.s, z2.s ; CHECK-NEXT: uaddv d0, p0, z0.s -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %r1 = call i32 @llvm.vector.reduce.add.i32.nxv8i32( %a) %r2 = call i32 @llvm.vector.reduce.add.i32.nxv4i32( %b) @@ -112,8 +111,7 @@ define i16 @add_ext_i16( %a, %b) { ; CHECK-NEXT: add z1.h, z1.h, z3.h ; CHECK-NEXT: add z0.h, z0.h, z1.h ; CHECK-NEXT: uaddv d0, p0, z0.h -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %ae = zext %a to %be = zext %b to @@ -139,8 +137,7 @@ define i16 @add_ext_v32i16( %a, %b) { ; CHECK-NEXT: add z1.h, z2.h, z5.h ; CHECK-NEXT: add z0.h, z0.h, z1.h ; CHECK-NEXT: uaddv d0, p0, z0.h -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %ae = zext %a to %be = zext %b to diff --git a/llvm/test/CodeGen/AArch64/sve-extract-element.ll b/llvm/test/CodeGen/AArch64/sve-extract-element.ll index 6d4f5963881e5..939c7e4310018 100644 --- a/llvm/test/CodeGen/AArch64/sve-extract-element.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-element.ll @@ -644,8 +644,8 @@ define i1 @test_lane4_2xi1( %a) #0 { ; CHECK-LABEL: test_lane4_2xi1: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1 -; CHECK-NEXT: mov z0.d, z0.d[4] -; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: mov z0.s, z0.s[8] +; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret %b = extractelement %a, i32 4 diff --git a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll index 518e3573b5edd..965af2a745afd 100644 --- a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll @@ -238,11 +238,8 @@ define <2 x i1> @extract_v2i1_nxv2i1( %inmask) { ; CHECK-LABEL: extract_v2i1_nxv2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1 -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: mov x8, v0.d[1] -; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: mov v0.s[1], v0.s[2] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %mask = call <2 x i1> @llvm.vector.extract.v2i1.nxv2i1( %inmask, i64 0) ret <2 x i1> %mask diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll index 752c2cd34bfe4..be19e9ef5e86f 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll @@ -37,8 +37,7 @@ define i8 @uaddv_v32i8(ptr %a) vscale_range(2,0) #0 { ; CHECK-NEXT: ptrue p0.b, vl32 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] ; CHECK-NEXT: uaddv d0, p0, z0.b -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %op = load <32 x i8>, ptr %a %res = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> %op) @@ -54,8 +53,7 @@ define i8 @uaddv_v64i8(ptr %a) #0 { ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: add z0.b, z1.b, z0.b ; VBITS_GE_256-NEXT: uaddv d0, p0, z0.b -; VBITS_GE_256-NEXT: fmov x0, d0 -; VBITS_GE_256-NEXT: // kill: def $w0 killed $w0 killed $x0 +; VBITS_GE_256-NEXT: fmov w0, s0 ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: uaddv_v64i8: @@ -63,8 +61,7 @@ define i8 @uaddv_v64i8(ptr %a) #0 { ; VBITS_GE_512-NEXT: ptrue p0.b, vl64 ; VBITS_GE_512-NEXT: ld1b { z0.b }, p0/z, [x0] ; VBITS_GE_512-NEXT: uaddv d0, p0, z0.b -; VBITS_GE_512-NEXT: fmov x0, d0 -; VBITS_GE_512-NEXT: // kill: def $w0 killed $w0 killed $x0 +; VBITS_GE_512-NEXT: fmov w0, s0 ; VBITS_GE_512-NEXT: ret %op = load <64 x i8>, ptr %a %res = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> %op) @@ -77,8 +74,7 @@ define i8 @uaddv_v128i8(ptr %a) vscale_range(8,0) #0 { ; CHECK-NEXT: ptrue p0.b, vl128 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] ; CHECK-NEXT: uaddv d0, p0, z0.b -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %op = load <128 x i8>, ptr %a %res = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> %op) @@ -91,8 +87,7 @@ define i8 @uaddv_v256i8(ptr %a) vscale_range(16,0) #0 { ; CHECK-NEXT: ptrue p0.b, vl256 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] ; CHECK-NEXT: uaddv d0, p0, z0.b -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %op = load <256 x i8>, ptr %a %res = call i8 @llvm.vector.reduce.add.v256i8(<256 x i8> %op) @@ -127,8 +122,7 @@ define i16 @uaddv_v16i16(ptr %a) vscale_range(2,0) #0 { ; CHECK-NEXT: ptrue p0.h, vl16 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: uaddv d0, p0, z0.h -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %op = load <16 x i16>, ptr %a %res = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %op) @@ -144,8 +138,7 @@ define i16 @uaddv_v32i16(ptr %a) #0 { ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: add z0.h, z1.h, z0.h ; VBITS_GE_256-NEXT: uaddv d0, p0, z0.h -; VBITS_GE_256-NEXT: fmov x0, d0 -; VBITS_GE_256-NEXT: // kill: def $w0 killed $w0 killed $x0 +; VBITS_GE_256-NEXT: fmov w0, s0 ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: uaddv_v32i16: @@ -153,8 +146,7 @@ define i16 @uaddv_v32i16(ptr %a) #0 { ; VBITS_GE_512-NEXT: ptrue p0.h, vl32 ; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0] ; VBITS_GE_512-NEXT: uaddv d0, p0, z0.h -; VBITS_GE_512-NEXT: fmov x0, d0 -; VBITS_GE_512-NEXT: // kill: def $w0 killed $w0 killed $x0 +; VBITS_GE_512-NEXT: fmov w0, s0 ; VBITS_GE_512-NEXT: ret %op = load <32 x i16>, ptr %a %res = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> %op) @@ -167,8 +159,7 @@ define i16 @uaddv_v64i16(ptr %a) vscale_range(8,0) #0 { ; CHECK-NEXT: ptrue p0.h, vl64 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: uaddv d0, p0, z0.h -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %op = load <64 x i16>, ptr %a %res = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> %op) @@ -181,8 +172,7 @@ define i16 @uaddv_v128i16(ptr %a) vscale_range(16,0) #0 { ; CHECK-NEXT: ptrue p0.h, vl128 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: uaddv d0, p0, z0.h -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %op = load <128 x i16>, ptr %a %res = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> %op) @@ -217,8 +207,7 @@ define i32 @uaddv_v8i32(ptr %a) vscale_range(2,0) #0 { ; CHECK-NEXT: ptrue p0.s, vl8 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: uaddv d0, p0, z0.s -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %op = load <8 x i32>, ptr %a %res = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %op) @@ -234,8 +223,7 @@ define i32 @uaddv_v16i32(ptr %a) #0 { ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: add z0.s, z1.s, z0.s ; VBITS_GE_256-NEXT: uaddv d0, p0, z0.s -; VBITS_GE_256-NEXT: fmov x0, d0 -; VBITS_GE_256-NEXT: // kill: def $w0 killed $w0 killed $x0 +; VBITS_GE_256-NEXT: fmov w0, s0 ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: uaddv_v16i32: @@ -243,8 +231,7 @@ define i32 @uaddv_v16i32(ptr %a) #0 { ; VBITS_GE_512-NEXT: ptrue p0.s, vl16 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] ; VBITS_GE_512-NEXT: uaddv d0, p0, z0.s -; VBITS_GE_512-NEXT: fmov x0, d0 -; VBITS_GE_512-NEXT: // kill: def $w0 killed $w0 killed $x0 +; VBITS_GE_512-NEXT: fmov w0, s0 ; VBITS_GE_512-NEXT: ret %op = load <16 x i32>, ptr %a %res = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %op) @@ -257,8 +244,7 @@ define i32 @uaddv_v32i32(ptr %a) vscale_range(8,0) #0 { ; CHECK-NEXT: ptrue p0.s, vl32 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: uaddv d0, p0, z0.s -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %op = load <32 x i32>, ptr %a %res = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> %op) @@ -271,8 +257,7 @@ define i32 @uaddv_v64i32(ptr %a) vscale_range(16,0) #0 { ; CHECK-NEXT: ptrue p0.s, vl64 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: uaddv d0, p0, z0.s -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %op = load <64 x i32>, ptr %a %res = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> %op) diff --git a/llvm/test/CodeGen/AArch64/sve-int-reduce.ll b/llvm/test/CodeGen/AArch64/sve-int-reduce.ll index 8c1b5225b7f25..6ec18477fe1a0 100644 --- a/llvm/test/CodeGen/AArch64/sve-int-reduce.ll +++ b/llvm/test/CodeGen/AArch64/sve-int-reduce.ll @@ -146,8 +146,7 @@ define i8 @uaddv_nxv16i8( %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: uaddv d0, p0, z0.b -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %res = call i8 @llvm.vector.reduce.add.nxv16i8( %a) ret i8 %res @@ -158,8 +157,7 @@ define i16 @uaddv_nxv8i16( %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: uaddv d0, p0, z0.h -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %res = call i16 @llvm.vector.reduce.add.nxv8i16( %a) ret i16 %res @@ -170,8 +168,7 @@ define i32 @uaddv_nxv4i32( %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: uaddv d0, p0, z0.s -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %res = call i32 @llvm.vector.reduce.add.nxv4i32( %a) ret i32 %res @@ -422,8 +419,7 @@ define i8 @uaddv_nxv12i8( %a) { ; CHECK-NEXT: uzp1 z1.h, z1.h, z2.h ; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b ; CHECK-NEXT: uaddv d0, p0, z0.b -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %res = call i8 @llvm.vector.reduce.add.nxv12i8( %a) ret i8 %res diff --git a/llvm/test/CodeGen/AArch64/sve-split-int-reduce.ll b/llvm/test/CodeGen/AArch64/sve-split-int-reduce.ll index dd7b15ef5ee6f..90383b43d5812 100644 --- a/llvm/test/CodeGen/AArch64/sve-split-int-reduce.ll +++ b/llvm/test/CodeGen/AArch64/sve-split-int-reduce.ll @@ -33,8 +33,7 @@ define i32 @orv_nxv2i32( %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: orv d0, p0, z0.d -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %res = call i32 @llvm.vector.reduce.or.nxv2i32( %a) ret i32 %res @@ -61,8 +60,7 @@ define i16 @xorv_nxv2i16( %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: eorv d0, p0, z0.d -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %res = call i16 @llvm.vector.reduce.xor.nxv2i16( %a) ret i16 %res @@ -87,8 +85,7 @@ define i16 @uaddv_nxv4i16( %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: uaddv d0, p0, z0.s -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %res = call i16 @llvm.vector.reduce.add.nxv4i16( %a) ret i16 %res @@ -100,8 +97,7 @@ define i16 @uaddv_nxv16i16( %a) { ; CHECK-NEXT: add z0.h, z0.h, z1.h ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: uaddv d0, p0, z0.h -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %res = call i16 @llvm.vector.reduce.add.nxv16i16( %a) ret i16 %res @@ -115,8 +111,7 @@ define i32 @uaddv_nxv16i32( %a) { ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: add z0.s, z0.s, z1.s ; CHECK-NEXT: uaddv d0, p0, z0.s -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %res = call i32 @llvm.vector.reduce.add.nxv16i32( %a) ret i32 %res @@ -130,8 +125,7 @@ define i32 @umin_nxv2i32( %a) { ; CHECK-NEXT: and z0.d, z0.d, #0xffffffff ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: uminv d0, p0, z0.d -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %res = call i32 @llvm.vector.reduce.umin.nxv2i32( %a) ret i32 %res diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll index 92a67cba55f7a..244dcc734bd7c 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll @@ -15,8 +15,7 @@ define i8 @uaddv_v8i8(<8 x i8> %a) { ; CHECK-NEXT: ptrue p0.b, vl8 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: uaddv d0, p0, z0.b -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: uaddv_v8i8: @@ -51,8 +50,7 @@ define i8 @uaddv_v16i8(<16 x i8> %a) { ; CHECK-NEXT: ptrue p0.b, vl16 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: uaddv d0, p0, z0.b -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: uaddv_v16i8: @@ -103,8 +101,7 @@ define i8 @uaddv_v32i8(ptr %a) { ; CHECK-NEXT: ptrue p0.b, vl16 ; CHECK-NEXT: add z0.b, z1.b, z0.b ; CHECK-NEXT: uaddv d0, p0, z0.b -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: uaddv_v32i8: @@ -188,8 +185,7 @@ define i16 @uaddv_v4i16(<4 x i16> %a) { ; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: uaddv d0, p0, z0.h -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: uaddv_v4i16: @@ -216,8 +212,7 @@ define i16 @uaddv_v8i16(<8 x i16> %a) { ; CHECK-NEXT: ptrue p0.h, vl8 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: uaddv d0, p0, z0.h -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: uaddv_v8i16: @@ -252,8 +247,7 @@ define i16 @uaddv_v16i16(ptr %a) { ; CHECK-NEXT: ptrue p0.h, vl8 ; CHECK-NEXT: add z0.h, z1.h, z0.h ; CHECK-NEXT: uaddv d0, p0, z0.h -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: uaddv_v16i16: @@ -305,8 +299,7 @@ define i32 @uaddv_v2i32(<2 x i32> %a) { ; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: uaddv d0, p0, z0.s -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: uaddv_v2i32: @@ -328,8 +321,7 @@ define i32 @uaddv_v4i32(<4 x i32> %a) { ; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: uaddv d0, p0, z0.s -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: uaddv_v4i32: @@ -353,8 +345,7 @@ define i32 @uaddv_v8i32(ptr %a) { ; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: add z0.s, z1.s, z0.s ; CHECK-NEXT: uaddv d0, p0, z0.s -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: uaddv_v8i32: diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reductions.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reductions.ll index 00a15f4bcd639..688537704a6f7 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reductions.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reductions.ll @@ -66,8 +66,7 @@ define i32 @reduce_uaddv_v16i8(<32 x i8> %a) { ; STREAMING-SVE-NEXT: add z0.s, z2.s, z0.s ; STREAMING-SVE-NEXT: add z0.s, z1.s, z0.s ; STREAMING-SVE-NEXT: uaddv d0, p0, z0.s -; STREAMING-SVE-NEXT: fmov x0, d0 -; STREAMING-SVE-NEXT: // kill: def $w0 killed $w0 killed $x0 +; STREAMING-SVE-NEXT: fmov w0, s0 ; STREAMING-SVE-NEXT: ret %1 = zext <32 x i8> %a to <32 x i32> %2 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> %1) @@ -134,8 +133,7 @@ define i32 @reduce_saddv_v16i8(<32 x i8> %a) { ; STREAMING-SVE-NEXT: add z0.s, z2.s, z0.s ; STREAMING-SVE-NEXT: add z0.s, z1.s, z0.s ; STREAMING-SVE-NEXT: uaddv d0, p0, z0.s -; STREAMING-SVE-NEXT: fmov x0, d0 -; STREAMING-SVE-NEXT: // kill: def $w0 killed $w0 killed $x0 +; STREAMING-SVE-NEXT: fmov w0, s0 ; STREAMING-SVE-NEXT: ret %1 = sext <32 x i8> %a to <32 x i32> %2 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> %1) diff --git a/llvm/test/CodeGen/AArch64/sve-vecreduce-dot.ll b/llvm/test/CodeGen/AArch64/sve-vecreduce-dot.ll index 91f8f5c2c90d8..6af26067cd6d6 100644 --- a/llvm/test/CodeGen/AArch64/sve-vecreduce-dot.ll +++ b/llvm/test/CodeGen/AArch64/sve-vecreduce-dot.ll @@ -42,8 +42,7 @@ define i32 @test( %bin.rdx, %bin.rdx2) { ; CHECK-NEXT: add z1.s, z3.s, z1.s ; CHECK-NEXT: add z0.s, z1.s, z0.s ; CHECK-NEXT: uaddv d0, p0, z0.s -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %a = sext %bin.rdx to %b = sext %bin.rdx2 to diff --git a/llvm/test/CodeGen/AArch64/uaddlv-vaddlp-combine.ll b/llvm/test/CodeGen/AArch64/uaddlv-vaddlp-combine.ll index f0856c43daf1d..e6905f687ad9a 100644 --- a/llvm/test/CodeGen/AArch64/uaddlv-vaddlp-combine.ll +++ b/llvm/test/CodeGen/AArch64/uaddlv-vaddlp-combine.ll @@ -5,8 +5,7 @@ define i32 @uaddlv_uaddlp_v8i16(<8 x i16> %0) { ; CHECK-LABEL: uaddlv_uaddlp_v8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: uaddlv s0, v0.8h -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %2 = tail call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> %0) %3 = tail call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> %2) diff --git a/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll b/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll index 557aa010b3a7d..7f2eefe5ed72f 100644 --- a/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll +++ b/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll @@ -81,7 +81,7 @@ define i8 @convert_to_bitmask2(<2 x i64> %vec) { ; CHECK-NEXT: ldr q1, [x8, lCPI3_0@PAGEOFF] ; CHECK-NEXT: bic.16b v0, v1, v0 ; CHECK-NEXT: addp.2d d0, v0 -; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: and w0, w8, #0x3 ; CHECK-NEXT: ret ; CHECK-NEXT: .loh AdrpLdr Lloh6, Lloh7 diff --git a/llvm/test/CodeGen/AArch64/vecreduce-bool.ll b/llvm/test/CodeGen/AArch64/vecreduce-bool.ll index 625e8ae6a98dc..1bdf7bbb7f813 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-bool.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-bool.ll @@ -833,7 +833,7 @@ define i32 @reduce_xor_v2i64(<2 x i64> %a0, i32 %a1, i32 %a2) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: cmlt v0.2d, v0.2d, #0 ; CHECK-NEXT: addp d0, v0.2d -; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: tst w8, #0x1 ; CHECK-NEXT: csel w0, w0, w1, ne ; CHECK-NEXT: ret