diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td index 076a6235eef0a..639ddcba28468 100644 --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -69,7 +69,6 @@ def push_mul_through_sext : push_opcode_through_ext; def AArch64PreLegalizerCombiner: GICombiner< "AArch64PreLegalizerCombinerImpl", [all_combines, - fconstant_to_constant, icmp_redundant_trunc, fold_global_offset, shuffle_to_extract, @@ -341,7 +340,7 @@ def AArch64PostLegalizerLowering : GICombiner<"AArch64PostLegalizerLoweringImpl", [shuffle_vector_lowering, vashr_vlshr_imm, icmp_lowering, build_vector_lowering, - lower_vector_fcmp, form_truncstore, + lower_vector_fcmp, form_truncstore, fconstant_to_constant, vector_sext_inreg_to_shift, unmerge_ext_to_unmerge, lower_mulv2s64, vector_unmerge_lowering, insertelt_nonconst, diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 7ee54c5932b15..1593f32d1fc6c 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -678,8 +678,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .widenScalarToNextPow2(0) .clampScalar(0, s8, s64); getActionDefinitionsBuilder(G_FCONSTANT) - .legalFor({s32, s64, s128}) - .legalFor(HasFP16, {s16}) + // Always legalize s16 to prevent G_FCONSTANT being widened to G_CONSTANT + .legalFor({s16, s32, s64, s128}) .clampScalar(0, MinFPScalar, s128); // FIXME: fix moreElementsToNextPow2 diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp index 6025f1c9f5f4e..e6dbc7a20088a 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp @@ -75,6 +75,31 @@ struct ShuffleVectorPseudo { ShuffleVectorPseudo() = default; }; +/// Return true if a G_FCONSTANT instruction is known to be better-represented +/// as a G_CONSTANT. +bool matchFConstantToConstant(MachineInstr &MI, MachineRegisterInfo &MRI) { + assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT); + Register DstReg = MI.getOperand(0).getReg(); + const unsigned DstSize = MRI.getType(DstReg).getSizeInBits(); + if (DstSize != 16 && DstSize != 32 && DstSize != 64) + return false; + + // When we're storing a value, it doesn't matter what register bank it's on. + // Since not all floating point constants can be materialized using a fmov, + // it makes more sense to just use a GPR. + return all_of(MRI.use_nodbg_instructions(DstReg), + [](const MachineInstr &Use) { return Use.mayStore(); }); +} + +/// Change a G_FCONSTANT into a G_CONSTANT. +void applyFConstantToConstant(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT); + MachineIRBuilder MIB(MI); + const APFloat &ImmValAPF = MI.getOperand(1).getFPImm()->getValueAPF(); + MIB.buildConstant(MI.getOperand(0).getReg(), ImmValAPF.bitcastToAPInt()); + MI.eraseFromParent(); +} + /// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector /// sources of the shuffle are different. std::optional> getExtMask(ArrayRef M, diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp index 8c10673c5e7b9..896eab521bfdb 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp @@ -44,31 +44,6 @@ namespace { #include "AArch64GenPreLegalizeGICombiner.inc" #undef GET_GICOMBINER_TYPES -/// Return true if a G_FCONSTANT instruction is known to be better-represented -/// as a G_CONSTANT. -bool matchFConstantToConstant(MachineInstr &MI, MachineRegisterInfo &MRI) { - assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT); - Register DstReg = MI.getOperand(0).getReg(); - const unsigned DstSize = MRI.getType(DstReg).getSizeInBits(); - if (DstSize != 32 && DstSize != 64) - return false; - - // When we're storing a value, it doesn't matter what register bank it's on. - // Since not all floating point constants can be materialized using a fmov, - // it makes more sense to just use a GPR. - return all_of(MRI.use_nodbg_instructions(DstReg), - [](const MachineInstr &Use) { return Use.mayStore(); }); -} - -/// Change a G_FCONSTANT into a G_CONSTANT. -void applyFConstantToConstant(MachineInstr &MI) { - assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT); - MachineIRBuilder MIB(MI); - const APFloat &ImmValAPF = MI.getOperand(1).getFPImm()->getValueAPF(); - MIB.buildConstant(MI.getOperand(0).getReg(), ImmValAPF.bitcastToAPInt()); - MI.eraseFromParent(); -} - /// Try to match a G_ICMP of a G_TRUNC with zero, in which the truncated bits /// are sign bits. In this case, we can transform the G_ICMP to directly compare /// the wide value with a zero. diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir index 6362ed65d09e3..9381f0f41bbbc 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir @@ -1,11 +1,12 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s -# RUN: llc -debugify-and-strip-all-safe -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s +# RUN: llc -run-pass=aarch64-postlegalizer-lowering -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s +# RUN: llc -debugify-and-strip-all-safe -run-pass=aarch64-postlegalizer-lowering -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s ... --- name: fconstant_to_constant_s32 alignment: 4 tracksRegLiveness: true +legalized: true frameInfo: maxAlignment: 1 machineFunctionInfo: {} @@ -24,16 +25,17 @@ body: | ; CHECK-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p0) :: (store (s32)) ; CHECK-NEXT: RET_ReallyLR %0:_(p0) = COPY $x0 - %3:_(s32) = G_FCONSTANT float 0x3FA99999A0000000 - %1:_(s64) = G_CONSTANT i64 524 - %2:_(p0) = G_PTR_ADD %0, %1(s64) - G_STORE %3(s32), %2(p0) :: (store (s32)) + %1:_(s32) = G_FCONSTANT float 0x3FA99999A0000000 + %2:_(s64) = G_CONSTANT i64 524 + %3:_(p0) = G_PTR_ADD %0, %2(s64) + G_STORE %1(s32), %3(p0) :: (store (s32)) RET_ReallyLR ... --- name: fconstant_to_constant_s64 alignment: 4 tracksRegLiveness: true +legalized: true frameInfo: maxAlignment: 1 machineFunctionInfo: {} @@ -48,7 +50,7 @@ body: | ; CHECK-NEXT: G_STORE %c(s64), %ptr(p0) :: (store (s64)) ; CHECK-NEXT: RET_ReallyLR %ptr:_(p0) = COPY $x0 - %c:_(s64) = G_FCONSTANT double 0.0 + %c:_(s64) = G_FCONSTANT double 0.000000e+00 G_STORE %c(s64), %ptr(p0) :: (store (s64)) RET_ReallyLR ... @@ -56,6 +58,7 @@ body: | name: no_store_means_no_combine alignment: 4 tracksRegLiveness: true +legalized: true frameInfo: maxAlignment: 1 machineFunctionInfo: {} @@ -71,7 +74,7 @@ body: | ; CHECK-NEXT: %add:_(s64) = G_FADD %v, %c ; CHECK-NEXT: RET_ReallyLR implicit %add(s64) %v:_(s64) = COPY $x0 - %c:_(s64) = G_FCONSTANT double 0.0 + %c:_(s64) = G_FCONSTANT double 0.000000e+00 %add:_(s64) = G_FADD %v, %c - RET_ReallyLR implicit %add + RET_ReallyLR implicit %add(s64) ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir index c301e76852b54..c00ce2242a888 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir @@ -48,8 +48,9 @@ body: | ; CHECK-NEXT: $w0 = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00 ; CHECK-NEXT: $x0 = COPY [[C1]](s64) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: $w0 = COPY [[C2]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C2]](s16) + ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32) %0:_(s32) = G_FCONSTANT float 1.0 $w0 = COPY %0 %1:_(s64) = G_FCONSTANT double 2.0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir index ddf219dc4927e..c6df3456a8445 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir @@ -8,7 +8,7 @@ tracksRegLiveness: true body: | bb.0: ; NO-FP16-LABEL: name: fp16 - ; NO-FP16: %cst:_(s16) = G_CONSTANT i16 0 + ; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH0000 ; NO-FP16-NEXT: $h0 = COPY %cst(s16) ; NO-FP16-NEXT: RET_ReallyLR implicit $h0 ; @@ -26,7 +26,7 @@ tracksRegLiveness: true body: | bb.0: ; NO-FP16-LABEL: name: fp16_non_zero - ; NO-FP16: %cst:_(s16) = G_CONSTANT i16 16384 + ; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH4000 ; NO-FP16-NEXT: $h0 = COPY %cst(s16) ; NO-FP16-NEXT: RET_ReallyLR implicit $h0 ; @@ -44,7 +44,7 @@ tracksRegLiveness: true body: | bb.1.entry: ; NO-FP16-LABEL: name: nan - ; NO-FP16: %cst:_(s16) = G_CONSTANT i16 31745 + ; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH7C01 ; NO-FP16-NEXT: %ext:_(s32) = G_FPEXT %cst(s16) ; NO-FP16-NEXT: $w0 = COPY %ext(s32) ; NO-FP16-NEXT: RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll index cb5df07c7ede4..322a96aca5db2 100644 --- a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll +++ b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll @@ -739,15 +739,14 @@ define ptr @postidx32_shalf(ptr %src, ptr %out, half %a) { ; ; GISEL-LABEL: postidx32_shalf: ; GISEL: ; %bb.0: -; GISEL-NEXT: mov w8, #0 ; =0x0 -; GISEL-NEXT: ldr h1, [x0], #4 -; GISEL-NEXT: fmov s2, w8 +; GISEL-NEXT: movi d1, #0000000000000000 +; GISEL-NEXT: ldr h2, [x0], #4 ; GISEL-NEXT: ; kill: def $h0 killed $h0 def $s0 ; GISEL-NEXT: fmov w9, s0 -; GISEL-NEXT: fcvt s3, h1 -; GISEL-NEXT: fmov w8, s1 -; GISEL-NEXT: fcvt s2, h2 -; GISEL-NEXT: fcmp s3, s2 +; GISEL-NEXT: fcvt s3, h2 +; GISEL-NEXT: fmov w8, s2 +; GISEL-NEXT: fcvt s1, h1 +; GISEL-NEXT: fcmp s3, s1 ; GISEL-NEXT: csel w8, w8, w9, mi ; GISEL-NEXT: strh w8, [x1] ; GISEL-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/f16-instructions.ll b/llvm/test/CodeGen/AArch64/f16-instructions.ll index adc536da26f26..b234ef7a5ff8b 100644 --- a/llvm/test/CodeGen/AArch64/f16-instructions.ll +++ b/llvm/test/CodeGen/AArch64/f16-instructions.ll @@ -782,18 +782,19 @@ define void @test_fccmp(half %in, ptr %out) { ; ; CHECK-CVT-GI-LABEL: test_fccmp: ; CHECK-CVT-GI: // %bb.0: -; CHECK-CVT-GI-NEXT: mov w8, #17664 // =0x4500 -; CHECK-CVT-GI-NEXT: mov w9, #18432 // =0x4800 +; CHECK-CVT-GI-NEXT: adrp x8, .LCPI29_0 ; CHECK-CVT-GI-NEXT: // kill: def $h0 killed $h0 def $s0 ; CHECK-CVT-GI-NEXT: fcvt s2, h0 -; CHECK-CVT-GI-NEXT: fmov s1, w8 -; CHECK-CVT-GI-NEXT: fmov s3, w9 -; CHECK-CVT-GI-NEXT: fmov w9, s0 -; CHECK-CVT-GI-NEXT: fcvt s1, h1 -; CHECK-CVT-GI-NEXT: fcvt s3, h3 -; CHECK-CVT-GI-NEXT: fcmp s2, s1 -; CHECK-CVT-GI-NEXT: fccmp s2, s3, #4, mi -; CHECK-CVT-GI-NEXT: csel w8, w9, w8, gt +; CHECK-CVT-GI-NEXT: ldr h1, [x8, :lo12:.LCPI29_0] +; CHECK-CVT-GI-NEXT: adrp x8, .LCPI29_1 +; CHECK-CVT-GI-NEXT: ldr h4, [x8, :lo12:.LCPI29_1] +; CHECK-CVT-GI-NEXT: fmov w8, s0 +; CHECK-CVT-GI-NEXT: fcvt s3, h1 +; CHECK-CVT-GI-NEXT: fmov w9, s1 +; CHECK-CVT-GI-NEXT: fcvt s4, h4 +; CHECK-CVT-GI-NEXT: fcmp s2, s3 +; CHECK-CVT-GI-NEXT: fccmp s2, s4, #4, mi +; CHECK-CVT-GI-NEXT: csel w8, w8, w9, gt ; CHECK-CVT-GI-NEXT: strh w8, [x0] ; CHECK-CVT-GI-NEXT: ret ; diff --git a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll index 51aad4fe25d3b..7409bfb91454c 100644 --- a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll +++ b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll @@ -166,9 +166,9 @@ define i32 @fcvtzs_f16_i32_7(half %flt) { ; ; CHECK-GI-NO16-LABEL: fcvtzs_f16_i32_7: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI8_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI8_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -206,9 +206,9 @@ define i32 @fcvtzs_f16_i32_15(half %flt) { ; ; CHECK-GI-NO16-LABEL: fcvtzs_f16_i32_15: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI9_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI9_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -246,9 +246,9 @@ define i64 @fcvtzs_f16_i64_7(half %flt) { ; ; CHECK-GI-NO16-LABEL: fcvtzs_f16_i64_7: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI10_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI10_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -286,9 +286,9 @@ define i64 @fcvtzs_f16_i64_15(half %flt) { ; ; CHECK-GI-NO16-LABEL: fcvtzs_f16_i64_15: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI11_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI11_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -470,9 +470,9 @@ define i32 @fcvtzu_f16_i32_7(half %flt) { ; ; CHECK-GI-NO16-LABEL: fcvtzu_f16_i32_7: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI20_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI20_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -510,9 +510,9 @@ define i32 @fcvtzu_f16_i32_15(half %flt) { ; ; CHECK-GI-NO16-LABEL: fcvtzu_f16_i32_15: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI21_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI21_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -550,9 +550,9 @@ define i64 @fcvtzu_f16_i64_7(half %flt) { ; ; CHECK-GI-NO16-LABEL: fcvtzu_f16_i64_7: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI22_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI22_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -590,9 +590,9 @@ define i64 @fcvtzu_f16_i64_15(half %flt) { ; ; CHECK-GI-NO16-LABEL: fcvtzu_f16_i64_15: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI23_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI23_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -775,10 +775,10 @@ define half @scvtf_f16_i32_7(i32 %int) { ; CHECK-GI-NO16-LABEL: scvtf_f16_i32_7: ; CHECK-GI-NO16: // %bb.0: ; CHECK-GI-NO16-NEXT: scvtf s0, w0 -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 -; CHECK-GI-NO16-NEXT: fmov s1, w8 -; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI32_0 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI32_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 ; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -815,10 +815,10 @@ define half @scvtf_f16_i32_15(i32 %int) { ; CHECK-GI-NO16-LABEL: scvtf_f16_i32_15: ; CHECK-GI-NO16: // %bb.0: ; CHECK-GI-NO16-NEXT: scvtf s0, w0 -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 -; CHECK-GI-NO16-NEXT: fmov s1, w8 -; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI33_0 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI33_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 ; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -855,10 +855,10 @@ define half @scvtf_f16_i64_7(i64 %long) { ; CHECK-GI-NO16-LABEL: scvtf_f16_i64_7: ; CHECK-GI-NO16: // %bb.0: ; CHECK-GI-NO16-NEXT: scvtf s0, x0 -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 -; CHECK-GI-NO16-NEXT: fmov s1, w8 -; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI34_0 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI34_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 ; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -895,10 +895,10 @@ define half @scvtf_f16_i64_15(i64 %long) { ; CHECK-GI-NO16-LABEL: scvtf_f16_i64_15: ; CHECK-GI-NO16: // %bb.0: ; CHECK-GI-NO16-NEXT: scvtf s0, x0 -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 -; CHECK-GI-NO16-NEXT: fmov s1, w8 -; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI35_0 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI35_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 ; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1079,10 +1079,10 @@ define half @ucvtf_f16_i32_7(i32 %int) { ; CHECK-GI-NO16-LABEL: ucvtf_f16_i32_7: ; CHECK-GI-NO16: // %bb.0: ; CHECK-GI-NO16-NEXT: ucvtf s0, w0 -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 -; CHECK-GI-NO16-NEXT: fmov s1, w8 -; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI44_0 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI44_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 ; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1119,10 +1119,10 @@ define half @ucvtf_f16_i32_15(i32 %int) { ; CHECK-GI-NO16-LABEL: ucvtf_f16_i32_15: ; CHECK-GI-NO16: // %bb.0: ; CHECK-GI-NO16-NEXT: ucvtf s0, w0 -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 -; CHECK-GI-NO16-NEXT: fmov s1, w8 -; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI45_0 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI45_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 ; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1159,10 +1159,10 @@ define half @ucvtf_f16_i64_7(i64 %long) { ; CHECK-GI-NO16-LABEL: ucvtf_f16_i64_7: ; CHECK-GI-NO16: // %bb.0: ; CHECK-GI-NO16-NEXT: ucvtf s0, x0 -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 -; CHECK-GI-NO16-NEXT: fmov s1, w8 -; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI46_0 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI46_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 ; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1199,10 +1199,10 @@ define half @ucvtf_f16_i64_15(i64 %long) { ; CHECK-GI-NO16-LABEL: ucvtf_f16_i64_15: ; CHECK-GI-NO16: // %bb.0: ; CHECK-GI-NO16-NEXT: ucvtf s0, x0 -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 -; CHECK-GI-NO16-NEXT: fmov s1, w8 -; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI47_0 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI47_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 ; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1373,9 +1373,9 @@ define i32 @fcvtzs_sat_f16_i32_7(half %dbl) { ; ; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i32_7: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI55_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI55_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1413,9 +1413,9 @@ define i32 @fcvtzs_sat_f16_i32_15(half %dbl) { ; ; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i32_15: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI56_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI56_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1453,9 +1453,9 @@ define i64 @fcvtzs_sat_f16_i64_7(half %dbl) { ; ; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i64_7: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI57_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI57_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1493,9 +1493,9 @@ define i64 @fcvtzs_sat_f16_i64_15(half %dbl) { ; ; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i64_15: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI58_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI58_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1667,9 +1667,9 @@ define i32 @fcvtzu_sat_f16_i32_7(half %dbl) { ; ; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i32_7: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI66_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI66_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1707,9 +1707,9 @@ define i32 @fcvtzu_sat_f16_i32_15(half %dbl) { ; ; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i32_15: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI67_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI67_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1747,9 +1747,9 @@ define i64 @fcvtzu_sat_f16_i64_7(half %dbl) { ; ; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i64_7: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI68_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI68_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1787,9 +1787,9 @@ define i64 @fcvtzu_sat_f16_i64_15(half %dbl) { ; ; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i64_15: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI69_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI69_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll index 594a3ab79d73b..be07978cd8516 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll @@ -38,10 +38,10 @@ define half @add_v2HalfH(<2 x half> %bin.rdx) { ; ; CHECK-GI-NOFP16-LABEL: add_v2HalfH: ; CHECK-GI-NOFP16: // %bb.0: -; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000 +; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI1_0 ; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h0 -; CHECK-GI-NOFP16-NEXT: fmov s1, w8 +; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI1_0] ; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[1] ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 ; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 @@ -88,10 +88,10 @@ define half @add_v3HalfH(<3 x half> %bin.rdx) { ; ; CHECK-GI-NOFP16-LABEL: add_v3HalfH: ; CHECK-GI-NOFP16: // %bb.0: -; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000 +; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI2_0 ; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h0 -; CHECK-GI-NOFP16-NEXT: fmov s1, w8 +; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI2_0] ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 ; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2 ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] @@ -152,10 +152,10 @@ define half @add_HalfH(<4 x half> %bin.rdx) { ; ; CHECK-GI-NOFP16-LABEL: add_HalfH: ; CHECK-GI-NOFP16: // %bb.0: -; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000 +; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI3_0 ; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h0 -; CHECK-GI-NOFP16-NEXT: fmov s1, w8 +; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI3_0] ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 ; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2 ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] @@ -250,9 +250,9 @@ define half @add_H(<8 x half> %bin.rdx) { ; ; CHECK-GI-NOFP16-LABEL: add_H: ; CHECK-GI-NOFP16: // %bb.0: -; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000 +; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI4_0 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h0 -; CHECK-GI-NOFP16-NEXT: fmov s1, w8 +; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI4_0] ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 ; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2 ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] @@ -448,9 +448,9 @@ define half @add_2H(<16 x half> %bin.rdx) { ; ; CHECK-GI-NOFP16-LABEL: add_2H: ; CHECK-GI-NOFP16: // %bb.0: -; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000 +; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI7_0 ; CHECK-GI-NOFP16-NEXT: fcvt s3, h0 -; CHECK-GI-NOFP16-NEXT: fmov s2, w8 +; CHECK-GI-NOFP16-NEXT: ldr h2, [x8, :lo12:.LCPI7_0] ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 ; CHECK-GI-NOFP16-NEXT: fadd s2, s2, s3 ; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[1] diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll index 18f463cfcf7c9..40925da0557ec 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll @@ -405,26 +405,23 @@ define half @fadd_reduction_v4f16_in_loop(ptr %ptr.start) { ; ; CHECK-GI-NOFP16-LABEL: fadd_reduction_v4f16_in_loop: ; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: movi d0, #0000000000000000 ; CHECK-GI-NOFP16-NEXT: mov x8, xzr -; CHECK-GI-NOFP16-NEXT: mov w9, #0 // =0x0 ; CHECK-GI-NOFP16-NEXT: .LBB13_1: // %loop ; CHECK-GI-NOFP16-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-GI-NOFP16-NEXT: ldr d0, [x0, x8] -; CHECK-GI-NOFP16-NEXT: fmov s1, w9 +; CHECK-GI-NOFP16-NEXT: ldr d1, [x0, x8] +; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 ; CHECK-GI-NOFP16-NEXT: add x8, x8, #8 ; CHECK-GI-NOFP16-NEXT: cmp w8, #56 -; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: faddp v1.4s, v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: faddp s1, v1.2s +; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 -; CHECK-GI-NOFP16-NEXT: faddp v0.4s, v0.4s, v0.4s -; CHECK-GI-NOFP16-NEXT: faddp s0, v0.2s -; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 -; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 -; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s1 +; CHECK-GI-NOFP16-NEXT: fadd s0, s1, s0 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 -; CHECK-GI-NOFP16-NEXT: fmov w9, s0 ; CHECK-GI-NOFP16-NEXT: b.ne .LBB13_1 ; CHECK-GI-NOFP16-NEXT: // %bb.2: // %exit -; CHECK-GI-NOFP16-NEXT: // kill: def $h0 killed $h0 killed $s0 ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: fadd_reduction_v4f16_in_loop: @@ -521,28 +518,25 @@ define half @fadd_reduction_v8f16_in_loop(ptr %ptr.start) { ; ; CHECK-GI-NOFP16-LABEL: fadd_reduction_v8f16_in_loop: ; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: movi d0, #0000000000000000 ; CHECK-GI-NOFP16-NEXT: mov x8, xzr -; CHECK-GI-NOFP16-NEXT: mov w9, #0 // =0x0 ; CHECK-GI-NOFP16-NEXT: .LBB14_1: // %loop ; CHECK-GI-NOFP16-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-GI-NOFP16-NEXT: ldr q0, [x0, x8] +; CHECK-GI-NOFP16-NEXT: ldr q1, [x0, x8] +; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 ; CHECK-GI-NOFP16-NEXT: add x8, x8, #8 ; CHECK-GI-NOFP16-NEXT: cmp w8, #56 -; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v0.4h -; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h -; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v1.4s, v0.4s -; CHECK-GI-NOFP16-NEXT: fmov s1, w9 +; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v2.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: faddp v1.4s, v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: faddp s1, v1.2s +; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 -; CHECK-GI-NOFP16-NEXT: faddp v0.4s, v0.4s, v0.4s -; CHECK-GI-NOFP16-NEXT: faddp s0, v0.2s -; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 -; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 -; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s1 +; CHECK-GI-NOFP16-NEXT: fadd s0, s1, s0 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 -; CHECK-GI-NOFP16-NEXT: fmov w9, s0 ; CHECK-GI-NOFP16-NEXT: b.ne .LBB14_1 ; CHECK-GI-NOFP16-NEXT: // %bb.2: // %exit -; CHECK-GI-NOFP16-NEXT: // kill: def $h0 killed $h0 killed $s0 ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: fadd_reduction_v8f16_in_loop: diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll index e1b21705c95f3..c10d6e94226f2 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll @@ -52,10 +52,10 @@ define half @mul_HalfH(<4 x half> %bin.rdx) { ; ; CHECK-GI-NOFP16-LABEL: mul_HalfH: ; CHECK-GI-NOFP16: // %bb.0: -; CHECK-GI-NOFP16-NEXT: mov w8, #15360 // =0x3c00 +; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI1_0 ; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h0 -; CHECK-GI-NOFP16-NEXT: fmov s1, w8 +; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI1_0] ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 ; CHECK-GI-NOFP16-NEXT: fmul s1, s1, s2 ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] @@ -144,9 +144,9 @@ define half @mul_H(<8 x half> %bin.rdx) { ; ; CHECK-GI-NOFP16-LABEL: mul_H: ; CHECK-GI-NOFP16: // %bb.0: -; CHECK-GI-NOFP16-NEXT: mov w8, #15360 // =0x3c00 +; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI2_0 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h0 -; CHECK-GI-NOFP16-NEXT: fmov s1, w8 +; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI2_0] ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 ; CHECK-GI-NOFP16-NEXT: fmul s1, s1, s2 ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] @@ -321,9 +321,9 @@ define half @mul_2H(<16 x half> %bin.rdx) { ; ; CHECK-GI-NOFP16-LABEL: mul_2H: ; CHECK-GI-NOFP16: // %bb.0: -; CHECK-GI-NOFP16-NEXT: mov w8, #15360 // =0x3c00 +; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI5_0 ; CHECK-GI-NOFP16-NEXT: fcvt s3, h0 -; CHECK-GI-NOFP16-NEXT: fmov s2, w8 +; CHECK-GI-NOFP16-NEXT: ldr h2, [x8, :lo12:.LCPI5_0] ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 ; CHECK-GI-NOFP16-NEXT: fmul s2, s2, s3 ; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[1]