From c0e510ce1f6a398c7f39f7b2e8b55cb54a7059aa Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 8 Aug 2025 03:15:39 -0500 Subject: [PATCH 1/8] [Test] Add and update tests for lrint A number of backends are missing either all tests for lrint, or specifically those for f16 which currently crashes for `softPromoteHalf` targets. For a number of popular backends, do the following: * Ensure f16, f32, f64, and f128 are all covered * Ensure both a 32- and 64-bit target are tested, if relevant * Add `nounwind` to clean up CFI output * Add a test covering the above if one did not exist --- llvm/test/CodeGen/ARM/llrint-conv.ll | 21 ++++ llvm/test/CodeGen/ARM/lrint-conv.ll | 18 +++ llvm/test/CodeGen/AVR/llrint.ll | 18 +++ llvm/test/CodeGen/AVR/lrint.ll | 18 +++ llvm/test/CodeGen/LoongArch/lrint-conv.ll | 96 +++++++++++++++ llvm/test/CodeGen/MSP430/lrint-conv.ll | 60 +++++++++ llvm/test/CodeGen/Mips/llrint-conv.ll | 15 +++ llvm/test/CodeGen/Mips/lrint-conv.ll | 15 +++ llvm/test/CodeGen/PowerPC/llrint-conv.ll | 32 +++++ llvm/test/CodeGen/PowerPC/lrint-conv.ll | 32 +++++ llvm/test/CodeGen/RISCV/lrint-conv.ll | 76 ++++++++++++ llvm/test/CodeGen/SPARC/lrint-conv.ll | 68 +++++++++++ llvm/test/CodeGen/WebAssembly/lrint-conv.ll | 62 ++++++++++ llvm/test/CodeGen/X86/llrint-conv.ll | 128 +++++++++++++++----- llvm/test/CodeGen/X86/lrint-conv-i32.ll | 74 +++++++++-- llvm/test/CodeGen/X86/lrint-conv-i64.ll | 34 +++++- 16 files changed, 723 insertions(+), 44 deletions(-) create mode 100644 llvm/test/CodeGen/LoongArch/lrint-conv.ll create mode 100644 llvm/test/CodeGen/MSP430/lrint-conv.ll create mode 100644 llvm/test/CodeGen/RISCV/lrint-conv.ll create mode 100644 llvm/test/CodeGen/SPARC/lrint-conv.ll create mode 100644 llvm/test/CodeGen/WebAssembly/lrint-conv.ll diff --git a/llvm/test/CodeGen/ARM/llrint-conv.ll b/llvm/test/CodeGen/ARM/llrint-conv.ll index 017955bb43afb..f0fb2e7543be6 100644 --- a/llvm/test/CodeGen/ARM/llrint-conv.ll +++ b/llvm/test/CodeGen/ARM/llrint-conv.ll @@ -1,6 +1,16 @@ ; RUN: llc < %s -mtriple=arm-eabi -float-abi=soft | FileCheck %s --check-prefix=SOFTFP ; RUN: llc < %s -mtriple=arm-eabi -float-abi=hard | FileCheck %s --check-prefix=HARDFP +; SOFTFP-LABEL: testmsxh_builtin: +; SOFTFP: bl llrintf +; HARDFP-LABEL: testmsxh_builtin: +; HARDFP: bl llrintf +define i64 @testmsxh_builtin(half %x) { +entry: + %0 = tail call i64 @llvm.llrint.f16(half %x) + ret i64 %0 +} + ; SOFTFP-LABEL: testmsxs_builtin: ; SOFTFP: bl llrintf ; HARDFP-LABEL: testmsxs_builtin: @@ -21,5 +31,16 @@ entry: ret i64 %0 } +; FIXME(#44744): incorrect libcall +; SOFTFP-LABEL: testmsxq_builtin: +; SOFTFP: bl llrintl +; HARDFP-LABEL: testmsxq_builtin: +; HARDFP: bl llrintl +define i64 @testmsxq_builtin(fp128 %x) { +entry: + %0 = tail call i64 @llvm.llrint.f128(fp128 %x) + ret i64 %0 +} + declare i64 @llvm.llrint.f32(float) nounwind readnone declare i64 @llvm.llrint.f64(double) nounwind readnone diff --git a/llvm/test/CodeGen/ARM/lrint-conv.ll b/llvm/test/CodeGen/ARM/lrint-conv.ll index 192da565c12fd..9aa95112af533 100644 --- a/llvm/test/CodeGen/ARM/lrint-conv.ll +++ b/llvm/test/CodeGen/ARM/lrint-conv.ll @@ -1,6 +1,13 @@ ; RUN: llc < %s -mtriple=arm-eabi -float-abi=soft | FileCheck %s --check-prefix=SOFTFP ; RUN: llc < %s -mtriple=arm-eabi -float-abi=hard | FileCheck %s --check-prefix=HARDFP +; FIXME: crash +; define i32 @testmswh_builtin(half %x) { +; entry: +; %0 = tail call i32 @llvm.lrint.i32.f16(half %x) +; ret i32 %0 +; } + ; SOFTFP-LABEL: testmsws_builtin: ; SOFTFP: bl lrintf ; HARDFP-LABEL: testmsws_builtin: @@ -21,5 +28,16 @@ entry: ret i32 %0 } +; FIXME(#44744): incorrect libcall +; SOFTFP-LABEL: testmswq_builtin: +; SOFTFP: bl lrintl +; HARDFP-LABEL: testmswq_builtin: +; HARDFP: bl lrintl +define i32 @testmswq_builtin(fp128 %x) { +entry: + %0 = tail call i32 @llvm.lrint.i32.f128(fp128 %x) + ret i32 %0 +} + declare i32 @llvm.lrint.i32.f32(float) nounwind readnone declare i32 @llvm.lrint.i32.f64(double) nounwind readnone diff --git a/llvm/test/CodeGen/AVR/llrint.ll b/llvm/test/CodeGen/AVR/llrint.ll index 32b4c7ab12a4b..c55664f2d7353 100644 --- a/llvm/test/CodeGen/AVR/llrint.ll +++ b/llvm/test/CodeGen/AVR/llrint.ll @@ -1,6 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=avr -mcpu=atmega328p | FileCheck %s +; FIXME: crash "Input type needs to be promoted!" +; define i64 @testmsxh_builtin(half %x) { +; entry: +; %0 = tail call i64 @llvm.llrint.f16(half %x) +; ret i64 %0 +; } + define i64 @testmsxs_builtin(float %x) { ; CHECK-LABEL: testmsxs_builtin: ; CHECK: ; %bb.0: ; %entry @@ -21,5 +28,16 @@ entry: ret i64 %0 } +; FIXME(#44744): incorrect libcall +define i64 @testmsxq_builtin(fp128 %x) { +; CHECK-LABEL: testmsxq_builtin: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: call llrintl +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.llrint.fp128(fp128 %x) + ret i64 %0 +} + declare i64 @llvm.llrint.f32(float) nounwind readnone declare i64 @llvm.llrint.f64(double) nounwind readnone diff --git a/llvm/test/CodeGen/AVR/lrint.ll b/llvm/test/CodeGen/AVR/lrint.ll index d7568305f7b51..4ef656060bd10 100644 --- a/llvm/test/CodeGen/AVR/lrint.ll +++ b/llvm/test/CodeGen/AVR/lrint.ll @@ -1,6 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=avr -mcpu=atmega328p | FileCheck %s +; FIXME: crash "Input type needs to be promoted!" +; define i32 @testmswh_builtin(half %x) { +; entry: +; %0 = tail call i32 @llvm.lrint.i32.f16(half %x) +; ret i32 %0 +; } + define i32 @testmsws_builtin(float %x) { ; CHECK-LABEL: testmsws_builtin: ; CHECK: ; %bb.0: ; %entry @@ -21,5 +28,16 @@ entry: ret i32 %0 } +; FIXME(#44744): incorrect libcall +define i32 @testmswq_builtin(fp128 %x) { +; CHECK-LABEL: testmswq_builtin: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: call lrint +; CHECK-NEXT: ret +entry: + %0 = tail call i32 @llvm.lrint.i32.fp128(fp128 %x) + ret i32 %0 +} + declare i32 @llvm.lrint.i32.f32(float) nounwind readnone declare i32 @llvm.lrint.i32.f64(double) nounwind readnone diff --git a/llvm/test/CodeGen/LoongArch/lrint-conv.ll b/llvm/test/CodeGen/LoongArch/lrint-conv.ll new file mode 100644 index 0000000000000..85de820025614 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lrint-conv.ll @@ -0,0 +1,96 @@ +; Tests for lrint and llrint, with both i32 and i64 checked. + +; RUN: sed 's/ITy/i32/g' %s | llc -mtriple=loongarch32 | FileCheck %s --check-prefixes=LA32 +; RUN: sed 's/ITy/i64/g' %s | llc -mtriple=loongarch32 | FileCheck %s --check-prefixes=LA32 +; RUN: sed 's/ITy/i32/g' %s | llc -mtriple=loongarch64 | FileCheck %s --check-prefixes=LA64-I32 +; RUN: sed 's/ITy/i64/g' %s | llc -mtriple=loongarch64 | FileCheck %s --check-prefixes=LA64-I64 + +; FIXME: crash +; define ITy @test_lrint_ixx_f16(half %x) nounwind { +; %res = tail call ITy @llvm.lrint.ITy.f16(half %x) +; ret ITy %res +; } + +; define ITy @test_llrint_ixx_f16(half %x) nounwind { +; %res = tail call ITy @llvm.llrint.ITy.f16(half %x) +; ret ITy %res +; } + +define ITy @test_lrint_ixx_f32(float %x) nounwind { +; LA32-LABEL: test_lrint_ixx_f32: +; LA32: bl lrintf +; +; LA64-I32-LABEL: test_lrint_ixx_f32: +; LA64-I32: pcaddu18i $ra, %call36(lrintf) +; +; LA64-I64-LABEL: test_lrint_ixx_f32: +; LA64-I64: pcaddu18i $t8, %call36(lrintf) + %res = tail call ITy @llvm.lrint.ITy.f32(float %x) + ret ITy %res +} + +define ITy @test_llrint_ixx_f32(float %x) nounwind { +; LA32-LABEL: test_llrint_ixx_f32: +; LA32: bl llrintf +; +; LA64-I32-LABEL: test_llrint_ixx_f32: +; LA64-I32: pcaddu18i $ra, %call36(llrintf) +; +; LA64-I64-LABEL: test_llrint_ixx_f32: +; LA64-I64: pcaddu18i $t8, %call36(llrintf) + %res = tail call ITy @llvm.llrint.ITy.f32(float %x) + ret ITy %res +} + +define ITy @test_lrint_ixx_f64(double %x) nounwind { +; LA32-LABEL: test_lrint_ixx_f64: +; LA32: bl lrint +; +; LA64-I32-LABEL: test_lrint_ixx_f64: +; LA64-I32: pcaddu18i $ra, %call36(lrint) +; +; LA64-I64-LABEL: test_lrint_ixx_f64: +; LA64-I64: pcaddu18i $t8, %call36(lrint) + %res = tail call ITy @llvm.lrint.ITy.f64(double %x) + ret ITy %res +} + +define ITy @test_llrint_ixx_f64(double %x) nounwind { +; LA32-LABEL: test_llrint_ixx_f64: +; LA32: bl llrint +; +; LA64-I32-LABEL: test_llrint_ixx_f64: +; LA64-I32: pcaddu18i $ra, %call36(llrint) +; +; LA64-I64-LABEL: test_llrint_ixx_f64: +; LA64-I64: pcaddu18i $t8, %call36(llrint) + %res = tail call ITy @llvm.llrint.ITy.f64(double %x) + ret ITy %res +} + +; FIXME(#44744): incorrect libcall on loongarch32 +define ITy @test_lrint_ixx_f128(fp128 %x) nounwind { +; LA32-LABEL: test_lrint_ixx_f128: +; LA32: bl lrintl +; +; LA64-I32-LABEL: test_lrint_ixx_f128: +; LA64-I32: pcaddu18i $ra, %call36(lrintl) +; +; LA64-I64-LABEL: test_lrint_ixx_f128: +; LA64-I64: pcaddu18i $ra, %call36(lrintl) + %res = tail call ITy @llvm.lrint.ITy.f128(fp128 %x) + ret ITy %res +} + +define ITy @test_llrint_ixx_f128(fp128 %x) nounwind { +; LA32-LABEL: test_llrint_ixx_f128: +; LA32: bl llrintl +; +; LA64-I32-LABEL: test_llrint_ixx_f128: +; LA64-I32: pcaddu18i $ra, %call36(llrintl) +; +; LA64-I64-LABEL: test_llrint_ixx_f128: +; LA64-I64: pcaddu18i $ra, %call36(llrintl) + %res = tail call ITy @llvm.llrint.ITy.f128(fp128 %x) + ret ITy %res +} diff --git a/llvm/test/CodeGen/MSP430/lrint-conv.ll b/llvm/test/CodeGen/MSP430/lrint-conv.ll new file mode 100644 index 0000000000000..04ab2af6102a0 --- /dev/null +++ b/llvm/test/CodeGen/MSP430/lrint-conv.ll @@ -0,0 +1,60 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + +; Tests for lrint and llrint, with both i32 and i64 checked. + +; RUN: sed 's/ITy/i32/g' %s | llc -mtriple=msp430-unknown-unknown | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/ITy/i64/g' %s | llc -mtriple=msp430-unknown-unknown | FileCheck %s --check-prefixes=CHECK + +; FIXME: crash "Input type needs to be promoted!" +; define ITy @test_lrint_ixx_f16(half %x) nounwind { +; %res = tail call ITy @llvm.lrint.ITy.f16(half %x) +; ret ITy %res +; } + +; define ITy @test_llrint_ixx_f16(half %x) nounwind { +; %res = tail call ITy @llvm.llrint.ITy.f16(half %x) +; ret ITy %res +; } + +define ITy @test_lrint_ixx_f32(float %x) nounwind { +; CHECK-LABEL: test_lrint_ixx_f32: +; CHECK: call #lrintf + %res = tail call ITy @llvm.lrint.ITy.f32(float %x) + ret ITy %res +} + +define ITy @test_llrint_ixx_f32(float %x) nounwind { +; CHECK-LABEL: test_llrint_ixx_f32: +; CHECK: call #llrintf + %res = tail call ITy @llvm.llrint.ITy.f32(float %x) + ret ITy %res +} + +define ITy @test_lrint_ixx_f64(double %x) nounwind { +; CHECK-LABEL: test_lrint_ixx_f64: +; CHECK: call #lrint + %res = tail call ITy @llvm.lrint.ITy.f64(double %x) + ret ITy %res +} + +define ITy @test_llrint_ixx_f64(double %x) nounwind { +; CHECK-LABEL: test_llrint_ixx_f64: +; CHECK: call #llrint + %res = tail call ITy @llvm.llrint.ITy.f64(double %x) + ret ITy %res +} + +; FIXME(#44744): incorrect libcall +define ITy @test_lrint_ixx_f128(fp128 %x) nounwind { +; CHECK-LABEL: test_lrint_ixx_f128: +; CHECK: call #lrintl + %res = tail call ITy @llvm.lrint.ITy.f128(fp128 %x) + ret ITy %res +} + +define ITy @test_llrint_ixx_f128(fp128 %x) nounwind { +; CHECK-LABEL: test_llrint_ixx_f128: +; CHECK: call #llrintl + %res = tail call ITy @llvm.llrint.ITy.f128(fp128 %x) + ret ITy %res +} diff --git a/llvm/test/CodeGen/Mips/llrint-conv.ll b/llvm/test/CodeGen/Mips/llrint-conv.ll index dcb4e5657e80b..ee3c0d99253a6 100644 --- a/llvm/test/CodeGen/Mips/llrint-conv.ll +++ b/llvm/test/CodeGen/Mips/llrint-conv.ll @@ -1,4 +1,19 @@ ; RUN: llc < %s -mtriple=mips64el -mattr=+soft-float | FileCheck %s +; RUN: llc < %s -mtriple=mips -mattr=+soft-float | FileCheck %s + +; FIXME: crash +; define signext i32 @testmswh(half %x) { +; entry: +; %0 = tail call i64 @llvm.llrint.f16(half %x) +; %conv = trunc i64 %0 to i32 +; ret i32 %conv +; } + +; define i64 @testmsxh(half %x) { +; entry: +; %0 = tail call i64 @llvm.llrint.f16(half %x) +; ret i64 %0 +; } define signext i32 @testmsws(float %x) { ; CHECK-LABEL: testmsws: diff --git a/llvm/test/CodeGen/Mips/lrint-conv.ll b/llvm/test/CodeGen/Mips/lrint-conv.ll index bd3f7b3babe10..6d2e392675f1c 100644 --- a/llvm/test/CodeGen/Mips/lrint-conv.ll +++ b/llvm/test/CodeGen/Mips/lrint-conv.ll @@ -1,4 +1,19 @@ ; RUN: llc < %s -mtriple=mips64el -mattr=+soft-float | FileCheck %s +; RUN: llc < %s -mtriple=mips -mattr=+soft-float | FileCheck %s + +; FIXME: crash +; define signext i32 @testmswh(half %x) { +; entry: +; %0 = tail call i64 @llvm.lrint.i64.f16(half %x) +; %conv = trunc i64 %0 to i32 +; ret i32 %conv +; } + +; define i64 @testmsxh(half %x) { +; entry: +; %0 = tail call i64 @llvm.lrint.i64.f16(half %x) +; ret i64 %0 +; } define signext i32 @testmsws(float %x) { ; CHECK-LABEL: testmsws: diff --git a/llvm/test/CodeGen/PowerPC/llrint-conv.ll b/llvm/test/CodeGen/PowerPC/llrint-conv.ll index daadf85b4085a..dcd3bd25a83c5 100644 --- a/llvm/test/CodeGen/PowerPC/llrint-conv.ll +++ b/llvm/test/CodeGen/PowerPC/llrint-conv.ll @@ -1,4 +1,19 @@ ; RUN: llc < %s -mtriple=powerpc64le | FileCheck %s +; RUN: llc < %s -mtriple=powerpc | FileCheck %s + +; FIXME: crash "Input type needs to be promoted!" +; define signext i32 @testmswh(half %x) { +; entry: +; %0 = tail call i64 @llvm.llrint.f16(half %x) +; %conv = trunc i64 %0 to i32 +; ret i32 %conv +; } + +; define i64 @testmsxh(half %x) { +; entry: +; %0 = tail call i64 @llvm.llrint.f16(half %x) +; ret i64 %0 +; } ; CHECK-LABEL: testmsws: ; CHECK: bl llrintf @@ -51,6 +66,23 @@ entry: ret i64 %0 } +; CHECK-LABEL: testmswq: +; CHECK: bl llrintf128 +define signext i32 @testmswq(fp128 %x) { +entry: + %0 = tail call i64 @llvm.llrint.f128(fp128 %x) + %conv = trunc i64 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: testmslq: +; CHECK: bl llrintf128 +define i64 @testmslq(fp128 %x) { +entry: + %0 = tail call i64 @llvm.llrint.f128(fp128 %x) + ret i64 %0 +} + declare i64 @llvm.llrint.f32(float) nounwind readnone declare i64 @llvm.llrint.f64(double) nounwind readnone declare i64 @llvm.llrint.ppcf128(ppc_fp128) nounwind readnone diff --git a/llvm/test/CodeGen/PowerPC/lrint-conv.ll b/llvm/test/CodeGen/PowerPC/lrint-conv.ll index adfc994497323..bc77a200757f4 100644 --- a/llvm/test/CodeGen/PowerPC/lrint-conv.ll +++ b/llvm/test/CodeGen/PowerPC/lrint-conv.ll @@ -1,4 +1,19 @@ ; RUN: llc < %s -mtriple=powerpc64le | FileCheck %s +; RUN: llc < %s -mtriple=powerpc | FileCheck %s + +; FIXME: crash "Input type needs to be promoted!" +; define signext i32 @testmswh(half %x) { +; entry: +; %0 = tail call i64 @llvm.lrint.i64.f16(half %x) +; %conv = trunc i64 %0 to i32 +; ret i32 %conv +; } + +; define i64 @testmsxh(half %x) { +; entry: +; %0 = tail call i64 @llvm.lrint.i64.f16(half %x) +; ret i64 %0 +; } ; CHECK-LABEL: testmsws: ; CHECK: bl lrintf @@ -51,6 +66,23 @@ entry: ret i64 %0 } +; CHECK-LABEL: testmswq: +; CHECK: bl lrintf128 +define signext i32 @testmswq(fp128 %x) { +entry: + %0 = tail call i64 @llvm.lrint.i64.f128(fp128 %x) + %conv = trunc i64 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: testmslq: +; CHECK: bl lrintf128 +define i64 @testmslq(fp128 %x) { +entry: + %0 = tail call i64 @llvm.lrint.i64.f128(fp128 %x) + ret i64 %0 +} + declare i64 @llvm.lrint.i64.f32(float) nounwind readnone declare i64 @llvm.lrint.i64.f64(double) nounwind readnone declare i64 @llvm.lrint.i64.ppcf128(ppc_fp128) nounwind readnone diff --git a/llvm/test/CodeGen/RISCV/lrint-conv.ll b/llvm/test/CodeGen/RISCV/lrint-conv.ll new file mode 100644 index 0000000000000..d3af2153588a1 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/lrint-conv.ll @@ -0,0 +1,76 @@ +; Tests for lrint and llrint, with both i32 and i64 checked. + +; RUN: sed 's/ITy/i32/g' %s | llc -mtriple=riscv32 | FileCheck %s --check-prefixes=RV32 +; RUN: sed 's/ITy/i64/g' %s | llc -mtriple=riscv32 | FileCheck %s --check-prefixes=RV32 +; RUN: sed 's/ITy/i32/g' %s | llc -mtriple=riscv64 | FileCheck %s --check-prefixes=RV64 +; RUN: sed 's/ITy/i64/g' %s | llc -mtriple=riscv64 | FileCheck %s --check-prefixes=RV64 + +; FIXME: crash +; define ITy @test_lrint_ixx_f16(half %x) nounwind { +; %res = tail call ITy @llvm.lrint.ITy.f16(half %x) +; } + +; define ITy @test_llrint_ixx_f16(half %x) nounwind { +; %res = tail call ITy @llvm.llrint.ITy.f16(half %x) +; } + +define ITy @test_lrint_ixx_f32(float %x) nounwind { +; RV32-LABEL: test_lrint_ixx_f32: +; RV32: call lrintf +; +; RV64-LABEL: test_lrint_ixx_f32: +; RV64: call lrintf + %res = tail call ITy @llvm.lrint.ITy.f32(float %x) + ret ITy %res +} + +define ITy @test_llrint_ixx_f32(float %x) nounwind { +; RV32-LABEL: test_llrint_ixx_f32: +; RV32: call llrintf +; +; RV64-LABEL: test_llrint_ixx_f32: +; RV64: call llrintf + %res = tail call ITy @llvm.llrint.ITy.f32(float %x) + ret ITy %res +} + +define ITy @test_lrint_ixx_f64(double %x) nounwind { +; RV32-LABEL: test_lrint_ixx_f64: +; RV32: call lrint +; +; RV64-LABEL: test_lrint_ixx_f64: +; RV64: call lrint + %res = tail call ITy @llvm.lrint.ITy.f64(double %x) + ret ITy %res +} + +define ITy @test_llrint_ixx_f64(double %x) nounwind { +; RV32-LABEL: test_llrint_ixx_f64: +; RV32: call llrint +; +; RV64-LABEL: test_llrint_ixx_f64: +; RV64: call llrint + %res = tail call ITy @llvm.llrint.ITy.f64(double %x) + ret ITy %res +} + +; FIXME(#44744): incorrect libcall on riscv32 +define ITy @test_lrint_ixx_f128(fp128 %x) nounwind { +; RV32-LABEL: test_lrint_ixx_f128: +; RV32: call lrintl +; +; RV64-LABEL: test_lrint_ixx_f128: +; RV64: call lrintl + %res = tail call ITy @llvm.lrint.ITy.f128(fp128 %x) + ret ITy %res +} + +define ITy @test_llrint_ixx_f128(fp128 %x) nounwind { +; RV32-LABEL: test_llrint_ixx_f128: +; RV32: call llrintl +; +; RV64-LABEL: test_llrint_ixx_f128: +; RV64: call llrintl + %res = tail call ITy @llvm.llrint.ITy.f128(fp128 %x) + ret ITy %res +} diff --git a/llvm/test/CodeGen/SPARC/lrint-conv.ll b/llvm/test/CodeGen/SPARC/lrint-conv.ll new file mode 100644 index 0000000000000..81934114f548f --- /dev/null +++ b/llvm/test/CodeGen/SPARC/lrint-conv.ll @@ -0,0 +1,68 @@ +; Tests for lrint and llrint, with both i32 and i64 checked. + +; RUN: sed 's/ITy/i32/g' %s | llc -mtriple=sparc | FileCheck %s --check-prefixes=SPARC32 +; RUN: sed 's/ITy/i64/g' %s | llc -mtriple=sparc | FileCheck %s --check-prefixes=SPARC32 +; RUN: sed 's/ITy/i32/g' %s | llc -mtriple=sparc64 | FileCheck %s --check-prefixes=SPARC64 +; RUN: sed 's/ITy/i64/g' %s | llc -mtriple=sparc64 | FileCheck %s --check-prefixes=SPARC64 + +; FIXME: crash "Input type needs to be promoted!" +; define ITy @test_lrint_ixx_f16(half %x) nounwind { +; %res = tail call ITy @llvm.lrint.ITy.f16(half %x) +; ret ITy %res +; } + +; define ITy @test_llrint_ixx_f16(half %x) nounwind { +; %res = tail call ITy @llvm.llrint.ITy.f16(half %x) +; ret ITy %res +; } + +define ITy @test_lrint_ixx_f32(float %x) nounwind { +; SPARC32-LABEL: test_lrint_ixx_f32: +; SPARC32: call lrintf +; +; SPARC64-LABEL: test_lrint_ixx_f32: +; SPARC64: call lrintf + %res = tail call ITy @llvm.lrint.ITy.f32(float %x) + ret ITy %res +} + +define ITy @test_llrint_ixx_f32(float %x) nounwind { +; SPARC32-LABEL: test_llrint_ixx_f32: +; SPARC32: call llrintf +; +; SPARC64-LABEL: test_llrint_ixx_f32: +; SPARC64: call llrintf + %res = tail call ITy @llvm.llrint.ITy.f32(float %x) + ret ITy %res +} + +define ITy @test_lrint_ixx_f64(double %x) nounwind { +; SPARC32-LABEL: test_lrint_ixx_f64: +; SPARC32: call lrint +; +; SPARC64-LABEL: test_lrint_ixx_f64: +; SPARC64: call lrint + %res = tail call ITy @llvm.lrint.ITy.f64(double %x) + ret ITy %res +} + +define ITy @test_llrint_ixx_f64(double %x) nounwind { +; SPARC32-LABEL: test_llrint_ixx_f64: +; SPARC32: call llrint +; +; SPARC64-LABEL: test_llrint_ixx_f64: +; SPARC64: call llrint + %res = tail call ITy @llvm.llrint.ITy.f64(double %x) + ret ITy %res +} + +; FIXME(#41838): unsupported type +; define ITy @test_lrint_ixx_f128(fp128 %x) nounwind { +; %res = tail call ITy @llvm.lrint.ITy.f128(fp128 %x) +; ret ITy %res +; } + +; define ITy @test_llrint_ixx_f128(fp128 %x) nounwind { +; %res = tail call ITy @llvm.llrint.ITy.f128(fp128 %x) +; ret ITy %res +; } diff --git a/llvm/test/CodeGen/WebAssembly/lrint-conv.ll b/llvm/test/CodeGen/WebAssembly/lrint-conv.ll new file mode 100644 index 0000000000000..0571150cb3505 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/lrint-conv.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + +; Tests for lrint and llrint, with both i32 and i64 checked. + +; RUN: sed 's/ITy/i32/g' %s | llc -mtriple=wasm32-unknown-unknown | FileCheck %s +; RUN: sed 's/ITy/i64/g' %s | llc -mtriple=wasm32-unknown-unknown | FileCheck %s + +define ITy @test_lrint_ixx_f16(half %x) nounwind { +; CHECK-LABEL: test_lrint_ixx_f16: +; CHECK: call lrintf + %res = tail call ITy @llvm.lrint.ITy.f16(half %x) + ret ITy %res +} + +define ITy @test_llrint_ixx_f16(half %x) nounwind { +; CHECK-LABEL: test_llrint_ixx_f16: +; CHECK: call llrintf + %res = tail call ITy @llvm.llrint.ITy.f16(half %x) + ret ITy %res +} + +define ITy @test_lrint_ixx_f32(float %x) nounwind { +; CHECK-LABEL: test_lrint_ixx_f32: +; CHECK: call lrintf + %res = tail call ITy @llvm.lrint.ITy.f32(float %x) + ret ITy %res +} + +define ITy @test_llrint_ixx_f32(float %x) nounwind { +; CHECK-LABEL: test_llrint_ixx_f32: +; CHECK: call llrintf + %res = tail call ITy @llvm.llrint.ITy.f32(float %x) + ret ITy %res +} + +define ITy @test_lrint_ixx_f64(double %x) nounwind { +; CHECK-LABEL: test_lrint_ixx_f64: +; CHECK: call lrint + %res = tail call ITy @llvm.lrint.ITy.f64(double %x) + ret ITy %res +} + +define ITy @test_llrint_ixx_f64(double %x) nounwind { +; CHECK-LABEL: test_llrint_ixx_f64: +; CHECK: call llrint + %res = tail call ITy @llvm.llrint.ITy.f64(double %x) + ret ITy %res +} + +define ITy @test_lrint_ixx_f128(fp128 %x) nounwind { +; CHECK-LABEL: test_lrint_ixx_f128: +; CHECK: call lrintl + %res = tail call ITy @llvm.lrint.ITy.f128(fp128 %x) + ret ITy %res +} + +define ITy @test_llrint_ixx_f128(fp128 %x) nounwind { +; CHECK-LABEL: test_llrint_ixx_f128: +; CHECK: call llrintl + %res = tail call ITy @llvm.llrint.ITy.f128(fp128 %x) + ret ITy %res +} diff --git a/llvm/test/CodeGen/X86/llrint-conv.ll b/llvm/test/CodeGen/X86/llrint-conv.ll index 402daf80a15e8..d3eca5197a94b 100644 --- a/llvm/test/CodeGen/X86/llrint-conv.ll +++ b/llvm/test/CodeGen/X86/llrint-conv.ll @@ -7,14 +7,50 @@ ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64,X64-AVX -define i64 @testmsxs(float %x) { +define i64 @testmsxh(half %x) nounwind { +; X86-NOSSE-LABEL: testmsxh: +; X86-NOSSE: # %bb.0: # %entry +; X86-NOSSE-NEXT: pushl %eax +; X86-NOSSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl %eax, (%esp) +; X86-NOSSE-NEXT: calll __extendhfsf2 +; X86-NOSSE-NEXT: fstps (%esp) +; X86-NOSSE-NEXT: calll llrintf +; X86-NOSSE-NEXT: popl %ecx +; X86-NOSSE-NEXT: retl +; +; X86-SSE2-LABEL: testmsxh: +; X86-SSE2: # %bb.0: # %entry +; X86-SSE2-NEXT: pushl %eax +; X86-SSE2-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 +; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax +; X86-SSE2-NEXT: movw %ax, (%esp) +; X86-SSE2-NEXT: calll __extendhfsf2 +; X86-SSE2-NEXT: fstps (%esp) +; X86-SSE2-NEXT: calll llrintf +; X86-SSE2-NEXT: popl %ecx +; X86-SSE2-NEXT: retl +; +; X64-SSE-LABEL: testmsxh: +; X64-SSE: # %bb.0: # %entry +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: callq __extendhfsf2@PLT +; X64-SSE-NEXT: callq rintf@PLT +; X64-SSE-NEXT: callq __truncsfhf2@PLT +; X64-SSE-NEXT: callq __extendhfsf2@PLT +; X64-SSE-NEXT: cvttss2si %xmm0, %rax +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +entry: + %0 = tail call i64 @llvm.llrint.f16(half %x) + ret i64 %0 +} + +define i64 @testmsxs(float %x) nounwind { ; X86-NOSSE-LABEL: testmsxs: ; X86-NOSSE: # %bb.0: # %entry ; X86-NOSSE-NEXT: pushl %ebp -; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8 -; X86-NOSSE-NEXT: .cfi_offset %ebp, -8 ; X86-NOSSE-NEXT: movl %esp, %ebp -; X86-NOSSE-NEXT: .cfi_def_cfa_register %ebp ; X86-NOSSE-NEXT: andl $-8, %esp ; X86-NOSSE-NEXT: subl $8, %esp ; X86-NOSSE-NEXT: flds 8(%ebp) @@ -23,16 +59,12 @@ define i64 @testmsxs(float %x) { ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NOSSE-NEXT: movl %ebp, %esp ; X86-NOSSE-NEXT: popl %ebp -; X86-NOSSE-NEXT: .cfi_def_cfa %esp, 4 ; X86-NOSSE-NEXT: retl ; ; X86-SSE2-LABEL: testmsxs: ; X86-SSE2: # %bb.0: # %entry ; X86-SSE2-NEXT: pushl %ebp -; X86-SSE2-NEXT: .cfi_def_cfa_offset 8 -; X86-SSE2-NEXT: .cfi_offset %ebp, -8 ; X86-SSE2-NEXT: movl %esp, %ebp -; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp ; X86-SSE2-NEXT: andl $-8, %esp ; X86-SSE2-NEXT: subl $8, %esp ; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero @@ -43,16 +75,12 @@ define i64 @testmsxs(float %x) { ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-SSE2-NEXT: movl %ebp, %esp ; X86-SSE2-NEXT: popl %ebp -; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4 ; X86-SSE2-NEXT: retl ; ; X86-AVX-LABEL: testmsxs: ; X86-AVX: # %bb.0: # %entry ; X86-AVX-NEXT: pushl %ebp -; X86-AVX-NEXT: .cfi_def_cfa_offset 8 -; X86-AVX-NEXT: .cfi_offset %ebp, -8 ; X86-AVX-NEXT: movl %esp, %ebp -; X86-AVX-NEXT: .cfi_def_cfa_register %ebp ; X86-AVX-NEXT: andl $-8, %esp ; X86-AVX-NEXT: subl $8, %esp ; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero @@ -63,7 +91,6 @@ define i64 @testmsxs(float %x) { ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-AVX-NEXT: movl %ebp, %esp ; X86-AVX-NEXT: popl %ebp -; X86-AVX-NEXT: .cfi_def_cfa %esp, 4 ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: testmsxs: @@ -80,14 +107,11 @@ entry: ret i64 %0 } -define i64 @testmsxd(double %x) { +define i64 @testmsxd(double %x) nounwind { ; X86-NOSSE-LABEL: testmsxd: ; X86-NOSSE: # %bb.0: # %entry ; X86-NOSSE-NEXT: pushl %ebp -; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8 -; X86-NOSSE-NEXT: .cfi_offset %ebp, -8 ; X86-NOSSE-NEXT: movl %esp, %ebp -; X86-NOSSE-NEXT: .cfi_def_cfa_register %ebp ; X86-NOSSE-NEXT: andl $-8, %esp ; X86-NOSSE-NEXT: subl $8, %esp ; X86-NOSSE-NEXT: fldl 8(%ebp) @@ -96,16 +120,12 @@ define i64 @testmsxd(double %x) { ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NOSSE-NEXT: movl %ebp, %esp ; X86-NOSSE-NEXT: popl %ebp -; X86-NOSSE-NEXT: .cfi_def_cfa %esp, 4 ; X86-NOSSE-NEXT: retl ; ; X86-SSE2-LABEL: testmsxd: ; X86-SSE2: # %bb.0: # %entry ; X86-SSE2-NEXT: pushl %ebp -; X86-SSE2-NEXT: .cfi_def_cfa_offset 8 -; X86-SSE2-NEXT: .cfi_offset %ebp, -8 ; X86-SSE2-NEXT: movl %esp, %ebp -; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp ; X86-SSE2-NEXT: andl $-8, %esp ; X86-SSE2-NEXT: subl $8, %esp ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero @@ -116,16 +136,12 @@ define i64 @testmsxd(double %x) { ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-SSE2-NEXT: movl %ebp, %esp ; X86-SSE2-NEXT: popl %ebp -; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4 ; X86-SSE2-NEXT: retl ; ; X86-AVX-LABEL: testmsxd: ; X86-AVX: # %bb.0: # %entry ; X86-AVX-NEXT: pushl %ebp -; X86-AVX-NEXT: .cfi_def_cfa_offset 8 -; X86-AVX-NEXT: .cfi_offset %ebp, -8 ; X86-AVX-NEXT: movl %esp, %ebp -; X86-AVX-NEXT: .cfi_def_cfa_register %ebp ; X86-AVX-NEXT: andl $-8, %esp ; X86-AVX-NEXT: subl $8, %esp ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero @@ -136,7 +152,6 @@ define i64 @testmsxd(double %x) { ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-AVX-NEXT: movl %ebp, %esp ; X86-AVX-NEXT: popl %ebp -; X86-AVX-NEXT: .cfi_def_cfa %esp, 4 ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: testmsxd: @@ -153,14 +168,11 @@ entry: ret i64 %0 } -define i64 @testmsll(x86_fp80 %x) { +define i64 @testmsll(x86_fp80 %x) nounwind { ; X86-LABEL: testmsll: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %ebp -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %ebp, -8 ; X86-NEXT: movl %esp, %ebp -; X86-NEXT: .cfi_def_cfa_register %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp ; X86-NEXT: fldt 8(%ebp) @@ -169,7 +181,6 @@ define i64 @testmsll(x86_fp80 %x) { ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp -; X86-NEXT: .cfi_def_cfa %esp, 4 ; X86-NEXT: retl ; ; X64-LABEL: testmsll: @@ -183,6 +194,61 @@ entry: ret i64 %0 } +; FIXME(#44744): incorrect libcall +define i64 @testmslq(fp128 %x) nounwind { +; X86-NOSSE-LABEL: testmslq: +; X86-NOSSE: # %bb.0: # %entry +; X86-NOSSE-NEXT: pushl %ebp +; X86-NOSSE-NEXT: movl %esp, %ebp +; X86-NOSSE-NEXT: andl $-16, %esp +; X86-NOSSE-NEXT: subl $16, %esp +; X86-NOSSE-NEXT: pushl 20(%ebp) +; X86-NOSSE-NEXT: pushl 16(%ebp) +; X86-NOSSE-NEXT: pushl 12(%ebp) +; X86-NOSSE-NEXT: pushl 8(%ebp) +; X86-NOSSE-NEXT: calll llrintl +; X86-NOSSE-NEXT: addl $16, %esp +; X86-NOSSE-NEXT: movl %ebp, %esp +; X86-NOSSE-NEXT: popl %ebp +; X86-NOSSE-NEXT: retl +; +; X86-SSE2-LABEL: testmslq: +; X86-SSE2: # %bb.0: # %entry +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: andl $-16, %esp +; X86-SSE2-NEXT: subl $16, %esp +; X86-SSE2-NEXT: pushl 20(%ebp) +; X86-SSE2-NEXT: pushl 16(%ebp) +; X86-SSE2-NEXT: pushl 12(%ebp) +; X86-SSE2-NEXT: pushl 8(%ebp) +; X86-SSE2-NEXT: calll llrintl +; X86-SSE2-NEXT: addl $16, %esp +; X86-SSE2-NEXT: movl %ebp, %esp +; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: testmslq: +; X86-AVX: # %bb.0: # %entry +; X86-AVX-NEXT: pushl %ebp +; X86-AVX-NEXT: movl %esp, %ebp +; X86-AVX-NEXT: andl $-16, %esp +; X86-AVX-NEXT: subl $32, %esp +; X86-AVX-NEXT: vmovups 8(%ebp), %xmm0 +; X86-AVX-NEXT: vmovups %xmm0, (%esp) +; X86-AVX-NEXT: calll llrintl +; X86-AVX-NEXT: movl %ebp, %esp +; X86-AVX-NEXT: popl %ebp +; X86-AVX-NEXT: retl +; +; X64-LABEL: testmslq: +; X64: # %bb.0: # %entry +; X64-NEXT: jmp llrintl@PLT # TAILCALL +entry: + %0 = tail call i64 @llvm.llrint.fp128(fp128 %x) + ret i64 %0 +} + declare i64 @llvm.llrint.f32(float) nounwind readnone declare i64 @llvm.llrint.f64(double) nounwind readnone declare i64 @llvm.llrint.f80(x86_fp80) nounwind readnone diff --git a/llvm/test/CodeGen/X86/lrint-conv-i32.ll b/llvm/test/CodeGen/X86/lrint-conv-i32.ll index 21580f53ec9b3..3c50aea1095f4 100644 --- a/llvm/test/CodeGen/X86/lrint-conv-i32.ll +++ b/llvm/test/CodeGen/X86/lrint-conv-i32.ll @@ -7,16 +7,21 @@ ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64,X64-AVX -define i32 @testmsws(float %x) { +; FIXME: crash +; define i32 @testmswh(half %x) nounwind { +; entry: +; %0 = tail call i32 @llvm.lrint.i32.f16(half %x) +; ret i32 %0 +; } + +define i32 @testmsws(float %x) nounwind { ; X86-NOSSE-LABEL: testmsws: ; X86-NOSSE: # %bb.0: # %entry ; X86-NOSSE-NEXT: pushl %eax -; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8 ; X86-NOSSE-NEXT: flds {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: fistpl (%esp) ; X86-NOSSE-NEXT: movl (%esp), %eax ; X86-NOSSE-NEXT: popl %ecx -; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4 ; X86-NOSSE-NEXT: retl ; ; X86-SSE2-LABEL: testmsws: @@ -43,16 +48,14 @@ entry: ret i32 %0 } -define i32 @testmswd(double %x) { +define i32 @testmswd(double %x) nounwind { ; X86-NOSSE-LABEL: testmswd: ; X86-NOSSE: # %bb.0: # %entry ; X86-NOSSE-NEXT: pushl %eax -; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8 ; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: fistpl (%esp) ; X86-NOSSE-NEXT: movl (%esp), %eax ; X86-NOSSE-NEXT: popl %ecx -; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4 ; X86-NOSSE-NEXT: retl ; ; X86-SSE2-LABEL: testmswd: @@ -79,16 +82,14 @@ entry: ret i32 %0 } -define i32 @testmsll(x86_fp80 %x) { +define i32 @testmsll(x86_fp80 %x) nounwind { ; X86-LABEL: testmsll: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %eax -; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fistpl (%esp) ; X86-NEXT: movl (%esp), %eax ; X86-NEXT: popl %ecx -; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; ; X64-LABEL: testmsll: @@ -102,6 +103,61 @@ entry: ret i32 %0 } +; FIXME(#44744): incorrect libcall +define i32 @testmswq(fp128 %x) nounwind { +; X86-NOSSE-LABEL: testmswq: +; X86-NOSSE: # %bb.0: # %entry +; X86-NOSSE-NEXT: pushl %ebp +; X86-NOSSE-NEXT: movl %esp, %ebp +; X86-NOSSE-NEXT: andl $-16, %esp +; X86-NOSSE-NEXT: subl $16, %esp +; X86-NOSSE-NEXT: pushl 20(%ebp) +; X86-NOSSE-NEXT: pushl 16(%ebp) +; X86-NOSSE-NEXT: pushl 12(%ebp) +; X86-NOSSE-NEXT: pushl 8(%ebp) +; X86-NOSSE-NEXT: calll lrintl +; X86-NOSSE-NEXT: addl $16, %esp +; X86-NOSSE-NEXT: movl %ebp, %esp +; X86-NOSSE-NEXT: popl %ebp +; X86-NOSSE-NEXT: retl +; +; X86-SSE2-LABEL: testmswq: +; X86-SSE2: # %bb.0: # %entry +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: andl $-16, %esp +; X86-SSE2-NEXT: subl $16, %esp +; X86-SSE2-NEXT: pushl 20(%ebp) +; X86-SSE2-NEXT: pushl 16(%ebp) +; X86-SSE2-NEXT: pushl 12(%ebp) +; X86-SSE2-NEXT: pushl 8(%ebp) +; X86-SSE2-NEXT: calll lrintl +; X86-SSE2-NEXT: addl $16, %esp +; X86-SSE2-NEXT: movl %ebp, %esp +; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: testmswq: +; X86-AVX: # %bb.0: # %entry +; X86-AVX-NEXT: pushl %ebp +; X86-AVX-NEXT: movl %esp, %ebp +; X86-AVX-NEXT: andl $-16, %esp +; X86-AVX-NEXT: subl $32, %esp +; X86-AVX-NEXT: vmovups 8(%ebp), %xmm0 +; X86-AVX-NEXT: vmovups %xmm0, (%esp) +; X86-AVX-NEXT: calll lrintl +; X86-AVX-NEXT: movl %ebp, %esp +; X86-AVX-NEXT: popl %ebp +; X86-AVX-NEXT: retl +; +; X64-LABEL: testmswq: +; X64: # %bb.0: # %entry +; X64-NEXT: jmp lrintl@PLT # TAILCALL +entry: + %0 = tail call i32 @llvm.lrint.i32.f128(fp128 %x) + ret i32 %0 +} + declare i32 @llvm.lrint.i32.f32(float) nounwind readnone declare i32 @llvm.lrint.i32.f64(double) nounwind readnone declare i32 @llvm.lrint.i32.f80(x86_fp80) nounwind readnone diff --git a/llvm/test/CodeGen/X86/lrint-conv-i64.ll b/llvm/test/CodeGen/X86/lrint-conv-i64.ll index 38fa09085e189..2ba1500df0b6e 100644 --- a/llvm/test/CodeGen/X86/lrint-conv-i64.ll +++ b/llvm/test/CodeGen/X86/lrint-conv-i64.ll @@ -3,7 +3,23 @@ ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=CHECK,AVX ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=CHECK,AVX -define i64 @testmsxs(float %x) { +define i64 @testmsxh(half %x) nounwind { +; SSE-LABEL: testmsxh: +; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rax +; SSE-NEXT: callq __extendhfsf2@PLT +; SSE-NEXT: callq rintf@PLT +; SSE-NEXT: callq __truncsfhf2@PLT +; SSE-NEXT: callq __extendhfsf2@PLT +; SSE-NEXT: cvttss2si %xmm0, %rax +; SSE-NEXT: popq %rcx +; SSE-NEXT: retq +entry: + %0 = tail call i64 @llvm.lrint.i64.f16(half %x) + ret i64 %0 +} + +define i64 @testmsxs(float %x) nounwind { ; SSE-LABEL: testmsxs: ; SSE: # %bb.0: # %entry ; SSE-NEXT: cvtss2si %xmm0, %rax @@ -18,7 +34,7 @@ entry: ret i64 %0 } -define i64 @testmsxd(double %x) { +define i64 @testmsxd(double %x) nounwind { ; SSE-LABEL: testmsxd: ; SSE: # %bb.0: # %entry ; SSE-NEXT: cvtsd2si %xmm0, %rax @@ -33,7 +49,7 @@ entry: ret i64 %0 } -define i64 @testmsll(x86_fp80 %x) { +define i64 @testmsll(x86_fp80 %x) nounwind { ; CHECK-LABEL: testmsll: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) @@ -45,7 +61,17 @@ entry: ret i64 %0 } -define i32 @PR125324(float %x) { +; FIXME(#44744): incorrect libcall +define i64 @testmsxq(fp128 %x) nounwind { +; CHECK-LABEL: testmsxq: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: jmp lrintl@PLT # TAILCALL +entry: + %0 = tail call i64 @llvm.lrint.i64.f128(fp128 %x) + ret i64 %0 +} + +define i32 @PR125324(float %x) nounwind { ; SSE-LABEL: PR125324: ; SSE: # %bb.0: # %entry ; SSE-NEXT: cvtss2si %xmm0, %rax From 6ab0fe9c09e317f6c0c8cf80c37c7ca93c3196fb Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 8 Aug 2025 06:26:56 -0500 Subject: [PATCH 2/8] Update existing vector tests --- llvm/test/CodeGen/PowerPC/vector-llrint.ll | 4 + llvm/test/CodeGen/PowerPC/vector-lrint.ll | 8 + llvm/test/CodeGen/X86/vector-llrint-f16.ll | 3 + llvm/test/CodeGen/X86/vector-llrint.ll | 573 ++++++++++++++++++ llvm/test/CodeGen/X86/vector-lrint.ll | 650 +++++++++++++++++++++ 5 files changed, 1238 insertions(+) diff --git a/llvm/test/CodeGen/PowerPC/vector-llrint.ll b/llvm/test/CodeGen/PowerPC/vector-llrint.ll index 9229fefced67e..7085cf51916da 100644 --- a/llvm/test/CodeGen/PowerPC/vector-llrint.ll +++ b/llvm/test/CodeGen/PowerPC/vector-llrint.ll @@ -1,4 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; FIXME: crash "Input type needs to be promoted!" +; SKIP: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; SKIP: -mtriple=powerpc-unknown-unknown -verify-machineinstrs < %s | \ +; SKIP: FileCheck %s --check-prefix=PPC32 ; RUN: llc -mcpu=pwr7 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ ; RUN: -mtriple=powerpc64-unknown-unknown -verify-machineinstrs < %s | \ ; RUN: FileCheck %s --check-prefix=BE diff --git a/llvm/test/CodeGen/PowerPC/vector-lrint.ll b/llvm/test/CodeGen/PowerPC/vector-lrint.ll index c2576d4631db8..b2ade5300dbc3 100644 --- a/llvm/test/CodeGen/PowerPC/vector-lrint.ll +++ b/llvm/test/CodeGen/PowerPC/vector-lrint.ll @@ -1,4 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; FIXME: crash "Input type needs to be promoted!" +; SKIP: sed 's/iXLen/i32/g' %s | llc -ppc-asm-full-reg-names \ +; SKIP: -ppc-vsr-nums-as-vr -mtriple=powerpc-unknown-unknown \ +; SKIP: -verify-machineinstrs | FileCheck %s --check-prefixes=PPC32 ; RUN: sed 's/iXLen/i32/g' %s | llc -mcpu=pwr7 -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-unknown \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=BE @@ -9,6 +13,10 @@ ; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-unknown \ ; RUN: -verify-machineinstrs --enable-unsafe-fp-math | \ ; RUN: FileCheck %s --check-prefixes=FAST +; FIXME: crash "Input type needs to be promoted!" +; SKIP: sed 's/iXLen/i64/g' %s | llc -ppc-asm-full-reg-names \ +; SKIP: -ppc-vsr-nums-as-vr -mtriple=powerpc-unknown-unknown \ +; SKIP: -verify-machineinstrs | FileCheck %s --check-prefixes=PPC32 ; RUN: sed 's/iXLen/i64/g' %s | llc -mcpu=pwr7 -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-unknown \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=BE diff --git a/llvm/test/CodeGen/X86/vector-llrint-f16.ll b/llvm/test/CodeGen/X86/vector-llrint-f16.ll index 5e5c5849fc22e..d6a21e1c00502 100644 --- a/llvm/test/CodeGen/X86/vector-llrint-f16.ll +++ b/llvm/test/CodeGen/X86/vector-llrint-f16.ll @@ -1,4 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; FIXME: crash "Do not know how to split the result of this operator!" +; SKIP: sed 's/XRINT/lrint/g' %s | llc -mtriple=i686-unknown | FileCheck %s --check-prefix=X86 +; SKIP: sed 's/XRINT/llrint/g' %s | llc -mtriple=i686-unknown | FileCheck %s --check-prefix=X86 ; RUN: sed 's/XRINT/lrint/g' %s | llc -mtriple=x86_64-unknown -mattr=avx2,f16c | FileCheck %s --check-prefix=AVX ; RUN: sed 's/XRINT/llrint/g' %s | llc -mtriple=x86_64-unknown -mattr=avx2,f16c | FileCheck %s --check-prefix=AVX ; RUN: sed 's/XRINT/lrint/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefix=FP16 diff --git a/llvm/test/CodeGen/X86/vector-llrint.ll b/llvm/test/CodeGen/X86/vector-llrint.ll index 7017eb60df41d..08ee748497650 100644 --- a/llvm/test/CodeGen/X86/vector-llrint.ll +++ b/llvm/test/CodeGen/X86/vector-llrint.ll @@ -1,10 +1,29 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86 ; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=SSE ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=AVX,AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=AVX,AVX512 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512dq,avx512vl | FileCheck %s --check-prefixes=AVX512DQ define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) { +; X86-LABEL: llrint_v1i64_v1f32: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: .cfi_def_cfa_register %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: flds 8(%ebp) +; X86-NEXT: fistpll (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: .cfi_def_cfa %esp, 4 +; X86-NEXT: retl +; ; SSE-LABEL: llrint_v1i64_v1f32: ; SSE: # %bb.0: ; SSE-NEXT: cvtss2si %xmm0, %rax @@ -25,6 +44,39 @@ define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) { declare <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float>) define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) { +; X86-LABEL: llrint_v2i64_v2f32: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: .cfi_def_cfa_register %ebp +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: .cfi_offset %esi, -16 +; X86-NEXT: .cfi_offset %edi, -12 +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: flds 16(%ebp) +; X86-NEXT: flds 12(%ebp) +; X86-NEXT: fistpll (%esp) +; X86-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NEXT: movl (%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 12(%eax) +; X86-NEXT: movl %esi, 8(%eax) +; X86-NEXT: movl %edx, 4(%eax) +; X86-NEXT: movl %ecx, (%eax) +; X86-NEXT: leal -8(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebp +; X86-NEXT: .cfi_def_cfa %esp, 4 +; X86-NEXT: retl $4 +; ; SSE-LABEL: llrint_v2i64_v2f32: ; SSE: # %bb.0: ; SSE-NEXT: cvtss2si %xmm0, %rax @@ -56,6 +108,60 @@ define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) { declare <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float>) define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) { +; X86-LABEL: llrint_v4i64_v4f32: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: .cfi_def_cfa_register %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $56, %esp +; X86-NEXT: .cfi_offset %esi, -20 +; X86-NEXT: .cfi_offset %edi, -16 +; X86-NEXT: .cfi_offset %ebx, -12 +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: flds 24(%ebp) +; X86-NEXT: flds 20(%ebp) +; X86-NEXT: flds 16(%ebp) +; X86-NEXT: flds 12(%ebp) +; X86-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, 28(%eax) +; X86-NEXT: movl %ecx, 24(%eax) +; X86-NEXT: movl %edx, 20(%eax) +; X86-NEXT: movl %ebx, 16(%eax) +; X86-NEXT: movl %edi, 12(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 8(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 4(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, (%eax) +; X86-NEXT: leal -12(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: .cfi_def_cfa %esp, 4 +; X86-NEXT: retl $4 +; ; SSE-LABEL: llrint_v4i64_v4f32: ; SSE: # %bb.0: ; SSE-NEXT: cvtss2si %xmm0, %rax @@ -122,6 +228,100 @@ define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) { declare <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float>) define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) { +; X86-LABEL: llrint_v8i64_v8f32: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: .cfi_def_cfa_register %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $120, %esp +; X86-NEXT: .cfi_offset %esi, -20 +; X86-NEXT: .cfi_offset %edi, -16 +; X86-NEXT: .cfi_offset %ebx, -12 +; X86-NEXT: flds 12(%ebp) +; X86-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NEXT: flds 16(%ebp) +; X86-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NEXT: flds 20(%ebp) +; X86-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NEXT: flds 24(%ebp) +; X86-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NEXT: flds 28(%ebp) +; X86-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NEXT: flds 32(%ebp) +; X86-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NEXT: flds 36(%ebp) +; X86-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NEXT: flds 40(%ebp) +; X86-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl %ebx, 60(%eax) +; X86-NEXT: movl %ecx, 56(%eax) +; X86-NEXT: movl %edx, 52(%eax) +; X86-NEXT: movl %esi, 48(%eax) +; X86-NEXT: movl %edi, 44(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 40(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 36(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 32(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 28(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 24(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 20(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 16(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 12(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 8(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 4(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, (%eax) +; X86-NEXT: leal -12(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: .cfi_def_cfa %esp, 4 +; X86-NEXT: retl $4 +; ; SSE-LABEL: llrint_v8i64_v8f32: ; SSE: # %bb.0: ; SSE-NEXT: movaps %xmm0, %xmm2 @@ -236,6 +436,180 @@ define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) { declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>) define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) { +; X86-LABEL: llrint_v16i64_v16f32: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: .cfi_def_cfa_register %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $248, %esp +; X86-NEXT: .cfi_offset %esi, -20 +; X86-NEXT: .cfi_offset %edi, -16 +; X86-NEXT: .cfi_offset %ebx, -12 +; X86-NEXT: flds 12(%ebp) +; X86-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NEXT: flds 16(%ebp) +; X86-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NEXT: flds 20(%ebp) +; X86-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NEXT: flds 24(%ebp) +; X86-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NEXT: flds 28(%ebp) +; X86-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NEXT: flds 32(%ebp) +; X86-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NEXT: flds 36(%ebp) +; X86-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NEXT: flds 40(%ebp) +; X86-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NEXT: flds 44(%ebp) +; X86-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NEXT: flds 48(%ebp) +; X86-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NEXT: flds 52(%ebp) +; X86-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NEXT: flds 56(%ebp) +; X86-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NEXT: flds 60(%ebp) +; X86-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NEXT: flds 64(%ebp) +; X86-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NEXT: flds 68(%ebp) +; X86-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NEXT: flds 72(%ebp) +; X86-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl %ebx, 124(%eax) +; X86-NEXT: movl %ecx, 120(%eax) +; X86-NEXT: movl %edx, 116(%eax) +; X86-NEXT: movl %esi, 112(%eax) +; X86-NEXT: movl %edi, 108(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 104(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 100(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 96(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 92(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 88(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 84(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 80(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 76(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 72(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 68(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 64(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 60(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 56(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 52(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 48(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 44(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 40(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 36(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 32(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 28(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 24(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 20(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 16(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 12(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 8(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 4(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, (%eax) +; X86-NEXT: leal -12(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: .cfi_def_cfa %esp, 4 +; X86-NEXT: retl $4 +; ; SSE-LABEL: llrint_v16i64_v16f32: ; SSE: # %bb.0: ; SSE-NEXT: movq %rdi, %rax @@ -452,6 +826,24 @@ define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) { declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>) define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) { +; X86-LABEL: llrint_v1i64_v1f64: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: .cfi_def_cfa_register %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: fldl 8(%ebp) +; X86-NEXT: fistpll (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: .cfi_def_cfa %esp, 4 +; X86-NEXT: retl +; ; SSE-LABEL: llrint_v1i64_v1f64: ; SSE: # %bb.0: ; SSE-NEXT: cvtsd2si %xmm0, %rax @@ -472,6 +864,39 @@ define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) { declare <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double>) define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) { +; X86-LABEL: llrint_v2i64_v2f64: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: .cfi_def_cfa_register %ebp +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: .cfi_offset %esi, -16 +; X86-NEXT: .cfi_offset %edi, -12 +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: fldl 20(%ebp) +; X86-NEXT: fldl 12(%ebp) +; X86-NEXT: fistpll (%esp) +; X86-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NEXT: movl (%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 12(%eax) +; X86-NEXT: movl %esi, 8(%eax) +; X86-NEXT: movl %edx, 4(%eax) +; X86-NEXT: movl %ecx, (%eax) +; X86-NEXT: leal -8(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebp +; X86-NEXT: .cfi_def_cfa %esp, 4 +; X86-NEXT: retl $4 +; ; SSE-LABEL: llrint_v2i64_v2f64: ; SSE: # %bb.0: ; SSE-NEXT: cvtsd2si %xmm0, %rax @@ -503,6 +928,60 @@ define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) { declare <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double>) define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) { +; X86-LABEL: llrint_v4i64_v4f64: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: .cfi_def_cfa_register %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $56, %esp +; X86-NEXT: .cfi_offset %esi, -20 +; X86-NEXT: .cfi_offset %edi, -16 +; X86-NEXT: .cfi_offset %ebx, -12 +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: fldl 36(%ebp) +; X86-NEXT: fldl 28(%ebp) +; X86-NEXT: fldl 20(%ebp) +; X86-NEXT: fldl 12(%ebp) +; X86-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, 28(%eax) +; X86-NEXT: movl %ecx, 24(%eax) +; X86-NEXT: movl %edx, 20(%eax) +; X86-NEXT: movl %ebx, 16(%eax) +; X86-NEXT: movl %edi, 12(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 8(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 4(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, (%eax) +; X86-NEXT: leal -12(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: .cfi_def_cfa %esp, 4 +; X86-NEXT: retl $4 +; ; SSE-LABEL: llrint_v4i64_v4f64: ; SSE: # %bb.0: ; SSE-NEXT: cvtsd2si %xmm0, %rax @@ -567,6 +1046,100 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) { declare <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double>) define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) { +; X86-LABEL: llrint_v8i64_v8f64: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: .cfi_def_cfa_register %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $120, %esp +; X86-NEXT: .cfi_offset %esi, -20 +; X86-NEXT: .cfi_offset %edi, -16 +; X86-NEXT: .cfi_offset %ebx, -12 +; X86-NEXT: fldl 12(%ebp) +; X86-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NEXT: fldl 20(%ebp) +; X86-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NEXT: fldl 28(%ebp) +; X86-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NEXT: fldl 36(%ebp) +; X86-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NEXT: fldl 44(%ebp) +; X86-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NEXT: fldl 52(%ebp) +; X86-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NEXT: fldl 60(%ebp) +; X86-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NEXT: fldl 68(%ebp) +; X86-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl %ebx, 60(%eax) +; X86-NEXT: movl %ecx, 56(%eax) +; X86-NEXT: movl %edx, 52(%eax) +; X86-NEXT: movl %esi, 48(%eax) +; X86-NEXT: movl %edi, 44(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 40(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 36(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 32(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 28(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 24(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 20(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 16(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 12(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 8(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 4(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, (%eax) +; X86-NEXT: leal -12(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: .cfi_def_cfa %esp, 4 +; X86-NEXT: retl $4 +; ; SSE-LABEL: llrint_v8i64_v8f64: ; SSE: # %bb.0: ; SSE-NEXT: cvtsd2si %xmm0, %rax diff --git a/llvm/test/CodeGen/X86/vector-lrint.ll b/llvm/test/CodeGen/X86/vector-lrint.ll index b1c8d46f497f3..a4c50e539d661 100644 --- a/llvm/test/CodeGen/X86/vector-lrint.ll +++ b/llvm/test/CodeGen/X86/vector-lrint.ll @@ -1,4 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown | FileCheck %s --check-prefix=X86-I32 +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=i686-unknown | FileCheck %s --check-prefix=X86-I64 ; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefix=X86-SSE2 ; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown -mattr=avx | FileCheck %s --check-prefixes=X86-AVX,X86-AVX1 ; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X86-AVX,AVX512-i32 @@ -11,6 +13,35 @@ ; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512dq,avx512vl | FileCheck %s --check-prefixes=X64-AVX-i64,AVX512DQ-i64 define <1 x iXLen> @lrint_v1f32(<1 x float> %x) { +; X86-I32-LABEL: lrint_v1f32: +; X86-I32: # %bb.0: +; X86-I32-NEXT: pushl %eax +; X86-I32-NEXT: .cfi_def_cfa_offset 8 +; X86-I32-NEXT: flds {{[0-9]+}}(%esp) +; X86-I32-NEXT: fistpl (%esp) +; X86-I32-NEXT: movl (%esp), %eax +; X86-I32-NEXT: popl %ecx +; X86-I32-NEXT: .cfi_def_cfa_offset 4 +; X86-I32-NEXT: retl +; +; X86-I64-LABEL: lrint_v1f32: +; X86-I64: # %bb.0: +; X86-I64-NEXT: pushl %ebp +; X86-I64-NEXT: .cfi_def_cfa_offset 8 +; X86-I64-NEXT: .cfi_offset %ebp, -8 +; X86-I64-NEXT: movl %esp, %ebp +; X86-I64-NEXT: .cfi_def_cfa_register %ebp +; X86-I64-NEXT: andl $-8, %esp +; X86-I64-NEXT: subl $8, %esp +; X86-I64-NEXT: flds 8(%ebp) +; X86-I64-NEXT: fistpll (%esp) +; X86-I64-NEXT: movl (%esp), %eax +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-I64-NEXT: movl %ebp, %esp +; X86-I64-NEXT: popl %ebp +; X86-I64-NEXT: .cfi_def_cfa %esp, 4 +; X86-I64-NEXT: retl +; ; X86-SSE2-LABEL: lrint_v1f32: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: cvtss2si {{[0-9]+}}(%esp), %eax @@ -36,6 +67,53 @@ define <1 x iXLen> @lrint_v1f32(<1 x float> %x) { declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float>) define <2 x iXLen> @lrint_v2f32(<2 x float> %x) { +; X86-I32-LABEL: lrint_v2f32: +; X86-I32: # %bb.0: +; X86-I32-NEXT: subl $8, %esp +; X86-I32-NEXT: .cfi_def_cfa_offset 12 +; X86-I32-NEXT: flds {{[0-9]+}}(%esp) +; X86-I32-NEXT: flds {{[0-9]+}}(%esp) +; X86-I32-NEXT: fistpl (%esp) +; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp) +; X86-I32-NEXT: movl (%esp), %eax +; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-I32-NEXT: addl $8, %esp +; X86-I32-NEXT: .cfi_def_cfa_offset 4 +; X86-I32-NEXT: retl +; +; X86-I64-LABEL: lrint_v2f32: +; X86-I64: # %bb.0: +; X86-I64-NEXT: pushl %ebp +; X86-I64-NEXT: .cfi_def_cfa_offset 8 +; X86-I64-NEXT: .cfi_offset %ebp, -8 +; X86-I64-NEXT: movl %esp, %ebp +; X86-I64-NEXT: .cfi_def_cfa_register %ebp +; X86-I64-NEXT: pushl %edi +; X86-I64-NEXT: pushl %esi +; X86-I64-NEXT: andl $-8, %esp +; X86-I64-NEXT: subl $16, %esp +; X86-I64-NEXT: .cfi_offset %esi, -16 +; X86-I64-NEXT: .cfi_offset %edi, -12 +; X86-I64-NEXT: movl 8(%ebp), %eax +; X86-I64-NEXT: flds 16(%ebp) +; X86-I64-NEXT: flds 12(%ebp) +; X86-I64-NEXT: fistpll (%esp) +; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-I64-NEXT: movl (%esp), %ecx +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-I64-NEXT: movl %edi, 12(%eax) +; X86-I64-NEXT: movl %esi, 8(%eax) +; X86-I64-NEXT: movl %edx, 4(%eax) +; X86-I64-NEXT: movl %ecx, (%eax) +; X86-I64-NEXT: leal -8(%ebp), %esp +; X86-I64-NEXT: popl %esi +; X86-I64-NEXT: popl %edi +; X86-I64-NEXT: popl %ebp +; X86-I64-NEXT: .cfi_def_cfa %esp, 4 +; X86-I64-NEXT: retl $4 +; ; X86-SSE2-LABEL: lrint_v2f32: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: cvtps2dq %xmm0, %xmm0 @@ -81,6 +159,95 @@ define <2 x iXLen> @lrint_v2f32(<2 x float> %x) { declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float>) define <4 x iXLen> @lrint_v4f32(<4 x float> %x) { +; X86-I32-LABEL: lrint_v4f32: +; X86-I32: # %bb.0: +; X86-I32-NEXT: pushl %edi +; X86-I32-NEXT: .cfi_def_cfa_offset 8 +; X86-I32-NEXT: pushl %esi +; X86-I32-NEXT: .cfi_def_cfa_offset 12 +; X86-I32-NEXT: subl $16, %esp +; X86-I32-NEXT: .cfi_def_cfa_offset 28 +; X86-I32-NEXT: .cfi_offset %esi, -12 +; X86-I32-NEXT: .cfi_offset %edi, -8 +; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-I32-NEXT: flds {{[0-9]+}}(%esp) +; X86-I32-NEXT: flds {{[0-9]+}}(%esp) +; X86-I32-NEXT: flds {{[0-9]+}}(%esp) +; X86-I32-NEXT: flds {{[0-9]+}}(%esp) +; X86-I32-NEXT: fistpl (%esp) +; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp) +; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp) +; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp) +; X86-I32-NEXT: movl (%esp), %ecx +; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-I32-NEXT: movl %edi, 12(%eax) +; X86-I32-NEXT: movl %esi, 8(%eax) +; X86-I32-NEXT: movl %edx, 4(%eax) +; X86-I32-NEXT: movl %ecx, (%eax) +; X86-I32-NEXT: addl $16, %esp +; X86-I32-NEXT: .cfi_def_cfa_offset 12 +; X86-I32-NEXT: popl %esi +; X86-I32-NEXT: .cfi_def_cfa_offset 8 +; X86-I32-NEXT: popl %edi +; X86-I32-NEXT: .cfi_def_cfa_offset 4 +; X86-I32-NEXT: retl $4 +; +; X86-I64-LABEL: lrint_v4f32: +; X86-I64: # %bb.0: +; X86-I64-NEXT: pushl %ebp +; X86-I64-NEXT: .cfi_def_cfa_offset 8 +; X86-I64-NEXT: .cfi_offset %ebp, -8 +; X86-I64-NEXT: movl %esp, %ebp +; X86-I64-NEXT: .cfi_def_cfa_register %ebp +; X86-I64-NEXT: pushl %ebx +; X86-I64-NEXT: pushl %edi +; X86-I64-NEXT: pushl %esi +; X86-I64-NEXT: andl $-8, %esp +; X86-I64-NEXT: subl $56, %esp +; X86-I64-NEXT: .cfi_offset %esi, -20 +; X86-I64-NEXT: .cfi_offset %edi, -16 +; X86-I64-NEXT: .cfi_offset %ebx, -12 +; X86-I64-NEXT: movl 8(%ebp), %eax +; X86-I64-NEXT: flds 24(%ebp) +; X86-I64-NEXT: flds 20(%ebp) +; X86-I64-NEXT: flds 16(%ebp) +; X86-I64-NEXT: flds 12(%ebp) +; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-I64-NEXT: movl %esi, 28(%eax) +; X86-I64-NEXT: movl %ecx, 24(%eax) +; X86-I64-NEXT: movl %edx, 20(%eax) +; X86-I64-NEXT: movl %ebx, 16(%eax) +; X86-I64-NEXT: movl %edi, 12(%eax) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-I64-NEXT: movl %ecx, 8(%eax) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-I64-NEXT: movl %ecx, 4(%eax) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-I64-NEXT: movl %ecx, (%eax) +; X86-I64-NEXT: leal -12(%ebp), %esp +; X86-I64-NEXT: popl %esi +; X86-I64-NEXT: popl %edi +; X86-I64-NEXT: popl %ebx +; X86-I64-NEXT: popl %ebp +; X86-I64-NEXT: .cfi_def_cfa %esp, 4 +; X86-I64-NEXT: retl $4 +; ; X86-SSE2-LABEL: lrint_v4f32: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: cvtps2dq %xmm0, %xmm0 @@ -142,6 +309,165 @@ define <4 x iXLen> @lrint_v4f32(<4 x float> %x) { declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float>) define <8 x iXLen> @lrint_v8f32(<8 x float> %x) { +; X86-I32-LABEL: lrint_v8f32: +; X86-I32: # %bb.0: +; X86-I32-NEXT: pushl %ebp +; X86-I32-NEXT: .cfi_def_cfa_offset 8 +; X86-I32-NEXT: pushl %ebx +; X86-I32-NEXT: .cfi_def_cfa_offset 12 +; X86-I32-NEXT: pushl %edi +; X86-I32-NEXT: .cfi_def_cfa_offset 16 +; X86-I32-NEXT: pushl %esi +; X86-I32-NEXT: .cfi_def_cfa_offset 20 +; X86-I32-NEXT: subl $40, %esp +; X86-I32-NEXT: .cfi_def_cfa_offset 60 +; X86-I32-NEXT: .cfi_offset %esi, -20 +; X86-I32-NEXT: .cfi_offset %edi, -16 +; X86-I32-NEXT: .cfi_offset %ebx, -12 +; X86-I32-NEXT: .cfi_offset %ebp, -8 +; X86-I32-NEXT: flds {{[0-9]+}}(%esp) +; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp) +; X86-I32-NEXT: flds {{[0-9]+}}(%esp) +; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp) +; X86-I32-NEXT: flds {{[0-9]+}}(%esp) +; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp) +; X86-I32-NEXT: flds {{[0-9]+}}(%esp) +; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp) +; X86-I32-NEXT: flds {{[0-9]+}}(%esp) +; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp) +; X86-I32-NEXT: flds {{[0-9]+}}(%esp) +; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp) +; X86-I32-NEXT: flds {{[0-9]+}}(%esp) +; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp) +; X86-I32-NEXT: flds {{[0-9]+}}(%esp) +; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp) +; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-I32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-I32-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-I32-NEXT: movl %edx, 28(%eax) +; X86-I32-NEXT: movl %ecx, 24(%eax) +; X86-I32-NEXT: movl %ebp, 20(%eax) +; X86-I32-NEXT: movl %ebx, 16(%eax) +; X86-I32-NEXT: movl %edi, 12(%eax) +; X86-I32-NEXT: movl %esi, 8(%eax) +; X86-I32-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-I32-NEXT: movl %ecx, 4(%eax) +; X86-I32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-I32-NEXT: movl %ecx, (%eax) +; X86-I32-NEXT: addl $40, %esp +; X86-I32-NEXT: .cfi_def_cfa_offset 20 +; X86-I32-NEXT: popl %esi +; X86-I32-NEXT: .cfi_def_cfa_offset 16 +; X86-I32-NEXT: popl %edi +; X86-I32-NEXT: .cfi_def_cfa_offset 12 +; X86-I32-NEXT: popl %ebx +; X86-I32-NEXT: .cfi_def_cfa_offset 8 +; X86-I32-NEXT: popl %ebp +; X86-I32-NEXT: .cfi_def_cfa_offset 4 +; X86-I32-NEXT: retl $4 +; +; X86-I64-LABEL: lrint_v8f32: +; X86-I64: # %bb.0: +; X86-I64-NEXT: pushl %ebp +; X86-I64-NEXT: .cfi_def_cfa_offset 8 +; X86-I64-NEXT: .cfi_offset %ebp, -8 +; X86-I64-NEXT: movl %esp, %ebp +; X86-I64-NEXT: .cfi_def_cfa_register %ebp +; X86-I64-NEXT: pushl %ebx +; X86-I64-NEXT: pushl %edi +; X86-I64-NEXT: pushl %esi +; X86-I64-NEXT: andl $-8, %esp +; X86-I64-NEXT: subl $120, %esp +; X86-I64-NEXT: .cfi_offset %esi, -20 +; X86-I64-NEXT: .cfi_offset %edi, -16 +; X86-I64-NEXT: .cfi_offset %ebx, -12 +; X86-I64-NEXT: flds 12(%ebp) +; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-I64-NEXT: flds 16(%ebp) +; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-I64-NEXT: flds 20(%ebp) +; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-I64-NEXT: flds 24(%ebp) +; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-I64-NEXT: flds 28(%ebp) +; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-I64-NEXT: flds 32(%ebp) +; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-I64-NEXT: flds 36(%ebp) +; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-I64-NEXT: flds 40(%ebp) +; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-I64-NEXT: movl 8(%ebp), %eax +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-I64-NEXT: movl %ebx, 60(%eax) +; X86-I64-NEXT: movl %ecx, 56(%eax) +; X86-I64-NEXT: movl %edx, 52(%eax) +; X86-I64-NEXT: movl %esi, 48(%eax) +; X86-I64-NEXT: movl %edi, 44(%eax) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-I64-NEXT: movl %ecx, 40(%eax) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-I64-NEXT: movl %ecx, 36(%eax) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-I64-NEXT: movl %ecx, 32(%eax) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-I64-NEXT: movl %ecx, 28(%eax) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-I64-NEXT: movl %ecx, 24(%eax) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-I64-NEXT: movl %ecx, 20(%eax) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-I64-NEXT: movl %ecx, 16(%eax) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-I64-NEXT: movl %ecx, 12(%eax) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-I64-NEXT: movl %ecx, 8(%eax) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-I64-NEXT: movl %ecx, 4(%eax) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-I64-NEXT: movl %ecx, (%eax) +; X86-I64-NEXT: leal -12(%ebp), %esp +; X86-I64-NEXT: popl %esi +; X86-I64-NEXT: popl %edi +; X86-I64-NEXT: popl %ebx +; X86-I64-NEXT: popl %ebp +; X86-I64-NEXT: .cfi_def_cfa %esp, 4 +; X86-I64-NEXT: retl $4 +; ; X86-SSE2-LABEL: lrint_v8f32: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: cvtps2dq %xmm0, %xmm0 @@ -242,6 +568,35 @@ define <16 x iXLen> @lrint_v16iXLen_v16f32(<16 x float> %x) { declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float>) define <1 x iXLen> @lrint_v1f64(<1 x double> %x) { +; X86-I32-LABEL: lrint_v1f64: +; X86-I32: # %bb.0: +; X86-I32-NEXT: pushl %eax +; X86-I32-NEXT: .cfi_def_cfa_offset 8 +; X86-I32-NEXT: fldl {{[0-9]+}}(%esp) +; X86-I32-NEXT: fistpl (%esp) +; X86-I32-NEXT: movl (%esp), %eax +; X86-I32-NEXT: popl %ecx +; X86-I32-NEXT: .cfi_def_cfa_offset 4 +; X86-I32-NEXT: retl +; +; X86-I64-LABEL: lrint_v1f64: +; X86-I64: # %bb.0: +; X86-I64-NEXT: pushl %ebp +; X86-I64-NEXT: .cfi_def_cfa_offset 8 +; X86-I64-NEXT: .cfi_offset %ebp, -8 +; X86-I64-NEXT: movl %esp, %ebp +; X86-I64-NEXT: .cfi_def_cfa_register %ebp +; X86-I64-NEXT: andl $-8, %esp +; X86-I64-NEXT: subl $8, %esp +; X86-I64-NEXT: fldl 8(%ebp) +; X86-I64-NEXT: fistpll (%esp) +; X86-I64-NEXT: movl (%esp), %eax +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-I64-NEXT: movl %ebp, %esp +; X86-I64-NEXT: popl %ebp +; X86-I64-NEXT: .cfi_def_cfa %esp, 4 +; X86-I64-NEXT: retl +; ; X86-SSE2-LABEL: lrint_v1f64: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: cvtsd2si {{[0-9]+}}(%esp), %eax @@ -267,6 +622,53 @@ define <1 x iXLen> @lrint_v1f64(<1 x double> %x) { declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double>) define <2 x iXLen> @lrint_v2f64(<2 x double> %x) { +; X86-I32-LABEL: lrint_v2f64: +; X86-I32: # %bb.0: +; X86-I32-NEXT: subl $8, %esp +; X86-I32-NEXT: .cfi_def_cfa_offset 12 +; X86-I32-NEXT: fldl {{[0-9]+}}(%esp) +; X86-I32-NEXT: fldl {{[0-9]+}}(%esp) +; X86-I32-NEXT: fistpl (%esp) +; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp) +; X86-I32-NEXT: movl (%esp), %eax +; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-I32-NEXT: addl $8, %esp +; X86-I32-NEXT: .cfi_def_cfa_offset 4 +; X86-I32-NEXT: retl +; +; X86-I64-LABEL: lrint_v2f64: +; X86-I64: # %bb.0: +; X86-I64-NEXT: pushl %ebp +; X86-I64-NEXT: .cfi_def_cfa_offset 8 +; X86-I64-NEXT: .cfi_offset %ebp, -8 +; X86-I64-NEXT: movl %esp, %ebp +; X86-I64-NEXT: .cfi_def_cfa_register %ebp +; X86-I64-NEXT: pushl %edi +; X86-I64-NEXT: pushl %esi +; X86-I64-NEXT: andl $-8, %esp +; X86-I64-NEXT: subl $16, %esp +; X86-I64-NEXT: .cfi_offset %esi, -16 +; X86-I64-NEXT: .cfi_offset %edi, -12 +; X86-I64-NEXT: movl 8(%ebp), %eax +; X86-I64-NEXT: fldl 20(%ebp) +; X86-I64-NEXT: fldl 12(%ebp) +; X86-I64-NEXT: fistpll (%esp) +; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-I64-NEXT: movl (%esp), %ecx +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-I64-NEXT: movl %edi, 12(%eax) +; X86-I64-NEXT: movl %esi, 8(%eax) +; X86-I64-NEXT: movl %edx, 4(%eax) +; X86-I64-NEXT: movl %ecx, (%eax) +; X86-I64-NEXT: leal -8(%ebp), %esp +; X86-I64-NEXT: popl %esi +; X86-I64-NEXT: popl %edi +; X86-I64-NEXT: popl %ebp +; X86-I64-NEXT: .cfi_def_cfa %esp, 4 +; X86-I64-NEXT: retl $4 +; ; X86-SSE2-LABEL: lrint_v2f64: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: cvtpd2dq %xmm0, %xmm0 @@ -312,6 +714,95 @@ define <2 x iXLen> @lrint_v2f64(<2 x double> %x) { declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double>) define <4 x iXLen> @lrint_v4f64(<4 x double> %x) { +; X86-I32-LABEL: lrint_v4f64: +; X86-I32: # %bb.0: +; X86-I32-NEXT: pushl %edi +; X86-I32-NEXT: .cfi_def_cfa_offset 8 +; X86-I32-NEXT: pushl %esi +; X86-I32-NEXT: .cfi_def_cfa_offset 12 +; X86-I32-NEXT: subl $16, %esp +; X86-I32-NEXT: .cfi_def_cfa_offset 28 +; X86-I32-NEXT: .cfi_offset %esi, -12 +; X86-I32-NEXT: .cfi_offset %edi, -8 +; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-I32-NEXT: fldl {{[0-9]+}}(%esp) +; X86-I32-NEXT: fldl {{[0-9]+}}(%esp) +; X86-I32-NEXT: fldl {{[0-9]+}}(%esp) +; X86-I32-NEXT: fldl {{[0-9]+}}(%esp) +; X86-I32-NEXT: fistpl (%esp) +; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp) +; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp) +; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp) +; X86-I32-NEXT: movl (%esp), %ecx +; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-I32-NEXT: movl %edi, 12(%eax) +; X86-I32-NEXT: movl %esi, 8(%eax) +; X86-I32-NEXT: movl %edx, 4(%eax) +; X86-I32-NEXT: movl %ecx, (%eax) +; X86-I32-NEXT: addl $16, %esp +; X86-I32-NEXT: .cfi_def_cfa_offset 12 +; X86-I32-NEXT: popl %esi +; X86-I32-NEXT: .cfi_def_cfa_offset 8 +; X86-I32-NEXT: popl %edi +; X86-I32-NEXT: .cfi_def_cfa_offset 4 +; X86-I32-NEXT: retl $4 +; +; X86-I64-LABEL: lrint_v4f64: +; X86-I64: # %bb.0: +; X86-I64-NEXT: pushl %ebp +; X86-I64-NEXT: .cfi_def_cfa_offset 8 +; X86-I64-NEXT: .cfi_offset %ebp, -8 +; X86-I64-NEXT: movl %esp, %ebp +; X86-I64-NEXT: .cfi_def_cfa_register %ebp +; X86-I64-NEXT: pushl %ebx +; X86-I64-NEXT: pushl %edi +; X86-I64-NEXT: pushl %esi +; X86-I64-NEXT: andl $-8, %esp +; X86-I64-NEXT: subl $56, %esp +; X86-I64-NEXT: .cfi_offset %esi, -20 +; X86-I64-NEXT: .cfi_offset %edi, -16 +; X86-I64-NEXT: .cfi_offset %ebx, -12 +; X86-I64-NEXT: movl 8(%ebp), %eax +; X86-I64-NEXT: fldl 36(%ebp) +; X86-I64-NEXT: fldl 28(%ebp) +; X86-I64-NEXT: fldl 20(%ebp) +; X86-I64-NEXT: fldl 12(%ebp) +; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-I64-NEXT: movl %esi, 28(%eax) +; X86-I64-NEXT: movl %ecx, 24(%eax) +; X86-I64-NEXT: movl %edx, 20(%eax) +; X86-I64-NEXT: movl %ebx, 16(%eax) +; X86-I64-NEXT: movl %edi, 12(%eax) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-I64-NEXT: movl %ecx, 8(%eax) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-I64-NEXT: movl %ecx, 4(%eax) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-I64-NEXT: movl %ecx, (%eax) +; X86-I64-NEXT: leal -12(%ebp), %esp +; X86-I64-NEXT: popl %esi +; X86-I64-NEXT: popl %edi +; X86-I64-NEXT: popl %ebx +; X86-I64-NEXT: popl %ebp +; X86-I64-NEXT: .cfi_def_cfa %esp, 4 +; X86-I64-NEXT: retl $4 +; ; X86-SSE2-LABEL: lrint_v4f64: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: cvtpd2dq %xmm1, %xmm1 @@ -377,6 +868,165 @@ define <4 x iXLen> @lrint_v4f64(<4 x double> %x) { declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double>) define <8 x iXLen> @lrint_v8f64(<8 x double> %x) { +; X86-I32-LABEL: lrint_v8f64: +; X86-I32: # %bb.0: +; X86-I32-NEXT: pushl %ebp +; X86-I32-NEXT: .cfi_def_cfa_offset 8 +; X86-I32-NEXT: pushl %ebx +; X86-I32-NEXT: .cfi_def_cfa_offset 12 +; X86-I32-NEXT: pushl %edi +; X86-I32-NEXT: .cfi_def_cfa_offset 16 +; X86-I32-NEXT: pushl %esi +; X86-I32-NEXT: .cfi_def_cfa_offset 20 +; X86-I32-NEXT: subl $40, %esp +; X86-I32-NEXT: .cfi_def_cfa_offset 60 +; X86-I32-NEXT: .cfi_offset %esi, -20 +; X86-I32-NEXT: .cfi_offset %edi, -16 +; X86-I32-NEXT: .cfi_offset %ebx, -12 +; X86-I32-NEXT: .cfi_offset %ebp, -8 +; X86-I32-NEXT: fldl {{[0-9]+}}(%esp) +; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp) +; X86-I32-NEXT: fldl {{[0-9]+}}(%esp) +; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp) +; X86-I32-NEXT: fldl {{[0-9]+}}(%esp) +; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp) +; X86-I32-NEXT: fldl {{[0-9]+}}(%esp) +; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp) +; X86-I32-NEXT: fldl {{[0-9]+}}(%esp) +; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp) +; X86-I32-NEXT: fldl {{[0-9]+}}(%esp) +; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp) +; X86-I32-NEXT: fldl {{[0-9]+}}(%esp) +; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp) +; X86-I32-NEXT: fldl {{[0-9]+}}(%esp) +; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp) +; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-I32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-I32-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-I32-NEXT: movl %edx, 28(%eax) +; X86-I32-NEXT: movl %ecx, 24(%eax) +; X86-I32-NEXT: movl %ebp, 20(%eax) +; X86-I32-NEXT: movl %ebx, 16(%eax) +; X86-I32-NEXT: movl %edi, 12(%eax) +; X86-I32-NEXT: movl %esi, 8(%eax) +; X86-I32-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-I32-NEXT: movl %ecx, 4(%eax) +; X86-I32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-I32-NEXT: movl %ecx, (%eax) +; X86-I32-NEXT: addl $40, %esp +; X86-I32-NEXT: .cfi_def_cfa_offset 20 +; X86-I32-NEXT: popl %esi +; X86-I32-NEXT: .cfi_def_cfa_offset 16 +; X86-I32-NEXT: popl %edi +; X86-I32-NEXT: .cfi_def_cfa_offset 12 +; X86-I32-NEXT: popl %ebx +; X86-I32-NEXT: .cfi_def_cfa_offset 8 +; X86-I32-NEXT: popl %ebp +; X86-I32-NEXT: .cfi_def_cfa_offset 4 +; X86-I32-NEXT: retl $4 +; +; X86-I64-LABEL: lrint_v8f64: +; X86-I64: # %bb.0: +; X86-I64-NEXT: pushl %ebp +; X86-I64-NEXT: .cfi_def_cfa_offset 8 +; X86-I64-NEXT: .cfi_offset %ebp, -8 +; X86-I64-NEXT: movl %esp, %ebp +; X86-I64-NEXT: .cfi_def_cfa_register %ebp +; X86-I64-NEXT: pushl %ebx +; X86-I64-NEXT: pushl %edi +; X86-I64-NEXT: pushl %esi +; X86-I64-NEXT: andl $-8, %esp +; X86-I64-NEXT: subl $120, %esp +; X86-I64-NEXT: .cfi_offset %esi, -20 +; X86-I64-NEXT: .cfi_offset %edi, -16 +; X86-I64-NEXT: .cfi_offset %ebx, -12 +; X86-I64-NEXT: fldl 12(%ebp) +; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-I64-NEXT: fldl 20(%ebp) +; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-I64-NEXT: fldl 28(%ebp) +; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-I64-NEXT: fldl 36(%ebp) +; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-I64-NEXT: fldl 44(%ebp) +; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-I64-NEXT: fldl 52(%ebp) +; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-I64-NEXT: fldl 60(%ebp) +; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-I64-NEXT: fldl 68(%ebp) +; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-I64-NEXT: movl 8(%ebp), %eax +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-I64-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-I64-NEXT: movl %ebx, 60(%eax) +; X86-I64-NEXT: movl %ecx, 56(%eax) +; X86-I64-NEXT: movl %edx, 52(%eax) +; X86-I64-NEXT: movl %esi, 48(%eax) +; X86-I64-NEXT: movl %edi, 44(%eax) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-I64-NEXT: movl %ecx, 40(%eax) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-I64-NEXT: movl %ecx, 36(%eax) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-I64-NEXT: movl %ecx, 32(%eax) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-I64-NEXT: movl %ecx, 28(%eax) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-I64-NEXT: movl %ecx, 24(%eax) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-I64-NEXT: movl %ecx, 20(%eax) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-I64-NEXT: movl %ecx, 16(%eax) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-I64-NEXT: movl %ecx, 12(%eax) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-I64-NEXT: movl %ecx, 8(%eax) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-I64-NEXT: movl %ecx, 4(%eax) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-I64-NEXT: movl %ecx, (%eax) +; X86-I64-NEXT: leal -12(%ebp), %esp +; X86-I64-NEXT: popl %esi +; X86-I64-NEXT: popl %edi +; X86-I64-NEXT: popl %ebx +; X86-I64-NEXT: popl %ebp +; X86-I64-NEXT: .cfi_def_cfa %esp, 4 +; X86-I64-NEXT: retl $4 +; ; X86-SSE2-LABEL: lrint_v8f64: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: pushl %ebp From f834954c1a12aa1c0af63e32d9b02ba11000027a Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 8 Aug 2025 06:48:37 -0500 Subject: [PATCH 3/8] Add f128 to vector tests --- .../AArch64/sve-fixed-vector-llrint.ll | 654 +++++ .../CodeGen/AArch64/sve-fixed-vector-lrint.ll | 1115 ++++++++ llvm/test/CodeGen/AArch64/vector-llrint.ll | 516 ++++ llvm/test/CodeGen/AArch64/vector-lrint.ll | 948 +++++++ llvm/test/CodeGen/PowerPC/vector-llrint.ll | 596 +++++ llvm/test/CodeGen/PowerPC/vector-lrint.ll | 2324 +++++++++++++++++ llvm/test/CodeGen/X86/vector-llrint.ll | 705 +++++ llvm/test/CodeGen/X86/vector-lrint.ll | 1271 +++++++++ 8 files changed, 8129 insertions(+) diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll b/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll index 7f144df499be0..838aac0edcb73 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll @@ -861,3 +861,657 @@ define <32 x i64> @llrint_v32f64(<32 x double> %x) { ret <32 x i64> %a } declare <32 x i64> @llvm.llrint.v32i64.v32f64(<32 x double>) + +define <1 x i64> @llrint_v1i64_v1fp128(<1 x fp128> %x) { +; CHECK-LABEL: llrint_v1i64_v1fp128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %a = call <1 x i64> @llvm.llrint.v1i64.v1fp128(<1 x fp128> %x) + ret <1 x i64> %a +} +declare <1 x i64> @llvm.llrint.v1i64.v1fp128(<1 x fp128>) + +define <2 x i64> @llrint_v2i64_v2fp128(<2 x fp128> %x) { +; CHECK-LABEL: llrint_v2i64_v2fp128: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret + %a = call <2 x i64> @llvm.llrint.v2i64.v2fp128(<2 x fp128> %x) + ret <2 x i64> %a +} +declare <2 x i64> @llvm.llrint.v2i64.v2fp128(<2 x fp128>) + +define <4 x i64> @llrint_v4i64_v4fp128(<4 x fp128> %x) { +; CHECK-LABEL: llrint_v4i64_v4fp128: +; CHECK: // %bb.0: +; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: sub sp, sp, #64 +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 80 + 8 * VG +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: mov v0.16b, v3.16b +; CHECK-NEXT: stp q2, q1, [sp, #16] // 32-byte Folded Spill +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: add x8, sp, #64 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: add x8, sp, #64 +; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: ldr z1, [x8] // 16-byte Folded Reload +; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ret + %a = call <4 x i64> @llvm.llrint.v4i64.v4fp128(<4 x fp128> %x) + ret <4 x i64> %a +} +declare <4 x i64> @llvm.llrint.v4i64.v4fp128(<4 x fp128>) + +define <8 x i64> @llrint_v8i64_v8fp128(<8 x fp128> %x) { +; CHECK-LABEL: llrint_v8i64_v8fp128: +; CHECK: // %bb.0: +; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: sub sp, sp, #128 +; CHECK-NEXT: addvl sp, sp, #-2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x90, 0x01, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 144 + 16 * VG +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: str q0, [sp, #112] // 16-byte Folded Spill +; CHECK-NEXT: mov v0.16b, v7.16b +; CHECK-NEXT: stp q6, q5, [sp, #16] // 32-byte Folded Spill +; CHECK-NEXT: stp q4, q3, [sp, #48] // 32-byte Folded Spill +; CHECK-NEXT: stp q2, q1, [sp, #80] // 32-byte Folded Spill +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: add x8, sp, #128 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: add x8, sp, #128 +; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: ldr z1, [x8, #1, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: add x8, sp, #128 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: add x8, sp, #128 +; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldr z2, [x8, #1, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: ldr z1, [x8] // 16-byte Folded Reload +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-NEXT: ext z3.b, z3.b, z2.b, #16 +; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2 +; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3 +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 +; CHECK-NEXT: addvl sp, sp, #2 +; CHECK-NEXT: add sp, sp, #128 +; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ret + %a = call <8 x i64> @llvm.llrint.v8i64.v8fp128(<8 x fp128> %x) + ret <8 x i64> %a +} +declare <8 x i64> @llvm.llrint.v8i64.v8fp128(<8 x fp128>) + +define <16 x i64> @llrint_v16fp128(<16 x fp128> %x) { +; CHECK-LABEL: llrint_v16fp128: +; CHECK: // %bb.0: +; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: sub sp, sp, #256 +; CHECK-NEXT: addvl sp, sp, #-4 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x90, 0x02, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 272 + 32 * VG +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: addvl x8, sp, #4 +; CHECK-NEXT: str q1, [sp, #240] // 16-byte Folded Spill +; CHECK-NEXT: ldr q1, [x8, #272] +; CHECK-NEXT: addvl x8, sp, #4 +; CHECK-NEXT: str q0, [sp, #224] // 16-byte Folded Spill +; CHECK-NEXT: stp q7, q6, [sp, #128] // 32-byte Folded Spill +; CHECK-NEXT: str q1, [sp, #112] // 16-byte Folded Spill +; CHECK-NEXT: ldr q1, [x8, #288] +; CHECK-NEXT: addvl x8, sp, #4 +; CHECK-NEXT: stp q5, q4, [sp, #160] // 32-byte Folded Spill +; CHECK-NEXT: str q1, [sp, #96] // 16-byte Folded Spill +; CHECK-NEXT: ldr q1, [x8, #304] +; CHECK-NEXT: addvl x8, sp, #4 +; CHECK-NEXT: stp q3, q2, [sp, #192] // 32-byte Folded Spill +; CHECK-NEXT: str q1, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: ldr q1, [x8, #320] +; CHECK-NEXT: addvl x8, sp, #4 +; CHECK-NEXT: str q1, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: ldr q1, [x8, #336] +; CHECK-NEXT: addvl x8, sp, #4 +; CHECK-NEXT: str q1, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: ldr q1, [x8, #352] +; CHECK-NEXT: addvl x8, sp, #4 +; CHECK-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: ldr q1, [x8, #368] +; CHECK-NEXT: addvl x8, sp, #4 +; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: ldr q1, [x8, #384] +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: add x8, sp, #256 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str z0, [x8, #3, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: add x8, sp, #256 +; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: ldr z1, [x8, #3, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-NEXT: str z0, [x8, #3, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: add x8, sp, #256 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str z0, [x8, #2, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: add x8, sp, #256 +; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: ldr z1, [x8, #2, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-NEXT: str z0, [x8, #2, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #128] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #128] // 16-byte Folded Reload +; CHECK-NEXT: add x8, sp, #256 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #160] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #160] // 16-byte Folded Reload +; CHECK-NEXT: add x8, sp, #256 +; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: ldr z1, [x8, #1, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #192] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #192] // 16-byte Folded Reload +; CHECK-NEXT: add x8, sp, #256 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #240] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #240] // 16-byte Folded Reload +; CHECK-NEXT: add x8, sp, #256 +; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldr z2, [x8, #1, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z4, [x8, #2, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z6, [x8, #3, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: ldr z1, [x8] // 16-byte Folded Reload +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z5.d, z4.d +; CHECK-NEXT: mov z7.d, z6.d +; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-NEXT: ext z3.b, z3.b, z2.b, #16 +; CHECK-NEXT: ext z5.b, z5.b, z4.b, #16 +; CHECK-NEXT: ext z7.b, z7.b, z6.b, #16 +; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2 +; CHECK-NEXT: // kill: def $q4 killed $q4 killed $z4 +; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3 +; CHECK-NEXT: // kill: def $q5 killed $q5 killed $z5 +; CHECK-NEXT: // kill: def $q6 killed $q6 killed $z6 +; CHECK-NEXT: // kill: def $q7 killed $q7 killed $z7 +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 +; CHECK-NEXT: addvl sp, sp, #4 +; CHECK-NEXT: add sp, sp, #256 +; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ret + %a = call <16 x i64> @llvm.llrint.v16i64.v16fp128(<16 x fp128> %x) + ret <16 x i64> %a +} +declare <16 x i64> @llvm.llrint.v16i64.v16fp128(<16 x fp128>) + +define <32 x i64> @llrint_v32fp128(<32 x fp128> %x) { +; CHECK-LABEL: llrint_v32fp128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: sub sp, sp, #512 +; CHECK-NEXT: addvl sp, sp, #-8 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xa0, 0x04, 0x22, 0x11, 0xc0, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 544 + 64 * VG +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: .cfi_offset w29, -32 +; CHECK-NEXT: addvl x9, sp, #8 +; CHECK-NEXT: stp q2, q1, [sp, #16] // 32-byte Folded Spill +; CHECK-NEXT: mov x19, x8 +; CHECK-NEXT: stp q0, q7, [sp, #48] // 32-byte Folded Spill +; CHECK-NEXT: ldr q0, [x9, #864] +; CHECK-NEXT: addvl x9, sp, #8 +; CHECK-NEXT: str q6, [sp, #96] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #496] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [x9, #880] +; CHECK-NEXT: addvl x9, sp, #8 +; CHECK-NEXT: stp q5, q4, [sp, #128] // 32-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #480] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [x9, #896] +; CHECK-NEXT: addvl x9, sp, #8 +; CHECK-NEXT: str q0, [sp, #464] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [x9, #912] +; CHECK-NEXT: addvl x9, sp, #8 +; CHECK-NEXT: str q0, [sp, #448] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [x9, #800] +; CHECK-NEXT: addvl x9, sp, #8 +; CHECK-NEXT: str q0, [sp, #432] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [x9, #816] +; CHECK-NEXT: addvl x9, sp, #8 +; CHECK-NEXT: str q0, [sp, #416] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [x9, #832] +; CHECK-NEXT: addvl x9, sp, #8 +; CHECK-NEXT: str q0, [sp, #400] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [x9, #848] +; CHECK-NEXT: addvl x9, sp, #8 +; CHECK-NEXT: str q0, [sp, #384] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [x9, #736] +; CHECK-NEXT: addvl x9, sp, #8 +; CHECK-NEXT: str q0, [sp, #368] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [x9, #752] +; CHECK-NEXT: addvl x9, sp, #8 +; CHECK-NEXT: str q0, [sp, #352] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [x9, #768] +; CHECK-NEXT: addvl x9, sp, #8 +; CHECK-NEXT: str q0, [sp, #336] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [x9, #784] +; CHECK-NEXT: addvl x9, sp, #8 +; CHECK-NEXT: str q0, [sp, #320] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [x9, #672] +; CHECK-NEXT: addvl x9, sp, #8 +; CHECK-NEXT: str q0, [sp, #304] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [x9, #688] +; CHECK-NEXT: addvl x9, sp, #8 +; CHECK-NEXT: str q0, [sp, #288] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [x9, #704] +; CHECK-NEXT: addvl x9, sp, #8 +; CHECK-NEXT: str q0, [sp, #272] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [x9, #720] +; CHECK-NEXT: addvl x9, sp, #8 +; CHECK-NEXT: str q0, [sp, #256] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [x9, #608] +; CHECK-NEXT: addvl x9, sp, #8 +; CHECK-NEXT: str q0, [sp, #240] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [x9, #624] +; CHECK-NEXT: addvl x9, sp, #8 +; CHECK-NEXT: str q0, [sp, #224] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [x9, #640] +; CHECK-NEXT: addvl x9, sp, #8 +; CHECK-NEXT: str q0, [sp, #208] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [x9, #656] +; CHECK-NEXT: addvl x9, sp, #8 +; CHECK-NEXT: str q0, [sp, #192] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [x9, #544] +; CHECK-NEXT: addvl x9, sp, #8 +; CHECK-NEXT: str q0, [sp, #176] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [x9, #560] +; CHECK-NEXT: addvl x9, sp, #8 +; CHECK-NEXT: str q0, [sp, #160] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [x9, #576] +; CHECK-NEXT: addvl x9, sp, #8 +; CHECK-NEXT: str q0, [sp, #112] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [x9, #592] +; CHECK-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: mov v0.16b, v3.16b +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: add x8, sp, #512 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str z0, [x8, #7, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: add x8, sp, #512 +; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: ldr z1, [x8, #7, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-NEXT: str z0, [x8, #7, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: add x8, sp, #512 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str z0, [x8, #6, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #128] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #128] // 16-byte Folded Reload +; CHECK-NEXT: add x8, sp, #512 +; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: ldr z1, [x8, #6, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-NEXT: str z0, [x8, #6, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #144] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #144] // 16-byte Folded Reload +; CHECK-NEXT: add x8, sp, #512 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str z0, [x8, #5, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #160] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #160] // 16-byte Folded Reload +; CHECK-NEXT: add x8, sp, #512 +; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: ldr z1, [x8, #5, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-NEXT: str z0, [x8, #5, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #192] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #192] // 16-byte Folded Reload +; CHECK-NEXT: add x8, sp, #512 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str z0, [x8, #4, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #224] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #224] // 16-byte Folded Reload +; CHECK-NEXT: add x8, sp, #512 +; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: ldr z1, [x8, #4, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-NEXT: str z0, [x8, #4, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #256] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #256] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #272] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #256] // 16-byte Folded Reload +; CHECK-NEXT: add x8, sp, #512 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str z0, [x8, #3, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #288] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #288] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #304] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #288] // 16-byte Folded Reload +; CHECK-NEXT: add x8, sp, #512 +; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: ldr z1, [x8, #3, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-NEXT: str z0, [x8, #3, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #320] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #320] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #336] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #320] // 16-byte Folded Reload +; CHECK-NEXT: add x8, sp, #512 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str z0, [x8, #2, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #352] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #352] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #368] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #352] // 16-byte Folded Reload +; CHECK-NEXT: add x8, sp, #512 +; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: ldr z1, [x8, #2, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-NEXT: str z0, [x8, #2, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #384] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #384] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #400] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #384] // 16-byte Folded Reload +; CHECK-NEXT: add x8, sp, #512 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #416] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #416] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #432] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #416] // 16-byte Folded Reload +; CHECK-NEXT: add x8, sp, #512 +; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: ldr z1, [x8, #1, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #448] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #448] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #464] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #448] // 16-byte Folded Reload +; CHECK-NEXT: add x8, sp, #512 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #480] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #480] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #496] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #480] // 16-byte Folded Reload +; CHECK-NEXT: add x8, sp, #512 +; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: add x9, sp, #512 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: ldr z1, [x8] // 16-byte Folded Reload +; CHECK-NEXT: mov x8, #28 // =0x1c +; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3] +; CHECK-NEXT: mov x8, #24 // =0x18 +; CHECK-NEXT: ldr z0, [x9, #1, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3] +; CHECK-NEXT: mov x8, #20 // =0x14 +; CHECK-NEXT: ldr z0, [x9, #2, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3] +; CHECK-NEXT: mov x8, #16 // =0x10 +; CHECK-NEXT: ldr z0, [x9, #3, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3] +; CHECK-NEXT: mov x8, #12 // =0xc +; CHECK-NEXT: ldr z0, [x9, #4, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3] +; CHECK-NEXT: mov x8, #8 // =0x8 +; CHECK-NEXT: ldr z0, [x9, #5, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3] +; CHECK-NEXT: mov x8, #4 // =0x4 +; CHECK-NEXT: ldr z0, [x9, #6, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3] +; CHECK-NEXT: add x8, sp, #512 +; CHECK-NEXT: ldr z0, [x8, #7, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: st1d { z0.d }, p0, [x19] +; CHECK-NEXT: addvl sp, sp, #8 +; CHECK-NEXT: add sp, sp, #512 +; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ret + %a = call <32 x i64> @llvm.llrint.v32i64.v16fp128(<32 x fp128> %x) + ret <32 x i64> %a +} +declare <32 x i64> @llvm.llrint.v32i64.v32fp128(<32 x fp128>) diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll b/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll index 9fe8d92a182ac..0b5e27f9fe15d 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll @@ -1611,3 +1611,1118 @@ define <32 x iXLen> @lrint_v32f64(<32 x double> %x) { ret <32 x iXLen> %a } declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f64(<32 x double>) + +define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) { +; CHECK-i32-LABEL: lrint_v1fp128: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-i32-NEXT: .cfi_def_cfa_offset 16 +; CHECK-i32-NEXT: .cfi_offset w30, -16 +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: fmov s0, w0 +; CHECK-i32-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v1fp128: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-i64-NEXT: .cfi_def_cfa_offset 16 +; CHECK-i64-NEXT: .cfi_offset w30, -16 +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-i64-NEXT: ret + %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1fp128(<1 x fp128> %x) + ret <1 x iXLen> %a +} +declare <1 x iXLen> @llvm.lrint.v1iXLen.v1fp128(<1 x fp128>) + +define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) { +; CHECK-i32-LABEL: lrint_v2fp128: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: sub sp, sp, #48 +; CHECK-i32-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-i32-NEXT: .cfi_def_cfa_offset 48 +; CHECK-i32-NEXT: .cfi_offset w30, -16 +; CHECK-i32-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: fmov s0, w0 +; CHECK-i32-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[1], w0 +; CHECK-i32-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-i32-NEXT: add sp, sp, #48 +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v2fp128: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: sub sp, sp, #48 +; CHECK-i64-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-i64-NEXT: .cfi_def_cfa_offset 48 +; CHECK-i64-NEXT: .cfi_offset w30, -16 +; CHECK-i64-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-i64-NEXT: mov v0.16b, v1.16b +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: add sp, sp, #48 +; CHECK-i64-NEXT: ret + %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2fp128(<2 x fp128> %x) + ret <2 x iXLen> %a +} +declare <2 x iXLen> @llvm.lrint.v2iXLen.v2fp128(<2 x fp128>) + +define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) { +; CHECK-i32-LABEL: lrint_v4fp128: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: sub sp, sp, #80 +; CHECK-i32-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-i32-NEXT: .cfi_def_cfa_offset 80 +; CHECK-i32-NEXT: .cfi_offset w30, -16 +; CHECK-i32-NEXT: stp q2, q3, [sp, #16] // 32-byte Folded Spill +; CHECK-i32-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: fmov s0, w0 +; CHECK-i32-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[1], w0 +; CHECK-i32-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[2], w0 +; CHECK-i32-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[3], w0 +; CHECK-i32-NEXT: add sp, sp, #80 +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v4fp128: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-i64-NEXT: sub sp, sp, #64 +; CHECK-i64-NEXT: addvl sp, sp, #-1 +; CHECK-i64-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 80 + 8 * VG +; CHECK-i64-NEXT: .cfi_offset w30, -8 +; CHECK-i64-NEXT: .cfi_offset w29, -16 +; CHECK-i64-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-i64-NEXT: mov v0.16b, v3.16b +; CHECK-i64-NEXT: stp q2, q1, [sp, #16] // 32-byte Folded Spill +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-i64-NEXT: add x8, sp, #64 +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: str z0, [x8] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-i64-NEXT: add x8, sp, #64 +; CHECK-i64-NEXT: ptrue p0.d, vl2 +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: ldr z1, [x8] // 16-byte Folded Reload +; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-i64-NEXT: mov z1.d, z0.d +; CHECK-i64-NEXT: ext z1.b, z1.b, z0.b, #16 +; CHECK-i64-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-i64-NEXT: // kill: def $q1 killed $q1 killed $z1 +; CHECK-i64-NEXT: addvl sp, sp, #1 +; CHECK-i64-NEXT: add sp, sp, #64 +; CHECK-i64-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-i64-NEXT: ret + %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128> %x) + ret <4 x iXLen> %a +} +declare <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128>) + +define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) { +; CHECK-i32-LABEL: lrint_v8fp128: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: sub sp, sp, #176 +; CHECK-i32-NEXT: stp x30, x25, [sp, #112] // 16-byte Folded Spill +; CHECK-i32-NEXT: stp x24, x23, [sp, #128] // 16-byte Folded Spill +; CHECK-i32-NEXT: stp x22, x21, [sp, #144] // 16-byte Folded Spill +; CHECK-i32-NEXT: stp x20, x19, [sp, #160] // 16-byte Folded Spill +; CHECK-i32-NEXT: .cfi_def_cfa_offset 176 +; CHECK-i32-NEXT: .cfi_offset w19, -8 +; CHECK-i32-NEXT: .cfi_offset w20, -16 +; CHECK-i32-NEXT: .cfi_offset w21, -24 +; CHECK-i32-NEXT: .cfi_offset w22, -32 +; CHECK-i32-NEXT: .cfi_offset w23, -40 +; CHECK-i32-NEXT: .cfi_offset w24, -48 +; CHECK-i32-NEXT: .cfi_offset w25, -56 +; CHECK-i32-NEXT: .cfi_offset w30, -64 +; CHECK-i32-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-i32-NEXT: mov v0.16b, v7.16b +; CHECK-i32-NEXT: stp q6, q5, [sp] // 32-byte Folded Spill +; CHECK-i32-NEXT: stp q4, q3, [sp, #32] // 32-byte Folded Spill +; CHECK-i32-NEXT: stp q2, q1, [sp, #64] // 32-byte Folded Spill +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov w19, w0 +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov w20, w0 +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov w21, w0 +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov w22, w0 +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov w23, w0 +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov w24, w0 +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov w25, w0 +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: fmov s1, w22 +; CHECK-i32-NEXT: fmov s0, w0 +; CHECK-i32-NEXT: mov v0.s[1], w25 +; CHECK-i32-NEXT: mov v1.s[1], w21 +; CHECK-i32-NEXT: ldp x22, x21, [sp, #144] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldp x30, x25, [sp, #112] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[2], w24 +; CHECK-i32-NEXT: mov v1.s[2], w20 +; CHECK-i32-NEXT: mov v0.s[3], w23 +; CHECK-i32-NEXT: mov v1.s[3], w19 +; CHECK-i32-NEXT: ldp x20, x19, [sp, #160] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldp x24, x23, [sp, #128] // 16-byte Folded Reload +; CHECK-i32-NEXT: add sp, sp, #176 +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v8fp128: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-i64-NEXT: sub sp, sp, #128 +; CHECK-i64-NEXT: addvl sp, sp, #-2 +; CHECK-i64-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x90, 0x01, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 144 + 16 * VG +; CHECK-i64-NEXT: .cfi_offset w30, -8 +; CHECK-i64-NEXT: .cfi_offset w29, -16 +; CHECK-i64-NEXT: str q0, [sp, #112] // 16-byte Folded Spill +; CHECK-i64-NEXT: mov v0.16b, v7.16b +; CHECK-i64-NEXT: stp q6, q5, [sp, #16] // 32-byte Folded Spill +; CHECK-i64-NEXT: stp q4, q3, [sp, #48] // 32-byte Folded Spill +; CHECK-i64-NEXT: stp q2, q1, [sp, #80] // 32-byte Folded Spill +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-i64-NEXT: add x8, sp, #128 +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-i64-NEXT: add x8, sp, #128 +; CHECK-i64-NEXT: ptrue p0.d, vl2 +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: ldr z1, [x8, #1, mul vl] // 16-byte Folded Reload +; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-i64-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-i64-NEXT: add x8, sp, #128 +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: str z0, [x8] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #96] // 16-byte Folded Reload +; CHECK-i64-NEXT: add x8, sp, #128 +; CHECK-i64-NEXT: ptrue p0.d, vl2 +; CHECK-i64-NEXT: ldr z2, [x8, #1, mul vl] // 16-byte Folded Reload +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: ldr z1, [x8] // 16-byte Folded Reload +; CHECK-i64-NEXT: mov z3.d, z2.d +; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-i64-NEXT: ext z3.b, z3.b, z2.b, #16 +; CHECK-i64-NEXT: // kill: def $q2 killed $q2 killed $z2 +; CHECK-i64-NEXT: // kill: def $q3 killed $q3 killed $z3 +; CHECK-i64-NEXT: mov z1.d, z0.d +; CHECK-i64-NEXT: ext z1.b, z1.b, z0.b, #16 +; CHECK-i64-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-i64-NEXT: // kill: def $q1 killed $q1 killed $z1 +; CHECK-i64-NEXT: addvl sp, sp, #2 +; CHECK-i64-NEXT: add sp, sp, #128 +; CHECK-i64-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-i64-NEXT: ret + %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128> %x) + ret <8 x iXLen> %a +} +declare <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128>) + +define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) { +; CHECK-i32-LABEL: lrint_v16fp128: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: sub sp, sp, #368 +; CHECK-i32-NEXT: stp x29, x30, [sp, #272] // 16-byte Folded Spill +; CHECK-i32-NEXT: stp x28, x27, [sp, #288] // 16-byte Folded Spill +; CHECK-i32-NEXT: stp x26, x25, [sp, #304] // 16-byte Folded Spill +; CHECK-i32-NEXT: stp x24, x23, [sp, #320] // 16-byte Folded Spill +; CHECK-i32-NEXT: stp x22, x21, [sp, #336] // 16-byte Folded Spill +; CHECK-i32-NEXT: stp x20, x19, [sp, #352] // 16-byte Folded Spill +; CHECK-i32-NEXT: .cfi_def_cfa_offset 368 +; CHECK-i32-NEXT: .cfi_offset w19, -8 +; CHECK-i32-NEXT: .cfi_offset w20, -16 +; CHECK-i32-NEXT: .cfi_offset w21, -24 +; CHECK-i32-NEXT: .cfi_offset w22, -32 +; CHECK-i32-NEXT: .cfi_offset w23, -40 +; CHECK-i32-NEXT: .cfi_offset w24, -48 +; CHECK-i32-NEXT: .cfi_offset w25, -56 +; CHECK-i32-NEXT: .cfi_offset w26, -64 +; CHECK-i32-NEXT: .cfi_offset w27, -72 +; CHECK-i32-NEXT: .cfi_offset w28, -80 +; CHECK-i32-NEXT: .cfi_offset w30, -88 +; CHECK-i32-NEXT: .cfi_offset w29, -96 +; CHECK-i32-NEXT: stp q7, q6, [sp, #80] // 32-byte Folded Spill +; CHECK-i32-NEXT: stp q5, q4, [sp, #112] // 32-byte Folded Spill +; CHECK-i32-NEXT: stp q3, q0, [sp, #144] // 32-byte Folded Spill +; CHECK-i32-NEXT: stp q2, q1, [sp, #176] // 32-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #368] +; CHECK-i32-NEXT: str q1, [sp, #64] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #384] +; CHECK-i32-NEXT: str q1, [sp, #48] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #400] +; CHECK-i32-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #416] +; CHECK-i32-NEXT: str q1, [sp, #208] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #432] +; CHECK-i32-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #448] +; CHECK-i32-NEXT: str q1, [sp, #224] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #464] +; CHECK-i32-NEXT: str q1, [sp, #240] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #480] +; CHECK-i32-NEXT: mov v0.16b, v1.16b +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload +; CHECK-i32-NEXT: str w0, [sp, #268] // 4-byte Folded Spill +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload +; CHECK-i32-NEXT: str w0, [sp, #240] // 4-byte Folded Spill +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-i32-NEXT: str w0, [sp, #224] // 4-byte Folded Spill +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov w23, w0 +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-i32-NEXT: str w0, [sp, #208] // 4-byte Folded Spill +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov w24, w0 +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov w25, w0 +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov w27, w0 +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov w26, w0 +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov w28, w0 +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov w29, w0 +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov w19, w0 +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov w20, w0 +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov w21, w0 +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov w22, w0 +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: fmov s1, w19 +; CHECK-i32-NEXT: fmov s0, w0 +; CHECK-i32-NEXT: ldr w8, [sp, #224] // 4-byte Folded Reload +; CHECK-i32-NEXT: fmov s2, w27 +; CHECK-i32-NEXT: fmov s3, w23 +; CHECK-i32-NEXT: mov v0.s[1], w22 +; CHECK-i32-NEXT: mov v1.s[1], w29 +; CHECK-i32-NEXT: mov v2.s[1], w25 +; CHECK-i32-NEXT: mov v3.s[1], w8 +; CHECK-i32-NEXT: ldr w8, [sp, #240] // 4-byte Folded Reload +; CHECK-i32-NEXT: ldp x29, x30, [sp, #272] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[2], w21 +; CHECK-i32-NEXT: mov v1.s[2], w28 +; CHECK-i32-NEXT: mov v2.s[2], w24 +; CHECK-i32-NEXT: mov v3.s[2], w8 +; CHECK-i32-NEXT: ldr w8, [sp, #208] // 4-byte Folded Reload +; CHECK-i32-NEXT: ldp x22, x21, [sp, #336] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldp x24, x23, [sp, #320] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[3], w20 +; CHECK-i32-NEXT: mov v1.s[3], w26 +; CHECK-i32-NEXT: mov v2.s[3], w8 +; CHECK-i32-NEXT: ldr w8, [sp, #268] // 4-byte Folded Reload +; CHECK-i32-NEXT: ldp x20, x19, [sp, #352] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldp x26, x25, [sp, #304] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v3.s[3], w8 +; CHECK-i32-NEXT: ldp x28, x27, [sp, #288] // 16-byte Folded Reload +; CHECK-i32-NEXT: add sp, sp, #368 +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v16fp128: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-i64-NEXT: sub sp, sp, #256 +; CHECK-i64-NEXT: addvl sp, sp, #-4 +; CHECK-i64-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x90, 0x02, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 272 + 32 * VG +; CHECK-i64-NEXT: .cfi_offset w30, -8 +; CHECK-i64-NEXT: .cfi_offset w29, -16 +; CHECK-i64-NEXT: addvl x8, sp, #4 +; CHECK-i64-NEXT: str q1, [sp, #240] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q1, [x8, #272] +; CHECK-i64-NEXT: addvl x8, sp, #4 +; CHECK-i64-NEXT: str q0, [sp, #224] // 16-byte Folded Spill +; CHECK-i64-NEXT: stp q7, q6, [sp, #128] // 32-byte Folded Spill +; CHECK-i64-NEXT: str q1, [sp, #112] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q1, [x8, #288] +; CHECK-i64-NEXT: addvl x8, sp, #4 +; CHECK-i64-NEXT: stp q5, q4, [sp, #160] // 32-byte Folded Spill +; CHECK-i64-NEXT: str q1, [sp, #96] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q1, [x8, #304] +; CHECK-i64-NEXT: addvl x8, sp, #4 +; CHECK-i64-NEXT: stp q3, q2, [sp, #192] // 32-byte Folded Spill +; CHECK-i64-NEXT: str q1, [sp, #80] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q1, [x8, #320] +; CHECK-i64-NEXT: addvl x8, sp, #4 +; CHECK-i64-NEXT: str q1, [sp, #64] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q1, [x8, #336] +; CHECK-i64-NEXT: addvl x8, sp, #4 +; CHECK-i64-NEXT: str q1, [sp, #48] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q1, [x8, #352] +; CHECK-i64-NEXT: addvl x8, sp, #4 +; CHECK-i64-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q1, [x8, #368] +; CHECK-i64-NEXT: addvl x8, sp, #4 +; CHECK-i64-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q1, [x8, #384] +; CHECK-i64-NEXT: mov v0.16b, v1.16b +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-i64-NEXT: add x8, sp, #256 +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: str z0, [x8, #3, mul vl] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-i64-NEXT: add x8, sp, #256 +; CHECK-i64-NEXT: ptrue p0.d, vl2 +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: ldr z1, [x8, #3, mul vl] // 16-byte Folded Reload +; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-i64-NEXT: str z0, [x8, #3, mul vl] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-i64-NEXT: add x8, sp, #256 +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: str z0, [x8, #2, mul vl] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #96] // 16-byte Folded Reload +; CHECK-i64-NEXT: add x8, sp, #256 +; CHECK-i64-NEXT: ptrue p0.d, vl2 +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: ldr z1, [x8, #2, mul vl] // 16-byte Folded Reload +; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-i64-NEXT: str z0, [x8, #2, mul vl] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #128] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #128] // 16-byte Folded Reload +; CHECK-i64-NEXT: add x8, sp, #256 +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #160] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #160] // 16-byte Folded Reload +; CHECK-i64-NEXT: add x8, sp, #256 +; CHECK-i64-NEXT: ptrue p0.d, vl2 +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: ldr z1, [x8, #1, mul vl] // 16-byte Folded Reload +; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-i64-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #192] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #192] // 16-byte Folded Reload +; CHECK-i64-NEXT: add x8, sp, #256 +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: str z0, [x8] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #240] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #240] // 16-byte Folded Reload +; CHECK-i64-NEXT: add x8, sp, #256 +; CHECK-i64-NEXT: ptrue p0.d, vl2 +; CHECK-i64-NEXT: ldr z2, [x8, #1, mul vl] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr z4, [x8, #2, mul vl] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr z6, [x8, #3, mul vl] // 16-byte Folded Reload +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: ldr z1, [x8] // 16-byte Folded Reload +; CHECK-i64-NEXT: mov z3.d, z2.d +; CHECK-i64-NEXT: mov z5.d, z4.d +; CHECK-i64-NEXT: mov z7.d, z6.d +; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-i64-NEXT: ext z3.b, z3.b, z2.b, #16 +; CHECK-i64-NEXT: ext z5.b, z5.b, z4.b, #16 +; CHECK-i64-NEXT: ext z7.b, z7.b, z6.b, #16 +; CHECK-i64-NEXT: // kill: def $q2 killed $q2 killed $z2 +; CHECK-i64-NEXT: // kill: def $q4 killed $q4 killed $z4 +; CHECK-i64-NEXT: // kill: def $q3 killed $q3 killed $z3 +; CHECK-i64-NEXT: // kill: def $q5 killed $q5 killed $z5 +; CHECK-i64-NEXT: // kill: def $q6 killed $q6 killed $z6 +; CHECK-i64-NEXT: // kill: def $q7 killed $q7 killed $z7 +; CHECK-i64-NEXT: mov z1.d, z0.d +; CHECK-i64-NEXT: ext z1.b, z1.b, z0.b, #16 +; CHECK-i64-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-i64-NEXT: // kill: def $q1 killed $q1 killed $z1 +; CHECK-i64-NEXT: addvl sp, sp, #4 +; CHECK-i64-NEXT: add sp, sp, #256 +; CHECK-i64-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-i64-NEXT: ret + %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16fp128(<16 x fp128> %x) + ret <16 x iXLen> %a +} +declare <16 x iXLen> @llvm.lrint.v16iXLen.v16fp128(<16 x fp128>) + +define <32 x iXLen> @lrint_v32fp128(<32 x fp128> %x) { +; CHECK-i32-LABEL: lrint_v32fp128: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: stp x29, x30, [sp, #-96]! // 16-byte Folded Spill +; CHECK-i32-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill +; CHECK-i32-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill +; CHECK-i32-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill +; CHECK-i32-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill +; CHECK-i32-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-i32-NEXT: sub sp, sp, #528 +; CHECK-i32-NEXT: .cfi_def_cfa_offset 624 +; CHECK-i32-NEXT: .cfi_offset w19, -8 +; CHECK-i32-NEXT: .cfi_offset w20, -16 +; CHECK-i32-NEXT: .cfi_offset w21, -24 +; CHECK-i32-NEXT: .cfi_offset w22, -32 +; CHECK-i32-NEXT: .cfi_offset w23, -40 +; CHECK-i32-NEXT: .cfi_offset w24, -48 +; CHECK-i32-NEXT: .cfi_offset w25, -56 +; CHECK-i32-NEXT: .cfi_offset w26, -64 +; CHECK-i32-NEXT: .cfi_offset w27, -72 +; CHECK-i32-NEXT: .cfi_offset w28, -80 +; CHECK-i32-NEXT: .cfi_offset w30, -88 +; CHECK-i32-NEXT: .cfi_offset w29, -96 +; CHECK-i32-NEXT: stp q2, q1, [sp, #368] // 32-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #624] +; CHECK-i32-NEXT: str q0, [sp, #224] // 16-byte Folded Spill +; CHECK-i32-NEXT: str q1, [sp, #160] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #640] +; CHECK-i32-NEXT: str q7, [sp, #208] // 16-byte Folded Spill +; CHECK-i32-NEXT: str q1, [sp, #128] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #656] +; CHECK-i32-NEXT: str q6, [sp, #240] // 16-byte Folded Spill +; CHECK-i32-NEXT: str q1, [sp, #96] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #672] +; CHECK-i32-NEXT: str q5, [sp, #272] // 16-byte Folded Spill +; CHECK-i32-NEXT: str q1, [sp, #80] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #688] +; CHECK-i32-NEXT: str q4, [sp, #304] // 16-byte Folded Spill +; CHECK-i32-NEXT: str q1, [sp, #48] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #704] +; CHECK-i32-NEXT: str q3, [sp, #336] // 16-byte Folded Spill +; CHECK-i32-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #720] +; CHECK-i32-NEXT: str q1, [sp, #112] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #736] +; CHECK-i32-NEXT: str q1, [sp, #176] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #752] +; CHECK-i32-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #768] +; CHECK-i32-NEXT: str q1, [sp, #64] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #784] +; CHECK-i32-NEXT: str q1, [sp, #192] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #800] +; CHECK-i32-NEXT: str q1, [sp, #288] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #816] +; CHECK-i32-NEXT: str q1, [sp, #144] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #832] +; CHECK-i32-NEXT: str q1, [sp, #256] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #848] +; CHECK-i32-NEXT: str q1, [sp, #352] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #864] +; CHECK-i32-NEXT: str q1, [sp, #416] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #880] +; CHECK-i32-NEXT: str q1, [sp, #320] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #896] +; CHECK-i32-NEXT: str q1, [sp, #400] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #912] +; CHECK-i32-NEXT: str q1, [sp, #448] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #928] +; CHECK-i32-NEXT: str q1, [sp, #480] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #944] +; CHECK-i32-NEXT: str q1, [sp, #432] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #960] +; CHECK-i32-NEXT: str q1, [sp, #464] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #976] +; CHECK-i32-NEXT: str q1, [sp, #496] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #992] +; CHECK-i32-NEXT: mov v0.16b, v1.16b +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #496] // 16-byte Folded Reload +; CHECK-i32-NEXT: str w0, [sp, #524] // 4-byte Folded Spill +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #464] // 16-byte Folded Reload +; CHECK-i32-NEXT: str w0, [sp, #496] // 4-byte Folded Spill +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #432] // 16-byte Folded Reload +; CHECK-i32-NEXT: str w0, [sp, #464] // 4-byte Folded Spill +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #480] // 16-byte Folded Reload +; CHECK-i32-NEXT: str w0, [sp, #432] // 4-byte Folded Spill +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #448] // 16-byte Folded Reload +; CHECK-i32-NEXT: str w0, [sp, #480] // 4-byte Folded Spill +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #400] // 16-byte Folded Reload +; CHECK-i32-NEXT: str w0, [sp, #448] // 4-byte Folded Spill +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #320] // 16-byte Folded Reload +; CHECK-i32-NEXT: str w0, [sp, #400] // 4-byte Folded Spill +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #416] // 16-byte Folded Reload +; CHECK-i32-NEXT: str w0, [sp, #320] // 4-byte Folded Spill +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #352] // 16-byte Folded Reload +; CHECK-i32-NEXT: str w0, [sp, #416] // 4-byte Folded Spill +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #256] // 16-byte Folded Reload +; CHECK-i32-NEXT: str w0, [sp, #352] // 4-byte Folded Spill +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload +; CHECK-i32-NEXT: str w0, [sp, #256] // 4-byte Folded Spill +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #288] // 16-byte Folded Reload +; CHECK-i32-NEXT: str w0, [sp, #144] // 4-byte Folded Spill +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload +; CHECK-i32-NEXT: str w0, [sp, #288] // 4-byte Folded Spill +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-i32-NEXT: str w0, [sp, #192] // 4-byte Folded Spill +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-i32-NEXT: str w0, [sp, #64] // 4-byte Folded Spill +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload +; CHECK-i32-NEXT: str w0, [sp, #16] // 4-byte Folded Spill +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload +; CHECK-i32-NEXT: str w0, [sp, #176] // 4-byte Folded Spill +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-i32-NEXT: str w0, [sp, #112] // 4-byte Folded Spill +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov w29, w0 +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov w21, w0 +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload +; CHECK-i32-NEXT: str w0, [sp, #80] // 4-byte Folded Spill +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload +; CHECK-i32-NEXT: str w0, [sp, #96] // 4-byte Folded Spill +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov w23, w0 +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov w24, w0 +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov w28, w0 +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #272] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov w19, w0 +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #304] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov w25, w0 +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #336] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov w26, w0 +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #368] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov w20, w0 +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #384] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov w22, w0 +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov w27, w0 +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr s4, [sp, #16] // 4-byte Folded Reload +; CHECK-i32-NEXT: ldr w8, [sp, #64] // 4-byte Folded Reload +; CHECK-i32-NEXT: fmov s2, w24 +; CHECK-i32-NEXT: ldr s5, [sp, #144] // 4-byte Folded Reload +; CHECK-i32-NEXT: ldr s6, [sp, #320] // 4-byte Folded Reload +; CHECK-i32-NEXT: fmov s3, w21 +; CHECK-i32-NEXT: mov v4.s[1], w8 +; CHECK-i32-NEXT: ldr w8, [sp, #256] // 4-byte Folded Reload +; CHECK-i32-NEXT: ldr s7, [sp, #432] // 4-byte Folded Reload +; CHECK-i32-NEXT: mov v2.s[1], w23 +; CHECK-i32-NEXT: fmov s1, w26 +; CHECK-i32-NEXT: fmov s0, w0 +; CHECK-i32-NEXT: mov v5.s[1], w8 +; CHECK-i32-NEXT: ldr w8, [sp, #400] // 4-byte Folded Reload +; CHECK-i32-NEXT: mov v3.s[1], w29 +; CHECK-i32-NEXT: mov v6.s[1], w8 +; CHECK-i32-NEXT: ldr w8, [sp, #464] // 4-byte Folded Reload +; CHECK-i32-NEXT: mov v1.s[1], w25 +; CHECK-i32-NEXT: mov v0.s[1], w27 +; CHECK-i32-NEXT: mov v7.s[1], w8 +; CHECK-i32-NEXT: ldr w8, [sp, #96] // 4-byte Folded Reload +; CHECK-i32-NEXT: mov v2.s[2], w8 +; CHECK-i32-NEXT: ldr w8, [sp, #112] // 4-byte Folded Reload +; CHECK-i32-NEXT: mov v1.s[2], w19 +; CHECK-i32-NEXT: mov v0.s[2], w22 +; CHECK-i32-NEXT: mov v3.s[2], w8 +; CHECK-i32-NEXT: ldr w8, [sp, #192] // 4-byte Folded Reload +; CHECK-i32-NEXT: mov v4.s[2], w8 +; CHECK-i32-NEXT: ldr w8, [sp, #352] // 4-byte Folded Reload +; CHECK-i32-NEXT: mov v1.s[3], w28 +; CHECK-i32-NEXT: mov v0.s[3], w20 +; CHECK-i32-NEXT: mov v5.s[2], w8 +; CHECK-i32-NEXT: ldr w8, [sp, #448] // 4-byte Folded Reload +; CHECK-i32-NEXT: mov v6.s[2], w8 +; CHECK-i32-NEXT: ldr w8, [sp, #496] // 4-byte Folded Reload +; CHECK-i32-NEXT: mov v7.s[2], w8 +; CHECK-i32-NEXT: ldr w8, [sp, #80] // 4-byte Folded Reload +; CHECK-i32-NEXT: mov v2.s[3], w8 +; CHECK-i32-NEXT: ldr w8, [sp, #176] // 4-byte Folded Reload +; CHECK-i32-NEXT: mov v3.s[3], w8 +; CHECK-i32-NEXT: ldr w8, [sp, #288] // 4-byte Folded Reload +; CHECK-i32-NEXT: mov v4.s[3], w8 +; CHECK-i32-NEXT: ldr w8, [sp, #416] // 4-byte Folded Reload +; CHECK-i32-NEXT: mov v5.s[3], w8 +; CHECK-i32-NEXT: ldr w8, [sp, #480] // 4-byte Folded Reload +; CHECK-i32-NEXT: mov v6.s[3], w8 +; CHECK-i32-NEXT: ldr w8, [sp, #524] // 4-byte Folded Reload +; CHECK-i32-NEXT: mov v7.s[3], w8 +; CHECK-i32-NEXT: add sp, sp, #528 +; CHECK-i32-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldp x29, x30, [sp], #96 // 16-byte Folded Reload +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v32fp128: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: str x29, [sp, #-32]! // 8-byte Folded Spill +; CHECK-i64-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-i64-NEXT: sub sp, sp, #512 +; CHECK-i64-NEXT: addvl sp, sp, #-8 +; CHECK-i64-NEXT: .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xa0, 0x04, 0x22, 0x11, 0xc0, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 544 + 64 * VG +; CHECK-i64-NEXT: .cfi_offset w19, -8 +; CHECK-i64-NEXT: .cfi_offset w30, -16 +; CHECK-i64-NEXT: .cfi_offset w29, -32 +; CHECK-i64-NEXT: addvl x9, sp, #8 +; CHECK-i64-NEXT: stp q2, q1, [sp, #16] // 32-byte Folded Spill +; CHECK-i64-NEXT: mov x19, x8 +; CHECK-i64-NEXT: stp q0, q7, [sp, #48] // 32-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [x9, #864] +; CHECK-i64-NEXT: addvl x9, sp, #8 +; CHECK-i64-NEXT: str q6, [sp, #96] // 16-byte Folded Spill +; CHECK-i64-NEXT: str q0, [sp, #496] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [x9, #880] +; CHECK-i64-NEXT: addvl x9, sp, #8 +; CHECK-i64-NEXT: stp q5, q4, [sp, #128] // 32-byte Folded Spill +; CHECK-i64-NEXT: str q0, [sp, #480] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [x9, #896] +; CHECK-i64-NEXT: addvl x9, sp, #8 +; CHECK-i64-NEXT: str q0, [sp, #464] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [x9, #912] +; CHECK-i64-NEXT: addvl x9, sp, #8 +; CHECK-i64-NEXT: str q0, [sp, #448] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [x9, #800] +; CHECK-i64-NEXT: addvl x9, sp, #8 +; CHECK-i64-NEXT: str q0, [sp, #432] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [x9, #816] +; CHECK-i64-NEXT: addvl x9, sp, #8 +; CHECK-i64-NEXT: str q0, [sp, #416] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [x9, #832] +; CHECK-i64-NEXT: addvl x9, sp, #8 +; CHECK-i64-NEXT: str q0, [sp, #400] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [x9, #848] +; CHECK-i64-NEXT: addvl x9, sp, #8 +; CHECK-i64-NEXT: str q0, [sp, #384] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [x9, #736] +; CHECK-i64-NEXT: addvl x9, sp, #8 +; CHECK-i64-NEXT: str q0, [sp, #368] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [x9, #752] +; CHECK-i64-NEXT: addvl x9, sp, #8 +; CHECK-i64-NEXT: str q0, [sp, #352] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [x9, #768] +; CHECK-i64-NEXT: addvl x9, sp, #8 +; CHECK-i64-NEXT: str q0, [sp, #336] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [x9, #784] +; CHECK-i64-NEXT: addvl x9, sp, #8 +; CHECK-i64-NEXT: str q0, [sp, #320] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [x9, #672] +; CHECK-i64-NEXT: addvl x9, sp, #8 +; CHECK-i64-NEXT: str q0, [sp, #304] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [x9, #688] +; CHECK-i64-NEXT: addvl x9, sp, #8 +; CHECK-i64-NEXT: str q0, [sp, #288] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [x9, #704] +; CHECK-i64-NEXT: addvl x9, sp, #8 +; CHECK-i64-NEXT: str q0, [sp, #272] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [x9, #720] +; CHECK-i64-NEXT: addvl x9, sp, #8 +; CHECK-i64-NEXT: str q0, [sp, #256] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [x9, #608] +; CHECK-i64-NEXT: addvl x9, sp, #8 +; CHECK-i64-NEXT: str q0, [sp, #240] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [x9, #624] +; CHECK-i64-NEXT: addvl x9, sp, #8 +; CHECK-i64-NEXT: str q0, [sp, #224] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [x9, #640] +; CHECK-i64-NEXT: addvl x9, sp, #8 +; CHECK-i64-NEXT: str q0, [sp, #208] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [x9, #656] +; CHECK-i64-NEXT: addvl x9, sp, #8 +; CHECK-i64-NEXT: str q0, [sp, #192] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [x9, #544] +; CHECK-i64-NEXT: addvl x9, sp, #8 +; CHECK-i64-NEXT: str q0, [sp, #176] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [x9, #560] +; CHECK-i64-NEXT: addvl x9, sp, #8 +; CHECK-i64-NEXT: str q0, [sp, #160] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [x9, #576] +; CHECK-i64-NEXT: addvl x9, sp, #8 +; CHECK-i64-NEXT: str q0, [sp, #112] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [x9, #592] +; CHECK-i64-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-i64-NEXT: mov v0.16b, v3.16b +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-i64-NEXT: add x8, sp, #512 +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: str z0, [x8, #7, mul vl] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-i64-NEXT: add x8, sp, #512 +; CHECK-i64-NEXT: ptrue p0.d, vl2 +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: ldr z1, [x8, #7, mul vl] // 16-byte Folded Reload +; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-i64-NEXT: str z0, [x8, #7, mul vl] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-i64-NEXT: add x8, sp, #512 +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: str z0, [x8, #6, mul vl] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #128] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #128] // 16-byte Folded Reload +; CHECK-i64-NEXT: add x8, sp, #512 +; CHECK-i64-NEXT: ptrue p0.d, vl2 +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: ldr z1, [x8, #6, mul vl] // 16-byte Folded Reload +; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-i64-NEXT: str z0, [x8, #6, mul vl] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #144] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #144] // 16-byte Folded Reload +; CHECK-i64-NEXT: add x8, sp, #512 +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: str z0, [x8, #5, mul vl] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #160] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #160] // 16-byte Folded Reload +; CHECK-i64-NEXT: add x8, sp, #512 +; CHECK-i64-NEXT: ptrue p0.d, vl2 +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: ldr z1, [x8, #5, mul vl] // 16-byte Folded Reload +; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-i64-NEXT: str z0, [x8, #5, mul vl] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #192] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #192] // 16-byte Folded Reload +; CHECK-i64-NEXT: add x8, sp, #512 +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: str z0, [x8, #4, mul vl] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #224] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #224] // 16-byte Folded Reload +; CHECK-i64-NEXT: add x8, sp, #512 +; CHECK-i64-NEXT: ptrue p0.d, vl2 +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: ldr z1, [x8, #4, mul vl] // 16-byte Folded Reload +; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-i64-NEXT: str z0, [x8, #4, mul vl] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #256] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #256] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #272] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #256] // 16-byte Folded Reload +; CHECK-i64-NEXT: add x8, sp, #512 +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: str z0, [x8, #3, mul vl] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #288] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #288] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #304] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #288] // 16-byte Folded Reload +; CHECK-i64-NEXT: add x8, sp, #512 +; CHECK-i64-NEXT: ptrue p0.d, vl2 +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: ldr z1, [x8, #3, mul vl] // 16-byte Folded Reload +; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-i64-NEXT: str z0, [x8, #3, mul vl] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #320] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #320] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #336] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #320] // 16-byte Folded Reload +; CHECK-i64-NEXT: add x8, sp, #512 +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: str z0, [x8, #2, mul vl] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #352] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #352] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #368] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #352] // 16-byte Folded Reload +; CHECK-i64-NEXT: add x8, sp, #512 +; CHECK-i64-NEXT: ptrue p0.d, vl2 +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: ldr z1, [x8, #2, mul vl] // 16-byte Folded Reload +; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-i64-NEXT: str z0, [x8, #2, mul vl] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #384] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #384] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #400] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #384] // 16-byte Folded Reload +; CHECK-i64-NEXT: add x8, sp, #512 +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #416] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #416] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #432] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #416] // 16-byte Folded Reload +; CHECK-i64-NEXT: add x8, sp, #512 +; CHECK-i64-NEXT: ptrue p0.d, vl2 +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: ldr z1, [x8, #1, mul vl] // 16-byte Folded Reload +; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-i64-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #448] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #448] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #464] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #448] // 16-byte Folded Reload +; CHECK-i64-NEXT: add x8, sp, #512 +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: str z0, [x8] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #480] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #480] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #496] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #480] // 16-byte Folded Reload +; CHECK-i64-NEXT: add x8, sp, #512 +; CHECK-i64-NEXT: ptrue p0.d, vl2 +; CHECK-i64-NEXT: add x9, sp, #512 +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: ldr z1, [x8] // 16-byte Folded Reload +; CHECK-i64-NEXT: mov x8, #28 // =0x1c +; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-i64-NEXT: ptrue p0.d, vl4 +; CHECK-i64-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3] +; CHECK-i64-NEXT: mov x8, #24 // =0x18 +; CHECK-i64-NEXT: ldr z0, [x9, #1, mul vl] // 16-byte Folded Reload +; CHECK-i64-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3] +; CHECK-i64-NEXT: mov x8, #20 // =0x14 +; CHECK-i64-NEXT: ldr z0, [x9, #2, mul vl] // 16-byte Folded Reload +; CHECK-i64-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3] +; CHECK-i64-NEXT: mov x8, #16 // =0x10 +; CHECK-i64-NEXT: ldr z0, [x9, #3, mul vl] // 16-byte Folded Reload +; CHECK-i64-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3] +; CHECK-i64-NEXT: mov x8, #12 // =0xc +; CHECK-i64-NEXT: ldr z0, [x9, #4, mul vl] // 16-byte Folded Reload +; CHECK-i64-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3] +; CHECK-i64-NEXT: mov x8, #8 // =0x8 +; CHECK-i64-NEXT: ldr z0, [x9, #5, mul vl] // 16-byte Folded Reload +; CHECK-i64-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3] +; CHECK-i64-NEXT: mov x8, #4 // =0x4 +; CHECK-i64-NEXT: ldr z0, [x9, #6, mul vl] // 16-byte Folded Reload +; CHECK-i64-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3] +; CHECK-i64-NEXT: add x8, sp, #512 +; CHECK-i64-NEXT: ldr z0, [x8, #7, mul vl] // 16-byte Folded Reload +; CHECK-i64-NEXT: st1d { z0.d }, p0, [x19] +; CHECK-i64-NEXT: addvl sp, sp, #8 +; CHECK-i64-NEXT: add sp, sp, #512 +; CHECK-i64-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr x29, [sp], #32 // 8-byte Folded Reload +; CHECK-i64-NEXT: ret + %a = call <32 x iXLen> @llvm.lrint.v32iXLen.v16fp128(<32 x fp128> %x) + ret <32 x iXLen> %a +} +declare <32 x iXLen> @llvm.lrint.v32iXLen.v32fp128(<32 x fp128>) diff --git a/llvm/test/CodeGen/AArch64/vector-llrint.ll b/llvm/test/CodeGen/AArch64/vector-llrint.ll index 5503de2b4c5db..9e6f46df05fec 100644 --- a/llvm/test/CodeGen/AArch64/vector-llrint.ll +++ b/llvm/test/CodeGen/AArch64/vector-llrint.ll @@ -674,3 +674,519 @@ define <32 x i64> @llrint_v32f64(<32 x double> %x) { ret <32 x i64> %a } declare <32 x i64> @llvm.llrint.v32i64.v32f64(<32 x double>) + +define <1 x i64> @llrint_v1i64_v1f128(<1 x fp128> %x) { +; CHECK-LABEL: llrint_v1i64_v1f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %a = call <1 x i64> @llvm.llrint.v1i64.v1f128(<1 x fp128> %x) + ret <1 x i64> %a +} +declare <1 x i64> @llvm.llrint.v1i64.v1f128(<1 x fp128>) + +define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) { +; CHECK-LABEL: llrint_v2i64_v2f128: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret + %a = call <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128> %x) + ret <2 x i64> %a +} +declare <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128>) + +define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) { +; CHECK-LABEL: llrint_v4i64_v4f128: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #80 +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: stp q3, q2, [sp, #32] // 32-byte Folded Spill +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d1, x0 +; CHECK-NEXT: ldp q0, q4, [sp, #16] // 32-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: mov v1.d[1], v4.d[0] +; CHECK-NEXT: add sp, sp, #80 +; CHECK-NEXT: ret + %a = call <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128> %x) + ret <4 x i64> %a +} +declare <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128>) + +define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) { +; CHECK-LABEL: llrint_v8i64_v8f128: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #144 +; CHECK-NEXT: str x30, [sp, #128] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 144 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: stp q3, q2, [sp, #16] // 32-byte Folded Spill +; CHECK-NEXT: stp q5, q4, [sp, #48] // 32-byte Folded Spill +; CHECK-NEXT: stp q7, q6, [sp, #96] // 32-byte Folded Spill +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d3, x0 +; CHECK-NEXT: ldp q0, q1, [sp, #80] // 32-byte Folded Reload +; CHECK-NEXT: ldr q2, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #128] // 8-byte Folded Reload +; CHECK-NEXT: mov v3.d[1], v1.d[0] +; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #144 +; CHECK-NEXT: ret + %a = call <8 x i64> @llvm.llrint.v8i64.v8f128(<8 x fp128> %x) + ret <8 x i64> %a +} +declare <8 x i64> @llvm.llrint.v8i64.v8f128(<8 x fp128>) + +define <16 x i64> @llrint_v16f128(<16 x fp128> %x) { +; CHECK-LABEL: llrint_v16f128: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #272 +; CHECK-NEXT: stp x29, x30, [sp, #256] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 272 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: str q2, [sp, #160] // 16-byte Folded Spill +; CHECK-NEXT: ldr q2, [sp, #368] +; CHECK-NEXT: stp q0, q3, [sp] // 32-byte Folded Spill +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: str q2, [sp, #240] // 16-byte Folded Spill +; CHECK-NEXT: ldr q2, [sp, #384] +; CHECK-NEXT: stp q5, q7, [sp, #32] // 32-byte Folded Spill +; CHECK-NEXT: str q2, [sp, #224] // 16-byte Folded Spill +; CHECK-NEXT: ldr q2, [sp, #336] +; CHECK-NEXT: str q2, [sp, #192] // 16-byte Folded Spill +; CHECK-NEXT: ldr q2, [sp, #352] +; CHECK-NEXT: str q2, [sp, #176] // 16-byte Folded Spill +; CHECK-NEXT: ldr q2, [sp, #304] +; CHECK-NEXT: str q2, [sp, #144] // 16-byte Folded Spill +; CHECK-NEXT: ldr q2, [sp, #320] +; CHECK-NEXT: stp q4, q2, [sp, #112] // 32-byte Folded Spill +; CHECK-NEXT: ldr q2, [sp, #272] +; CHECK-NEXT: stp q6, q2, [sp, #80] // 32-byte Folded Spill +; CHECK-NEXT: ldr q2, [sp, #288] +; CHECK-NEXT: str q2, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #208] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #208] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str q0, [sp, #208] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str q0, [sp, #160] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str q0, [sp, #112] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #128] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #128] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str q0, [sp, #144] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #176] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #176] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str q0, [sp, #192] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #224] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d7, x0 +; CHECK-NEXT: ldp q0, q1, [sp, #208] // 32-byte Folded Reload +; CHECK-NEXT: ldp q4, q2, [sp, #96] // 32-byte Folded Reload +; CHECK-NEXT: ldr q3, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [sp, #256] // 16-byte Folded Reload +; CHECK-NEXT: ldr q6, [sp, #192] // 16-byte Folded Reload +; CHECK-NEXT: mov v7.d[1], v1.d[0] +; CHECK-NEXT: ldp q5, q1, [sp, #144] // 32-byte Folded Reload +; CHECK-NEXT: add sp, sp, #272 +; CHECK-NEXT: ret + %a = call <16 x i64> @llvm.llrint.v16i64.v16f128(<16 x fp128> %x) + ret <16 x i64> %a +} +declare <16 x i64> @llvm.llrint.v16i64.v16f128(<16 x fp128>) + +define <32 x i64> @llrint_v32f128(<32 x fp128> %x) { +; CHECK-LABEL: llrint_v32f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: sub sp, sp, #512 +; CHECK-NEXT: .cfi_def_cfa_offset 544 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: .cfi_offset w29, -32 +; CHECK-NEXT: str q0, [sp, #464] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #896] +; CHECK-NEXT: mov x19, x8 +; CHECK-NEXT: str q7, [sp, #272] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #496] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #912] +; CHECK-NEXT: str q6, [sp, #320] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #480] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #864] +; CHECK-NEXT: stp q3, q5, [sp, #16] // 32-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #448] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #880] +; CHECK-NEXT: stp q2, q0, [sp, #416] // 32-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #832] +; CHECK-NEXT: str q0, [sp, #400] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #848] +; CHECK-NEXT: stp q4, q0, [sp, #368] // 32-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #800] +; CHECK-NEXT: str q0, [sp, #352] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #816] +; CHECK-NEXT: str q0, [sp, #336] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #768] +; CHECK-NEXT: str q0, [sp, #304] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #784] +; CHECK-NEXT: str q0, [sp, #288] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #736] +; CHECK-NEXT: str q0, [sp, #256] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #752] +; CHECK-NEXT: str q0, [sp, #240] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #704] +; CHECK-NEXT: str q0, [sp, #208] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #720] +; CHECK-NEXT: str q0, [sp, #192] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #672] +; CHECK-NEXT: str q0, [sp, #160] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #688] +; CHECK-NEXT: str q0, [sp, #128] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #640] +; CHECK-NEXT: str q0, [sp, #144] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #656] +; CHECK-NEXT: str q0, [sp, #112] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #608] +; CHECK-NEXT: str q0, [sp, #176] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #624] +; CHECK-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #576] +; CHECK-NEXT: str q0, [sp, #224] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #592] +; CHECK-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #544] +; CHECK-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #560] +; CHECK-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #464] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str q0, [sp, #464] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #416] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str q0, [sp, #416] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #368] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str q0, [sp, #368] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #272] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #272] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #320] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #272] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str q0, [sp, #320] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #272] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #272] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str q0, [sp, #272] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str q0, [sp, #224] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str q0, [sp, #176] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #112] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #112] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str q0, [sp, #144] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #128] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #128] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str q0, [sp, #160] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #192] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #192] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str q0, [sp, #208] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #240] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #256] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #240] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str q0, [sp, #256] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #288] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #288] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #304] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #288] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str q0, [sp, #304] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #336] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #336] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #352] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #336] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str q0, [sp, #352] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #384] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #384] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #400] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #384] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str q0, [sp, #400] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #432] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #432] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #448] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #432] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str q0, [sp, #448] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #480] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #480] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #496] // 16-byte Folded Reload +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: ldr q0, [sp, #400] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [x19, #208] +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #480] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: ldr q1, [sp, #352] // 16-byte Folded Reload +; CHECK-NEXT: str q1, [x19, #192] +; CHECK-NEXT: ldr q1, [sp, #304] // 16-byte Folded Reload +; CHECK-NEXT: str q1, [x19, #176] +; CHECK-NEXT: ldr q1, [sp, #256] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [x19, #240] +; CHECK-NEXT: str q1, [x19, #160] +; CHECK-NEXT: ldr q0, [sp, #448] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [x19, #224] +; CHECK-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [x19, #144] +; CHECK-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [x19, #128] +; CHECK-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [x19, #112] +; CHECK-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [x19, #96] +; CHECK-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [x19, #80] +; CHECK-NEXT: ldr q0, [sp, #272] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [x19, #64] +; CHECK-NEXT: ldr q0, [sp, #320] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [x19, #48] +; CHECK-NEXT: ldr q0, [sp, #368] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [x19, #32] +; CHECK-NEXT: ldr q0, [sp, #416] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [x19, #16] +; CHECK-NEXT: ldr q0, [sp, #464] // 16-byte Folded Reload +; CHECK-NEXT: str q0, [x19] +; CHECK-NEXT: add sp, sp, #512 +; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ret + %a = call <32 x i64> @llvm.llrint.v32i64.v16f128(<32 x fp128> %x) + ret <32 x i64> %a +} +declare <32 x i64> @llvm.llrint.v32i64.v32f128(<32 x fp128>) diff --git a/llvm/test/CodeGen/AArch64/vector-lrint.ll b/llvm/test/CodeGen/AArch64/vector-lrint.ll index 602643264e7be..cb7fe14273a42 100644 --- a/llvm/test/CodeGen/AArch64/vector-lrint.ll +++ b/llvm/test/CodeGen/AArch64/vector-lrint.ll @@ -1335,3 +1335,951 @@ define <32 x iXLen> @lrint_v32f64(<32 x double> %x) { ret <32 x iXLen> %a } declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f64(<32 x double>) + +define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) { +; CHECK-i32-LABEL: lrint_v1fp128: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-i32-NEXT: .cfi_def_cfa_offset 16 +; CHECK-i32-NEXT: .cfi_offset w30, -16 +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: fmov s0, w0 +; CHECK-i32-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v1fp128: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-i64-NEXT: .cfi_def_cfa_offset 16 +; CHECK-i64-NEXT: .cfi_offset w30, -16 +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-i64-NEXT: ret + %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1fp128(<1 x fp128> %x) + ret <1 x iXLen> %a +} +declare <1 x iXLen> @llvm.lrint.v1iXLen.v1fp128(<1 x fp128>) + +define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) { +; CHECK-i32-LABEL: lrint_v2fp128: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: sub sp, sp, #48 +; CHECK-i32-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-i32-NEXT: .cfi_def_cfa_offset 48 +; CHECK-i32-NEXT: .cfi_offset w30, -16 +; CHECK-i32-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: fmov s0, w0 +; CHECK-i32-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[1], w0 +; CHECK-i32-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-i32-NEXT: add sp, sp, #48 +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v2fp128: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: sub sp, sp, #48 +; CHECK-i64-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-i64-NEXT: .cfi_def_cfa_offset 48 +; CHECK-i64-NEXT: .cfi_offset w30, -16 +; CHECK-i64-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-i64-NEXT: mov v0.16b, v1.16b +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: add sp, sp, #48 +; CHECK-i64-NEXT: ret + %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2fp128(<2 x fp128> %x) + ret <2 x iXLen> %a +} +declare <2 x iXLen> @llvm.lrint.v2iXLen.v2fp128(<2 x fp128>) + +define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) { +; CHECK-i32-LABEL: lrint_v4fp128: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: sub sp, sp, #80 +; CHECK-i32-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-i32-NEXT: .cfi_def_cfa_offset 80 +; CHECK-i32-NEXT: .cfi_offset w30, -16 +; CHECK-i32-NEXT: stp q2, q3, [sp, #16] // 32-byte Folded Spill +; CHECK-i32-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: fmov s0, w0 +; CHECK-i32-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[1], w0 +; CHECK-i32-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[2], w0 +; CHECK-i32-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-i32-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[3], w0 +; CHECK-i32-NEXT: add sp, sp, #80 +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v4fp128: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: sub sp, sp, #80 +; CHECK-i64-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-i64-NEXT: .cfi_def_cfa_offset 80 +; CHECK-i64-NEXT: .cfi_offset w30, -16 +; CHECK-i64-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-i64-NEXT: mov v0.16b, v1.16b +; CHECK-i64-NEXT: stp q3, q2, [sp, #32] // 32-byte Folded Spill +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d1, x0 +; CHECK-i64-NEXT: ldp q0, q4, [sp, #16] // 32-byte Folded Reload +; CHECK-i64-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-i64-NEXT: mov v1.d[1], v4.d[0] +; CHECK-i64-NEXT: add sp, sp, #80 +; CHECK-i64-NEXT: ret + %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128> %x) + ret <4 x iXLen> %a +} +declare <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128>) + +define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) { +; CHECK-i32-LABEL: lrint_v8fp128: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: sub sp, sp, #144 +; CHECK-i32-NEXT: str x30, [sp, #128] // 8-byte Folded Spill +; CHECK-i32-NEXT: .cfi_def_cfa_offset 144 +; CHECK-i32-NEXT: .cfi_offset w30, -16 +; CHECK-i32-NEXT: stp q1, q2, [sp] // 32-byte Folded Spill +; CHECK-i32-NEXT: stp q3, q5, [sp, #32] // 32-byte Folded Spill +; CHECK-i32-NEXT: stp q6, q7, [sp, #64] // 32-byte Folded Spill +; CHECK-i32-NEXT: str q4, [sp, #96] // 16-byte Folded Spill +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: fmov s0, w0 +; CHECK-i32-NEXT: str q0, [sp, #112] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[1], w0 +; CHECK-i32-NEXT: str q0, [sp, #112] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[2], w0 +; CHECK-i32-NEXT: str q0, [sp, #112] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[3], w0 +; CHECK-i32-NEXT: str q0, [sp, #112] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: fmov s0, w0 +; CHECK-i32-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[1], w0 +; CHECK-i32-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[2], w0 +; CHECK-i32-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldp q1, q0, [sp, #96] // 32-byte Folded Reload +; CHECK-i32-NEXT: ldr x30, [sp, #128] // 8-byte Folded Reload +; CHECK-i32-NEXT: mov v1.s[3], w0 +; CHECK-i32-NEXT: add sp, sp, #144 +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v8fp128: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: sub sp, sp, #144 +; CHECK-i64-NEXT: str x30, [sp, #128] // 8-byte Folded Spill +; CHECK-i64-NEXT: .cfi_def_cfa_offset 144 +; CHECK-i64-NEXT: .cfi_offset w30, -16 +; CHECK-i64-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-i64-NEXT: mov v0.16b, v1.16b +; CHECK-i64-NEXT: stp q3, q2, [sp, #16] // 32-byte Folded Spill +; CHECK-i64-NEXT: stp q5, q4, [sp, #48] // 32-byte Folded Spill +; CHECK-i64-NEXT: stp q7, q6, [sp, #96] // 32-byte Folded Spill +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d3, x0 +; CHECK-i64-NEXT: ldp q0, q1, [sp, #80] // 32-byte Folded Reload +; CHECK-i64-NEXT: ldr q2, [sp, #64] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr x30, [sp, #128] // 8-byte Folded Reload +; CHECK-i64-NEXT: mov v3.d[1], v1.d[0] +; CHECK-i64-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-i64-NEXT: add sp, sp, #144 +; CHECK-i64-NEXT: ret + %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128> %x) + ret <8 x iXLen> %a +} +declare <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128>) + +define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) { +; CHECK-i32-LABEL: lrint_v16fp128: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: sub sp, sp, #272 +; CHECK-i32-NEXT: stp x29, x30, [sp, #256] // 16-byte Folded Spill +; CHECK-i32-NEXT: .cfi_def_cfa_offset 272 +; CHECK-i32-NEXT: .cfi_offset w30, -8 +; CHECK-i32-NEXT: .cfi_offset w29, -16 +; CHECK-i32-NEXT: stp q1, q2, [sp] // 32-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #384] +; CHECK-i32-NEXT: stp q3, q5, [sp, #32] // 32-byte Folded Spill +; CHECK-i32-NEXT: str q1, [sp, #176] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #368] +; CHECK-i32-NEXT: stp q7, q4, [sp, #208] // 32-byte Folded Spill +; CHECK-i32-NEXT: str q1, [sp, #160] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #352] +; CHECK-i32-NEXT: str q1, [sp, #144] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #336] +; CHECK-i32-NEXT: str q1, [sp, #192] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #320] +; CHECK-i32-NEXT: str q1, [sp, #128] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #304] +; CHECK-i32-NEXT: str q1, [sp, #112] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #288] +; CHECK-i32-NEXT: stp q6, q1, [sp, #80] // 32-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #272] +; CHECK-i32-NEXT: str q1, [sp, #64] // 16-byte Folded Spill +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: fmov s0, w0 +; CHECK-i32-NEXT: str q0, [sp, #240] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[1], w0 +; CHECK-i32-NEXT: str q0, [sp, #240] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[2], w0 +; CHECK-i32-NEXT: str q0, [sp, #240] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[3], w0 +; CHECK-i32-NEXT: str q0, [sp, #240] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: fmov s0, w0 +; CHECK-i32-NEXT: str q0, [sp, #224] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[1], w0 +; CHECK-i32-NEXT: str q0, [sp, #224] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[2], w0 +; CHECK-i32-NEXT: str q0, [sp, #224] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[3], w0 +; CHECK-i32-NEXT: str q0, [sp, #224] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: fmov s0, w0 +; CHECK-i32-NEXT: str q0, [sp, #208] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[1], w0 +; CHECK-i32-NEXT: str q0, [sp, #208] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[2], w0 +; CHECK-i32-NEXT: str q0, [sp, #208] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[3], w0 +; CHECK-i32-NEXT: str q0, [sp, #208] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: fmov s0, w0 +; CHECK-i32-NEXT: str q0, [sp, #192] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[1], w0 +; CHECK-i32-NEXT: str q0, [sp, #192] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[2], w0 +; CHECK-i32-NEXT: str q0, [sp, #192] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldp q3, q2, [sp, #192] // 32-byte Folded Reload +; CHECK-i32-NEXT: ldp q1, q0, [sp, #224] // 32-byte Folded Reload +; CHECK-i32-NEXT: ldp x29, x30, [sp, #256] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v3.s[3], w0 +; CHECK-i32-NEXT: add sp, sp, #272 +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v16fp128: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: sub sp, sp, #272 +; CHECK-i64-NEXT: stp x29, x30, [sp, #256] // 16-byte Folded Spill +; CHECK-i64-NEXT: .cfi_def_cfa_offset 272 +; CHECK-i64-NEXT: .cfi_offset w30, -8 +; CHECK-i64-NEXT: .cfi_offset w29, -16 +; CHECK-i64-NEXT: str q2, [sp, #160] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q2, [sp, #368] +; CHECK-i64-NEXT: stp q0, q3, [sp] // 32-byte Folded Spill +; CHECK-i64-NEXT: mov v0.16b, v1.16b +; CHECK-i64-NEXT: str q2, [sp, #240] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q2, [sp, #384] +; CHECK-i64-NEXT: stp q5, q7, [sp, #32] // 32-byte Folded Spill +; CHECK-i64-NEXT: str q2, [sp, #224] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q2, [sp, #336] +; CHECK-i64-NEXT: str q2, [sp, #192] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q2, [sp, #352] +; CHECK-i64-NEXT: str q2, [sp, #176] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q2, [sp, #304] +; CHECK-i64-NEXT: str q2, [sp, #144] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q2, [sp, #320] +; CHECK-i64-NEXT: stp q4, q2, [sp, #112] // 32-byte Folded Spill +; CHECK-i64-NEXT: ldr q2, [sp, #272] +; CHECK-i64-NEXT: stp q6, q2, [sp, #80] // 32-byte Folded Spill +; CHECK-i64-NEXT: ldr q2, [sp, #288] +; CHECK-i64-NEXT: str q2, [sp, #64] // 16-byte Folded Spill +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #208] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #208] // 16-byte Folded Reload +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: str q0, [sp, #208] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: str q0, [sp, #160] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: str q0, [sp, #112] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #128] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #128] // 16-byte Folded Reload +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: str q0, [sp, #144] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #176] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #176] // 16-byte Folded Reload +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: str q0, [sp, #192] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #224] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d7, x0 +; CHECK-i64-NEXT: ldp q0, q1, [sp, #208] // 32-byte Folded Reload +; CHECK-i64-NEXT: ldp q4, q2, [sp, #96] // 32-byte Folded Reload +; CHECK-i64-NEXT: ldr q3, [sp, #80] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldp x29, x30, [sp, #256] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr q6, [sp, #192] // 16-byte Folded Reload +; CHECK-i64-NEXT: mov v7.d[1], v1.d[0] +; CHECK-i64-NEXT: ldp q5, q1, [sp, #144] // 32-byte Folded Reload +; CHECK-i64-NEXT: add sp, sp, #272 +; CHECK-i64-NEXT: ret + %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16fp128(<16 x fp128> %x) + ret <16 x iXLen> %a +} +declare <16 x iXLen> @llvm.lrint.v16iXLen.v16fp128(<16 x fp128>) + +define <32 x iXLen> @lrint_v32fp128(<32 x fp128> %x) { +; CHECK-i32-LABEL: lrint_v32fp128: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-i32-NEXT: sub sp, sp, #512 +; CHECK-i32-NEXT: .cfi_def_cfa_offset 528 +; CHECK-i32-NEXT: .cfi_offset w30, -8 +; CHECK-i32-NEXT: .cfi_offset w29, -16 +; CHECK-i32-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #896] +; CHECK-i32-NEXT: stp q2, q3, [sp, #16] // 32-byte Folded Spill +; CHECK-i32-NEXT: str q1, [sp, #368] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #880] +; CHECK-i32-NEXT: stp q7, q4, [sp, #464] // 32-byte Folded Spill +; CHECK-i32-NEXT: str q1, [sp, #352] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #864] +; CHECK-i32-NEXT: str q6, [sp, #112] // 16-byte Folded Spill +; CHECK-i32-NEXT: str q1, [sp, #336] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #848] +; CHECK-i32-NEXT: str q5, [sp, #80] // 16-byte Folded Spill +; CHECK-i32-NEXT: str q1, [sp, #384] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #832] +; CHECK-i32-NEXT: str q1, [sp, #320] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #816] +; CHECK-i32-NEXT: str q1, [sp, #304] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #800] +; CHECK-i32-NEXT: str q1, [sp, #288] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #784] +; CHECK-i32-NEXT: str q1, [sp, #400] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #768] +; CHECK-i32-NEXT: str q1, [sp, #272] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #752] +; CHECK-i32-NEXT: str q1, [sp, #256] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #736] +; CHECK-i32-NEXT: str q1, [sp, #240] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #720] +; CHECK-i32-NEXT: str q1, [sp, #416] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #704] +; CHECK-i32-NEXT: str q1, [sp, #224] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #688] +; CHECK-i32-NEXT: str q1, [sp, #208] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #672] +; CHECK-i32-NEXT: str q1, [sp, #192] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #656] +; CHECK-i32-NEXT: str q1, [sp, #432] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #640] +; CHECK-i32-NEXT: str q1, [sp, #176] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #624] +; CHECK-i32-NEXT: str q1, [sp, #160] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #608] +; CHECK-i32-NEXT: str q1, [sp, #144] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #592] +; CHECK-i32-NEXT: str q1, [sp, #448] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #576] +; CHECK-i32-NEXT: str q1, [sp, #128] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #560] +; CHECK-i32-NEXT: str q1, [sp, #96] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #544] +; CHECK-i32-NEXT: str q1, [sp, #64] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q1, [sp, #528] +; CHECK-i32-NEXT: str q1, [sp, #48] // 16-byte Folded Spill +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: fmov s0, w0 +; CHECK-i32-NEXT: str q0, [sp, #496] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #496] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[1], w0 +; CHECK-i32-NEXT: str q0, [sp, #496] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #496] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[2], w0 +; CHECK-i32-NEXT: str q0, [sp, #496] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #496] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[3], w0 +; CHECK-i32-NEXT: str q0, [sp, #496] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #480] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: fmov s0, w0 +; CHECK-i32-NEXT: str q0, [sp, #480] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #480] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[1], w0 +; CHECK-i32-NEXT: str q0, [sp, #480] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #480] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[2], w0 +; CHECK-i32-NEXT: str q0, [sp, #480] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #464] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #480] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[3], w0 +; CHECK-i32-NEXT: str q0, [sp, #480] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: fmov s0, w0 +; CHECK-i32-NEXT: str q0, [sp, #464] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #464] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[1], w0 +; CHECK-i32-NEXT: str q0, [sp, #464] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #464] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[2], w0 +; CHECK-i32-NEXT: str q0, [sp, #464] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #464] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[3], w0 +; CHECK-i32-NEXT: str q0, [sp, #464] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #448] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: fmov s0, w0 +; CHECK-i32-NEXT: str q0, [sp, #448] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #448] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[1], w0 +; CHECK-i32-NEXT: str q0, [sp, #448] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #448] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[2], w0 +; CHECK-i32-NEXT: str q0, [sp, #448] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #448] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[3], w0 +; CHECK-i32-NEXT: str q0, [sp, #448] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #432] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: fmov s0, w0 +; CHECK-i32-NEXT: str q0, [sp, #432] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #432] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[1], w0 +; CHECK-i32-NEXT: str q0, [sp, #432] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #432] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[2], w0 +; CHECK-i32-NEXT: str q0, [sp, #432] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #432] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[3], w0 +; CHECK-i32-NEXT: str q0, [sp, #432] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #416] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: fmov s0, w0 +; CHECK-i32-NEXT: str q0, [sp, #416] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #416] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[1], w0 +; CHECK-i32-NEXT: str q0, [sp, #416] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #256] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #416] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[2], w0 +; CHECK-i32-NEXT: str q0, [sp, #416] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #272] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #416] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[3], w0 +; CHECK-i32-NEXT: str q0, [sp, #416] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #400] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: fmov s0, w0 +; CHECK-i32-NEXT: str q0, [sp, #400] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #288] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #400] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[1], w0 +; CHECK-i32-NEXT: str q0, [sp, #400] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #304] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #400] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[2], w0 +; CHECK-i32-NEXT: str q0, [sp, #400] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #320] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #400] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[3], w0 +; CHECK-i32-NEXT: str q0, [sp, #400] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #384] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: fmov s0, w0 +; CHECK-i32-NEXT: str q0, [sp, #384] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #336] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #384] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[1], w0 +; CHECK-i32-NEXT: str q0, [sp, #384] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #352] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldr q0, [sp, #384] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v0.s[2], w0 +; CHECK-i32-NEXT: str q0, [sp, #384] // 16-byte Folded Spill +; CHECK-i32-NEXT: ldr q0, [sp, #368] // 16-byte Folded Reload +; CHECK-i32-NEXT: bl lrintl +; CHECK-i32-NEXT: ldp q7, q6, [sp, #384] // 32-byte Folded Reload +; CHECK-i32-NEXT: ldp q1, q0, [sp, #480] // 32-byte Folded Reload +; CHECK-i32-NEXT: ldp q3, q2, [sp, #448] // 32-byte Folded Reload +; CHECK-i32-NEXT: ldp q5, q4, [sp, #416] // 32-byte Folded Reload +; CHECK-i32-NEXT: mov v7.s[3], w0 +; CHECK-i32-NEXT: add sp, sp, #512 +; CHECK-i32-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v32fp128: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: str x29, [sp, #-32]! // 8-byte Folded Spill +; CHECK-i64-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-i64-NEXT: sub sp, sp, #512 +; CHECK-i64-NEXT: .cfi_def_cfa_offset 544 +; CHECK-i64-NEXT: .cfi_offset w19, -8 +; CHECK-i64-NEXT: .cfi_offset w30, -16 +; CHECK-i64-NEXT: .cfi_offset w29, -32 +; CHECK-i64-NEXT: str q0, [sp, #464] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #896] +; CHECK-i64-NEXT: mov x19, x8 +; CHECK-i64-NEXT: str q7, [sp, #272] // 16-byte Folded Spill +; CHECK-i64-NEXT: str q0, [sp, #496] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #912] +; CHECK-i64-NEXT: str q6, [sp, #320] // 16-byte Folded Spill +; CHECK-i64-NEXT: str q0, [sp, #480] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #864] +; CHECK-i64-NEXT: stp q3, q5, [sp, #16] // 32-byte Folded Spill +; CHECK-i64-NEXT: str q0, [sp, #448] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #880] +; CHECK-i64-NEXT: stp q2, q0, [sp, #416] // 32-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #832] +; CHECK-i64-NEXT: str q0, [sp, #400] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #848] +; CHECK-i64-NEXT: stp q4, q0, [sp, #368] // 32-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #800] +; CHECK-i64-NEXT: str q0, [sp, #352] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #816] +; CHECK-i64-NEXT: str q0, [sp, #336] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #768] +; CHECK-i64-NEXT: str q0, [sp, #304] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #784] +; CHECK-i64-NEXT: str q0, [sp, #288] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #736] +; CHECK-i64-NEXT: str q0, [sp, #256] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #752] +; CHECK-i64-NEXT: str q0, [sp, #240] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #704] +; CHECK-i64-NEXT: str q0, [sp, #208] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #720] +; CHECK-i64-NEXT: str q0, [sp, #192] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #672] +; CHECK-i64-NEXT: str q0, [sp, #160] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #688] +; CHECK-i64-NEXT: str q0, [sp, #128] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #640] +; CHECK-i64-NEXT: str q0, [sp, #144] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #656] +; CHECK-i64-NEXT: str q0, [sp, #112] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #608] +; CHECK-i64-NEXT: str q0, [sp, #176] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #624] +; CHECK-i64-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #576] +; CHECK-i64-NEXT: str q0, [sp, #224] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #592] +; CHECK-i64-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #544] +; CHECK-i64-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #560] +; CHECK-i64-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-i64-NEXT: mov v0.16b, v1.16b +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #464] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: str q0, [sp, #464] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #416] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: str q0, [sp, #416] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #368] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: str q0, [sp, #368] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #272] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #272] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #320] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #272] // 16-byte Folded Reload +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: str q0, [sp, #320] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #272] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #272] // 16-byte Folded Reload +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: str q0, [sp, #272] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: str q0, [sp, #224] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #96] // 16-byte Folded Reload +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: str q0, [sp, #176] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #112] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #112] // 16-byte Folded Reload +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: str q0, [sp, #144] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #128] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #128] // 16-byte Folded Reload +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: str q0, [sp, #160] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #192] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #192] // 16-byte Folded Reload +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: str q0, [sp, #208] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #240] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #256] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #240] // 16-byte Folded Reload +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: str q0, [sp, #256] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #288] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #288] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #304] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #288] // 16-byte Folded Reload +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: str q0, [sp, #304] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #336] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #336] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #352] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #336] // 16-byte Folded Reload +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: str q0, [sp, #352] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #384] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #384] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #400] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #384] // 16-byte Folded Reload +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: str q0, [sp, #400] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #432] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #432] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #448] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #432] // 16-byte Folded Reload +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: str q0, [sp, #448] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #480] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: str q0, [sp, #480] // 16-byte Folded Spill +; CHECK-i64-NEXT: ldr q0, [sp, #496] // 16-byte Folded Reload +; CHECK-i64-NEXT: bl lrintl +; CHECK-i64-NEXT: ldr q0, [sp, #400] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [x19, #208] +; CHECK-i64-NEXT: fmov d0, x0 +; CHECK-i64-NEXT: ldr q1, [sp, #480] // 16-byte Folded Reload +; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] +; CHECK-i64-NEXT: ldr q1, [sp, #352] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q1, [x19, #192] +; CHECK-i64-NEXT: ldr q1, [sp, #304] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q1, [x19, #176] +; CHECK-i64-NEXT: ldr q1, [sp, #256] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [x19, #240] +; CHECK-i64-NEXT: str q1, [x19, #160] +; CHECK-i64-NEXT: ldr q0, [sp, #448] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [x19, #224] +; CHECK-i64-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [x19, #144] +; CHECK-i64-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [x19, #128] +; CHECK-i64-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [x19, #112] +; CHECK-i64-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [x19, #96] +; CHECK-i64-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [x19, #80] +; CHECK-i64-NEXT: ldr q0, [sp, #272] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [x19, #64] +; CHECK-i64-NEXT: ldr q0, [sp, #320] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [x19, #48] +; CHECK-i64-NEXT: ldr q0, [sp, #368] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [x19, #32] +; CHECK-i64-NEXT: ldr q0, [sp, #416] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [x19, #16] +; CHECK-i64-NEXT: ldr q0, [sp, #464] // 16-byte Folded Reload +; CHECK-i64-NEXT: str q0, [x19] +; CHECK-i64-NEXT: add sp, sp, #512 +; CHECK-i64-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-i64-NEXT: ldr x29, [sp], #32 // 8-byte Folded Reload +; CHECK-i64-NEXT: ret + %a = call <32 x iXLen> @llvm.lrint.v32iXLen.v16fp128(<32 x fp128> %x) + ret <32 x iXLen> %a +} +declare <32 x iXLen> @llvm.lrint.v32iXLen.v32fp128(<32 x fp128>) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-i32-GI: {{.*}} diff --git a/llvm/test/CodeGen/PowerPC/vector-llrint.ll b/llvm/test/CodeGen/PowerPC/vector-llrint.ll index 7085cf51916da..d57bf6b2e706c 100644 --- a/llvm/test/CodeGen/PowerPC/vector-llrint.ll +++ b/llvm/test/CodeGen/PowerPC/vector-llrint.ll @@ -4836,3 +4836,599 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) { ret <8 x i64> %a } declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>) + +define <1 x i64> @llrint_v1i64_v1f128(<1 x fp128> %x) { +; BE-LABEL: llrint_v1i64_v1f128: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -112(r1) +; BE-NEXT: std r0, 128(r1) +; BE-NEXT: .cfi_def_cfa_offset 112 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: bl llrintf128 +; BE-NEXT: nop +; BE-NEXT: addi r1, r1, 112 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: llrint_v1i64_v1f128: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: std r0, 48(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: bl llrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: llrint_v1i64_v1f128: +; FAST: # %bb.0: +; FAST-NEXT: mflr r0 +; FAST-NEXT: stdu r1, -32(r1) +; FAST-NEXT: std r0, 48(r1) +; FAST-NEXT: .cfi_def_cfa_offset 32 +; FAST-NEXT: .cfi_offset lr, 16 +; FAST-NEXT: bl llrintf128 +; FAST-NEXT: nop +; FAST-NEXT: addi r1, r1, 32 +; FAST-NEXT: ld r0, 16(r1) +; FAST-NEXT: mtlr r0 +; FAST-NEXT: blr + %a = call <1 x i64> @llvm.llrint.v1i64.v1f128(<1 x fp128> %x) + ret <1 x i64> %a +} +declare <1 x i64> @llvm.llrint.v1i64.v1f128(<1 x fp128>) + +define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) { +; BE-LABEL: llrint_v2i64_v2f128: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -160(r1) +; BE-NEXT: std r0, 176(r1) +; BE-NEXT: .cfi_def_cfa_offset 160 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: .cfi_offset v31, -16 +; BE-NEXT: li r3, 144 +; BE-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill +; BE-NEXT: vmr v31, v2 +; BE-NEXT: vmr v2, v3 +; BE-NEXT: bl llrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v31 +; BE-NEXT: std r3, 136(r1) +; BE-NEXT: bl llrintf128 +; BE-NEXT: nop +; BE-NEXT: std r3, 128(r1) +; BE-NEXT: addi r3, r1, 128 +; BE-NEXT: lxvd2x v2, 0, r3 +; BE-NEXT: li r3, 144 +; BE-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; BE-NEXT: addi r1, r1, 160 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: llrint_v2i64_v2f128: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -80(r1) +; CHECK-NEXT: std r0, 96(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset v30, -32 +; CHECK-NEXT: .cfi_offset v31, -16 +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: vmr v31, v3 +; CHECK-NEXT: bl llrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v31 +; CHECK-NEXT: mtvsrd v30, r3 +; CHECK-NEXT: bl llrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: xxmrghd v2, vs0, v30 +; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 80 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: llrint_v2i64_v2f128: +; FAST: # %bb.0: +; FAST-NEXT: mflr r0 +; FAST-NEXT: stdu r1, -80(r1) +; FAST-NEXT: std r0, 96(r1) +; FAST-NEXT: .cfi_def_cfa_offset 80 +; FAST-NEXT: .cfi_offset lr, 16 +; FAST-NEXT: .cfi_offset v30, -32 +; FAST-NEXT: .cfi_offset v31, -16 +; FAST-NEXT: li r3, 48 +; FAST-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 64 +; FAST-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: vmr v31, v3 +; FAST-NEXT: bl llrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v31 +; FAST-NEXT: mtvsrd v30, r3 +; FAST-NEXT: bl llrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: li r3, 64 +; FAST-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 48 +; FAST-NEXT: xxmrghd v2, vs0, v30 +; FAST-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: addi r1, r1, 80 +; FAST-NEXT: ld r0, 16(r1) +; FAST-NEXT: mtlr r0 +; FAST-NEXT: blr + %a = call <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128> %x) + ret <2 x i64> %a +} +declare <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128>) + +define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) { +; BE-LABEL: llrint_v4i64_v4f128: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -208(r1) +; BE-NEXT: std r0, 224(r1) +; BE-NEXT: .cfi_def_cfa_offset 208 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: .cfi_offset v29, -48 +; BE-NEXT: .cfi_offset v30, -32 +; BE-NEXT: .cfi_offset v31, -16 +; BE-NEXT: li r3, 160 +; BE-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 176 +; BE-NEXT: vmr v29, v2 +; BE-NEXT: vmr v2, v3 +; BE-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 192 +; BE-NEXT: vmr v30, v4 +; BE-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill +; BE-NEXT: vmr v31, v5 +; BE-NEXT: bl llrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v29 +; BE-NEXT: std r3, 136(r1) +; BE-NEXT: bl llrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v31 +; BE-NEXT: std r3, 128(r1) +; BE-NEXT: bl llrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v30 +; BE-NEXT: std r3, 152(r1) +; BE-NEXT: bl llrintf128 +; BE-NEXT: nop +; BE-NEXT: std r3, 144(r1) +; BE-NEXT: addi r3, r1, 128 +; BE-NEXT: lxvd2x v2, 0, r3 +; BE-NEXT: addi r3, r1, 144 +; BE-NEXT: lxvd2x v3, 0, r3 +; BE-NEXT: li r3, 192 +; BE-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 176 +; BE-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 160 +; BE-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload +; BE-NEXT: addi r1, r1, 208 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: llrint_v4i64_v4f128: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -112(r1) +; CHECK-NEXT: std r0, 128(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 112 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset v28, -64 +; CHECK-NEXT: .cfi_offset v29, -48 +; CHECK-NEXT: .cfi_offset v30, -32 +; CHECK-NEXT: .cfi_offset v31, -16 +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: vmr v29, v3 +; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: vmr v30, v4 +; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: vmr v31, v5 +; CHECK-NEXT: bl llrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v29 +; CHECK-NEXT: mtvsrd v28, r3 +; CHECK-NEXT: bl llrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: vmr v2, v30 +; CHECK-NEXT: xxmrghd v29, vs0, v28 +; CHECK-NEXT: bl llrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v31 +; CHECK-NEXT: mtvsrd v30, r3 +; CHECK-NEXT: bl llrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: vmr v2, v29 +; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: xxmrghd v3, vs0, v30 +; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 112 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: llrint_v4i64_v4f128: +; FAST: # %bb.0: +; FAST-NEXT: mflr r0 +; FAST-NEXT: stdu r1, -112(r1) +; FAST-NEXT: std r0, 128(r1) +; FAST-NEXT: .cfi_def_cfa_offset 112 +; FAST-NEXT: .cfi_offset lr, 16 +; FAST-NEXT: .cfi_offset v28, -64 +; FAST-NEXT: .cfi_offset v29, -48 +; FAST-NEXT: .cfi_offset v30, -32 +; FAST-NEXT: .cfi_offset v31, -16 +; FAST-NEXT: li r3, 48 +; FAST-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 64 +; FAST-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 80 +; FAST-NEXT: vmr v29, v3 +; FAST-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 96 +; FAST-NEXT: vmr v30, v4 +; FAST-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: vmr v31, v5 +; FAST-NEXT: bl llrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v29 +; FAST-NEXT: mtvsrd v28, r3 +; FAST-NEXT: bl llrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: vmr v2, v30 +; FAST-NEXT: xxmrghd v29, vs0, v28 +; FAST-NEXT: bl llrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v31 +; FAST-NEXT: mtvsrd v30, r3 +; FAST-NEXT: bl llrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: li r3, 96 +; FAST-NEXT: vmr v2, v29 +; FAST-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 80 +; FAST-NEXT: xxmrghd v3, vs0, v30 +; FAST-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 64 +; FAST-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 48 +; FAST-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: addi r1, r1, 112 +; FAST-NEXT: ld r0, 16(r1) +; FAST-NEXT: mtlr r0 +; FAST-NEXT: blr + %a = call <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128> %x) + ret <4 x i64> %a +} +declare <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128>) + +define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) { +; BE-LABEL: llrint_v8i64_v8f128: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -304(r1) +; BE-NEXT: std r0, 320(r1) +; BE-NEXT: .cfi_def_cfa_offset 304 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: .cfi_offset v25, -112 +; BE-NEXT: .cfi_offset v26, -96 +; BE-NEXT: .cfi_offset v27, -80 +; BE-NEXT: .cfi_offset v28, -64 +; BE-NEXT: .cfi_offset v29, -48 +; BE-NEXT: .cfi_offset v30, -32 +; BE-NEXT: .cfi_offset v31, -16 +; BE-NEXT: li r3, 192 +; BE-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 208 +; BE-NEXT: vmr v25, v2 +; BE-NEXT: vmr v2, v3 +; BE-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 224 +; BE-NEXT: vmr v26, v4 +; BE-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 240 +; BE-NEXT: vmr v27, v5 +; BE-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 256 +; BE-NEXT: vmr v28, v6 +; BE-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 272 +; BE-NEXT: vmr v29, v7 +; BE-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 288 +; BE-NEXT: vmr v30, v8 +; BE-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill +; BE-NEXT: vmr v31, v9 +; BE-NEXT: bl llrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v25 +; BE-NEXT: std r3, 136(r1) +; BE-NEXT: bl llrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v27 +; BE-NEXT: std r3, 128(r1) +; BE-NEXT: bl llrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v26 +; BE-NEXT: std r3, 152(r1) +; BE-NEXT: bl llrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v29 +; BE-NEXT: std r3, 144(r1) +; BE-NEXT: bl llrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v28 +; BE-NEXT: std r3, 168(r1) +; BE-NEXT: bl llrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v31 +; BE-NEXT: std r3, 160(r1) +; BE-NEXT: bl llrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v30 +; BE-NEXT: std r3, 184(r1) +; BE-NEXT: bl llrintf128 +; BE-NEXT: nop +; BE-NEXT: std r3, 176(r1) +; BE-NEXT: addi r3, r1, 128 +; BE-NEXT: lxvd2x v2, 0, r3 +; BE-NEXT: addi r3, r1, 144 +; BE-NEXT: lxvd2x v3, 0, r3 +; BE-NEXT: addi r3, r1, 160 +; BE-NEXT: lxvd2x v4, 0, r3 +; BE-NEXT: addi r3, r1, 176 +; BE-NEXT: lxvd2x v5, 0, r3 +; BE-NEXT: li r3, 288 +; BE-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 272 +; BE-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 256 +; BE-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 240 +; BE-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 224 +; BE-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 208 +; BE-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 192 +; BE-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload +; BE-NEXT: addi r1, r1, 304 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: llrint_v8i64_v8f128: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -176(r1) +; CHECK-NEXT: std r0, 192(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 176 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset v24, -128 +; CHECK-NEXT: .cfi_offset v25, -112 +; CHECK-NEXT: .cfi_offset v26, -96 +; CHECK-NEXT: .cfi_offset v27, -80 +; CHECK-NEXT: .cfi_offset v28, -64 +; CHECK-NEXT: .cfi_offset v29, -48 +; CHECK-NEXT: .cfi_offset v30, -32 +; CHECK-NEXT: .cfi_offset v31, -16 +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: stvx v24, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: stvx v25, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: vmr v25, v3 +; CHECK-NEXT: stvx v26, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: vmr v26, v4 +; CHECK-NEXT: stvx v27, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 112 +; CHECK-NEXT: vmr v27, v5 +; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 128 +; CHECK-NEXT: vmr v28, v6 +; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 144 +; CHECK-NEXT: vmr v29, v7 +; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 160 +; CHECK-NEXT: vmr v30, v8 +; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: vmr v31, v9 +; CHECK-NEXT: bl llrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v25 +; CHECK-NEXT: mtvsrd v24, r3 +; CHECK-NEXT: bl llrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: vmr v2, v26 +; CHECK-NEXT: xxmrghd v25, vs0, v24 +; CHECK-NEXT: bl llrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v27 +; CHECK-NEXT: mtvsrd v26, r3 +; CHECK-NEXT: bl llrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: vmr v2, v28 +; CHECK-NEXT: xxmrghd v27, vs0, v26 +; CHECK-NEXT: bl llrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v29 +; CHECK-NEXT: mtvsrd v28, r3 +; CHECK-NEXT: bl llrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: vmr v2, v30 +; CHECK-NEXT: xxmrghd v29, vs0, v28 +; CHECK-NEXT: bl llrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v31 +; CHECK-NEXT: mtvsrd v30, r3 +; CHECK-NEXT: bl llrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 160 +; CHECK-NEXT: vmr v4, v29 +; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 144 +; CHECK-NEXT: vmr v3, v27 +; CHECK-NEXT: vmr v2, v25 +; CHECK-NEXT: xxmrghd v5, vs0, v30 +; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 128 +; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 112 +; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 176 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: llrint_v8i64_v8f128: +; FAST: # %bb.0: +; FAST-NEXT: mflr r0 +; FAST-NEXT: stdu r1, -176(r1) +; FAST-NEXT: std r0, 192(r1) +; FAST-NEXT: .cfi_def_cfa_offset 176 +; FAST-NEXT: .cfi_offset lr, 16 +; FAST-NEXT: .cfi_offset v24, -128 +; FAST-NEXT: .cfi_offset v25, -112 +; FAST-NEXT: .cfi_offset v26, -96 +; FAST-NEXT: .cfi_offset v27, -80 +; FAST-NEXT: .cfi_offset v28, -64 +; FAST-NEXT: .cfi_offset v29, -48 +; FAST-NEXT: .cfi_offset v30, -32 +; FAST-NEXT: .cfi_offset v31, -16 +; FAST-NEXT: li r3, 48 +; FAST-NEXT: stvx v24, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 64 +; FAST-NEXT: stvx v25, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 80 +; FAST-NEXT: vmr v25, v3 +; FAST-NEXT: stvx v26, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 96 +; FAST-NEXT: vmr v26, v4 +; FAST-NEXT: stvx v27, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 112 +; FAST-NEXT: vmr v27, v5 +; FAST-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 128 +; FAST-NEXT: vmr v28, v6 +; FAST-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 144 +; FAST-NEXT: vmr v29, v7 +; FAST-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 160 +; FAST-NEXT: vmr v30, v8 +; FAST-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: vmr v31, v9 +; FAST-NEXT: bl llrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v25 +; FAST-NEXT: mtvsrd v24, r3 +; FAST-NEXT: bl llrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: vmr v2, v26 +; FAST-NEXT: xxmrghd v25, vs0, v24 +; FAST-NEXT: bl llrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v27 +; FAST-NEXT: mtvsrd v26, r3 +; FAST-NEXT: bl llrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: vmr v2, v28 +; FAST-NEXT: xxmrghd v27, vs0, v26 +; FAST-NEXT: bl llrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v29 +; FAST-NEXT: mtvsrd v28, r3 +; FAST-NEXT: bl llrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: vmr v2, v30 +; FAST-NEXT: xxmrghd v29, vs0, v28 +; FAST-NEXT: bl llrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v31 +; FAST-NEXT: mtvsrd v30, r3 +; FAST-NEXT: bl llrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: li r3, 160 +; FAST-NEXT: vmr v4, v29 +; FAST-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 144 +; FAST-NEXT: vmr v3, v27 +; FAST-NEXT: vmr v2, v25 +; FAST-NEXT: xxmrghd v5, vs0, v30 +; FAST-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 128 +; FAST-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 112 +; FAST-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 96 +; FAST-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 80 +; FAST-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 64 +; FAST-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 48 +; FAST-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: addi r1, r1, 176 +; FAST-NEXT: ld r0, 16(r1) +; FAST-NEXT: mtlr r0 +; FAST-NEXT: blr + %a = call <8 x i64> @llvm.llrint.v8i64.v8f128(<8 x fp128> %x) + ret <8 x i64> %a +} +declare <8 x i64> @llvm.llrint.v8i64.v8f128(<8 x fp128>) diff --git a/llvm/test/CodeGen/PowerPC/vector-lrint.ll b/llvm/test/CodeGen/PowerPC/vector-lrint.ll index b2ade5300dbc3..c64c2e15179cb 100644 --- a/llvm/test/CodeGen/PowerPC/vector-lrint.ll +++ b/llvm/test/CodeGen/PowerPC/vector-lrint.ll @@ -4851,3 +4851,2327 @@ define <8 x i64> @lrint_v8f64(<8 x double> %x) { ret <8 x i64> %a } declare <8 x i64> @llvm.lrint.v8i64.v8f64(<8 x double>) + +define <1 x i64> @lrint_v1f128(<1 x fp128> %x) { +; BE-LABEL: lrint_v1f128: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -112(r1) +; BE-NEXT: std r0, 128(r1) +; BE-NEXT: .cfi_def_cfa_offset 112 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: addi r1, r1, 112 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: lrint_v1f128: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: std r0, 48(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: lrint_v1f128: +; FAST: # %bb.0: +; FAST-NEXT: mflr r0 +; FAST-NEXT: stdu r1, -32(r1) +; FAST-NEXT: std r0, 48(r1) +; FAST-NEXT: .cfi_def_cfa_offset 32 +; FAST-NEXT: .cfi_offset lr, 16 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: addi r1, r1, 32 +; FAST-NEXT: ld r0, 16(r1) +; FAST-NEXT: mtlr r0 +; FAST-NEXT: blr + %a = call <1 x i64> @llvm.lrint.v1i64.v1f128(<1 x fp128> %x) + ret <1 x i64> %a +} +declare <1 x i64> @llvm.lrint.v1i64.v1f128(<1 x fp128>) + +define <2 x i64> @lrint_v2f128(<2 x fp128> %x) { +; BE-LABEL: lrint_v2f128: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -160(r1) +; BE-NEXT: std r0, 176(r1) +; BE-NEXT: .cfi_def_cfa_offset 160 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: .cfi_offset v31, -16 +; BE-NEXT: li r3, 144 +; BE-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill +; BE-NEXT: vmr v31, v2 +; BE-NEXT: vmr v2, v3 +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v31 +; BE-NEXT: std r3, 136(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: std r3, 128(r1) +; BE-NEXT: addi r3, r1, 128 +; BE-NEXT: lxvd2x v2, 0, r3 +; BE-NEXT: li r3, 144 +; BE-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; BE-NEXT: addi r1, r1, 160 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: lrint_v2f128: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -80(r1) +; CHECK-NEXT: std r0, 96(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset v30, -32 +; CHECK-NEXT: .cfi_offset v31, -16 +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: vmr v31, v3 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v31 +; CHECK-NEXT: mtvsrd v30, r3 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: xxmrghd v2, vs0, v30 +; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 80 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: lrint_v2f128: +; FAST: # %bb.0: +; FAST-NEXT: mflr r0 +; FAST-NEXT: stdu r1, -80(r1) +; FAST-NEXT: std r0, 96(r1) +; FAST-NEXT: .cfi_def_cfa_offset 80 +; FAST-NEXT: .cfi_offset lr, 16 +; FAST-NEXT: .cfi_offset v30, -32 +; FAST-NEXT: .cfi_offset v31, -16 +; FAST-NEXT: li r3, 48 +; FAST-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 64 +; FAST-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: vmr v31, v3 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v31 +; FAST-NEXT: mtvsrd v30, r3 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: li r3, 64 +; FAST-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 48 +; FAST-NEXT: xxmrghd v2, vs0, v30 +; FAST-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: addi r1, r1, 80 +; FAST-NEXT: ld r0, 16(r1) +; FAST-NEXT: mtlr r0 +; FAST-NEXT: blr + %a = call <2 x i64> @llvm.lrint.v2i64.v2f128(<2 x fp128> %x) + ret <2 x i64> %a +} +declare <2 x i64> @llvm.lrint.v2i64.v2f128(<2 x fp128>) + +define <4 x i64> @lrint_v4f128(<4 x fp128> %x) { +; BE-LABEL: lrint_v4f128: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -208(r1) +; BE-NEXT: std r0, 224(r1) +; BE-NEXT: .cfi_def_cfa_offset 208 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: .cfi_offset v29, -48 +; BE-NEXT: .cfi_offset v30, -32 +; BE-NEXT: .cfi_offset v31, -16 +; BE-NEXT: li r3, 160 +; BE-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 176 +; BE-NEXT: vmr v29, v2 +; BE-NEXT: vmr v2, v3 +; BE-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 192 +; BE-NEXT: vmr v30, v4 +; BE-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill +; BE-NEXT: vmr v31, v5 +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v29 +; BE-NEXT: std r3, 136(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v31 +; BE-NEXT: std r3, 128(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v30 +; BE-NEXT: std r3, 152(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: std r3, 144(r1) +; BE-NEXT: addi r3, r1, 128 +; BE-NEXT: lxvd2x v2, 0, r3 +; BE-NEXT: addi r3, r1, 144 +; BE-NEXT: lxvd2x v3, 0, r3 +; BE-NEXT: li r3, 192 +; BE-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 176 +; BE-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 160 +; BE-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload +; BE-NEXT: addi r1, r1, 208 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: lrint_v4f128: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -112(r1) +; CHECK-NEXT: std r0, 128(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 112 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset v28, -64 +; CHECK-NEXT: .cfi_offset v29, -48 +; CHECK-NEXT: .cfi_offset v30, -32 +; CHECK-NEXT: .cfi_offset v31, -16 +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: vmr v29, v3 +; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: vmr v30, v4 +; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: vmr v31, v5 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v29 +; CHECK-NEXT: mtvsrd v28, r3 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: vmr v2, v30 +; CHECK-NEXT: xxmrghd v29, vs0, v28 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v31 +; CHECK-NEXT: mtvsrd v30, r3 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: vmr v2, v29 +; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: xxmrghd v3, vs0, v30 +; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 112 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: lrint_v4f128: +; FAST: # %bb.0: +; FAST-NEXT: mflr r0 +; FAST-NEXT: stdu r1, -112(r1) +; FAST-NEXT: std r0, 128(r1) +; FAST-NEXT: .cfi_def_cfa_offset 112 +; FAST-NEXT: .cfi_offset lr, 16 +; FAST-NEXT: .cfi_offset v28, -64 +; FAST-NEXT: .cfi_offset v29, -48 +; FAST-NEXT: .cfi_offset v30, -32 +; FAST-NEXT: .cfi_offset v31, -16 +; FAST-NEXT: li r3, 48 +; FAST-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 64 +; FAST-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 80 +; FAST-NEXT: vmr v29, v3 +; FAST-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 96 +; FAST-NEXT: vmr v30, v4 +; FAST-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: vmr v31, v5 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v29 +; FAST-NEXT: mtvsrd v28, r3 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: vmr v2, v30 +; FAST-NEXT: xxmrghd v29, vs0, v28 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v31 +; FAST-NEXT: mtvsrd v30, r3 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: li r3, 96 +; FAST-NEXT: vmr v2, v29 +; FAST-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 80 +; FAST-NEXT: xxmrghd v3, vs0, v30 +; FAST-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 64 +; FAST-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 48 +; FAST-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: addi r1, r1, 112 +; FAST-NEXT: ld r0, 16(r1) +; FAST-NEXT: mtlr r0 +; FAST-NEXT: blr + %a = call <4 x i64> @llvm.lrint.v4i64.v4f128(<4 x fp128> %x) + ret <4 x i64> %a +} +declare <4 x i64> @llvm.lrint.v4i64.v4f128(<4 x fp128>) + +define <8 x i64> @lrint_v8f128(<8 x fp128> %x) { +; BE-LABEL: lrint_v8f128: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -304(r1) +; BE-NEXT: std r0, 320(r1) +; BE-NEXT: .cfi_def_cfa_offset 304 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: .cfi_offset v25, -112 +; BE-NEXT: .cfi_offset v26, -96 +; BE-NEXT: .cfi_offset v27, -80 +; BE-NEXT: .cfi_offset v28, -64 +; BE-NEXT: .cfi_offset v29, -48 +; BE-NEXT: .cfi_offset v30, -32 +; BE-NEXT: .cfi_offset v31, -16 +; BE-NEXT: li r3, 192 +; BE-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 208 +; BE-NEXT: vmr v25, v2 +; BE-NEXT: vmr v2, v3 +; BE-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 224 +; BE-NEXT: vmr v26, v4 +; BE-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 240 +; BE-NEXT: vmr v27, v5 +; BE-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 256 +; BE-NEXT: vmr v28, v6 +; BE-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 272 +; BE-NEXT: vmr v29, v7 +; BE-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 288 +; BE-NEXT: vmr v30, v8 +; BE-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill +; BE-NEXT: vmr v31, v9 +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v25 +; BE-NEXT: std r3, 136(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v27 +; BE-NEXT: std r3, 128(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v26 +; BE-NEXT: std r3, 152(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v29 +; BE-NEXT: std r3, 144(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v28 +; BE-NEXT: std r3, 168(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v31 +; BE-NEXT: std r3, 160(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v30 +; BE-NEXT: std r3, 184(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: std r3, 176(r1) +; BE-NEXT: addi r3, r1, 128 +; BE-NEXT: lxvd2x v2, 0, r3 +; BE-NEXT: addi r3, r1, 144 +; BE-NEXT: lxvd2x v3, 0, r3 +; BE-NEXT: addi r3, r1, 160 +; BE-NEXT: lxvd2x v4, 0, r3 +; BE-NEXT: addi r3, r1, 176 +; BE-NEXT: lxvd2x v5, 0, r3 +; BE-NEXT: li r3, 288 +; BE-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 272 +; BE-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 256 +; BE-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 240 +; BE-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 224 +; BE-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 208 +; BE-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 192 +; BE-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload +; BE-NEXT: addi r1, r1, 304 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: lrint_v8f128: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -176(r1) +; CHECK-NEXT: std r0, 192(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 176 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset v24, -128 +; CHECK-NEXT: .cfi_offset v25, -112 +; CHECK-NEXT: .cfi_offset v26, -96 +; CHECK-NEXT: .cfi_offset v27, -80 +; CHECK-NEXT: .cfi_offset v28, -64 +; CHECK-NEXT: .cfi_offset v29, -48 +; CHECK-NEXT: .cfi_offset v30, -32 +; CHECK-NEXT: .cfi_offset v31, -16 +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: stvx v24, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: stvx v25, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: vmr v25, v3 +; CHECK-NEXT: stvx v26, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: vmr v26, v4 +; CHECK-NEXT: stvx v27, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 112 +; CHECK-NEXT: vmr v27, v5 +; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 128 +; CHECK-NEXT: vmr v28, v6 +; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 144 +; CHECK-NEXT: vmr v29, v7 +; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 160 +; CHECK-NEXT: vmr v30, v8 +; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: vmr v31, v9 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v25 +; CHECK-NEXT: mtvsrd v24, r3 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: vmr v2, v26 +; CHECK-NEXT: xxmrghd v25, vs0, v24 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v27 +; CHECK-NEXT: mtvsrd v26, r3 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: vmr v2, v28 +; CHECK-NEXT: xxmrghd v27, vs0, v26 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v29 +; CHECK-NEXT: mtvsrd v28, r3 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: vmr v2, v30 +; CHECK-NEXT: xxmrghd v29, vs0, v28 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v31 +; CHECK-NEXT: mtvsrd v30, r3 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 160 +; CHECK-NEXT: vmr v4, v29 +; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 144 +; CHECK-NEXT: vmr v3, v27 +; CHECK-NEXT: vmr v2, v25 +; CHECK-NEXT: xxmrghd v5, vs0, v30 +; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 128 +; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 112 +; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 176 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: lrint_v8f128: +; FAST: # %bb.0: +; FAST-NEXT: mflr r0 +; FAST-NEXT: stdu r1, -176(r1) +; FAST-NEXT: std r0, 192(r1) +; FAST-NEXT: .cfi_def_cfa_offset 176 +; FAST-NEXT: .cfi_offset lr, 16 +; FAST-NEXT: .cfi_offset v24, -128 +; FAST-NEXT: .cfi_offset v25, -112 +; FAST-NEXT: .cfi_offset v26, -96 +; FAST-NEXT: .cfi_offset v27, -80 +; FAST-NEXT: .cfi_offset v28, -64 +; FAST-NEXT: .cfi_offset v29, -48 +; FAST-NEXT: .cfi_offset v30, -32 +; FAST-NEXT: .cfi_offset v31, -16 +; FAST-NEXT: li r3, 48 +; FAST-NEXT: stvx v24, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 64 +; FAST-NEXT: stvx v25, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 80 +; FAST-NEXT: vmr v25, v3 +; FAST-NEXT: stvx v26, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 96 +; FAST-NEXT: vmr v26, v4 +; FAST-NEXT: stvx v27, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 112 +; FAST-NEXT: vmr v27, v5 +; FAST-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 128 +; FAST-NEXT: vmr v28, v6 +; FAST-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 144 +; FAST-NEXT: vmr v29, v7 +; FAST-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 160 +; FAST-NEXT: vmr v30, v8 +; FAST-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: vmr v31, v9 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v25 +; FAST-NEXT: mtvsrd v24, r3 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: vmr v2, v26 +; FAST-NEXT: xxmrghd v25, vs0, v24 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v27 +; FAST-NEXT: mtvsrd v26, r3 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: vmr v2, v28 +; FAST-NEXT: xxmrghd v27, vs0, v26 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v29 +; FAST-NEXT: mtvsrd v28, r3 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: vmr v2, v30 +; FAST-NEXT: xxmrghd v29, vs0, v28 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v31 +; FAST-NEXT: mtvsrd v30, r3 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: li r3, 160 +; FAST-NEXT: vmr v4, v29 +; FAST-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 144 +; FAST-NEXT: vmr v3, v27 +; FAST-NEXT: vmr v2, v25 +; FAST-NEXT: xxmrghd v5, vs0, v30 +; FAST-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 128 +; FAST-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 112 +; FAST-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 96 +; FAST-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 80 +; FAST-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 64 +; FAST-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 48 +; FAST-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: addi r1, r1, 176 +; FAST-NEXT: ld r0, 16(r1) +; FAST-NEXT: mtlr r0 +; FAST-NEXT: blr + %a = call <8 x i64> @llvm.lrint.v8i64.v8f128(<8 x fp128> %x) + ret <8 x i64> %a +} +declare <8 x i64> @llvm.lrint.v8i64.v8f128(<8 x fp128>) + +define <16 x i64> @lrint_v16i64_v16f128(<16 x fp128> %x) { +; BE-LABEL: lrint_v16i64_v16f128: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -496(r1) +; BE-NEXT: std r0, 512(r1) +; BE-NEXT: .cfi_def_cfa_offset 496 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: .cfi_offset v20, -192 +; BE-NEXT: .cfi_offset v21, -176 +; BE-NEXT: .cfi_offset v22, -160 +; BE-NEXT: .cfi_offset v23, -144 +; BE-NEXT: .cfi_offset v24, -128 +; BE-NEXT: .cfi_offset v25, -112 +; BE-NEXT: .cfi_offset v26, -96 +; BE-NEXT: .cfi_offset v27, -80 +; BE-NEXT: .cfi_offset v28, -64 +; BE-NEXT: .cfi_offset v29, -48 +; BE-NEXT: .cfi_offset v30, -32 +; BE-NEXT: .cfi_offset v31, -16 +; BE-NEXT: li r3, 304 +; BE-NEXT: stxvd2x v20, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 320 +; BE-NEXT: stxvd2x v21, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 336 +; BE-NEXT: vmr v21, v2 +; BE-NEXT: vmr v2, v3 +; BE-NEXT: stxvd2x v22, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 352 +; BE-NEXT: vmr v22, v4 +; BE-NEXT: stxvd2x v23, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 368 +; BE-NEXT: vmr v23, v5 +; BE-NEXT: stxvd2x v24, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 384 +; BE-NEXT: vmr v24, v6 +; BE-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 400 +; BE-NEXT: vmr v25, v7 +; BE-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 416 +; BE-NEXT: vmr v26, v8 +; BE-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 432 +; BE-NEXT: vmr v27, v9 +; BE-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 448 +; BE-NEXT: vmr v28, v11 +; BE-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 464 +; BE-NEXT: vmr v29, v10 +; BE-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 480 +; BE-NEXT: vmr v30, v13 +; BE-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 128 +; BE-NEXT: stxvd2x v12, r1, r3 # 16-byte Folded Spill +; BE-NEXT: addi r3, r1, 768 +; BE-NEXT: lxvw4x vs0, 0, r3 +; BE-NEXT: li r3, 160 +; BE-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; BE-NEXT: addi r3, r1, 784 +; BE-NEXT: lxvw4x vs0, 0, r3 +; BE-NEXT: li r3, 144 +; BE-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; BE-NEXT: addi r3, r1, 736 +; BE-NEXT: lxvw4x v20, 0, r3 +; BE-NEXT: addi r3, r1, 752 +; BE-NEXT: lxvw4x v31, 0, r3 +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v21 +; BE-NEXT: std r3, 184(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v23 +; BE-NEXT: std r3, 176(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v22 +; BE-NEXT: std r3, 200(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v25 +; BE-NEXT: std r3, 192(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v24 +; BE-NEXT: std r3, 216(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v27 +; BE-NEXT: std r3, 208(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v26 +; BE-NEXT: std r3, 232(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v28 +; BE-NEXT: std r3, 224(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v29 +; BE-NEXT: std r3, 248(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v30 +; BE-NEXT: std r3, 240(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: std r3, 264(r1) +; BE-NEXT: li r3, 128 +; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v31 +; BE-NEXT: std r3, 256(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v20 +; BE-NEXT: std r3, 280(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: std r3, 272(r1) +; BE-NEXT: li r3, 144 +; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: std r3, 296(r1) +; BE-NEXT: li r3, 160 +; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: std r3, 288(r1) +; BE-NEXT: addi r3, r1, 176 +; BE-NEXT: lxvd2x v2, 0, r3 +; BE-NEXT: addi r3, r1, 192 +; BE-NEXT: lxvd2x v3, 0, r3 +; BE-NEXT: addi r3, r1, 208 +; BE-NEXT: lxvd2x v4, 0, r3 +; BE-NEXT: addi r3, r1, 224 +; BE-NEXT: lxvd2x v5, 0, r3 +; BE-NEXT: addi r3, r1, 240 +; BE-NEXT: lxvd2x v6, 0, r3 +; BE-NEXT: addi r3, r1, 256 +; BE-NEXT: lxvd2x v7, 0, r3 +; BE-NEXT: addi r3, r1, 272 +; BE-NEXT: lxvd2x v8, 0, r3 +; BE-NEXT: addi r3, r1, 288 +; BE-NEXT: lxvd2x v9, 0, r3 +; BE-NEXT: li r3, 480 +; BE-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 464 +; BE-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 448 +; BE-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 432 +; BE-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 416 +; BE-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 400 +; BE-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 384 +; BE-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 368 +; BE-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 352 +; BE-NEXT: lxvd2x v23, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 336 +; BE-NEXT: lxvd2x v22, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 320 +; BE-NEXT: lxvd2x v21, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 304 +; BE-NEXT: lxvd2x v20, r1, r3 # 16-byte Folded Reload +; BE-NEXT: addi r1, r1, 496 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: lrint_v16i64_v16f128: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -304(r1) +; CHECK-NEXT: std r0, 320(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 304 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset v20, -192 +; CHECK-NEXT: .cfi_offset v21, -176 +; CHECK-NEXT: .cfi_offset v22, -160 +; CHECK-NEXT: .cfi_offset v23, -144 +; CHECK-NEXT: .cfi_offset v24, -128 +; CHECK-NEXT: .cfi_offset v25, -112 +; CHECK-NEXT: .cfi_offset v26, -96 +; CHECK-NEXT: .cfi_offset v27, -80 +; CHECK-NEXT: .cfi_offset v28, -64 +; CHECK-NEXT: .cfi_offset v29, -48 +; CHECK-NEXT: .cfi_offset v30, -32 +; CHECK-NEXT: .cfi_offset v31, -16 +; CHECK-NEXT: li r3, 112 +; CHECK-NEXT: stvx v20, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 128 +; CHECK-NEXT: stvx v21, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 144 +; CHECK-NEXT: vmr v21, v4 +; CHECK-NEXT: stvx v22, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 160 +; CHECK-NEXT: vmr v22, v6 +; CHECK-NEXT: stvx v23, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 176 +; CHECK-NEXT: vmr v23, v8 +; CHECK-NEXT: stvx v24, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 192 +; CHECK-NEXT: vmr v24, v9 +; CHECK-NEXT: stvx v25, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 208 +; CHECK-NEXT: vmr v25, v7 +; CHECK-NEXT: stvx v26, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 224 +; CHECK-NEXT: vmr v26, v10 +; CHECK-NEXT: stvx v27, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 240 +; CHECK-NEXT: vmr v27, v5 +; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 256 +; CHECK-NEXT: vmr v28, v11 +; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 272 +; CHECK-NEXT: vmr v29, v12 +; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 288 +; CHECK-NEXT: vmr v30, v3 +; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: stxvd2x v13, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: addi r3, r1, 576 +; CHECK-NEXT: lxvd2x vs0, 0, r3 +; CHECK-NEXT: addi r3, r1, 560 +; CHECK-NEXT: lxvd2x vs1, 0, r3 +; CHECK-NEXT: addi r3, r1, 544 +; CHECK-NEXT: lxvd2x vs2, 0, r3 +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: xxswapd vs0, vs0 +; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: xxswapd vs0, vs1 +; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: xxswapd vs0, vs2 +; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: addi r3, r1, 528 +; CHECK-NEXT: lxvd2x vs0, 0, r3 +; CHECK-NEXT: xxswapd v31, vs0 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v30 +; CHECK-NEXT: mtvsrd v20, r3 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: vmr v2, v21 +; CHECK-NEXT: xxmrghd v30, vs0, v20 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v27 +; CHECK-NEXT: mtvsrd v21, r3 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: vmr v2, v22 +; CHECK-NEXT: xxmrghd v27, vs0, v21 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v25 +; CHECK-NEXT: mtvsrd v22, r3 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: vmr v2, v23 +; CHECK-NEXT: xxmrghd v25, vs0, v22 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v24 +; CHECK-NEXT: mtvsrd v23, r3 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: vmr v2, v26 +; CHECK-NEXT: xxmrghd v24, vs0, v23 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v28 +; CHECK-NEXT: mtvsrd v26, r3 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: vmr v2, v29 +; CHECK-NEXT: xxmrghd v28, vs0, v26 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtvsrd v29, r3 +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: vmr v2, v31 +; CHECK-NEXT: xxmrghd v29, vs0, v29 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: xxmrghd v31, vs0, v31 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtvsrd v26, r3 +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 288 +; CHECK-NEXT: vmr v8, v31 +; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 272 +; CHECK-NEXT: vmr v2, v30 +; CHECK-NEXT: vmr v7, v29 +; CHECK-NEXT: vmr v6, v28 +; CHECK-NEXT: vmr v3, v27 +; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 256 +; CHECK-NEXT: vmr v4, v25 +; CHECK-NEXT: vmr v5, v24 +; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 240 +; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 224 +; CHECK-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 208 +; CHECK-NEXT: xxmrghd v9, vs0, v26 +; CHECK-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 192 +; CHECK-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 176 +; CHECK-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 160 +; CHECK-NEXT: lvx v23, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 144 +; CHECK-NEXT: lvx v22, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 128 +; CHECK-NEXT: lvx v21, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 112 +; CHECK-NEXT: lvx v20, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 304 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: lrint_v16i64_v16f128: +; FAST: # %bb.0: +; FAST-NEXT: mflr r0 +; FAST-NEXT: stdu r1, -304(r1) +; FAST-NEXT: std r0, 320(r1) +; FAST-NEXT: .cfi_def_cfa_offset 304 +; FAST-NEXT: .cfi_offset lr, 16 +; FAST-NEXT: .cfi_offset v20, -192 +; FAST-NEXT: .cfi_offset v21, -176 +; FAST-NEXT: .cfi_offset v22, -160 +; FAST-NEXT: .cfi_offset v23, -144 +; FAST-NEXT: .cfi_offset v24, -128 +; FAST-NEXT: .cfi_offset v25, -112 +; FAST-NEXT: .cfi_offset v26, -96 +; FAST-NEXT: .cfi_offset v27, -80 +; FAST-NEXT: .cfi_offset v28, -64 +; FAST-NEXT: .cfi_offset v29, -48 +; FAST-NEXT: .cfi_offset v30, -32 +; FAST-NEXT: .cfi_offset v31, -16 +; FAST-NEXT: li r3, 112 +; FAST-NEXT: stvx v20, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 128 +; FAST-NEXT: stvx v21, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 144 +; FAST-NEXT: vmr v21, v4 +; FAST-NEXT: stvx v22, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 160 +; FAST-NEXT: vmr v22, v6 +; FAST-NEXT: stvx v23, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 176 +; FAST-NEXT: vmr v23, v8 +; FAST-NEXT: stvx v24, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 192 +; FAST-NEXT: vmr v24, v9 +; FAST-NEXT: stvx v25, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 208 +; FAST-NEXT: vmr v25, v7 +; FAST-NEXT: stvx v26, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 224 +; FAST-NEXT: vmr v26, v10 +; FAST-NEXT: stvx v27, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 240 +; FAST-NEXT: vmr v27, v5 +; FAST-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 256 +; FAST-NEXT: vmr v28, v11 +; FAST-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 272 +; FAST-NEXT: vmr v29, v12 +; FAST-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 288 +; FAST-NEXT: vmr v30, v3 +; FAST-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 64 +; FAST-NEXT: stxvd2x v13, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: addi r3, r1, 576 +; FAST-NEXT: lxvd2x vs0, 0, r3 +; FAST-NEXT: addi r3, r1, 560 +; FAST-NEXT: lxvd2x vs1, 0, r3 +; FAST-NEXT: addi r3, r1, 544 +; FAST-NEXT: lxvd2x vs2, 0, r3 +; FAST-NEXT: li r3, 96 +; FAST-NEXT: xxswapd vs0, vs0 +; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 80 +; FAST-NEXT: xxswapd vs0, vs1 +; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 48 +; FAST-NEXT: xxswapd vs0, vs2 +; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: addi r3, r1, 528 +; FAST-NEXT: lxvd2x vs0, 0, r3 +; FAST-NEXT: xxswapd v31, vs0 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v30 +; FAST-NEXT: mtvsrd v20, r3 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: vmr v2, v21 +; FAST-NEXT: xxmrghd v30, vs0, v20 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v27 +; FAST-NEXT: mtvsrd v21, r3 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: vmr v2, v22 +; FAST-NEXT: xxmrghd v27, vs0, v21 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v25 +; FAST-NEXT: mtvsrd v22, r3 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: vmr v2, v23 +; FAST-NEXT: xxmrghd v25, vs0, v22 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v24 +; FAST-NEXT: mtvsrd v23, r3 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: vmr v2, v26 +; FAST-NEXT: xxmrghd v24, vs0, v23 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v28 +; FAST-NEXT: mtvsrd v26, r3 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: vmr v2, v29 +; FAST-NEXT: xxmrghd v28, vs0, v26 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtvsrd v29, r3 +; FAST-NEXT: li r3, 64 +; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: vmr v2, v31 +; FAST-NEXT: xxmrghd v29, vs0, v29 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtvsrd v31, r3 +; FAST-NEXT: li r3, 48 +; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: li r3, 80 +; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: xxmrghd v31, vs0, v31 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtvsrd v26, r3 +; FAST-NEXT: li r3, 96 +; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: li r3, 288 +; FAST-NEXT: vmr v8, v31 +; FAST-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 272 +; FAST-NEXT: vmr v2, v30 +; FAST-NEXT: vmr v7, v29 +; FAST-NEXT: vmr v6, v28 +; FAST-NEXT: vmr v3, v27 +; FAST-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 256 +; FAST-NEXT: vmr v4, v25 +; FAST-NEXT: vmr v5, v24 +; FAST-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 240 +; FAST-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 224 +; FAST-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 208 +; FAST-NEXT: xxmrghd v9, vs0, v26 +; FAST-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 192 +; FAST-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 176 +; FAST-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 160 +; FAST-NEXT: lvx v23, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 144 +; FAST-NEXT: lvx v22, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 128 +; FAST-NEXT: lvx v21, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 112 +; FAST-NEXT: lvx v20, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: addi r1, r1, 304 +; FAST-NEXT: ld r0, 16(r1) +; FAST-NEXT: mtlr r0 +; FAST-NEXT: blr + %a = call <16 x i64> @llvm.lrint.v16i64.v16f128(<16 x fp128> %x) + ret <16 x i64> %a +} +declare <16 x i64> @llvm.lrint.v16i64.v16f128(<16 x fp128>) + +define <32 x i64> @lrint_v32i64_v32f128(<32 x fp128> %x) { +; BE-LABEL: lrint_v32i64_v32f128: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -896(r1) +; BE-NEXT: std r0, 912(r1) +; BE-NEXT: .cfi_def_cfa_offset 896 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: .cfi_offset r30, -16 +; BE-NEXT: .cfi_offset v20, -208 +; BE-NEXT: .cfi_offset v21, -192 +; BE-NEXT: .cfi_offset v22, -176 +; BE-NEXT: .cfi_offset v23, -160 +; BE-NEXT: .cfi_offset v24, -144 +; BE-NEXT: .cfi_offset v25, -128 +; BE-NEXT: .cfi_offset v26, -112 +; BE-NEXT: .cfi_offset v27, -96 +; BE-NEXT: .cfi_offset v28, -80 +; BE-NEXT: .cfi_offset v29, -64 +; BE-NEXT: .cfi_offset v30, -48 +; BE-NEXT: .cfi_offset v31, -32 +; BE-NEXT: std r30, 880(r1) # 8-byte Folded Spill +; BE-NEXT: mr r30, r3 +; BE-NEXT: addi r3, r1, 1440 +; BE-NEXT: li r4, 688 +; BE-NEXT: lxvw4x vs0, 0, r3 +; BE-NEXT: li r3, 256 +; BE-NEXT: stxvd2x v20, r1, r4 # 16-byte Folded Spill +; BE-NEXT: li r4, 704 +; BE-NEXT: stxvd2x v21, r1, r4 # 16-byte Folded Spill +; BE-NEXT: li r4, 720 +; BE-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; BE-NEXT: addi r3, r1, 1456 +; BE-NEXT: stxvd2x v22, r1, r4 # 16-byte Folded Spill +; BE-NEXT: li r4, 736 +; BE-NEXT: lxvw4x vs0, 0, r3 +; BE-NEXT: li r3, 240 +; BE-NEXT: stxvd2x v23, r1, r4 # 16-byte Folded Spill +; BE-NEXT: li r4, 752 +; BE-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; BE-NEXT: addi r3, r1, 1408 +; BE-NEXT: stxvd2x v24, r1, r4 # 16-byte Folded Spill +; BE-NEXT: li r4, 768 +; BE-NEXT: lxvw4x vs0, 0, r3 +; BE-NEXT: li r3, 224 +; BE-NEXT: stxvd2x v25, r1, r4 # 16-byte Folded Spill +; BE-NEXT: li r4, 784 +; BE-NEXT: stxvd2x v26, r1, r4 # 16-byte Folded Spill +; BE-NEXT: li r4, 800 +; BE-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; BE-NEXT: addi r3, r1, 1424 +; BE-NEXT: stxvd2x v27, r1, r4 # 16-byte Folded Spill +; BE-NEXT: li r4, 816 +; BE-NEXT: lxvw4x vs0, 0, r3 +; BE-NEXT: li r3, 208 +; BE-NEXT: stxvd2x v28, r1, r4 # 16-byte Folded Spill +; BE-NEXT: li r4, 832 +; BE-NEXT: vmr v28, v2 +; BE-NEXT: vmr v2, v3 +; BE-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; BE-NEXT: addi r3, r1, 1376 +; BE-NEXT: stxvd2x v29, r1, r4 # 16-byte Folded Spill +; BE-NEXT: li r4, 848 +; BE-NEXT: lxvw4x vs0, 0, r3 +; BE-NEXT: li r3, 192 +; BE-NEXT: stxvd2x v30, r1, r4 # 16-byte Folded Spill +; BE-NEXT: li r4, 864 +; BE-NEXT: stxvd2x v31, r1, r4 # 16-byte Folded Spill +; BE-NEXT: li r4, 400 +; BE-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; BE-NEXT: addi r3, r1, 1392 +; BE-NEXT: stxvd2x v13, r1, r4 # 16-byte Folded Spill +; BE-NEXT: li r4, 416 +; BE-NEXT: lxvw4x vs0, 0, r3 +; BE-NEXT: li r3, 176 +; BE-NEXT: stxvd2x v12, r1, r4 # 16-byte Folded Spill +; BE-NEXT: li r4, 368 +; BE-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; BE-NEXT: addi r3, r1, 1344 +; BE-NEXT: stxvd2x v11, r1, r4 # 16-byte Folded Spill +; BE-NEXT: li r4, 384 +; BE-NEXT: lxvw4x vs0, 0, r3 +; BE-NEXT: li r3, 160 +; BE-NEXT: stxvd2x v10, r1, r4 # 16-byte Folded Spill +; BE-NEXT: li r4, 336 +; BE-NEXT: stxvd2x v9, r1, r4 # 16-byte Folded Spill +; BE-NEXT: li r4, 352 +; BE-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; BE-NEXT: addi r3, r1, 1360 +; BE-NEXT: stxvd2x v8, r1, r4 # 16-byte Folded Spill +; BE-NEXT: li r4, 304 +; BE-NEXT: lxvw4x vs0, 0, r3 +; BE-NEXT: li r3, 144 +; BE-NEXT: stxvd2x v7, r1, r4 # 16-byte Folded Spill +; BE-NEXT: li r4, 320 +; BE-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; BE-NEXT: addi r3, r1, 1312 +; BE-NEXT: stxvd2x v6, r1, r4 # 16-byte Folded Spill +; BE-NEXT: li r4, 272 +; BE-NEXT: lxvw4x vs0, 0, r3 +; BE-NEXT: li r3, 128 +; BE-NEXT: stxvd2x v5, r1, r4 # 16-byte Folded Spill +; BE-NEXT: li r4, 288 +; BE-NEXT: stxvd2x v4, r1, r4 # 16-byte Folded Spill +; BE-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; BE-NEXT: addi r3, r1, 1328 +; BE-NEXT: lxvw4x v23, 0, r3 +; BE-NEXT: addi r3, r1, 1280 +; BE-NEXT: lxvw4x v22, 0, r3 +; BE-NEXT: addi r3, r1, 1296 +; BE-NEXT: lxvw4x v21, 0, r3 +; BE-NEXT: addi r3, r1, 1248 +; BE-NEXT: lxvw4x v20, 0, r3 +; BE-NEXT: addi r3, r1, 1264 +; BE-NEXT: lxvw4x v31, 0, r3 +; BE-NEXT: addi r3, r1, 1216 +; BE-NEXT: lxvw4x v30, 0, r3 +; BE-NEXT: addi r3, r1, 1232 +; BE-NEXT: lxvw4x v29, 0, r3 +; BE-NEXT: addi r3, r1, 1184 +; BE-NEXT: lxvw4x v27, 0, r3 +; BE-NEXT: addi r3, r1, 1200 +; BE-NEXT: lxvw4x v26, 0, r3 +; BE-NEXT: addi r3, r1, 1152 +; BE-NEXT: lxvw4x v25, 0, r3 +; BE-NEXT: addi r3, r1, 1168 +; BE-NEXT: lxvw4x v24, 0, r3 +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v28 +; BE-NEXT: std r3, 440(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v24 +; BE-NEXT: std r3, 432(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v25 +; BE-NEXT: std r3, 536(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v26 +; BE-NEXT: std r3, 528(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v27 +; BE-NEXT: std r3, 552(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v29 +; BE-NEXT: std r3, 544(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v30 +; BE-NEXT: std r3, 568(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v31 +; BE-NEXT: std r3, 560(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v20 +; BE-NEXT: std r3, 584(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v21 +; BE-NEXT: std r3, 576(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v22 +; BE-NEXT: std r3, 600(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v23 +; BE-NEXT: std r3, 592(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: std r3, 616(r1) +; BE-NEXT: li r3, 128 +; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: std r3, 608(r1) +; BE-NEXT: li r3, 144 +; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: std r3, 632(r1) +; BE-NEXT: li r3, 160 +; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: std r3, 624(r1) +; BE-NEXT: li r3, 176 +; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: std r3, 648(r1) +; BE-NEXT: li r3, 192 +; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: std r3, 640(r1) +; BE-NEXT: li r3, 208 +; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: std r3, 664(r1) +; BE-NEXT: li r3, 224 +; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: std r3, 656(r1) +; BE-NEXT: li r3, 240 +; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: std r3, 680(r1) +; BE-NEXT: li r3, 256 +; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: std r3, 672(r1) +; BE-NEXT: li r3, 272 +; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: std r3, 456(r1) +; BE-NEXT: li r3, 288 +; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: std r3, 448(r1) +; BE-NEXT: li r3, 304 +; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: std r3, 472(r1) +; BE-NEXT: li r3, 320 +; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: std r3, 464(r1) +; BE-NEXT: li r3, 336 +; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: std r3, 488(r1) +; BE-NEXT: li r3, 352 +; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: std r3, 480(r1) +; BE-NEXT: li r3, 368 +; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: std r3, 504(r1) +; BE-NEXT: li r3, 384 +; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: std r3, 496(r1) +; BE-NEXT: li r3, 400 +; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: std r3, 520(r1) +; BE-NEXT: li r3, 416 +; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: std r3, 512(r1) +; BE-NEXT: addi r3, r1, 432 +; BE-NEXT: lxvd2x vs0, 0, r3 +; BE-NEXT: addi r3, r1, 528 +; BE-NEXT: lxvd2x vs1, 0, r3 +; BE-NEXT: addi r3, r1, 544 +; BE-NEXT: lxvd2x vs2, 0, r3 +; BE-NEXT: addi r3, r1, 560 +; BE-NEXT: lxvd2x vs3, 0, r3 +; BE-NEXT: addi r3, r1, 576 +; BE-NEXT: lxvd2x vs4, 0, r3 +; BE-NEXT: addi r3, r1, 592 +; BE-NEXT: lxvd2x vs5, 0, r3 +; BE-NEXT: addi r3, r1, 608 +; BE-NEXT: lxvd2x vs6, 0, r3 +; BE-NEXT: addi r3, r1, 624 +; BE-NEXT: lxvd2x vs7, 0, r3 +; BE-NEXT: addi r3, r1, 640 +; BE-NEXT: lxvd2x vs8, 0, r3 +; BE-NEXT: addi r3, r1, 656 +; BE-NEXT: lxvd2x vs9, 0, r3 +; BE-NEXT: addi r3, r1, 672 +; BE-NEXT: lxvd2x vs10, 0, r3 +; BE-NEXT: addi r3, r1, 448 +; BE-NEXT: lxvd2x vs11, 0, r3 +; BE-NEXT: addi r3, r1, 464 +; BE-NEXT: lxvd2x vs12, 0, r3 +; BE-NEXT: addi r3, r1, 480 +; BE-NEXT: lxvd2x vs13, 0, r3 +; BE-NEXT: addi r3, r1, 496 +; BE-NEXT: lxvd2x v2, 0, r3 +; BE-NEXT: addi r3, r1, 512 +; BE-NEXT: lxvd2x v3, 0, r3 +; BE-NEXT: li r3, 80 +; BE-NEXT: stxvd2x v3, r30, r3 +; BE-NEXT: li r3, 64 +; BE-NEXT: stxvd2x v2, r30, r3 +; BE-NEXT: li r3, 48 +; BE-NEXT: stxvd2x vs13, r30, r3 +; BE-NEXT: li r3, 32 +; BE-NEXT: stxvd2x vs12, r30, r3 +; BE-NEXT: li r3, 16 +; BE-NEXT: stxvd2x vs11, r30, r3 +; BE-NEXT: li r3, 240 +; BE-NEXT: stxvd2x vs10, r30, r3 +; BE-NEXT: li r3, 224 +; BE-NEXT: stxvd2x vs9, r30, r3 +; BE-NEXT: li r3, 208 +; BE-NEXT: stxvd2x vs8, r30, r3 +; BE-NEXT: li r3, 192 +; BE-NEXT: stxvd2x vs7, r30, r3 +; BE-NEXT: li r3, 176 +; BE-NEXT: stxvd2x vs6, r30, r3 +; BE-NEXT: li r3, 160 +; BE-NEXT: stxvd2x vs5, r30, r3 +; BE-NEXT: li r3, 144 +; BE-NEXT: stxvd2x vs4, r30, r3 +; BE-NEXT: li r3, 128 +; BE-NEXT: stxvd2x vs3, r30, r3 +; BE-NEXT: li r3, 112 +; BE-NEXT: stxvd2x vs2, r30, r3 +; BE-NEXT: li r3, 96 +; BE-NEXT: stxvd2x vs1, r30, r3 +; BE-NEXT: li r3, 864 +; BE-NEXT: stxvd2x vs0, 0, r30 +; BE-NEXT: ld r30, 880(r1) # 8-byte Folded Reload +; BE-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 848 +; BE-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 832 +; BE-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 816 +; BE-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 800 +; BE-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 784 +; BE-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 768 +; BE-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 752 +; BE-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 736 +; BE-NEXT: lxvd2x v23, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 720 +; BE-NEXT: lxvd2x v22, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 704 +; BE-NEXT: lxvd2x v21, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 688 +; BE-NEXT: lxvd2x v20, r1, r3 # 16-byte Folded Reload +; BE-NEXT: addi r1, r1, 896 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: lrint_v32i64_v32f128: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -640(r1) +; CHECK-NEXT: std r0, 656(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 640 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: .cfi_offset v20, -208 +; CHECK-NEXT: .cfi_offset v21, -192 +; CHECK-NEXT: .cfi_offset v22, -176 +; CHECK-NEXT: .cfi_offset v23, -160 +; CHECK-NEXT: .cfi_offset v24, -144 +; CHECK-NEXT: .cfi_offset v25, -128 +; CHECK-NEXT: .cfi_offset v26, -112 +; CHECK-NEXT: .cfi_offset v27, -96 +; CHECK-NEXT: .cfi_offset v28, -80 +; CHECK-NEXT: .cfi_offset v29, -64 +; CHECK-NEXT: .cfi_offset v30, -48 +; CHECK-NEXT: .cfi_offset v31, -32 +; CHECK-NEXT: li r4, 432 +; CHECK-NEXT: std r30, 624(r1) # 8-byte Folded Spill +; CHECK-NEXT: mr r30, r3 +; CHECK-NEXT: addi r3, r1, 1184 +; CHECK-NEXT: stvx v20, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 448 +; CHECK-NEXT: lxvd2x vs0, 0, r3 +; CHECK-NEXT: addi r3, r1, 1168 +; CHECK-NEXT: stvx v21, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 464 +; CHECK-NEXT: lxvd2x vs1, 0, r3 +; CHECK-NEXT: addi r3, r1, 1152 +; CHECK-NEXT: stvx v22, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 480 +; CHECK-NEXT: lxvd2x vs2, 0, r3 +; CHECK-NEXT: addi r3, r1, 1136 +; CHECK-NEXT: stvx v23, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 496 +; CHECK-NEXT: lxvd2x vs3, 0, r3 +; CHECK-NEXT: addi r3, r1, 1120 +; CHECK-NEXT: stvx v24, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 512 +; CHECK-NEXT: lxvd2x vs4, 0, r3 +; CHECK-NEXT: addi r3, r1, 1104 +; CHECK-NEXT: vmr v24, v3 +; CHECK-NEXT: stvx v25, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 528 +; CHECK-NEXT: lxvd2x vs5, 0, r3 +; CHECK-NEXT: xxswapd vs0, vs0 +; CHECK-NEXT: addi r3, r1, 1088 +; CHECK-NEXT: stvx v26, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 544 +; CHECK-NEXT: stvx v27, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 560 +; CHECK-NEXT: stvx v28, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 576 +; CHECK-NEXT: stvx v29, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 592 +; CHECK-NEXT: stvx v30, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 608 +; CHECK-NEXT: stvx v31, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 416 +; CHECK-NEXT: stxvd2x v13, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 400 +; CHECK-NEXT: stxvd2x v12, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 384 +; CHECK-NEXT: stxvd2x v11, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 352 +; CHECK-NEXT: stxvd2x v10, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 336 +; CHECK-NEXT: stxvd2x v9, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 304 +; CHECK-NEXT: stxvd2x v8, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 288 +; CHECK-NEXT: stxvd2x v7, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 256 +; CHECK-NEXT: stxvd2x v6, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 240 +; CHECK-NEXT: stxvd2x v5, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 224 +; CHECK-NEXT: stxvd2x v4, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 192 +; CHECK-NEXT: stxvd2x vs0, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 176 +; CHECK-NEXT: xxswapd vs0, vs1 +; CHECK-NEXT: stxvd2x vs0, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 160 +; CHECK-NEXT: xxswapd vs0, vs2 +; CHECK-NEXT: stxvd2x vs0, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 144 +; CHECK-NEXT: xxswapd vs0, vs3 +; CHECK-NEXT: stxvd2x vs0, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 128 +; CHECK-NEXT: xxswapd vs0, vs4 +; CHECK-NEXT: stxvd2x vs0, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: li r4, 112 +; CHECK-NEXT: xxswapd vs0, vs5 +; CHECK-NEXT: stxvd2x vs0, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: lxvd2x vs0, 0, r3 +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: xxswapd vs0, vs0 +; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: addi r3, r1, 1072 +; CHECK-NEXT: lxvd2x vs0, 0, r3 +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: xxswapd vs0, vs0 +; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: addi r3, r1, 1056 +; CHECK-NEXT: lxvd2x vs0, 0, r3 +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: xxswapd vs0, vs0 +; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: addi r3, r1, 1040 +; CHECK-NEXT: lxvd2x vs0, 0, r3 +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: xxswapd vs0, vs0 +; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: addi r3, r1, 1024 +; CHECK-NEXT: lxvd2x vs0, 0, r3 +; CHECK-NEXT: addi r3, r1, 1008 +; CHECK-NEXT: xxswapd v22, vs0 +; CHECK-NEXT: lxvd2x vs0, 0, r3 +; CHECK-NEXT: addi r3, r1, 992 +; CHECK-NEXT: xxswapd v21, vs0 +; CHECK-NEXT: lxvd2x vs0, 0, r3 +; CHECK-NEXT: addi r3, r1, 976 +; CHECK-NEXT: xxswapd v20, vs0 +; CHECK-NEXT: lxvd2x vs0, 0, r3 +; CHECK-NEXT: addi r3, r1, 960 +; CHECK-NEXT: xxswapd v31, vs0 +; CHECK-NEXT: lxvd2x vs0, 0, r3 +; CHECK-NEXT: addi r3, r1, 944 +; CHECK-NEXT: xxswapd v30, vs0 +; CHECK-NEXT: lxvd2x vs0, 0, r3 +; CHECK-NEXT: addi r3, r1, 928 +; CHECK-NEXT: xxswapd v29, vs0 +; CHECK-NEXT: lxvd2x vs0, 0, r3 +; CHECK-NEXT: addi r3, r1, 912 +; CHECK-NEXT: xxswapd v28, vs0 +; CHECK-NEXT: lxvd2x vs0, 0, r3 +; CHECK-NEXT: addi r3, r1, 896 +; CHECK-NEXT: xxswapd v27, vs0 +; CHECK-NEXT: lxvd2x vs0, 0, r3 +; CHECK-NEXT: addi r3, r1, 880 +; CHECK-NEXT: xxswapd v26, vs0 +; CHECK-NEXT: lxvd2x vs0, 0, r3 +; CHECK-NEXT: xxswapd v25, vs0 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v24 +; CHECK-NEXT: mtvsrd v23, r3 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 368 +; CHECK-NEXT: vmr v2, v25 +; CHECK-NEXT: xxmrghd vs0, vs0, v23 +; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v26 +; CHECK-NEXT: mtvsrd v25, r3 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 320 +; CHECK-NEXT: vmr v2, v27 +; CHECK-NEXT: xxmrghd vs0, vs0, v25 +; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v28 +; CHECK-NEXT: mtvsrd v27, r3 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 272 +; CHECK-NEXT: vmr v2, v29 +; CHECK-NEXT: xxmrghd vs0, vs0, v27 +; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v30 +; CHECK-NEXT: mtvsrd v29, r3 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 208 +; CHECK-NEXT: vmr v2, v31 +; CHECK-NEXT: xxmrghd vs0, vs0, v29 +; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v20 +; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: vmr v2, v21 +; CHECK-NEXT: xxmrghd v31, vs0, v31 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v22 +; CHECK-NEXT: mtvsrd v29, r3 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: xxmrghd v29, vs0, v29 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtvsrd v27, r3 +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: xxmrghd v27, vs0, v27 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtvsrd v25, r3 +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 112 +; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: xxmrghd v25, vs0, v25 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtvsrd v23, r3 +; CHECK-NEXT: li r3, 128 +; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 144 +; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: xxmrghd v23, vs0, v23 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtvsrd v22, r3 +; CHECK-NEXT: li r3, 160 +; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 176 +; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: xxmrghd v22, vs0, v22 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtvsrd v21, r3 +; CHECK-NEXT: li r3, 192 +; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 224 +; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: xxmrghd v21, vs0, v21 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtvsrd v20, r3 +; CHECK-NEXT: li r3, 240 +; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 256 +; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: xxmrghd v20, vs0, v20 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtvsrd v24, r3 +; CHECK-NEXT: li r3, 288 +; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 304 +; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: xxmrghd v24, vs0, v24 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtvsrd v26, r3 +; CHECK-NEXT: li r3, 336 +; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 352 +; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: xxmrghd v26, vs0, v26 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtvsrd v28, r3 +; CHECK-NEXT: li r3, 384 +; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 400 +; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: xxmrghd v28, vs0, v28 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtvsrd v30, r3 +; CHECK-NEXT: li r3, 416 +; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: xxswapd vs1, v28 +; CHECK-NEXT: li r4, 208 +; CHECK-NEXT: xxswapd vs2, v26 +; CHECK-NEXT: xxswapd vs3, v27 +; CHECK-NEXT: xxmrghd v2, vs0, v30 +; CHECK-NEXT: xxswapd vs0, v2 +; CHECK-NEXT: stxvd2x vs0, r30, r3 +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: stxvd2x vs1, r30, r3 +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: stxvd2x vs2, r30, r3 +; CHECK-NEXT: li r3, 32 +; CHECK-NEXT: xxswapd vs0, v24 +; CHECK-NEXT: stxvd2x vs0, r30, r3 +; CHECK-NEXT: li r3, 16 +; CHECK-NEXT: xxswapd vs1, v20 +; CHECK-NEXT: stxvd2x vs1, r30, r3 +; CHECK-NEXT: li r3, 240 +; CHECK-NEXT: xxswapd vs2, v23 +; CHECK-NEXT: xxswapd vs0, v21 +; CHECK-NEXT: stxvd2x vs0, r30, r3 +; CHECK-NEXT: li r3, 224 +; CHECK-NEXT: xxswapd vs1, v22 +; CHECK-NEXT: stxvd2x vs1, r30, r3 +; CHECK-NEXT: li r3, 208 +; CHECK-NEXT: stxvd2x vs2, r30, r3 +; CHECK-NEXT: li r3, 192 +; CHECK-NEXT: xxswapd vs0, v25 +; CHECK-NEXT: stxvd2x vs0, r30, r3 +; CHECK-NEXT: li r3, 176 +; CHECK-NEXT: stxvd2x vs3, r30, r3 +; CHECK-NEXT: li r3, 160 +; CHECK-NEXT: lxvd2x vs2, r1, r4 # 16-byte Folded Reload +; CHECK-NEXT: li r4, 272 +; CHECK-NEXT: xxswapd vs1, v29 +; CHECK-NEXT: stxvd2x vs1, r30, r3 +; CHECK-NEXT: li r3, 144 +; CHECK-NEXT: lxvd2x vs1, r1, r4 # 16-byte Folded Reload +; CHECK-NEXT: li r4, 320 +; CHECK-NEXT: lxvd2x vs3, r1, r4 # 16-byte Folded Reload +; CHECK-NEXT: li r4, 368 +; CHECK-NEXT: lxvd2x vs4, r1, r4 # 16-byte Folded Reload +; CHECK-NEXT: xxswapd vs0, v31 +; CHECK-NEXT: stxvd2x vs0, r30, r3 +; CHECK-NEXT: li r3, 128 +; CHECK-NEXT: xxswapd vs2, vs2 +; CHECK-NEXT: stxvd2x vs2, r30, r3 +; CHECK-NEXT: li r3, 112 +; CHECK-NEXT: xxswapd vs1, vs1 +; CHECK-NEXT: stxvd2x vs1, r30, r3 +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: xxswapd vs3, vs3 +; CHECK-NEXT: stxvd2x vs3, r30, r3 +; CHECK-NEXT: li r3, 608 +; CHECK-NEXT: xxswapd vs4, vs4 +; CHECK-NEXT: stxvd2x vs4, 0, r30 +; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 592 +; CHECK-NEXT: ld r30, 624(r1) # 8-byte Folded Reload +; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 576 +; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 560 +; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 544 +; CHECK-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 528 +; CHECK-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 512 +; CHECK-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 496 +; CHECK-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 480 +; CHECK-NEXT: lvx v23, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 464 +; CHECK-NEXT: lvx v22, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 448 +; CHECK-NEXT: lvx v21, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 432 +; CHECK-NEXT: lvx v20, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 640 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: lrint_v32i64_v32f128: +; FAST: # %bb.0: +; FAST-NEXT: mflr r0 +; FAST-NEXT: stdu r1, -640(r1) +; FAST-NEXT: std r0, 656(r1) +; FAST-NEXT: .cfi_def_cfa_offset 640 +; FAST-NEXT: .cfi_offset lr, 16 +; FAST-NEXT: .cfi_offset r30, -16 +; FAST-NEXT: .cfi_offset v20, -208 +; FAST-NEXT: .cfi_offset v21, -192 +; FAST-NEXT: .cfi_offset v22, -176 +; FAST-NEXT: .cfi_offset v23, -160 +; FAST-NEXT: .cfi_offset v24, -144 +; FAST-NEXT: .cfi_offset v25, -128 +; FAST-NEXT: .cfi_offset v26, -112 +; FAST-NEXT: .cfi_offset v27, -96 +; FAST-NEXT: .cfi_offset v28, -80 +; FAST-NEXT: .cfi_offset v29, -64 +; FAST-NEXT: .cfi_offset v30, -48 +; FAST-NEXT: .cfi_offset v31, -32 +; FAST-NEXT: li r4, 432 +; FAST-NEXT: std r30, 624(r1) # 8-byte Folded Spill +; FAST-NEXT: mr r30, r3 +; FAST-NEXT: addi r3, r1, 1184 +; FAST-NEXT: stvx v20, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 448 +; FAST-NEXT: lxvd2x vs0, 0, r3 +; FAST-NEXT: addi r3, r1, 1168 +; FAST-NEXT: stvx v21, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 464 +; FAST-NEXT: lxvd2x vs1, 0, r3 +; FAST-NEXT: addi r3, r1, 1152 +; FAST-NEXT: stvx v22, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 480 +; FAST-NEXT: lxvd2x vs2, 0, r3 +; FAST-NEXT: addi r3, r1, 1136 +; FAST-NEXT: stvx v23, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 496 +; FAST-NEXT: lxvd2x vs3, 0, r3 +; FAST-NEXT: addi r3, r1, 1120 +; FAST-NEXT: stvx v24, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 512 +; FAST-NEXT: lxvd2x vs4, 0, r3 +; FAST-NEXT: addi r3, r1, 1104 +; FAST-NEXT: vmr v24, v3 +; FAST-NEXT: stvx v25, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 528 +; FAST-NEXT: lxvd2x vs5, 0, r3 +; FAST-NEXT: xxswapd vs0, vs0 +; FAST-NEXT: addi r3, r1, 1088 +; FAST-NEXT: stvx v26, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 544 +; FAST-NEXT: stvx v27, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 560 +; FAST-NEXT: stvx v28, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 576 +; FAST-NEXT: stvx v29, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 592 +; FAST-NEXT: stvx v30, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 608 +; FAST-NEXT: stvx v31, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 416 +; FAST-NEXT: stxvd2x v13, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 400 +; FAST-NEXT: stxvd2x v12, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 384 +; FAST-NEXT: stxvd2x v11, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 352 +; FAST-NEXT: stxvd2x v10, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 336 +; FAST-NEXT: stxvd2x v9, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 304 +; FAST-NEXT: stxvd2x v8, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 288 +; FAST-NEXT: stxvd2x v7, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 256 +; FAST-NEXT: stxvd2x v6, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 240 +; FAST-NEXT: stxvd2x v5, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 224 +; FAST-NEXT: stxvd2x v4, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 192 +; FAST-NEXT: stxvd2x vs0, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 176 +; FAST-NEXT: xxswapd vs0, vs1 +; FAST-NEXT: stxvd2x vs0, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 160 +; FAST-NEXT: xxswapd vs0, vs2 +; FAST-NEXT: stxvd2x vs0, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 144 +; FAST-NEXT: xxswapd vs0, vs3 +; FAST-NEXT: stxvd2x vs0, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 128 +; FAST-NEXT: xxswapd vs0, vs4 +; FAST-NEXT: stxvd2x vs0, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: li r4, 112 +; FAST-NEXT: xxswapd vs0, vs5 +; FAST-NEXT: stxvd2x vs0, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: lxvd2x vs0, 0, r3 +; FAST-NEXT: li r3, 96 +; FAST-NEXT: xxswapd vs0, vs0 +; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: addi r3, r1, 1072 +; FAST-NEXT: lxvd2x vs0, 0, r3 +; FAST-NEXT: li r3, 80 +; FAST-NEXT: xxswapd vs0, vs0 +; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: addi r3, r1, 1056 +; FAST-NEXT: lxvd2x vs0, 0, r3 +; FAST-NEXT: li r3, 64 +; FAST-NEXT: xxswapd vs0, vs0 +; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: addi r3, r1, 1040 +; FAST-NEXT: lxvd2x vs0, 0, r3 +; FAST-NEXT: li r3, 48 +; FAST-NEXT: xxswapd vs0, vs0 +; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: addi r3, r1, 1024 +; FAST-NEXT: lxvd2x vs0, 0, r3 +; FAST-NEXT: addi r3, r1, 1008 +; FAST-NEXT: xxswapd v22, vs0 +; FAST-NEXT: lxvd2x vs0, 0, r3 +; FAST-NEXT: addi r3, r1, 992 +; FAST-NEXT: xxswapd v21, vs0 +; FAST-NEXT: lxvd2x vs0, 0, r3 +; FAST-NEXT: addi r3, r1, 976 +; FAST-NEXT: xxswapd v20, vs0 +; FAST-NEXT: lxvd2x vs0, 0, r3 +; FAST-NEXT: addi r3, r1, 960 +; FAST-NEXT: xxswapd v31, vs0 +; FAST-NEXT: lxvd2x vs0, 0, r3 +; FAST-NEXT: addi r3, r1, 944 +; FAST-NEXT: xxswapd v30, vs0 +; FAST-NEXT: lxvd2x vs0, 0, r3 +; FAST-NEXT: addi r3, r1, 928 +; FAST-NEXT: xxswapd v29, vs0 +; FAST-NEXT: lxvd2x vs0, 0, r3 +; FAST-NEXT: addi r3, r1, 912 +; FAST-NEXT: xxswapd v28, vs0 +; FAST-NEXT: lxvd2x vs0, 0, r3 +; FAST-NEXT: addi r3, r1, 896 +; FAST-NEXT: xxswapd v27, vs0 +; FAST-NEXT: lxvd2x vs0, 0, r3 +; FAST-NEXT: addi r3, r1, 880 +; FAST-NEXT: xxswapd v26, vs0 +; FAST-NEXT: lxvd2x vs0, 0, r3 +; FAST-NEXT: xxswapd v25, vs0 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v24 +; FAST-NEXT: mtvsrd v23, r3 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: li r3, 368 +; FAST-NEXT: vmr v2, v25 +; FAST-NEXT: xxmrghd vs0, vs0, v23 +; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v26 +; FAST-NEXT: mtvsrd v25, r3 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: li r3, 320 +; FAST-NEXT: vmr v2, v27 +; FAST-NEXT: xxmrghd vs0, vs0, v25 +; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v28 +; FAST-NEXT: mtvsrd v27, r3 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: li r3, 272 +; FAST-NEXT: vmr v2, v29 +; FAST-NEXT: xxmrghd vs0, vs0, v27 +; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v30 +; FAST-NEXT: mtvsrd v29, r3 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: li r3, 208 +; FAST-NEXT: vmr v2, v31 +; FAST-NEXT: xxmrghd vs0, vs0, v29 +; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v20 +; FAST-NEXT: mtvsrd v31, r3 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: vmr v2, v21 +; FAST-NEXT: xxmrghd v31, vs0, v31 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v22 +; FAST-NEXT: mtvsrd v29, r3 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: li r3, 48 +; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: xxmrghd v29, vs0, v29 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtvsrd v27, r3 +; FAST-NEXT: li r3, 64 +; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: li r3, 80 +; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: xxmrghd v27, vs0, v27 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtvsrd v25, r3 +; FAST-NEXT: li r3, 96 +; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: li r3, 112 +; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: xxmrghd v25, vs0, v25 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtvsrd v23, r3 +; FAST-NEXT: li r3, 128 +; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: li r3, 144 +; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: xxmrghd v23, vs0, v23 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtvsrd v22, r3 +; FAST-NEXT: li r3, 160 +; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: li r3, 176 +; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: xxmrghd v22, vs0, v22 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtvsrd v21, r3 +; FAST-NEXT: li r3, 192 +; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: li r3, 224 +; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: xxmrghd v21, vs0, v21 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtvsrd v20, r3 +; FAST-NEXT: li r3, 240 +; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: li r3, 256 +; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: xxmrghd v20, vs0, v20 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtvsrd v24, r3 +; FAST-NEXT: li r3, 288 +; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: li r3, 304 +; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: xxmrghd v24, vs0, v24 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtvsrd v26, r3 +; FAST-NEXT: li r3, 336 +; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: li r3, 352 +; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: xxmrghd v26, vs0, v26 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtvsrd v28, r3 +; FAST-NEXT: li r3, 384 +; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: li r3, 400 +; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: xxmrghd v28, vs0, v28 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtvsrd v30, r3 +; FAST-NEXT: li r3, 416 +; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: li r3, 80 +; FAST-NEXT: xxswapd vs1, v28 +; FAST-NEXT: li r4, 208 +; FAST-NEXT: xxswapd vs2, v26 +; FAST-NEXT: xxswapd vs3, v27 +; FAST-NEXT: xxmrghd v2, vs0, v30 +; FAST-NEXT: xxswapd vs0, v2 +; FAST-NEXT: stxvd2x vs0, r30, r3 +; FAST-NEXT: li r3, 64 +; FAST-NEXT: stxvd2x vs1, r30, r3 +; FAST-NEXT: li r3, 48 +; FAST-NEXT: stxvd2x vs2, r30, r3 +; FAST-NEXT: li r3, 32 +; FAST-NEXT: xxswapd vs0, v24 +; FAST-NEXT: stxvd2x vs0, r30, r3 +; FAST-NEXT: li r3, 16 +; FAST-NEXT: xxswapd vs1, v20 +; FAST-NEXT: stxvd2x vs1, r30, r3 +; FAST-NEXT: li r3, 240 +; FAST-NEXT: xxswapd vs2, v23 +; FAST-NEXT: xxswapd vs0, v21 +; FAST-NEXT: stxvd2x vs0, r30, r3 +; FAST-NEXT: li r3, 224 +; FAST-NEXT: xxswapd vs1, v22 +; FAST-NEXT: stxvd2x vs1, r30, r3 +; FAST-NEXT: li r3, 208 +; FAST-NEXT: stxvd2x vs2, r30, r3 +; FAST-NEXT: li r3, 192 +; FAST-NEXT: xxswapd vs0, v25 +; FAST-NEXT: stxvd2x vs0, r30, r3 +; FAST-NEXT: li r3, 176 +; FAST-NEXT: stxvd2x vs3, r30, r3 +; FAST-NEXT: li r3, 160 +; FAST-NEXT: lxvd2x vs2, r1, r4 # 16-byte Folded Reload +; FAST-NEXT: li r4, 272 +; FAST-NEXT: xxswapd vs1, v29 +; FAST-NEXT: stxvd2x vs1, r30, r3 +; FAST-NEXT: li r3, 144 +; FAST-NEXT: lxvd2x vs1, r1, r4 # 16-byte Folded Reload +; FAST-NEXT: li r4, 320 +; FAST-NEXT: lxvd2x vs3, r1, r4 # 16-byte Folded Reload +; FAST-NEXT: li r4, 368 +; FAST-NEXT: lxvd2x vs4, r1, r4 # 16-byte Folded Reload +; FAST-NEXT: xxswapd vs0, v31 +; FAST-NEXT: stxvd2x vs0, r30, r3 +; FAST-NEXT: li r3, 128 +; FAST-NEXT: xxswapd vs2, vs2 +; FAST-NEXT: stxvd2x vs2, r30, r3 +; FAST-NEXT: li r3, 112 +; FAST-NEXT: xxswapd vs1, vs1 +; FAST-NEXT: stxvd2x vs1, r30, r3 +; FAST-NEXT: li r3, 96 +; FAST-NEXT: xxswapd vs3, vs3 +; FAST-NEXT: stxvd2x vs3, r30, r3 +; FAST-NEXT: li r3, 608 +; FAST-NEXT: xxswapd vs4, vs4 +; FAST-NEXT: stxvd2x vs4, 0, r30 +; FAST-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 592 +; FAST-NEXT: ld r30, 624(r1) # 8-byte Folded Reload +; FAST-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 576 +; FAST-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 560 +; FAST-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 544 +; FAST-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 528 +; FAST-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 512 +; FAST-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 496 +; FAST-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 480 +; FAST-NEXT: lvx v23, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 464 +; FAST-NEXT: lvx v22, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 448 +; FAST-NEXT: lvx v21, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 432 +; FAST-NEXT: lvx v20, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: addi r1, r1, 640 +; FAST-NEXT: ld r0, 16(r1) +; FAST-NEXT: mtlr r0 +; FAST-NEXT: blr + %a = call <32 x i64> @llvm.lrint.v32i64.v32f128(<32 x fp128> %x) + ret <32 x i64> %a +} +declare <32 x i64> @llvm.lrint.v32i64.v32f128(<32 x fp128>) diff --git a/llvm/test/CodeGen/X86/vector-llrint.ll b/llvm/test/CodeGen/X86/vector-llrint.ll index 08ee748497650..f393ffd8a0441 100644 --- a/llvm/test/CodeGen/X86/vector-llrint.ll +++ b/llvm/test/CodeGen/X86/vector-llrint.ll @@ -1246,3 +1246,708 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) { ret <8 x i64> %a } declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>) + +define <1 x i64> @llrint_v1i64_v1f128(<1 x fp128> %x) { +; X86-LABEL: llrint_v1i64_v1f128: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: .cfi_def_cfa_register %ebp +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: pushl 20(%ebp) +; X86-NEXT: pushl 16(%ebp) +; X86-NEXT: pushl 12(%ebp) +; X86-NEXT: pushl 8(%ebp) +; X86-NEXT: calll llrintl +; X86-NEXT: addl $16, %esp +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: .cfi_def_cfa %esp, 4 +; X86-NEXT: retl +; +; SSE-LABEL: llrint_v1i64_v1f128: +; SSE: # %bb.0: +; SSE-NEXT: pushq %rax +; SSE-NEXT: .cfi_def_cfa_offset 16 +; SSE-NEXT: callq llrintl@PLT +; SSE-NEXT: popq %rcx +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq +; +; AVX-LABEL: llrint_v1i64_v1f128: +; AVX: # %bb.0: +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: callq llrintl@PLT +; AVX-NEXT: popq %rcx +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +; +; AVX512DQ-LABEL: llrint_v1i64_v1f128: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: pushq %rax +; AVX512DQ-NEXT: .cfi_def_cfa_offset 16 +; AVX512DQ-NEXT: callq llrintl@PLT +; AVX512DQ-NEXT: popq %rcx +; AVX512DQ-NEXT: .cfi_def_cfa_offset 8 +; AVX512DQ-NEXT: retq + %a = call <1 x i64> @llvm.llrint.v1i64.v1f128(<1 x fp128> %x) + ret <1 x i64> %a +} +declare <1 x i64> @llvm.llrint.v1i64.v1f128(<1 x fp128>) + +define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) { +; X86-LABEL: llrint_v2i64_v2f128: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: .cfi_def_cfa_register %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: .cfi_offset %esi, -20 +; X86-NEXT: .cfi_offset %edi, -16 +; X86-NEXT: .cfi_offset %ebx, -12 +; X86-NEXT: movl 8(%ebp), %esi +; X86-NEXT: pushl 24(%ebp) +; X86-NEXT: pushl 20(%ebp) +; X86-NEXT: pushl 16(%ebp) +; X86-NEXT: pushl 12(%ebp) +; X86-NEXT: calll llrintl +; X86-NEXT: addl $16, %esp +; X86-NEXT: movl %eax, %edi +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: pushl 40(%ebp) +; X86-NEXT: pushl 36(%ebp) +; X86-NEXT: pushl 32(%ebp) +; X86-NEXT: pushl 28(%ebp) +; X86-NEXT: calll llrintl +; X86-NEXT: addl $16, %esp +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, 8(%esi) +; X86-NEXT: movl %ebx, 4(%esi) +; X86-NEXT: movl %edi, (%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: leal -12(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: .cfi_def_cfa %esp, 4 +; X86-NEXT: retl $4 +; +; SSE-LABEL: llrint_v2i64_v2f128: +; SSE: # %bb.0: +; SSE-NEXT: subq $40, %rsp +; SSE-NEXT: .cfi_def_cfa_offset 48 +; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: callq llrintl@PLT +; SSE-NEXT: movq %rax, %xmm0 +; SSE-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill +; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE-NEXT: callq llrintl@PLT +; SSE-NEXT: movq %rax, %xmm0 +; SSE-NEXT: punpcklqdq (%rsp), %xmm0 # 16-byte Folded Reload +; SSE-NEXT: # xmm0 = xmm0[0],mem[0] +; SSE-NEXT: addq $40, %rsp +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq +; +; AVX-LABEL: llrint_v2i64_v2f128: +; AVX: # %bb.0: +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX-NEXT: vmovaps %xmm1, %xmm0 +; AVX-NEXT: callq llrintl@PLT +; AVX-NEXT: vmovq %rax, %xmm0 +; AVX-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX-NEXT: callq llrintl@PLT +; AVX-NEXT: vmovq %rax, %xmm0 +; AVX-NEXT: vpunpcklqdq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +; +; AVX512DQ-LABEL: llrint_v2i64_v2f128: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: subq $40, %rsp +; AVX512DQ-NEXT: .cfi_def_cfa_offset 48 +; AVX512DQ-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512DQ-NEXT: vmovaps %xmm1, %xmm0 +; AVX512DQ-NEXT: callq llrintl@PLT +; AVX512DQ-NEXT: vmovq %rax, %xmm0 +; AVX512DQ-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill +; AVX512DQ-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512DQ-NEXT: callq llrintl@PLT +; AVX512DQ-NEXT: vmovq %rax, %xmm0 +; AVX512DQ-NEXT: vpunpcklqdq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512DQ-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX512DQ-NEXT: addq $40, %rsp +; AVX512DQ-NEXT: .cfi_def_cfa_offset 8 +; AVX512DQ-NEXT: retq + %a = call <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128> %x) + ret <2 x i64> %a +} +declare <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128>) + +define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) { +; X86-LABEL: llrint_v4i64_v4f128: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: .cfi_def_cfa_register %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $32, %esp +; X86-NEXT: .cfi_offset %esi, -20 +; X86-NEXT: .cfi_offset %edi, -16 +; X86-NEXT: .cfi_offset %ebx, -12 +; X86-NEXT: movl 8(%ebp), %esi +; X86-NEXT: movl 36(%ebp), %edi +; X86-NEXT: movl 40(%ebp), %ebx +; X86-NEXT: pushl 24(%ebp) +; X86-NEXT: pushl 20(%ebp) +; X86-NEXT: pushl 16(%ebp) +; X86-NEXT: pushl 12(%ebp) +; X86-NEXT: calll llrintl +; X86-NEXT: addl $16, %esp +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl 32(%ebp) +; X86-NEXT: pushl 28(%ebp) +; X86-NEXT: calll llrintl +; X86-NEXT: addl $16, %esp +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: pushl 56(%ebp) +; X86-NEXT: pushl 52(%ebp) +; X86-NEXT: pushl 48(%ebp) +; X86-NEXT: pushl 44(%ebp) +; X86-NEXT: calll llrintl +; X86-NEXT: addl $16, %esp +; X86-NEXT: movl %eax, %edi +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: pushl 72(%ebp) +; X86-NEXT: pushl 68(%ebp) +; X86-NEXT: pushl 64(%ebp) +; X86-NEXT: pushl 60(%ebp) +; X86-NEXT: calll llrintl +; X86-NEXT: addl $16, %esp +; X86-NEXT: movl %edx, 28(%esi) +; X86-NEXT: movl %eax, 24(%esi) +; X86-NEXT: movl %ebx, 20(%esi) +; X86-NEXT: movl %edi, 16(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 12(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 8(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 4(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: leal -12(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: .cfi_def_cfa %esp, 4 +; X86-NEXT: retl $4 +; +; SSE-LABEL: llrint_v4i64_v4f128: +; SSE: # %bb.0: +; SSE-NEXT: subq $72, %rsp +; SSE-NEXT: .cfi_def_cfa_offset 80 +; SSE-NEXT: movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: callq llrintl@PLT +; SSE-NEXT: movq %rax, %xmm0 +; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; SSE-NEXT: callq llrintl@PLT +; SSE-NEXT: movq %rax, %xmm0 +; SSE-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; SSE-NEXT: # xmm0 = xmm0[0],mem[0] +; SSE-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill +; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE-NEXT: callq llrintl@PLT +; SSE-NEXT: movq %rax, %xmm0 +; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE-NEXT: callq llrintl@PLT +; SSE-NEXT: movq %rax, %xmm1 +; SSE-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; SSE-NEXT: # xmm1 = xmm1[0],mem[0] +; SSE-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; SSE-NEXT: addq $72, %rsp +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq +; +; AVX1-LABEL: llrint_v4i64_v4f128: +; AVX1: # %bb.0: +; AVX1-NEXT: subq $72, %rsp +; AVX1-NEXT: .cfi_def_cfa_offset 80 +; AVX1-NEXT: vmovaps %xmm2, (%rsp) # 16-byte Spill +; AVX1-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX1-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX1-NEXT: vmovaps %xmm3, %xmm0 +; AVX1-NEXT: callq llrintl@PLT +; AVX1-NEXT: vmovq %rax, %xmm0 +; AVX1-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX1-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload +; AVX1-NEXT: callq llrintl@PLT +; AVX1-NEXT: vmovq %rax, %xmm0 +; AVX1-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX1-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX1-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill +; AVX1-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX1-NEXT: callq llrintl@PLT +; AVX1-NEXT: vmovq %rax, %xmm0 +; AVX1-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX1-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX1-NEXT: callq llrintl@PLT +; AVX1-NEXT: vmovq %rax, %xmm0 +; AVX1-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX1-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX1-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload +; AVX1-NEXT: addq $72, %rsp +; AVX1-NEXT: .cfi_def_cfa_offset 8 +; AVX1-NEXT: retq +; +; AVX512-LABEL: llrint_v4i64_v4f128: +; AVX512: # %bb.0: +; AVX512-NEXT: subq $72, %rsp +; AVX512-NEXT: .cfi_def_cfa_offset 80 +; AVX512-NEXT: vmovaps %xmm2, (%rsp) # 16-byte Spill +; AVX512-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vmovaps %xmm3, %xmm0 +; AVX512-NEXT: callq llrintl@PLT +; AVX512-NEXT: vmovq %rax, %xmm0 +; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload +; AVX512-NEXT: callq llrintl@PLT +; AVX512-NEXT: vmovq %rax, %xmm0 +; AVX512-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX512-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill +; AVX512-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512-NEXT: callq llrintl@PLT +; AVX512-NEXT: vmovq %rax, %xmm0 +; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512-NEXT: callq llrintl@PLT +; AVX512-NEXT: vmovq %rax, %xmm0 +; AVX512-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX512-NEXT: vinserti128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload +; AVX512-NEXT: addq $72, %rsp +; AVX512-NEXT: .cfi_def_cfa_offset 8 +; AVX512-NEXT: retq +; +; AVX512DQ-LABEL: llrint_v4i64_v4f128: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: subq $72, %rsp +; AVX512DQ-NEXT: .cfi_def_cfa_offset 80 +; AVX512DQ-NEXT: vmovaps %xmm2, (%rsp) # 16-byte Spill +; AVX512DQ-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512DQ-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512DQ-NEXT: vmovaps %xmm3, %xmm0 +; AVX512DQ-NEXT: callq llrintl@PLT +; AVX512DQ-NEXT: vmovq %rax, %xmm0 +; AVX512DQ-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512DQ-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload +; AVX512DQ-NEXT: callq llrintl@PLT +; AVX512DQ-NEXT: vmovq %rax, %xmm0 +; AVX512DQ-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512DQ-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX512DQ-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill +; AVX512DQ-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512DQ-NEXT: callq llrintl@PLT +; AVX512DQ-NEXT: vmovq %rax, %xmm0 +; AVX512DQ-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512DQ-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512DQ-NEXT: callq llrintl@PLT +; AVX512DQ-NEXT: vmovq %rax, %xmm0 +; AVX512DQ-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512DQ-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX512DQ-NEXT: vinserti128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload +; AVX512DQ-NEXT: addq $72, %rsp +; AVX512DQ-NEXT: .cfi_def_cfa_offset 8 +; AVX512DQ-NEXT: retq + %a = call <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128> %x) + ret <4 x i64> %a +} +declare <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128>) + +define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) { +; X86-LABEL: llrint_v8i64_v8f128: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: .cfi_def_cfa_register %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $64, %esp +; X86-NEXT: .cfi_offset %esi, -20 +; X86-NEXT: .cfi_offset %edi, -16 +; X86-NEXT: .cfi_offset %ebx, -12 +; X86-NEXT: movl 8(%ebp), %esi +; X86-NEXT: movl 36(%ebp), %edi +; X86-NEXT: movl 40(%ebp), %ebx +; X86-NEXT: pushl 24(%ebp) +; X86-NEXT: pushl 20(%ebp) +; X86-NEXT: pushl 16(%ebp) +; X86-NEXT: pushl 12(%ebp) +; X86-NEXT: calll llrintl +; X86-NEXT: addl $16, %esp +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl 32(%ebp) +; X86-NEXT: pushl 28(%ebp) +; X86-NEXT: calll llrintl +; X86-NEXT: addl $16, %esp +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: pushl 56(%ebp) +; X86-NEXT: pushl 52(%ebp) +; X86-NEXT: pushl 48(%ebp) +; X86-NEXT: pushl 44(%ebp) +; X86-NEXT: calll llrintl +; X86-NEXT: addl $16, %esp +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: pushl 72(%ebp) +; X86-NEXT: pushl 68(%ebp) +; X86-NEXT: pushl 64(%ebp) +; X86-NEXT: pushl 60(%ebp) +; X86-NEXT: calll llrintl +; X86-NEXT: addl $16, %esp +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: pushl 88(%ebp) +; X86-NEXT: pushl 84(%ebp) +; X86-NEXT: pushl 80(%ebp) +; X86-NEXT: pushl 76(%ebp) +; X86-NEXT: calll llrintl +; X86-NEXT: addl $16, %esp +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: pushl 104(%ebp) +; X86-NEXT: pushl 100(%ebp) +; X86-NEXT: pushl 96(%ebp) +; X86-NEXT: pushl 92(%ebp) +; X86-NEXT: calll llrintl +; X86-NEXT: addl $16, %esp +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: pushl 120(%ebp) +; X86-NEXT: pushl 116(%ebp) +; X86-NEXT: pushl 112(%ebp) +; X86-NEXT: pushl 108(%ebp) +; X86-NEXT: calll llrintl +; X86-NEXT: addl $16, %esp +; X86-NEXT: movl %eax, %edi +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: pushl 136(%ebp) +; X86-NEXT: pushl 132(%ebp) +; X86-NEXT: pushl 128(%ebp) +; X86-NEXT: pushl 124(%ebp) +; X86-NEXT: calll llrintl +; X86-NEXT: addl $16, %esp +; X86-NEXT: movl %edx, 60(%esi) +; X86-NEXT: movl %eax, 56(%esi) +; X86-NEXT: movl %ebx, 52(%esi) +; X86-NEXT: movl %edi, 48(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 44(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 40(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 36(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 32(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 28(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 24(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 20(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 16(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 12(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 8(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 4(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: leal -12(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: .cfi_def_cfa %esp, 4 +; X86-NEXT: retl $4 +; +; SSE-LABEL: llrint_v8i64_v8f128: +; SSE: # %bb.0: +; SSE-NEXT: subq $136, %rsp +; SSE-NEXT: .cfi_def_cfa_offset 144 +; SSE-NEXT: movaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE-NEXT: movaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE-NEXT: movaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE-NEXT: movaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE-NEXT: movaps %xmm3, (%rsp) # 16-byte Spill +; SSE-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: callq llrintl@PLT +; SSE-NEXT: movq %rax, %xmm0 +; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE-NEXT: callq llrintl@PLT +; SSE-NEXT: movq %rax, %xmm0 +; SSE-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; SSE-NEXT: # xmm0 = xmm0[0],mem[0] +; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; SSE-NEXT: callq llrintl@PLT +; SSE-NEXT: movq %rax, %xmm0 +; SSE-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill +; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE-NEXT: callq llrintl@PLT +; SSE-NEXT: movq %rax, %xmm0 +; SSE-NEXT: punpcklqdq (%rsp), %xmm0 # 16-byte Folded Reload +; SSE-NEXT: # xmm0 = xmm0[0],mem[0] +; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE-NEXT: callq llrintl@PLT +; SSE-NEXT: movq %rax, %xmm0 +; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE-NEXT: callq llrintl@PLT +; SSE-NEXT: movq %rax, %xmm0 +; SSE-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; SSE-NEXT: # xmm0 = xmm0[0],mem[0] +; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE-NEXT: callq llrintl@PLT +; SSE-NEXT: movq %rax, %xmm0 +; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE-NEXT: callq llrintl@PLT +; SSE-NEXT: movq %rax, %xmm3 +; SSE-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 16-byte Folded Reload +; SSE-NEXT: # xmm3 = xmm3[0],mem[0] +; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload +; SSE-NEXT: addq $136, %rsp +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq +; +; AVX1-LABEL: llrint_v8i64_v8f128: +; AVX1: # %bb.0: +; AVX1-NEXT: subq $152, %rsp +; AVX1-NEXT: .cfi_def_cfa_offset 160 +; AVX1-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX1-NEXT: vmovaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX1-NEXT: vmovaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX1-NEXT: vmovaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX1-NEXT: vmovaps %xmm2, (%rsp) # 16-byte Spill +; AVX1-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX1-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX1-NEXT: vmovaps %xmm3, %xmm0 +; AVX1-NEXT: callq llrintl@PLT +; AVX1-NEXT: vmovq %rax, %xmm0 +; AVX1-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX1-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload +; AVX1-NEXT: callq llrintl@PLT +; AVX1-NEXT: vmovq %rax, %xmm0 +; AVX1-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX1-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX1-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill +; AVX1-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX1-NEXT: callq llrintl@PLT +; AVX1-NEXT: vmovq %rax, %xmm0 +; AVX1-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX1-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX1-NEXT: callq llrintl@PLT +; AVX1-NEXT: vmovq %rax, %xmm0 +; AVX1-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX1-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX1-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload +; AVX1-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX1-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: callq llrintl@PLT +; AVX1-NEXT: vmovq %rax, %xmm0 +; AVX1-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX1-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX1-NEXT: callq llrintl@PLT +; AVX1-NEXT: vmovq %rax, %xmm0 +; AVX1-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX1-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX1-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX1-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX1-NEXT: callq llrintl@PLT +; AVX1-NEXT: vmovq %rax, %xmm0 +; AVX1-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX1-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX1-NEXT: callq llrintl@PLT +; AVX1-NEXT: vmovq %rax, %xmm0 +; AVX1-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX1-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX1-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm1 # 16-byte Folded Reload +; AVX1-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload +; AVX1-NEXT: addq $152, %rsp +; AVX1-NEXT: .cfi_def_cfa_offset 8 +; AVX1-NEXT: retq +; +; AVX512-LABEL: llrint_v8i64_v8f128: +; AVX512: # %bb.0: +; AVX512-NEXT: subq $152, %rsp +; AVX512-NEXT: .cfi_def_cfa_offset 160 +; AVX512-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill +; AVX512-NEXT: vmovaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vmovaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vmovaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vmovaps %xmm7, %xmm0 +; AVX512-NEXT: callq llrintl@PLT +; AVX512-NEXT: vmovq %rax, %xmm0 +; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload +; AVX512-NEXT: callq llrintl@PLT +; AVX512-NEXT: vmovq %rax, %xmm0 +; AVX512-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX512-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill +; AVX512-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512-NEXT: callq llrintl@PLT +; AVX512-NEXT: vmovq %rax, %xmm0 +; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512-NEXT: callq llrintl@PLT +; AVX512-NEXT: vmovq %rax, %xmm0 +; AVX512-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX512-NEXT: vinserti128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload +; AVX512-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX512-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: callq llrintl@PLT +; AVX512-NEXT: vmovq %rax, %xmm0 +; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512-NEXT: callq llrintl@PLT +; AVX512-NEXT: vmovq %rax, %xmm0 +; AVX512-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512-NEXT: callq llrintl@PLT +; AVX512-NEXT: vmovq %rax, %xmm0 +; AVX512-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512-NEXT: callq llrintl@PLT +; AVX512-NEXT: vmovq %rax, %xmm0 +; AVX512-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX512-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload +; AVX512-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload +; AVX512-NEXT: addq $152, %rsp +; AVX512-NEXT: .cfi_def_cfa_offset 8 +; AVX512-NEXT: retq +; +; AVX512DQ-LABEL: llrint_v8i64_v8f128: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: subq $152, %rsp +; AVX512DQ-NEXT: .cfi_def_cfa_offset 160 +; AVX512DQ-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill +; AVX512DQ-NEXT: vmovaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512DQ-NEXT: vmovaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512DQ-NEXT: vmovaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512DQ-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512DQ-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512DQ-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512DQ-NEXT: vmovaps %xmm7, %xmm0 +; AVX512DQ-NEXT: callq llrintl@PLT +; AVX512DQ-NEXT: vmovq %rax, %xmm0 +; AVX512DQ-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512DQ-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload +; AVX512DQ-NEXT: callq llrintl@PLT +; AVX512DQ-NEXT: vmovq %rax, %xmm0 +; AVX512DQ-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512DQ-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX512DQ-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill +; AVX512DQ-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512DQ-NEXT: callq llrintl@PLT +; AVX512DQ-NEXT: vmovq %rax, %xmm0 +; AVX512DQ-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512DQ-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512DQ-NEXT: callq llrintl@PLT +; AVX512DQ-NEXT: vmovq %rax, %xmm0 +; AVX512DQ-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512DQ-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX512DQ-NEXT: vinserti128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload +; AVX512DQ-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX512DQ-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: callq llrintl@PLT +; AVX512DQ-NEXT: vmovq %rax, %xmm0 +; AVX512DQ-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512DQ-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512DQ-NEXT: callq llrintl@PLT +; AVX512DQ-NEXT: vmovq %rax, %xmm0 +; AVX512DQ-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512DQ-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX512DQ-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512DQ-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512DQ-NEXT: callq llrintl@PLT +; AVX512DQ-NEXT: vmovq %rax, %xmm0 +; AVX512DQ-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512DQ-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512DQ-NEXT: callq llrintl@PLT +; AVX512DQ-NEXT: vmovq %rax, %xmm0 +; AVX512DQ-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512DQ-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX512DQ-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload +; AVX512DQ-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload +; AVX512DQ-NEXT: addq $152, %rsp +; AVX512DQ-NEXT: .cfi_def_cfa_offset 8 +; AVX512DQ-NEXT: retq + %a = call <8 x i64> @llvm.llrint.v8i64.v8f128(<8 x fp128> %x) + ret <8 x i64> %a +} +declare <8 x i64> @llvm.llrint.v8i64.v8f128(<8 x fp128>) diff --git a/llvm/test/CodeGen/X86/vector-lrint.ll b/llvm/test/CodeGen/X86/vector-lrint.ll index a4c50e539d661..8900e94c50305 100644 --- a/llvm/test/CodeGen/X86/vector-lrint.ll +++ b/llvm/test/CodeGen/X86/vector-lrint.ll @@ -1140,3 +1140,1274 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) { ret <8 x iXLen> %a } declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double>) + +define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) { +; X86-I32-LABEL: lrint_v1fp128: +; X86-I32: # %bb.0: +; X86-I32-NEXT: pushl %ebp +; X86-I32-NEXT: .cfi_def_cfa_offset 8 +; X86-I32-NEXT: .cfi_offset %ebp, -8 +; X86-I32-NEXT: movl %esp, %ebp +; X86-I32-NEXT: .cfi_def_cfa_register %ebp +; X86-I32-NEXT: andl $-16, %esp +; X86-I32-NEXT: subl $16, %esp +; X86-I32-NEXT: pushl 20(%ebp) +; X86-I32-NEXT: pushl 16(%ebp) +; X86-I32-NEXT: pushl 12(%ebp) +; X86-I32-NEXT: pushl 8(%ebp) +; X86-I32-NEXT: calll lrintl +; X86-I32-NEXT: addl $16, %esp +; X86-I32-NEXT: movl %ebp, %esp +; X86-I32-NEXT: popl %ebp +; X86-I32-NEXT: .cfi_def_cfa %esp, 4 +; X86-I32-NEXT: retl +; +; X86-I64-LABEL: lrint_v1fp128: +; X86-I64: # %bb.0: +; X86-I64-NEXT: pushl %ebp +; X86-I64-NEXT: .cfi_def_cfa_offset 8 +; X86-I64-NEXT: .cfi_offset %ebp, -8 +; X86-I64-NEXT: movl %esp, %ebp +; X86-I64-NEXT: .cfi_def_cfa_register %ebp +; X86-I64-NEXT: andl $-16, %esp +; X86-I64-NEXT: subl $16, %esp +; X86-I64-NEXT: pushl 20(%ebp) +; X86-I64-NEXT: pushl 16(%ebp) +; X86-I64-NEXT: pushl 12(%ebp) +; X86-I64-NEXT: pushl 8(%ebp) +; X86-I64-NEXT: calll lrintl +; X86-I64-NEXT: addl $16, %esp +; X86-I64-NEXT: movl %ebp, %esp +; X86-I64-NEXT: popl %ebp +; X86-I64-NEXT: .cfi_def_cfa %esp, 4 +; X86-I64-NEXT: retl +; +; X86-SSE2-LABEL: lrint_v1fp128: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE2-NEXT: .cfi_offset %ebp, -8 +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp +; X86-SSE2-NEXT: andl $-16, %esp +; X86-SSE2-NEXT: subl $16, %esp +; X86-SSE2-NEXT: pushl 20(%ebp) +; X86-SSE2-NEXT: pushl 16(%ebp) +; X86-SSE2-NEXT: pushl 12(%ebp) +; X86-SSE2-NEXT: pushl 8(%ebp) +; X86-SSE2-NEXT: calll lrintl +; X86-SSE2-NEXT: addl $16, %esp +; X86-SSE2-NEXT: movl %ebp, %esp +; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4 +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: lrint_v1fp128: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: pushl %ebp +; X86-AVX-NEXT: .cfi_def_cfa_offset 8 +; X86-AVX-NEXT: .cfi_offset %ebp, -8 +; X86-AVX-NEXT: movl %esp, %ebp +; X86-AVX-NEXT: .cfi_def_cfa_register %ebp +; X86-AVX-NEXT: andl $-16, %esp +; X86-AVX-NEXT: subl $32, %esp +; X86-AVX-NEXT: vmovups 8(%ebp), %xmm0 +; X86-AVX-NEXT: vmovups %xmm0, (%esp) +; X86-AVX-NEXT: calll lrintl +; X86-AVX-NEXT: movl %ebp, %esp +; X86-AVX-NEXT: popl %ebp +; X86-AVX-NEXT: .cfi_def_cfa %esp, 4 +; X86-AVX-NEXT: retl +; +; X64-AVX-i32-LABEL: lrint_v1fp128: +; X64-AVX-i32: # %bb.0: +; X64-AVX-i32-NEXT: pushq %rax +; X64-AVX-i32-NEXT: .cfi_def_cfa_offset 16 +; X64-AVX-i32-NEXT: callq lrintl@PLT +; X64-AVX-i32-NEXT: popq %rcx +; X64-AVX-i32-NEXT: .cfi_def_cfa_offset 8 +; X64-AVX-i32-NEXT: retq +; +; X64-AVX-i64-LABEL: lrint_v1fp128: +; X64-AVX-i64: # %bb.0: +; X64-AVX-i64-NEXT: pushq %rax +; X64-AVX-i64-NEXT: .cfi_def_cfa_offset 16 +; X64-AVX-i64-NEXT: callq lrintl@PLT +; X64-AVX-i64-NEXT: popq %rcx +; X64-AVX-i64-NEXT: .cfi_def_cfa_offset 8 +; X64-AVX-i64-NEXT: retq + %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1fp128(<1 x fp128> %x) + ret <1 x iXLen> %a +} +declare <1 x iXLen> @llvm.lrint.v1iXLen.v1fp128(<1 x fp128>) + +define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) { +; X86-I32-LABEL: lrint_v2fp128: +; X86-I32: # %bb.0: +; X86-I32-NEXT: pushl %ebp +; X86-I32-NEXT: .cfi_def_cfa_offset 8 +; X86-I32-NEXT: .cfi_offset %ebp, -8 +; X86-I32-NEXT: movl %esp, %ebp +; X86-I32-NEXT: .cfi_def_cfa_register %ebp +; X86-I32-NEXT: pushl %ebx +; X86-I32-NEXT: pushl %edi +; X86-I32-NEXT: pushl %esi +; X86-I32-NEXT: andl $-16, %esp +; X86-I32-NEXT: subl $16, %esp +; X86-I32-NEXT: .cfi_offset %esi, -20 +; X86-I32-NEXT: .cfi_offset %edi, -16 +; X86-I32-NEXT: .cfi_offset %ebx, -12 +; X86-I32-NEXT: movl 32(%ebp), %edi +; X86-I32-NEXT: movl 36(%ebp), %ebx +; X86-I32-NEXT: pushl 20(%ebp) +; X86-I32-NEXT: pushl 16(%ebp) +; X86-I32-NEXT: pushl 12(%ebp) +; X86-I32-NEXT: pushl 8(%ebp) +; X86-I32-NEXT: calll lrintl +; X86-I32-NEXT: addl $16, %esp +; X86-I32-NEXT: movl %eax, %esi +; X86-I32-NEXT: pushl %ebx +; X86-I32-NEXT: pushl %edi +; X86-I32-NEXT: pushl 28(%ebp) +; X86-I32-NEXT: pushl 24(%ebp) +; X86-I32-NEXT: calll lrintl +; X86-I32-NEXT: addl $16, %esp +; X86-I32-NEXT: movl %eax, %edx +; X86-I32-NEXT: movl %esi, %eax +; X86-I32-NEXT: leal -12(%ebp), %esp +; X86-I32-NEXT: popl %esi +; X86-I32-NEXT: popl %edi +; X86-I32-NEXT: popl %ebx +; X86-I32-NEXT: popl %ebp +; X86-I32-NEXT: .cfi_def_cfa %esp, 4 +; X86-I32-NEXT: retl +; +; X86-I64-LABEL: lrint_v2fp128: +; X86-I64: # %bb.0: +; X86-I64-NEXT: pushl %ebp +; X86-I64-NEXT: .cfi_def_cfa_offset 8 +; X86-I64-NEXT: .cfi_offset %ebp, -8 +; X86-I64-NEXT: movl %esp, %ebp +; X86-I64-NEXT: .cfi_def_cfa_register %ebp +; X86-I64-NEXT: pushl %ebx +; X86-I64-NEXT: pushl %edi +; X86-I64-NEXT: pushl %esi +; X86-I64-NEXT: andl $-16, %esp +; X86-I64-NEXT: subl $16, %esp +; X86-I64-NEXT: .cfi_offset %esi, -20 +; X86-I64-NEXT: .cfi_offset %edi, -16 +; X86-I64-NEXT: .cfi_offset %ebx, -12 +; X86-I64-NEXT: movl 8(%ebp), %esi +; X86-I64-NEXT: pushl 24(%ebp) +; X86-I64-NEXT: pushl 20(%ebp) +; X86-I64-NEXT: pushl 16(%ebp) +; X86-I64-NEXT: pushl 12(%ebp) +; X86-I64-NEXT: calll lrintl +; X86-I64-NEXT: addl $16, %esp +; X86-I64-NEXT: movl %eax, %edi +; X86-I64-NEXT: movl %edx, %ebx +; X86-I64-NEXT: pushl 40(%ebp) +; X86-I64-NEXT: pushl 36(%ebp) +; X86-I64-NEXT: pushl 32(%ebp) +; X86-I64-NEXT: pushl 28(%ebp) +; X86-I64-NEXT: calll lrintl +; X86-I64-NEXT: addl $16, %esp +; X86-I64-NEXT: movl %edx, 12(%esi) +; X86-I64-NEXT: movl %eax, 8(%esi) +; X86-I64-NEXT: movl %ebx, 4(%esi) +; X86-I64-NEXT: movl %edi, (%esi) +; X86-I64-NEXT: movl %esi, %eax +; X86-I64-NEXT: leal -12(%ebp), %esp +; X86-I64-NEXT: popl %esi +; X86-I64-NEXT: popl %edi +; X86-I64-NEXT: popl %ebx +; X86-I64-NEXT: popl %ebp +; X86-I64-NEXT: .cfi_def_cfa %esp, 4 +; X86-I64-NEXT: retl $4 +; +; X86-SSE2-LABEL: lrint_v2fp128: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE2-NEXT: .cfi_offset %ebp, -8 +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp +; X86-SSE2-NEXT: pushl %ebx +; X86-SSE2-NEXT: pushl %edi +; X86-SSE2-NEXT: pushl %esi +; X86-SSE2-NEXT: andl $-16, %esp +; X86-SSE2-NEXT: subl $32, %esp +; X86-SSE2-NEXT: .cfi_offset %esi, -20 +; X86-SSE2-NEXT: .cfi_offset %edi, -16 +; X86-SSE2-NEXT: .cfi_offset %ebx, -12 +; X86-SSE2-NEXT: movl 12(%ebp), %edi +; X86-SSE2-NEXT: movl 16(%ebp), %ebx +; X86-SSE2-NEXT: movl 20(%ebp), %esi +; X86-SSE2-NEXT: pushl 36(%ebp) +; X86-SSE2-NEXT: pushl 32(%ebp) +; X86-SSE2-NEXT: pushl 28(%ebp) +; X86-SSE2-NEXT: pushl 24(%ebp) +; X86-SSE2-NEXT: calll lrintl +; X86-SSE2-NEXT: addl $16, %esp +; X86-SSE2-NEXT: movd %eax, %xmm0 +; X86-SSE2-NEXT: movdqa %xmm0, (%esp) # 16-byte Spill +; X86-SSE2-NEXT: pushl %esi +; X86-SSE2-NEXT: pushl %ebx +; X86-SSE2-NEXT: pushl %edi +; X86-SSE2-NEXT: pushl 8(%ebp) +; X86-SSE2-NEXT: calll lrintl +; X86-SSE2-NEXT: addl $16, %esp +; X86-SSE2-NEXT: movd %eax, %xmm0 +; X86-SSE2-NEXT: punpckldq (%esp), %xmm0 # 16-byte Folded Reload +; X86-SSE2-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; X86-SSE2-NEXT: leal -12(%ebp), %esp +; X86-SSE2-NEXT: popl %esi +; X86-SSE2-NEXT: popl %edi +; X86-SSE2-NEXT: popl %ebx +; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4 +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: lrint_v2fp128: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: pushl %ebp +; X86-AVX-NEXT: .cfi_def_cfa_offset 8 +; X86-AVX-NEXT: .cfi_offset %ebp, -8 +; X86-AVX-NEXT: movl %esp, %ebp +; X86-AVX-NEXT: .cfi_def_cfa_register %ebp +; X86-AVX-NEXT: andl $-16, %esp +; X86-AVX-NEXT: subl $48, %esp +; X86-AVX-NEXT: vmovups 8(%ebp), %xmm0 +; X86-AVX-NEXT: vmovups %xmm0, (%esp) +; X86-AVX-NEXT: calll lrintl +; X86-AVX-NEXT: vmovups 24(%ebp), %xmm0 +; X86-AVX-NEXT: vmovups %xmm0, (%esp) +; X86-AVX-NEXT: vmovd %eax, %xmm0 +; X86-AVX-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill +; X86-AVX-NEXT: calll lrintl +; X86-AVX-NEXT: vmovdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload +; X86-AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 +; X86-AVX-NEXT: movl %ebp, %esp +; X86-AVX-NEXT: popl %ebp +; X86-AVX-NEXT: .cfi_def_cfa %esp, 4 +; X86-AVX-NEXT: retl +; +; X64-AVX-i32-LABEL: lrint_v2fp128: +; X64-AVX-i32: # %bb.0: +; X64-AVX-i32-NEXT: pushq %rbx +; X64-AVX-i32-NEXT: .cfi_def_cfa_offset 16 +; X64-AVX-i32-NEXT: subq $16, %rsp +; X64-AVX-i32-NEXT: .cfi_def_cfa_offset 32 +; X64-AVX-i32-NEXT: .cfi_offset %rbx, -16 +; X64-AVX-i32-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; X64-AVX-i32-NEXT: vmovaps %xmm1, %xmm0 +; X64-AVX-i32-NEXT: callq lrintl@PLT +; X64-AVX-i32-NEXT: movl %eax, %ebx +; X64-AVX-i32-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload +; X64-AVX-i32-NEXT: callq lrintl@PLT +; X64-AVX-i32-NEXT: vmovd %eax, %xmm0 +; X64-AVX-i32-NEXT: vpinsrd $1, %ebx, %xmm0, %xmm0 +; X64-AVX-i32-NEXT: addq $16, %rsp +; X64-AVX-i32-NEXT: .cfi_def_cfa_offset 16 +; X64-AVX-i32-NEXT: popq %rbx +; X64-AVX-i32-NEXT: .cfi_def_cfa_offset 8 +; X64-AVX-i32-NEXT: retq +; +; X64-AVX-i64-LABEL: lrint_v2fp128: +; X64-AVX-i64: # %bb.0: +; X64-AVX-i64-NEXT: subq $40, %rsp +; X64-AVX-i64-NEXT: .cfi_def_cfa_offset 48 +; X64-AVX-i64-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-AVX-i64-NEXT: vmovaps %xmm1, %xmm0 +; X64-AVX-i64-NEXT: callq lrintl@PLT +; X64-AVX-i64-NEXT: vmovq %rax, %xmm0 +; X64-AVX-i64-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill +; X64-AVX-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-AVX-i64-NEXT: callq lrintl@PLT +; X64-AVX-i64-NEXT: vmovq %rax, %xmm0 +; X64-AVX-i64-NEXT: vpunpcklqdq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; X64-AVX-i64-NEXT: # xmm0 = xmm0[0],mem[0] +; X64-AVX-i64-NEXT: addq $40, %rsp +; X64-AVX-i64-NEXT: .cfi_def_cfa_offset 8 +; X64-AVX-i64-NEXT: retq + %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2fp128(<2 x fp128> %x) + ret <2 x iXLen> %a +} +declare <2 x iXLen> @llvm.lrint.v2iXLen.v2fp128(<2 x fp128>) + +define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) { +; X86-I32-LABEL: lrint_v4fp128: +; X86-I32: # %bb.0: +; X86-I32-NEXT: pushl %ebp +; X86-I32-NEXT: .cfi_def_cfa_offset 8 +; X86-I32-NEXT: .cfi_offset %ebp, -8 +; X86-I32-NEXT: movl %esp, %ebp +; X86-I32-NEXT: .cfi_def_cfa_register %ebp +; X86-I32-NEXT: pushl %ebx +; X86-I32-NEXT: pushl %edi +; X86-I32-NEXT: pushl %esi +; X86-I32-NEXT: andl $-16, %esp +; X86-I32-NEXT: subl $16, %esp +; X86-I32-NEXT: .cfi_offset %esi, -20 +; X86-I32-NEXT: .cfi_offset %edi, -16 +; X86-I32-NEXT: .cfi_offset %ebx, -12 +; X86-I32-NEXT: movl 8(%ebp), %esi +; X86-I32-NEXT: movl 36(%ebp), %ebx +; X86-I32-NEXT: movl 40(%ebp), %edi +; X86-I32-NEXT: pushl 24(%ebp) +; X86-I32-NEXT: pushl 20(%ebp) +; X86-I32-NEXT: pushl 16(%ebp) +; X86-I32-NEXT: pushl 12(%ebp) +; X86-I32-NEXT: calll lrintl +; X86-I32-NEXT: addl $16, %esp +; X86-I32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I32-NEXT: pushl %edi +; X86-I32-NEXT: pushl %ebx +; X86-I32-NEXT: pushl 32(%ebp) +; X86-I32-NEXT: pushl 28(%ebp) +; X86-I32-NEXT: calll lrintl +; X86-I32-NEXT: addl $16, %esp +; X86-I32-NEXT: movl %eax, %ebx +; X86-I32-NEXT: pushl 56(%ebp) +; X86-I32-NEXT: pushl 52(%ebp) +; X86-I32-NEXT: pushl 48(%ebp) +; X86-I32-NEXT: pushl 44(%ebp) +; X86-I32-NEXT: calll lrintl +; X86-I32-NEXT: addl $16, %esp +; X86-I32-NEXT: movl %eax, %edi +; X86-I32-NEXT: pushl 72(%ebp) +; X86-I32-NEXT: pushl 68(%ebp) +; X86-I32-NEXT: pushl 64(%ebp) +; X86-I32-NEXT: pushl 60(%ebp) +; X86-I32-NEXT: calll lrintl +; X86-I32-NEXT: addl $16, %esp +; X86-I32-NEXT: movl %eax, 12(%esi) +; X86-I32-NEXT: movl %edi, 8(%esi) +; X86-I32-NEXT: movl %ebx, 4(%esi) +; X86-I32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-I32-NEXT: movl %eax, (%esi) +; X86-I32-NEXT: movl %esi, %eax +; X86-I32-NEXT: leal -12(%ebp), %esp +; X86-I32-NEXT: popl %esi +; X86-I32-NEXT: popl %edi +; X86-I32-NEXT: popl %ebx +; X86-I32-NEXT: popl %ebp +; X86-I32-NEXT: .cfi_def_cfa %esp, 4 +; X86-I32-NEXT: retl $4 +; +; X86-I64-LABEL: lrint_v4fp128: +; X86-I64: # %bb.0: +; X86-I64-NEXT: pushl %ebp +; X86-I64-NEXT: .cfi_def_cfa_offset 8 +; X86-I64-NEXT: .cfi_offset %ebp, -8 +; X86-I64-NEXT: movl %esp, %ebp +; X86-I64-NEXT: .cfi_def_cfa_register %ebp +; X86-I64-NEXT: pushl %ebx +; X86-I64-NEXT: pushl %edi +; X86-I64-NEXT: pushl %esi +; X86-I64-NEXT: andl $-16, %esp +; X86-I64-NEXT: subl $32, %esp +; X86-I64-NEXT: .cfi_offset %esi, -20 +; X86-I64-NEXT: .cfi_offset %edi, -16 +; X86-I64-NEXT: .cfi_offset %ebx, -12 +; X86-I64-NEXT: movl 8(%ebp), %esi +; X86-I64-NEXT: movl 36(%ebp), %edi +; X86-I64-NEXT: movl 40(%ebp), %ebx +; X86-I64-NEXT: pushl 24(%ebp) +; X86-I64-NEXT: pushl 20(%ebp) +; X86-I64-NEXT: pushl 16(%ebp) +; X86-I64-NEXT: pushl 12(%ebp) +; X86-I64-NEXT: calll lrintl +; X86-I64-NEXT: addl $16, %esp +; X86-I64-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: pushl %ebx +; X86-I64-NEXT: pushl %edi +; X86-I64-NEXT: pushl 32(%ebp) +; X86-I64-NEXT: pushl 28(%ebp) +; X86-I64-NEXT: calll lrintl +; X86-I64-NEXT: addl $16, %esp +; X86-I64-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: pushl 56(%ebp) +; X86-I64-NEXT: pushl 52(%ebp) +; X86-I64-NEXT: pushl 48(%ebp) +; X86-I64-NEXT: pushl 44(%ebp) +; X86-I64-NEXT: calll lrintl +; X86-I64-NEXT: addl $16, %esp +; X86-I64-NEXT: movl %eax, %edi +; X86-I64-NEXT: movl %edx, %ebx +; X86-I64-NEXT: pushl 72(%ebp) +; X86-I64-NEXT: pushl 68(%ebp) +; X86-I64-NEXT: pushl 64(%ebp) +; X86-I64-NEXT: pushl 60(%ebp) +; X86-I64-NEXT: calll lrintl +; X86-I64-NEXT: addl $16, %esp +; X86-I64-NEXT: movl %edx, 28(%esi) +; X86-I64-NEXT: movl %eax, 24(%esi) +; X86-I64-NEXT: movl %ebx, 20(%esi) +; X86-I64-NEXT: movl %edi, 16(%esi) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-I64-NEXT: movl %eax, 12(%esi) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-I64-NEXT: movl %eax, 8(%esi) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-I64-NEXT: movl %eax, 4(%esi) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-I64-NEXT: movl %eax, (%esi) +; X86-I64-NEXT: movl %esi, %eax +; X86-I64-NEXT: leal -12(%ebp), %esp +; X86-I64-NEXT: popl %esi +; X86-I64-NEXT: popl %edi +; X86-I64-NEXT: popl %ebx +; X86-I64-NEXT: popl %ebp +; X86-I64-NEXT: .cfi_def_cfa %esp, 4 +; X86-I64-NEXT: retl $4 +; +; X86-SSE2-LABEL: lrint_v4fp128: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE2-NEXT: .cfi_offset %ebp, -8 +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp +; X86-SSE2-NEXT: pushl %ebx +; X86-SSE2-NEXT: pushl %edi +; X86-SSE2-NEXT: pushl %esi +; X86-SSE2-NEXT: andl $-16, %esp +; X86-SSE2-NEXT: subl $48, %esp +; X86-SSE2-NEXT: .cfi_offset %esi, -20 +; X86-SSE2-NEXT: .cfi_offset %edi, -16 +; X86-SSE2-NEXT: .cfi_offset %ebx, -12 +; X86-SSE2-NEXT: movl 48(%ebp), %edi +; X86-SSE2-NEXT: movl 52(%ebp), %ebx +; X86-SSE2-NEXT: pushl 36(%ebp) +; X86-SSE2-NEXT: pushl 32(%ebp) +; X86-SSE2-NEXT: pushl 28(%ebp) +; X86-SSE2-NEXT: pushl 24(%ebp) +; X86-SSE2-NEXT: calll lrintl +; X86-SSE2-NEXT: addl $16, %esp +; X86-SSE2-NEXT: movl %eax, %esi +; X86-SSE2-NEXT: pushl %ebx +; X86-SSE2-NEXT: pushl %edi +; X86-SSE2-NEXT: pushl 44(%ebp) +; X86-SSE2-NEXT: pushl 40(%ebp) +; X86-SSE2-NEXT: calll lrintl +; X86-SSE2-NEXT: addl $16, %esp +; X86-SSE2-NEXT: movl %eax, %edi +; X86-SSE2-NEXT: pushl 68(%ebp) +; X86-SSE2-NEXT: pushl 64(%ebp) +; X86-SSE2-NEXT: pushl 60(%ebp) +; X86-SSE2-NEXT: pushl 56(%ebp) +; X86-SSE2-NEXT: calll lrintl +; X86-SSE2-NEXT: addl $16, %esp +; X86-SSE2-NEXT: movd %eax, %xmm0 +; X86-SSE2-NEXT: movd %edi, %xmm1 +; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X86-SSE2-NEXT: movdqa %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill +; X86-SSE2-NEXT: movd %esi, %xmm0 +; X86-SSE2-NEXT: movdqa %xmm0, (%esp) # 16-byte Spill +; X86-SSE2-NEXT: pushl 20(%ebp) +; X86-SSE2-NEXT: pushl 16(%ebp) +; X86-SSE2-NEXT: pushl 12(%ebp) +; X86-SSE2-NEXT: pushl 8(%ebp) +; X86-SSE2-NEXT: calll lrintl +; X86-SSE2-NEXT: addl $16, %esp +; X86-SSE2-NEXT: movd %eax, %xmm0 +; X86-SSE2-NEXT: punpckldq (%esp), %xmm0 # 16-byte Folded Reload +; X86-SSE2-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; X86-SSE2-NEXT: punpcklqdq {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; X86-SSE2-NEXT: # xmm0 = xmm0[0],mem[0] +; X86-SSE2-NEXT: leal -12(%ebp), %esp +; X86-SSE2-NEXT: popl %esi +; X86-SSE2-NEXT: popl %edi +; X86-SSE2-NEXT: popl %ebx +; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4 +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: lrint_v4fp128: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: pushl %ebp +; X86-AVX-NEXT: .cfi_def_cfa_offset 8 +; X86-AVX-NEXT: .cfi_offset %ebp, -8 +; X86-AVX-NEXT: movl %esp, %ebp +; X86-AVX-NEXT: .cfi_def_cfa_register %ebp +; X86-AVX-NEXT: pushl %edi +; X86-AVX-NEXT: pushl %esi +; X86-AVX-NEXT: andl $-16, %esp +; X86-AVX-NEXT: subl $32, %esp +; X86-AVX-NEXT: .cfi_offset %esi, -16 +; X86-AVX-NEXT: .cfi_offset %edi, -12 +; X86-AVX-NEXT: vmovups 40(%ebp), %xmm0 +; X86-AVX-NEXT: vmovups %xmm0, (%esp) +; X86-AVX-NEXT: calll lrintl +; X86-AVX-NEXT: movl %eax, %esi +; X86-AVX-NEXT: vmovups 24(%ebp), %xmm0 +; X86-AVX-NEXT: vmovups %xmm0, (%esp) +; X86-AVX-NEXT: calll lrintl +; X86-AVX-NEXT: movl %eax, %edi +; X86-AVX-NEXT: vmovups 8(%ebp), %xmm0 +; X86-AVX-NEXT: vmovups %xmm0, (%esp) +; X86-AVX-NEXT: calll lrintl +; X86-AVX-NEXT: vmovups 56(%ebp), %xmm0 +; X86-AVX-NEXT: vmovups %xmm0, (%esp) +; X86-AVX-NEXT: vmovd %eax, %xmm0 +; X86-AVX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 +; X86-AVX-NEXT: vpinsrd $2, %esi, %xmm0, %xmm0 +; X86-AVX-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill +; X86-AVX-NEXT: calll lrintl +; X86-AVX-NEXT: vmovdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload +; X86-AVX-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 +; X86-AVX-NEXT: leal -8(%ebp), %esp +; X86-AVX-NEXT: popl %esi +; X86-AVX-NEXT: popl %edi +; X86-AVX-NEXT: popl %ebp +; X86-AVX-NEXT: .cfi_def_cfa %esp, 4 +; X86-AVX-NEXT: retl +; +; X64-AVX-i32-LABEL: lrint_v4fp128: +; X64-AVX-i32: # %bb.0: +; X64-AVX-i32-NEXT: pushq %rbx +; X64-AVX-i32-NEXT: .cfi_def_cfa_offset 16 +; X64-AVX-i32-NEXT: subq $48, %rsp +; X64-AVX-i32-NEXT: .cfi_def_cfa_offset 64 +; X64-AVX-i32-NEXT: .cfi_offset %rbx, -16 +; X64-AVX-i32-NEXT: vmovaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-AVX-i32-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-AVX-i32-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; X64-AVX-i32-NEXT: vmovaps %xmm1, %xmm0 +; X64-AVX-i32-NEXT: callq lrintl@PLT +; X64-AVX-i32-NEXT: movl %eax, %ebx +; X64-AVX-i32-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload +; X64-AVX-i32-NEXT: callq lrintl@PLT +; X64-AVX-i32-NEXT: vmovd %eax, %xmm0 +; X64-AVX-i32-NEXT: vpinsrd $1, %ebx, %xmm0, %xmm0 +; X64-AVX-i32-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill +; X64-AVX-i32-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-AVX-i32-NEXT: callq lrintl@PLT +; X64-AVX-i32-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload +; X64-AVX-i32-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 +; X64-AVX-i32-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-AVX-i32-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-AVX-i32-NEXT: callq lrintl@PLT +; X64-AVX-i32-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-AVX-i32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 +; X64-AVX-i32-NEXT: addq $48, %rsp +; X64-AVX-i32-NEXT: .cfi_def_cfa_offset 16 +; X64-AVX-i32-NEXT: popq %rbx +; X64-AVX-i32-NEXT: .cfi_def_cfa_offset 8 +; X64-AVX-i32-NEXT: retq +; +; X64-AVX1-i64-LABEL: lrint_v4fp128: +; X64-AVX1-i64: # %bb.0: +; X64-AVX1-i64-NEXT: subq $72, %rsp +; X64-AVX1-i64-NEXT: .cfi_def_cfa_offset 80 +; X64-AVX1-i64-NEXT: vmovaps %xmm2, (%rsp) # 16-byte Spill +; X64-AVX1-i64-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-AVX1-i64-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-AVX1-i64-NEXT: vmovaps %xmm3, %xmm0 +; X64-AVX1-i64-NEXT: callq lrintl@PLT +; X64-AVX1-i64-NEXT: vmovq %rax, %xmm0 +; X64-AVX1-i64-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-AVX1-i64-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload +; X64-AVX1-i64-NEXT: callq lrintl@PLT +; X64-AVX1-i64-NEXT: vmovq %rax, %xmm0 +; X64-AVX1-i64-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; X64-AVX1-i64-NEXT: # xmm0 = xmm0[0],mem[0] +; X64-AVX1-i64-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill +; X64-AVX1-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-AVX1-i64-NEXT: callq lrintl@PLT +; X64-AVX1-i64-NEXT: vmovq %rax, %xmm0 +; X64-AVX1-i64-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-AVX1-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-AVX1-i64-NEXT: callq lrintl@PLT +; X64-AVX1-i64-NEXT: vmovq %rax, %xmm0 +; X64-AVX1-i64-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; X64-AVX1-i64-NEXT: # xmm0 = xmm0[0],mem[0] +; X64-AVX1-i64-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload +; X64-AVX1-i64-NEXT: addq $72, %rsp +; X64-AVX1-i64-NEXT: .cfi_def_cfa_offset 8 +; X64-AVX1-i64-NEXT: retq +; +; AVX512-i64-LABEL: lrint_v4fp128: +; AVX512-i64: # %bb.0: +; AVX512-i64-NEXT: subq $72, %rsp +; AVX512-i64-NEXT: .cfi_def_cfa_offset 80 +; AVX512-i64-NEXT: vmovaps %xmm2, (%rsp) # 16-byte Spill +; AVX512-i64-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-i64-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-i64-NEXT: vmovaps %xmm3, %xmm0 +; AVX512-i64-NEXT: callq lrintl@PLT +; AVX512-i64-NEXT: vmovq %rax, %xmm0 +; AVX512-i64-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-i64-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload +; AVX512-i64-NEXT: callq lrintl@PLT +; AVX512-i64-NEXT: vmovq %rax, %xmm0 +; AVX512-i64-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-i64-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX512-i64-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill +; AVX512-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512-i64-NEXT: callq lrintl@PLT +; AVX512-i64-NEXT: vmovq %rax, %xmm0 +; AVX512-i64-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512-i64-NEXT: callq lrintl@PLT +; AVX512-i64-NEXT: vmovq %rax, %xmm0 +; AVX512-i64-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-i64-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX512-i64-NEXT: vinserti128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload +; AVX512-i64-NEXT: addq $72, %rsp +; AVX512-i64-NEXT: .cfi_def_cfa_offset 8 +; AVX512-i64-NEXT: retq +; +; AVX512DQ-i64-LABEL: lrint_v4fp128: +; AVX512DQ-i64: # %bb.0: +; AVX512DQ-i64-NEXT: subq $72, %rsp +; AVX512DQ-i64-NEXT: .cfi_def_cfa_offset 80 +; AVX512DQ-i64-NEXT: vmovaps %xmm2, (%rsp) # 16-byte Spill +; AVX512DQ-i64-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512DQ-i64-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512DQ-i64-NEXT: vmovaps %xmm3, %xmm0 +; AVX512DQ-i64-NEXT: callq lrintl@PLT +; AVX512DQ-i64-NEXT: vmovq %rax, %xmm0 +; AVX512DQ-i64-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512DQ-i64-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload +; AVX512DQ-i64-NEXT: callq lrintl@PLT +; AVX512DQ-i64-NEXT: vmovq %rax, %xmm0 +; AVX512DQ-i64-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512DQ-i64-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX512DQ-i64-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill +; AVX512DQ-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512DQ-i64-NEXT: callq lrintl@PLT +; AVX512DQ-i64-NEXT: vmovq %rax, %xmm0 +; AVX512DQ-i64-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512DQ-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512DQ-i64-NEXT: callq lrintl@PLT +; AVX512DQ-i64-NEXT: vmovq %rax, %xmm0 +; AVX512DQ-i64-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512DQ-i64-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX512DQ-i64-NEXT: vinserti128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload +; AVX512DQ-i64-NEXT: addq $72, %rsp +; AVX512DQ-i64-NEXT: .cfi_def_cfa_offset 8 +; AVX512DQ-i64-NEXT: retq + %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128> %x) + ret <4 x iXLen> %a +} +declare <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128>) + +define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) { +; X86-I32-LABEL: lrint_v8fp128: +; X86-I32: # %bb.0: +; X86-I32-NEXT: pushl %ebp +; X86-I32-NEXT: .cfi_def_cfa_offset 8 +; X86-I32-NEXT: .cfi_offset %ebp, -8 +; X86-I32-NEXT: movl %esp, %ebp +; X86-I32-NEXT: .cfi_def_cfa_register %ebp +; X86-I32-NEXT: pushl %ebx +; X86-I32-NEXT: pushl %edi +; X86-I32-NEXT: pushl %esi +; X86-I32-NEXT: andl $-16, %esp +; X86-I32-NEXT: subl $32, %esp +; X86-I32-NEXT: .cfi_offset %esi, -20 +; X86-I32-NEXT: .cfi_offset %edi, -16 +; X86-I32-NEXT: .cfi_offset %ebx, -12 +; X86-I32-NEXT: movl 8(%ebp), %esi +; X86-I32-NEXT: movl 36(%ebp), %ebx +; X86-I32-NEXT: movl 40(%ebp), %edi +; X86-I32-NEXT: pushl 24(%ebp) +; X86-I32-NEXT: pushl 20(%ebp) +; X86-I32-NEXT: pushl 16(%ebp) +; X86-I32-NEXT: pushl 12(%ebp) +; X86-I32-NEXT: calll lrintl +; X86-I32-NEXT: addl $16, %esp +; X86-I32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I32-NEXT: pushl %edi +; X86-I32-NEXT: pushl %ebx +; X86-I32-NEXT: pushl 32(%ebp) +; X86-I32-NEXT: pushl 28(%ebp) +; X86-I32-NEXT: calll lrintl +; X86-I32-NEXT: addl $16, %esp +; X86-I32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I32-NEXT: pushl 56(%ebp) +; X86-I32-NEXT: pushl 52(%ebp) +; X86-I32-NEXT: pushl 48(%ebp) +; X86-I32-NEXT: pushl 44(%ebp) +; X86-I32-NEXT: calll lrintl +; X86-I32-NEXT: addl $16, %esp +; X86-I32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I32-NEXT: pushl 72(%ebp) +; X86-I32-NEXT: pushl 68(%ebp) +; X86-I32-NEXT: pushl 64(%ebp) +; X86-I32-NEXT: pushl 60(%ebp) +; X86-I32-NEXT: calll lrintl +; X86-I32-NEXT: addl $16, %esp +; X86-I32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I32-NEXT: pushl 88(%ebp) +; X86-I32-NEXT: pushl 84(%ebp) +; X86-I32-NEXT: pushl 80(%ebp) +; X86-I32-NEXT: pushl 76(%ebp) +; X86-I32-NEXT: calll lrintl +; X86-I32-NEXT: addl $16, %esp +; X86-I32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I32-NEXT: pushl 104(%ebp) +; X86-I32-NEXT: pushl 100(%ebp) +; X86-I32-NEXT: pushl 96(%ebp) +; X86-I32-NEXT: pushl 92(%ebp) +; X86-I32-NEXT: calll lrintl +; X86-I32-NEXT: addl $16, %esp +; X86-I32-NEXT: movl %eax, %ebx +; X86-I32-NEXT: pushl 120(%ebp) +; X86-I32-NEXT: pushl 116(%ebp) +; X86-I32-NEXT: pushl 112(%ebp) +; X86-I32-NEXT: pushl 108(%ebp) +; X86-I32-NEXT: calll lrintl +; X86-I32-NEXT: addl $16, %esp +; X86-I32-NEXT: movl %eax, %edi +; X86-I32-NEXT: pushl 136(%ebp) +; X86-I32-NEXT: pushl 132(%ebp) +; X86-I32-NEXT: pushl 128(%ebp) +; X86-I32-NEXT: pushl 124(%ebp) +; X86-I32-NEXT: calll lrintl +; X86-I32-NEXT: addl $16, %esp +; X86-I32-NEXT: movl %eax, 28(%esi) +; X86-I32-NEXT: movl %edi, 24(%esi) +; X86-I32-NEXT: movl %ebx, 20(%esi) +; X86-I32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-I32-NEXT: movl %eax, 16(%esi) +; X86-I32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-I32-NEXT: movl %eax, 12(%esi) +; X86-I32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-I32-NEXT: movl %eax, 8(%esi) +; X86-I32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-I32-NEXT: movl %eax, 4(%esi) +; X86-I32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-I32-NEXT: movl %eax, (%esi) +; X86-I32-NEXT: movl %esi, %eax +; X86-I32-NEXT: leal -12(%ebp), %esp +; X86-I32-NEXT: popl %esi +; X86-I32-NEXT: popl %edi +; X86-I32-NEXT: popl %ebx +; X86-I32-NEXT: popl %ebp +; X86-I32-NEXT: .cfi_def_cfa %esp, 4 +; X86-I32-NEXT: retl $4 +; +; X86-I64-LABEL: lrint_v8fp128: +; X86-I64: # %bb.0: +; X86-I64-NEXT: pushl %ebp +; X86-I64-NEXT: .cfi_def_cfa_offset 8 +; X86-I64-NEXT: .cfi_offset %ebp, -8 +; X86-I64-NEXT: movl %esp, %ebp +; X86-I64-NEXT: .cfi_def_cfa_register %ebp +; X86-I64-NEXT: pushl %ebx +; X86-I64-NEXT: pushl %edi +; X86-I64-NEXT: pushl %esi +; X86-I64-NEXT: andl $-16, %esp +; X86-I64-NEXT: subl $64, %esp +; X86-I64-NEXT: .cfi_offset %esi, -20 +; X86-I64-NEXT: .cfi_offset %edi, -16 +; X86-I64-NEXT: .cfi_offset %ebx, -12 +; X86-I64-NEXT: movl 8(%ebp), %esi +; X86-I64-NEXT: movl 36(%ebp), %edi +; X86-I64-NEXT: movl 40(%ebp), %ebx +; X86-I64-NEXT: pushl 24(%ebp) +; X86-I64-NEXT: pushl 20(%ebp) +; X86-I64-NEXT: pushl 16(%ebp) +; X86-I64-NEXT: pushl 12(%ebp) +; X86-I64-NEXT: calll lrintl +; X86-I64-NEXT: addl $16, %esp +; X86-I64-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: pushl %ebx +; X86-I64-NEXT: pushl %edi +; X86-I64-NEXT: pushl 32(%ebp) +; X86-I64-NEXT: pushl 28(%ebp) +; X86-I64-NEXT: calll lrintl +; X86-I64-NEXT: addl $16, %esp +; X86-I64-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: pushl 56(%ebp) +; X86-I64-NEXT: pushl 52(%ebp) +; X86-I64-NEXT: pushl 48(%ebp) +; X86-I64-NEXT: pushl 44(%ebp) +; X86-I64-NEXT: calll lrintl +; X86-I64-NEXT: addl $16, %esp +; X86-I64-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: pushl 72(%ebp) +; X86-I64-NEXT: pushl 68(%ebp) +; X86-I64-NEXT: pushl 64(%ebp) +; X86-I64-NEXT: pushl 60(%ebp) +; X86-I64-NEXT: calll lrintl +; X86-I64-NEXT: addl $16, %esp +; X86-I64-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: pushl 88(%ebp) +; X86-I64-NEXT: pushl 84(%ebp) +; X86-I64-NEXT: pushl 80(%ebp) +; X86-I64-NEXT: pushl 76(%ebp) +; X86-I64-NEXT: calll lrintl +; X86-I64-NEXT: addl $16, %esp +; X86-I64-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: pushl 104(%ebp) +; X86-I64-NEXT: pushl 100(%ebp) +; X86-I64-NEXT: pushl 96(%ebp) +; X86-I64-NEXT: pushl 92(%ebp) +; X86-I64-NEXT: calll lrintl +; X86-I64-NEXT: addl $16, %esp +; X86-I64-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-I64-NEXT: pushl 120(%ebp) +; X86-I64-NEXT: pushl 116(%ebp) +; X86-I64-NEXT: pushl 112(%ebp) +; X86-I64-NEXT: pushl 108(%ebp) +; X86-I64-NEXT: calll lrintl +; X86-I64-NEXT: addl $16, %esp +; X86-I64-NEXT: movl %eax, %edi +; X86-I64-NEXT: movl %edx, %ebx +; X86-I64-NEXT: pushl 136(%ebp) +; X86-I64-NEXT: pushl 132(%ebp) +; X86-I64-NEXT: pushl 128(%ebp) +; X86-I64-NEXT: pushl 124(%ebp) +; X86-I64-NEXT: calll lrintl +; X86-I64-NEXT: addl $16, %esp +; X86-I64-NEXT: movl %edx, 60(%esi) +; X86-I64-NEXT: movl %eax, 56(%esi) +; X86-I64-NEXT: movl %ebx, 52(%esi) +; X86-I64-NEXT: movl %edi, 48(%esi) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-I64-NEXT: movl %eax, 44(%esi) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-I64-NEXT: movl %eax, 40(%esi) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-I64-NEXT: movl %eax, 36(%esi) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-I64-NEXT: movl %eax, 32(%esi) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-I64-NEXT: movl %eax, 28(%esi) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-I64-NEXT: movl %eax, 24(%esi) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-I64-NEXT: movl %eax, 20(%esi) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-I64-NEXT: movl %eax, 16(%esi) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-I64-NEXT: movl %eax, 12(%esi) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-I64-NEXT: movl %eax, 8(%esi) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-I64-NEXT: movl %eax, 4(%esi) +; X86-I64-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-I64-NEXT: movl %eax, (%esi) +; X86-I64-NEXT: movl %esi, %eax +; X86-I64-NEXT: leal -12(%ebp), %esp +; X86-I64-NEXT: popl %esi +; X86-I64-NEXT: popl %edi +; X86-I64-NEXT: popl %ebx +; X86-I64-NEXT: popl %ebp +; X86-I64-NEXT: .cfi_def_cfa %esp, 4 +; X86-I64-NEXT: retl $4 +; +; X86-SSE2-LABEL: lrint_v8fp128: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE2-NEXT: .cfi_offset %ebp, -8 +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp +; X86-SSE2-NEXT: pushl %ebx +; X86-SSE2-NEXT: pushl %edi +; X86-SSE2-NEXT: pushl %esi +; X86-SSE2-NEXT: andl $-16, %esp +; X86-SSE2-NEXT: subl $64, %esp +; X86-SSE2-NEXT: .cfi_offset %esi, -20 +; X86-SSE2-NEXT: .cfi_offset %edi, -16 +; X86-SSE2-NEXT: .cfi_offset %ebx, -12 +; X86-SSE2-NEXT: movl 108(%ebp), %esi +; X86-SSE2-NEXT: movl 112(%ebp), %edi +; X86-SSE2-NEXT: movl 116(%ebp), %ebx +; X86-SSE2-NEXT: pushl 100(%ebp) +; X86-SSE2-NEXT: pushl 96(%ebp) +; X86-SSE2-NEXT: pushl 92(%ebp) +; X86-SSE2-NEXT: pushl 88(%ebp) +; X86-SSE2-NEXT: calll lrintl +; X86-SSE2-NEXT: addl $16, %esp +; X86-SSE2-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE2-NEXT: pushl %ebx +; X86-SSE2-NEXT: pushl %edi +; X86-SSE2-NEXT: pushl %esi +; X86-SSE2-NEXT: pushl 104(%ebp) +; X86-SSE2-NEXT: calll lrintl +; X86-SSE2-NEXT: addl $16, %esp +; X86-SSE2-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE2-NEXT: pushl 132(%ebp) +; X86-SSE2-NEXT: pushl 128(%ebp) +; X86-SSE2-NEXT: pushl 124(%ebp) +; X86-SSE2-NEXT: pushl 120(%ebp) +; X86-SSE2-NEXT: calll lrintl +; X86-SSE2-NEXT: addl $16, %esp +; X86-SSE2-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-SSE2-NEXT: pushl 20(%ebp) +; X86-SSE2-NEXT: pushl 16(%ebp) +; X86-SSE2-NEXT: pushl 12(%ebp) +; X86-SSE2-NEXT: pushl 8(%ebp) +; X86-SSE2-NEXT: calll lrintl +; X86-SSE2-NEXT: addl $16, %esp +; X86-SSE2-NEXT: movl %eax, %esi +; X86-SSE2-NEXT: pushl 36(%ebp) +; X86-SSE2-NEXT: pushl 32(%ebp) +; X86-SSE2-NEXT: pushl 28(%ebp) +; X86-SSE2-NEXT: pushl 24(%ebp) +; X86-SSE2-NEXT: calll lrintl +; X86-SSE2-NEXT: addl $16, %esp +; X86-SSE2-NEXT: movl %eax, %edi +; X86-SSE2-NEXT: pushl 52(%ebp) +; X86-SSE2-NEXT: pushl 48(%ebp) +; X86-SSE2-NEXT: pushl 44(%ebp) +; X86-SSE2-NEXT: pushl 40(%ebp) +; X86-SSE2-NEXT: calll lrintl +; X86-SSE2-NEXT: addl $16, %esp +; X86-SSE2-NEXT: movl %eax, %ebx +; X86-SSE2-NEXT: pushl 68(%ebp) +; X86-SSE2-NEXT: pushl 64(%ebp) +; X86-SSE2-NEXT: pushl 60(%ebp) +; X86-SSE2-NEXT: pushl 56(%ebp) +; X86-SSE2-NEXT: calll lrintl +; X86-SSE2-NEXT: addl $16, %esp +; X86-SSE2-NEXT: movd %eax, %xmm0 +; X86-SSE2-NEXT: movd %ebx, %xmm1 +; X86-SSE2-NEXT: movd %edi, %xmm2 +; X86-SSE2-NEXT: movd %esi, %xmm4 +; X86-SSE2-NEXT: movss (%esp), %xmm3 # 4-byte Reload +; X86-SSE2-NEXT: # xmm3 = mem[0],zero,zero,zero +; X86-SSE2-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 4-byte Reload +; X86-SSE2-NEXT: # xmm5 = mem[0],zero,zero,zero +; X86-SSE2-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 4-byte Reload +; X86-SSE2-NEXT: # xmm6 = mem[0],zero,zero,zero +; X86-SSE2-NEXT: movaps %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill +; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] +; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm1[0] +; X86-SSE2-NEXT: movdqa %xmm4, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill +; X86-SSE2-NEXT: unpcklps {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1] +; X86-SSE2-NEXT: movaps %xmm5, (%esp) # 16-byte Spill +; X86-SSE2-NEXT: pushl 84(%ebp) +; X86-SSE2-NEXT: pushl 80(%ebp) +; X86-SSE2-NEXT: pushl 76(%ebp) +; X86-SSE2-NEXT: pushl 72(%ebp) +; X86-SSE2-NEXT: calll lrintl +; X86-SSE2-NEXT: addl $16, %esp +; X86-SSE2-NEXT: movd %eax, %xmm1 +; X86-SSE2-NEXT: punpckldq {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; X86-SSE2-NEXT: # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] +; X86-SSE2-NEXT: punpcklqdq (%esp), %xmm1 # 16-byte Folded Reload +; X86-SSE2-NEXT: # xmm1 = xmm1[0],mem[0] +; X86-SSE2-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload +; X86-SSE2-NEXT: leal -12(%ebp), %esp +; X86-SSE2-NEXT: popl %esi +; X86-SSE2-NEXT: popl %edi +; X86-SSE2-NEXT: popl %ebx +; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4 +; X86-SSE2-NEXT: retl +; +; X86-AVX1-LABEL: lrint_v8fp128: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: pushl %ebp +; X86-AVX1-NEXT: .cfi_def_cfa_offset 8 +; X86-AVX1-NEXT: .cfi_offset %ebp, -8 +; X86-AVX1-NEXT: movl %esp, %ebp +; X86-AVX1-NEXT: .cfi_def_cfa_register %ebp +; X86-AVX1-NEXT: pushl %ebx +; X86-AVX1-NEXT: pushl %edi +; X86-AVX1-NEXT: pushl %esi +; X86-AVX1-NEXT: andl $-16, %esp +; X86-AVX1-NEXT: subl $80, %esp +; X86-AVX1-NEXT: .cfi_offset %esi, -20 +; X86-AVX1-NEXT: .cfi_offset %edi, -16 +; X86-AVX1-NEXT: .cfi_offset %ebx, -12 +; X86-AVX1-NEXT: vmovups 40(%ebp), %xmm0 +; X86-AVX1-NEXT: vmovups %xmm0, (%esp) +; X86-AVX1-NEXT: calll lrintl +; X86-AVX1-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-AVX1-NEXT: vmovups 24(%ebp), %xmm0 +; X86-AVX1-NEXT: vmovups %xmm0, (%esp) +; X86-AVX1-NEXT: calll lrintl +; X86-AVX1-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-AVX1-NEXT: vmovups 8(%ebp), %xmm0 +; X86-AVX1-NEXT: vmovups %xmm0, (%esp) +; X86-AVX1-NEXT: calll lrintl +; X86-AVX1-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-AVX1-NEXT: vmovups 120(%ebp), %xmm0 +; X86-AVX1-NEXT: vmovups %xmm0, (%esp) +; X86-AVX1-NEXT: calll lrintl +; X86-AVX1-NEXT: movl %eax, %esi +; X86-AVX1-NEXT: vmovups 104(%ebp), %xmm0 +; X86-AVX1-NEXT: vmovups %xmm0, (%esp) +; X86-AVX1-NEXT: calll lrintl +; X86-AVX1-NEXT: movl %eax, %edi +; X86-AVX1-NEXT: vmovups 88(%ebp), %xmm0 +; X86-AVX1-NEXT: vmovups %xmm0, (%esp) +; X86-AVX1-NEXT: calll lrintl +; X86-AVX1-NEXT: movl %eax, %ebx +; X86-AVX1-NEXT: vmovups 72(%ebp), %xmm0 +; X86-AVX1-NEXT: vmovups %xmm0, (%esp) +; X86-AVX1-NEXT: calll lrintl +; X86-AVX1-NEXT: vmovd %eax, %xmm0 +; X86-AVX1-NEXT: vpinsrd $1, %ebx, %xmm0, %xmm0 +; X86-AVX1-NEXT: vpinsrd $2, %edi, %xmm0, %xmm0 +; X86-AVX1-NEXT: vpinsrd $3, %esi, %xmm0, %xmm0 +; X86-AVX1-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill +; X86-AVX1-NEXT: vmovd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero +; X86-AVX1-NEXT: vpinsrd $1, {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload +; X86-AVX1-NEXT: vmovups 56(%ebp), %xmm1 +; X86-AVX1-NEXT: vmovups %xmm1, (%esp) +; X86-AVX1-NEXT: vpinsrd $2, {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload +; X86-AVX1-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill +; X86-AVX1-NEXT: calll lrintl +; X86-AVX1-NEXT: vmovdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload +; X86-AVX1-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 +; X86-AVX1-NEXT: vinsertf128 $1, {{[-0-9]+}}(%e{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload +; X86-AVX1-NEXT: leal -12(%ebp), %esp +; X86-AVX1-NEXT: popl %esi +; X86-AVX1-NEXT: popl %edi +; X86-AVX1-NEXT: popl %ebx +; X86-AVX1-NEXT: popl %ebp +; X86-AVX1-NEXT: .cfi_def_cfa %esp, 4 +; X86-AVX1-NEXT: retl +; +; X64-AVX1-i32-LABEL: lrint_v8fp128: +; X64-AVX1-i32: # %bb.0: +; X64-AVX1-i32-NEXT: pushq %rbx +; X64-AVX1-i32-NEXT: .cfi_def_cfa_offset 16 +; X64-AVX1-i32-NEXT: subq $112, %rsp +; X64-AVX1-i32-NEXT: .cfi_def_cfa_offset 128 +; X64-AVX1-i32-NEXT: .cfi_offset %rbx, -16 +; X64-AVX1-i32-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-AVX1-i32-NEXT: vmovaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-AVX1-i32-NEXT: vmovaps %xmm4, (%rsp) # 16-byte Spill +; X64-AVX1-i32-NEXT: vmovaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-AVX1-i32-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-AVX1-i32-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-AVX1-i32-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-AVX1-i32-NEXT: vmovaps %xmm5, %xmm0 +; X64-AVX1-i32-NEXT: callq lrintl@PLT +; X64-AVX1-i32-NEXT: movl %eax, %ebx +; X64-AVX1-i32-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload +; X64-AVX1-i32-NEXT: callq lrintl@PLT +; X64-AVX1-i32-NEXT: vmovd %eax, %xmm0 +; X64-AVX1-i32-NEXT: vpinsrd $1, %ebx, %xmm0, %xmm0 +; X64-AVX1-i32-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill +; X64-AVX1-i32-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-AVX1-i32-NEXT: callq lrintl@PLT +; X64-AVX1-i32-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload +; X64-AVX1-i32-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 +; X64-AVX1-i32-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-AVX1-i32-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-AVX1-i32-NEXT: callq lrintl@PLT +; X64-AVX1-i32-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-AVX1-i32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 +; X64-AVX1-i32-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-AVX1-i32-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-AVX1-i32-NEXT: callq lrintl@PLT +; X64-AVX1-i32-NEXT: movl %eax, %ebx +; X64-AVX1-i32-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-AVX1-i32-NEXT: callq lrintl@PLT +; X64-AVX1-i32-NEXT: vmovd %eax, %xmm0 +; X64-AVX1-i32-NEXT: vpinsrd $1, %ebx, %xmm0, %xmm0 +; X64-AVX1-i32-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-AVX1-i32-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-AVX1-i32-NEXT: callq lrintl@PLT +; X64-AVX1-i32-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-AVX1-i32-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 +; X64-AVX1-i32-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-AVX1-i32-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-AVX1-i32-NEXT: callq lrintl@PLT +; X64-AVX1-i32-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-AVX1-i32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 +; X64-AVX1-i32-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload +; X64-AVX1-i32-NEXT: addq $112, %rsp +; X64-AVX1-i32-NEXT: .cfi_def_cfa_offset 16 +; X64-AVX1-i32-NEXT: popq %rbx +; X64-AVX1-i32-NEXT: .cfi_def_cfa_offset 8 +; X64-AVX1-i32-NEXT: retq +; +; X64-AVX1-i64-LABEL: lrint_v8fp128: +; X64-AVX1-i64: # %bb.0: +; X64-AVX1-i64-NEXT: subq $152, %rsp +; X64-AVX1-i64-NEXT: .cfi_def_cfa_offset 160 +; X64-AVX1-i64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-AVX1-i64-NEXT: vmovaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-AVX1-i64-NEXT: vmovaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-AVX1-i64-NEXT: vmovaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-AVX1-i64-NEXT: vmovaps %xmm2, (%rsp) # 16-byte Spill +; X64-AVX1-i64-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-AVX1-i64-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-AVX1-i64-NEXT: vmovaps %xmm3, %xmm0 +; X64-AVX1-i64-NEXT: callq lrintl@PLT +; X64-AVX1-i64-NEXT: vmovq %rax, %xmm0 +; X64-AVX1-i64-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-AVX1-i64-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload +; X64-AVX1-i64-NEXT: callq lrintl@PLT +; X64-AVX1-i64-NEXT: vmovq %rax, %xmm0 +; X64-AVX1-i64-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; X64-AVX1-i64-NEXT: # xmm0 = xmm0[0],mem[0] +; X64-AVX1-i64-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill +; X64-AVX1-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-AVX1-i64-NEXT: callq lrintl@PLT +; X64-AVX1-i64-NEXT: vmovq %rax, %xmm0 +; X64-AVX1-i64-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-AVX1-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-AVX1-i64-NEXT: callq lrintl@PLT +; X64-AVX1-i64-NEXT: vmovq %rax, %xmm0 +; X64-AVX1-i64-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; X64-AVX1-i64-NEXT: # xmm0 = xmm0[0],mem[0] +; X64-AVX1-i64-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload +; X64-AVX1-i64-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; X64-AVX1-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-AVX1-i64-NEXT: vzeroupper +; X64-AVX1-i64-NEXT: callq lrintl@PLT +; X64-AVX1-i64-NEXT: vmovq %rax, %xmm0 +; X64-AVX1-i64-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-AVX1-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-AVX1-i64-NEXT: callq lrintl@PLT +; X64-AVX1-i64-NEXT: vmovq %rax, %xmm0 +; X64-AVX1-i64-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; X64-AVX1-i64-NEXT: # xmm0 = xmm0[0],mem[0] +; X64-AVX1-i64-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-AVX1-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-AVX1-i64-NEXT: callq lrintl@PLT +; X64-AVX1-i64-NEXT: vmovq %rax, %xmm0 +; X64-AVX1-i64-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-AVX1-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-AVX1-i64-NEXT: callq lrintl@PLT +; X64-AVX1-i64-NEXT: vmovq %rax, %xmm0 +; X64-AVX1-i64-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; X64-AVX1-i64-NEXT: # xmm0 = xmm0[0],mem[0] +; X64-AVX1-i64-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm1 # 16-byte Folded Reload +; X64-AVX1-i64-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload +; X64-AVX1-i64-NEXT: addq $152, %rsp +; X64-AVX1-i64-NEXT: .cfi_def_cfa_offset 8 +; X64-AVX1-i64-NEXT: retq +; +; AVX512-i64-LABEL: lrint_v8fp128: +; AVX512-i64: # %bb.0: +; AVX512-i64-NEXT: subq $152, %rsp +; AVX512-i64-NEXT: .cfi_def_cfa_offset 160 +; AVX512-i64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill +; AVX512-i64-NEXT: vmovaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-i64-NEXT: vmovaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-i64-NEXT: vmovaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-i64-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-i64-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-i64-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-i64-NEXT: vmovaps %xmm7, %xmm0 +; AVX512-i64-NEXT: callq lrintl@PLT +; AVX512-i64-NEXT: vmovq %rax, %xmm0 +; AVX512-i64-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-i64-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload +; AVX512-i64-NEXT: callq lrintl@PLT +; AVX512-i64-NEXT: vmovq %rax, %xmm0 +; AVX512-i64-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-i64-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX512-i64-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill +; AVX512-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512-i64-NEXT: callq lrintl@PLT +; AVX512-i64-NEXT: vmovq %rax, %xmm0 +; AVX512-i64-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512-i64-NEXT: callq lrintl@PLT +; AVX512-i64-NEXT: vmovq %rax, %xmm0 +; AVX512-i64-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-i64-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX512-i64-NEXT: vinserti128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload +; AVX512-i64-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX512-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512-i64-NEXT: vzeroupper +; AVX512-i64-NEXT: callq lrintl@PLT +; AVX512-i64-NEXT: vmovq %rax, %xmm0 +; AVX512-i64-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512-i64-NEXT: callq lrintl@PLT +; AVX512-i64-NEXT: vmovq %rax, %xmm0 +; AVX512-i64-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-i64-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX512-i64-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512-i64-NEXT: callq lrintl@PLT +; AVX512-i64-NEXT: vmovq %rax, %xmm0 +; AVX512-i64-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512-i64-NEXT: callq lrintl@PLT +; AVX512-i64-NEXT: vmovq %rax, %xmm0 +; AVX512-i64-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512-i64-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX512-i64-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload +; AVX512-i64-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload +; AVX512-i64-NEXT: addq $152, %rsp +; AVX512-i64-NEXT: .cfi_def_cfa_offset 8 +; AVX512-i64-NEXT: retq +; +; AVX512DQ-i64-LABEL: lrint_v8fp128: +; AVX512DQ-i64: # %bb.0: +; AVX512DQ-i64-NEXT: subq $152, %rsp +; AVX512DQ-i64-NEXT: .cfi_def_cfa_offset 160 +; AVX512DQ-i64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill +; AVX512DQ-i64-NEXT: vmovaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512DQ-i64-NEXT: vmovaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512DQ-i64-NEXT: vmovaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512DQ-i64-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512DQ-i64-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512DQ-i64-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512DQ-i64-NEXT: vmovaps %xmm7, %xmm0 +; AVX512DQ-i64-NEXT: callq lrintl@PLT +; AVX512DQ-i64-NEXT: vmovq %rax, %xmm0 +; AVX512DQ-i64-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512DQ-i64-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload +; AVX512DQ-i64-NEXT: callq lrintl@PLT +; AVX512DQ-i64-NEXT: vmovq %rax, %xmm0 +; AVX512DQ-i64-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512DQ-i64-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX512DQ-i64-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill +; AVX512DQ-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512DQ-i64-NEXT: callq lrintl@PLT +; AVX512DQ-i64-NEXT: vmovq %rax, %xmm0 +; AVX512DQ-i64-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512DQ-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512DQ-i64-NEXT: callq lrintl@PLT +; AVX512DQ-i64-NEXT: vmovq %rax, %xmm0 +; AVX512DQ-i64-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512DQ-i64-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX512DQ-i64-NEXT: vinserti128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload +; AVX512DQ-i64-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX512DQ-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512DQ-i64-NEXT: vzeroupper +; AVX512DQ-i64-NEXT: callq lrintl@PLT +; AVX512DQ-i64-NEXT: vmovq %rax, %xmm0 +; AVX512DQ-i64-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512DQ-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512DQ-i64-NEXT: callq lrintl@PLT +; AVX512DQ-i64-NEXT: vmovq %rax, %xmm0 +; AVX512DQ-i64-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512DQ-i64-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX512DQ-i64-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512DQ-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512DQ-i64-NEXT: callq lrintl@PLT +; AVX512DQ-i64-NEXT: vmovq %rax, %xmm0 +; AVX512DQ-i64-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512DQ-i64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX512DQ-i64-NEXT: callq lrintl@PLT +; AVX512DQ-i64-NEXT: vmovq %rax, %xmm0 +; AVX512DQ-i64-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX512DQ-i64-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX512DQ-i64-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload +; AVX512DQ-i64-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload +; AVX512DQ-i64-NEXT: addq $152, %rsp +; AVX512DQ-i64-NEXT: .cfi_def_cfa_offset 8 +; AVX512DQ-i64-NEXT: retq + %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128> %x) + ret <8 x iXLen> %a +} +declare <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128>) From 8ace6b7ff788c2bb232744d75b7dfd933735eee7 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 8 Aug 2025 07:01:31 -0500 Subject: [PATCH 4/8] Add a vector test to arm --- llvm/test/CodeGen/ARM/vector-llrint.ll | 11126 +++++++++++++++++++ llvm/test/CodeGen/ARM/vector-lrint.ll | 13251 +++++++++++++++++++++++ 2 files changed, 24377 insertions(+) create mode 100644 llvm/test/CodeGen/ARM/vector-llrint.ll create mode 100644 llvm/test/CodeGen/ARM/vector-lrint.ll diff --git a/llvm/test/CodeGen/ARM/vector-llrint.ll b/llvm/test/CodeGen/ARM/vector-llrint.ll new file mode 100644 index 0000000000000..870947fac063e --- /dev/null +++ b/llvm/test/CodeGen/ARM/vector-llrint.ll @@ -0,0 +1,11126 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armv7-unknown-none-eabihf | FileCheck %s --check-prefix=LE +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armv7-unknown-none-eabihf | FileCheck %s --check-prefix=LE +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefix=LE-NEON +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefix=LE-NEON +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armebv7-unknown-none-eabihf | FileCheck %s --check-prefix=BE +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armebv7-unknown-none-eabihf | FileCheck %s --check-prefix=BE +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefix=BE-NEON +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefix=BE-NEON + +define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) { +; LE-LABEL: llrint_v1i64_v1f16: +; LE: @ %bb.0: +; LE-NEXT: .save {r11, lr} +; LE-NEXT: push {r11, lr} +; LE-NEXT: vmov r0, s0 +; LE-NEXT: bl __aeabi_f2h +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vmov s0, r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.32 d0[0], r0 +; LE-NEXT: vmov.32 d0[1], r1 +; LE-NEXT: pop {r11, pc} +; +; LE-NEON-LABEL: llrint_v1i64_v1f16: +; LE-NEON: @ %bb.0: +; LE-NEON-NEXT: .save {r11, lr} +; LE-NEON-NEXT: push {r11, lr} +; LE-NEON-NEXT: vmov r0, s0 +; LE-NEON-NEXT: bl __aeabi_f2h +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vmov s0, r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.32 d0[0], r0 +; LE-NEON-NEXT: vmov.32 d0[1], r1 +; LE-NEON-NEXT: pop {r11, pc} +; +; BE-LABEL: llrint_v1i64_v1f16: +; BE: @ %bb.0: +; BE-NEXT: .save {r11, lr} +; BE-NEXT: push {r11, lr} +; BE-NEXT: vmov r0, s0 +; BE-NEXT: bl __aeabi_f2h +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vmov s0, r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d16[0], r0 +; BE-NEXT: vmov.32 d16[1], r1 +; BE-NEXT: vrev64.32 d0, d16 +; BE-NEXT: pop {r11, pc} +; +; BE-NEON-LABEL: llrint_v1i64_v1f16: +; BE-NEON: @ %bb.0: +; BE-NEON-NEXT: .save {r11, lr} +; BE-NEON-NEXT: push {r11, lr} +; BE-NEON-NEXT: vmov r0, s0 +; BE-NEON-NEXT: bl __aeabi_f2h +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vmov s0, r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.32 d16[0], r0 +; BE-NEON-NEXT: vmov.32 d16[1], r1 +; BE-NEON-NEXT: vrev64.32 d0, d16 +; BE-NEON-NEXT: pop {r11, pc} + %a = call <1 x i64> @llvm.llrint.v1i64.v1f16(<1 x half> %x) + ret <1 x i64> %a +} +declare <1 x i64> @llvm.llrint.v1i64.v1f16(<1 x half>) + +define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) { +; LE-LABEL: llrint_v1i64_v2f16: +; LE: @ %bb.0: +; LE-NEXT: .save {r4, r5, r11, lr} +; LE-NEXT: push {r4, r5, r11, lr} +; LE-NEXT: .vsave {d8, d9} +; LE-NEXT: vpush {d8, d9} +; LE-NEXT: vmov r0, s1 +; LE-NEXT: vmov.f32 s16, s0 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vmov s0, r0 +; LE-NEXT: bl llrintf +; LE-NEXT: mov r4, r0 +; LE-NEXT: vmov r0, s16 +; LE-NEXT: mov r5, r1 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vmov s0, r0 +; LE-NEXT: vmov.32 d9[0], r4 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.32 d8[0], r0 +; LE-NEXT: vmov.32 d9[1], r5 +; LE-NEXT: vmov.32 d8[1], r1 +; LE-NEXT: vorr q0, q4, q4 +; LE-NEXT: vpop {d8, d9} +; LE-NEXT: pop {r4, r5, r11, pc} +; +; LE-NEON-LABEL: llrint_v1i64_v2f16: +; LE-NEON: @ %bb.0: +; LE-NEON-NEXT: .save {r4, r5, r11, lr} +; LE-NEON-NEXT: push {r4, r5, r11, lr} +; LE-NEON-NEXT: .vsave {d8, d9} +; LE-NEON-NEXT: vpush {d8, d9} +; LE-NEON-NEXT: vmov r0, s1 +; LE-NEON-NEXT: vmov.f32 s16, s0 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vmov s0, r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: mov r4, r0 +; LE-NEON-NEXT: vmov r0, s16 +; LE-NEON-NEXT: mov r5, r1 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vmov s0, r0 +; LE-NEON-NEXT: vmov.32 d9[0], r4 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.32 d8[0], r0 +; LE-NEON-NEXT: vmov.32 d9[1], r5 +; LE-NEON-NEXT: vmov.32 d8[1], r1 +; LE-NEON-NEXT: vorr q0, q4, q4 +; LE-NEON-NEXT: vpop {d8, d9} +; LE-NEON-NEXT: pop {r4, r5, r11, pc} +; +; BE-LABEL: llrint_v1i64_v2f16: +; BE: @ %bb.0: +; BE-NEXT: .save {r4, r5, r11, lr} +; BE-NEXT: push {r4, r5, r11, lr} +; BE-NEXT: .vsave {d8} +; BE-NEXT: vpush {d8} +; BE-NEXT: vmov r0, s1 +; BE-NEXT: vmov.f32 s16, s0 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vmov s0, r0 +; BE-NEXT: bl llrintf +; BE-NEXT: mov r4, r0 +; BE-NEXT: vmov r0, s16 +; BE-NEXT: mov r5, r1 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vmov s0, r0 +; BE-NEXT: vmov.32 d8[0], r4 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d16[0], r0 +; BE-NEXT: vmov.32 d8[1], r5 +; BE-NEXT: vmov.32 d16[1], r1 +; BE-NEXT: vrev64.32 d1, d8 +; BE-NEXT: vrev64.32 d0, d16 +; BE-NEXT: vpop {d8} +; BE-NEXT: pop {r4, r5, r11, pc} +; +; BE-NEON-LABEL: llrint_v1i64_v2f16: +; BE-NEON: @ %bb.0: +; BE-NEON-NEXT: .save {r4, r5, r11, lr} +; BE-NEON-NEXT: push {r4, r5, r11, lr} +; BE-NEON-NEXT: .vsave {d8} +; BE-NEON-NEXT: vpush {d8} +; BE-NEON-NEXT: vmov r0, s1 +; BE-NEON-NEXT: vmov.f32 s16, s0 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vmov s0, r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: mov r4, r0 +; BE-NEON-NEXT: vmov r0, s16 +; BE-NEON-NEXT: mov r5, r1 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vmov s0, r0 +; BE-NEON-NEXT: vmov.32 d8[0], r4 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.32 d16[0], r0 +; BE-NEON-NEXT: vmov.32 d8[1], r5 +; BE-NEON-NEXT: vmov.32 d16[1], r1 +; BE-NEON-NEXT: vrev64.32 d1, d8 +; BE-NEON-NEXT: vrev64.32 d0, d16 +; BE-NEON-NEXT: vpop {d8} +; BE-NEON-NEXT: pop {r4, r5, r11, pc} + %a = call <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half> %x) + ret <2 x i64> %a +} +declare <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half>) + +define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) { +; LE-LABEL: llrint_v4i64_v4f16: +; LE: @ %bb.0: +; LE-NEXT: .save {r4, r5, r6, r7, r11, lr} +; LE-NEXT: push {r4, r5, r6, r7, r11, lr} +; LE-NEXT: .vsave {d12, d13} +; LE-NEXT: vpush {d12, d13} +; LE-NEXT: .vsave {d8, d9, d10} +; LE-NEXT: vpush {d8, d9, d10} +; LE-NEXT: vmov r0, s1 +; LE-NEXT: vmov.f32 s16, s3 +; LE-NEXT: vmov.f32 s20, s2 +; LE-NEXT: vmov.f32 s18, s0 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vmov s0, r0 +; LE-NEXT: bl llrintf +; LE-NEXT: mov r5, r0 +; LE-NEXT: vmov r0, s18 +; LE-NEXT: mov r4, r1 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: mov r7, r0 +; LE-NEXT: vmov r0, s16 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vmov s0, r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov s0, r7 +; LE-NEXT: mov r6, r1 +; LE-NEXT: vmov.32 d9[0], r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.32 d12[0], r0 +; LE-NEXT: vmov r0, s20 +; LE-NEXT: mov r7, r1 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vmov s0, r0 +; LE-NEXT: vmov.32 d13[0], r5 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.32 d8[0], r0 +; LE-NEXT: vmov.32 d13[1], r4 +; LE-NEXT: vmov.32 d9[1], r6 +; LE-NEXT: vmov.32 d12[1], r7 +; LE-NEXT: vmov.32 d8[1], r1 +; LE-NEXT: vorr q0, q6, q6 +; LE-NEXT: vorr q1, q4, q4 +; LE-NEXT: vpop {d8, d9, d10} +; LE-NEXT: vpop {d12, d13} +; LE-NEXT: pop {r4, r5, r6, r7, r11, pc} +; +; LE-NEON-LABEL: llrint_v4i64_v4f16: +; LE-NEON: @ %bb.0: +; LE-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr} +; LE-NEON-NEXT: push {r4, r5, r6, r7, r11, lr} +; LE-NEON-NEXT: .vsave {d12, d13} +; LE-NEON-NEXT: vpush {d12, d13} +; LE-NEON-NEXT: .vsave {d8, d9, d10} +; LE-NEON-NEXT: vpush {d8, d9, d10} +; LE-NEON-NEXT: vmov r0, s1 +; LE-NEON-NEXT: vmov.f32 s16, s3 +; LE-NEON-NEXT: vmov.f32 s20, s2 +; LE-NEON-NEXT: vmov.f32 s18, s0 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vmov s0, r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: mov r5, r0 +; LE-NEON-NEXT: vmov r0, s18 +; LE-NEON-NEXT: mov r4, r1 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: mov r7, r0 +; LE-NEON-NEXT: vmov r0, s16 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vmov s0, r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov s0, r7 +; LE-NEON-NEXT: mov r6, r1 +; LE-NEON-NEXT: vmov.32 d9[0], r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.32 d12[0], r0 +; LE-NEON-NEXT: vmov r0, s20 +; LE-NEON-NEXT: mov r7, r1 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vmov s0, r0 +; LE-NEON-NEXT: vmov.32 d13[0], r5 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.32 d8[0], r0 +; LE-NEON-NEXT: vmov.32 d13[1], r4 +; LE-NEON-NEXT: vmov.32 d9[1], r6 +; LE-NEON-NEXT: vmov.32 d12[1], r7 +; LE-NEON-NEXT: vmov.32 d8[1], r1 +; LE-NEON-NEXT: vorr q0, q6, q6 +; LE-NEON-NEXT: vorr q1, q4, q4 +; LE-NEON-NEXT: vpop {d8, d9, d10} +; LE-NEON-NEXT: vpop {d12, d13} +; LE-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc} +; +; BE-LABEL: llrint_v4i64_v4f16: +; BE: @ %bb.0: +; BE-NEXT: .save {r4, r5, r6, r7, r11, lr} +; BE-NEXT: push {r4, r5, r6, r7, r11, lr} +; BE-NEXT: .vsave {d8, d9, d10} +; BE-NEXT: vpush {d8, d9, d10} +; BE-NEXT: vmov r0, s1 +; BE-NEXT: vmov.f32 s16, s3 +; BE-NEXT: vmov.f32 s18, s2 +; BE-NEXT: vmov.f32 s20, s0 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vmov s0, r0 +; BE-NEXT: bl llrintf +; BE-NEXT: mov r5, r0 +; BE-NEXT: vmov r0, s20 +; BE-NEXT: mov r4, r1 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: mov r7, r0 +; BE-NEXT: vmov r0, s16 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vmov s0, r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov s0, r7 +; BE-NEXT: mov r6, r1 +; BE-NEXT: vmov.32 d8[0], r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: vmov r0, s18 +; BE-NEXT: mov r7, r1 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vmov s0, r0 +; BE-NEXT: vmov.32 d9[0], r5 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d16[0], r0 +; BE-NEXT: vmov.32 d9[1], r4 +; BE-NEXT: vmov.32 d8[1], r6 +; BE-NEXT: vmov.32 d10[1], r7 +; BE-NEXT: vmov.32 d16[1], r1 +; BE-NEXT: vrev64.32 d1, d9 +; BE-NEXT: vrev64.32 d3, d8 +; BE-NEXT: vrev64.32 d0, d10 +; BE-NEXT: vrev64.32 d2, d16 +; BE-NEXT: vpop {d8, d9, d10} +; BE-NEXT: pop {r4, r5, r6, r7, r11, pc} +; +; BE-NEON-LABEL: llrint_v4i64_v4f16: +; BE-NEON: @ %bb.0: +; BE-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr} +; BE-NEON-NEXT: push {r4, r5, r6, r7, r11, lr} +; BE-NEON-NEXT: .vsave {d8, d9, d10} +; BE-NEON-NEXT: vpush {d8, d9, d10} +; BE-NEON-NEXT: vmov r0, s1 +; BE-NEON-NEXT: vmov.f32 s16, s3 +; BE-NEON-NEXT: vmov.f32 s18, s2 +; BE-NEON-NEXT: vmov.f32 s20, s0 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vmov s0, r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: mov r5, r0 +; BE-NEON-NEXT: vmov r0, s20 +; BE-NEON-NEXT: mov r4, r1 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: mov r7, r0 +; BE-NEON-NEXT: vmov r0, s16 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vmov s0, r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov s0, r7 +; BE-NEON-NEXT: mov r6, r1 +; BE-NEON-NEXT: vmov.32 d8[0], r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.32 d10[0], r0 +; BE-NEON-NEXT: vmov r0, s18 +; BE-NEON-NEXT: mov r7, r1 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vmov s0, r0 +; BE-NEON-NEXT: vmov.32 d9[0], r5 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.32 d16[0], r0 +; BE-NEON-NEXT: vmov.32 d9[1], r4 +; BE-NEON-NEXT: vmov.32 d8[1], r6 +; BE-NEON-NEXT: vmov.32 d10[1], r7 +; BE-NEON-NEXT: vmov.32 d16[1], r1 +; BE-NEON-NEXT: vrev64.32 d1, d9 +; BE-NEON-NEXT: vrev64.32 d3, d8 +; BE-NEON-NEXT: vrev64.32 d0, d10 +; BE-NEON-NEXT: vrev64.32 d2, d16 +; BE-NEON-NEXT: vpop {d8, d9, d10} +; BE-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc} + %a = call <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half> %x) + ret <4 x i64> %a +} +declare <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half>) + +define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) { +; LE-LABEL: llrint_v8i64_v8f16: +; LE: @ %bb.0: +; LE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-NEXT: .pad #4 +; LE-NEXT: sub sp, sp, #4 +; LE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEXT: .pad #8 +; LE-NEXT: sub sp, sp, #8 +; LE-NEXT: vmov r0, s1 +; LE-NEXT: vstr s6, [sp, #4] @ 4-byte Spill +; LE-NEXT: vmov.f32 s16, s7 +; LE-NEXT: vmov.f32 s18, s5 +; LE-NEXT: vmov.f32 s20, s4 +; LE-NEXT: vmov.f32 s22, s3 +; LE-NEXT: vmov.f32 s24, s2 +; LE-NEXT: vmov.f32 s26, s0 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vmov s0, r0 +; LE-NEXT: bl llrintf +; LE-NEXT: mov r9, r0 +; LE-NEXT: vmov r0, s26 +; LE-NEXT: str r1, [sp] @ 4-byte Spill +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: mov r10, r0 +; LE-NEXT: vmov r0, s22 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: mov r5, r0 +; LE-NEXT: vmov r0, s24 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: mov r7, r0 +; LE-NEXT: vmov r0, s18 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: mov r6, r0 +; LE-NEXT: vmov r0, s20 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: mov r4, r0 +; LE-NEXT: vmov r0, s16 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vmov s0, r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov s0, r4 +; LE-NEXT: mov r11, r1 +; LE-NEXT: vmov.32 d11[0], r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov s0, r6 +; LE-NEXT: mov r8, r1 +; LE-NEXT: vmov.32 d12[0], r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov s0, r7 +; LE-NEXT: mov r6, r1 +; LE-NEXT: vmov.32 d13[0], r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov s0, r5 +; LE-NEXT: mov r7, r1 +; LE-NEXT: vmov.32 d14[0], r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov s0, r10 +; LE-NEXT: mov r5, r1 +; LE-NEXT: vmov.32 d15[0], r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vldr s0, [sp, #4] @ 4-byte Reload +; LE-NEXT: mov r4, r1 +; LE-NEXT: vmov.32 d8[0], r0 +; LE-NEXT: vmov r0, s0 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vmov s0, r0 +; LE-NEXT: vmov.32 d9[0], r9 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.32 d10[0], r0 +; LE-NEXT: ldr r0, [sp] @ 4-byte Reload +; LE-NEXT: vmov.32 d15[1], r5 +; LE-NEXT: vmov.32 d9[1], r0 +; LE-NEXT: vmov.32 d13[1], r6 +; LE-NEXT: vmov.32 d11[1], r11 +; LE-NEXT: vmov.32 d8[1], r4 +; LE-NEXT: vmov.32 d14[1], r7 +; LE-NEXT: vorr q0, q4, q4 +; LE-NEXT: vmov.32 d12[1], r8 +; LE-NEXT: vorr q1, q7, q7 +; LE-NEXT: vmov.32 d10[1], r1 +; LE-NEXT: vorr q2, q6, q6 +; LE-NEXT: vorr q3, q5, q5 +; LE-NEXT: add sp, sp, #8 +; LE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEXT: add sp, sp, #4 +; LE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; LE-NEON-LABEL: llrint_v8i64_v8f16: +; LE-NEON: @ %bb.0: +; LE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-NEON-NEXT: .pad #4 +; LE-NEON-NEXT: sub sp, sp, #4 +; LE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEON-NEXT: .pad #8 +; LE-NEON-NEXT: sub sp, sp, #8 +; LE-NEON-NEXT: vmov r0, s1 +; LE-NEON-NEXT: vstr s6, [sp, #4] @ 4-byte Spill +; LE-NEON-NEXT: vmov.f32 s16, s7 +; LE-NEON-NEXT: vmov.f32 s18, s5 +; LE-NEON-NEXT: vmov.f32 s20, s4 +; LE-NEON-NEXT: vmov.f32 s22, s3 +; LE-NEON-NEXT: vmov.f32 s24, s2 +; LE-NEON-NEXT: vmov.f32 s26, s0 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vmov s0, r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: mov r9, r0 +; LE-NEON-NEXT: vmov r0, s26 +; LE-NEON-NEXT: str r1, [sp] @ 4-byte Spill +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: mov r10, r0 +; LE-NEON-NEXT: vmov r0, s22 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: mov r5, r0 +; LE-NEON-NEXT: vmov r0, s24 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: mov r7, r0 +; LE-NEON-NEXT: vmov r0, s18 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: mov r6, r0 +; LE-NEON-NEXT: vmov r0, s20 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: mov r4, r0 +; LE-NEON-NEXT: vmov r0, s16 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vmov s0, r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov s0, r4 +; LE-NEON-NEXT: mov r11, r1 +; LE-NEON-NEXT: vmov.32 d11[0], r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov s0, r6 +; LE-NEON-NEXT: mov r8, r1 +; LE-NEON-NEXT: vmov.32 d12[0], r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov s0, r7 +; LE-NEON-NEXT: mov r6, r1 +; LE-NEON-NEXT: vmov.32 d13[0], r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov s0, r5 +; LE-NEON-NEXT: mov r7, r1 +; LE-NEON-NEXT: vmov.32 d14[0], r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov s0, r10 +; LE-NEON-NEXT: mov r5, r1 +; LE-NEON-NEXT: vmov.32 d15[0], r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vldr s0, [sp, #4] @ 4-byte Reload +; LE-NEON-NEXT: mov r4, r1 +; LE-NEON-NEXT: vmov.32 d8[0], r0 +; LE-NEON-NEXT: vmov r0, s0 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vmov s0, r0 +; LE-NEON-NEXT: vmov.32 d9[0], r9 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.32 d10[0], r0 +; LE-NEON-NEXT: ldr r0, [sp] @ 4-byte Reload +; LE-NEON-NEXT: vmov.32 d15[1], r5 +; LE-NEON-NEXT: vmov.32 d9[1], r0 +; LE-NEON-NEXT: vmov.32 d13[1], r6 +; LE-NEON-NEXT: vmov.32 d11[1], r11 +; LE-NEON-NEXT: vmov.32 d8[1], r4 +; LE-NEON-NEXT: vmov.32 d14[1], r7 +; LE-NEON-NEXT: vorr q0, q4, q4 +; LE-NEON-NEXT: vmov.32 d12[1], r8 +; LE-NEON-NEXT: vorr q1, q7, q7 +; LE-NEON-NEXT: vmov.32 d10[1], r1 +; LE-NEON-NEXT: vorr q2, q6, q6 +; LE-NEON-NEXT: vorr q3, q5, q5 +; LE-NEON-NEXT: add sp, sp, #8 +; LE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEON-NEXT: add sp, sp, #4 +; LE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-LABEL: llrint_v8i64_v8f16: +; BE: @ %bb.0: +; BE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-NEXT: .pad #4 +; BE-NEXT: sub sp, sp, #4 +; BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14} +; BE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14} +; BE-NEXT: .pad #8 +; BE-NEXT: sub sp, sp, #8 +; BE-NEXT: vmov r0, s1 +; BE-NEXT: vmov.f32 s18, s7 +; BE-NEXT: vmov.f32 s16, s6 +; BE-NEXT: vmov.f32 s20, s5 +; BE-NEXT: vmov.f32 s22, s4 +; BE-NEXT: vmov.f32 s24, s3 +; BE-NEXT: vmov.f32 s26, s2 +; BE-NEXT: vmov.f32 s28, s0 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vmov s0, r0 +; BE-NEXT: bl llrintf +; BE-NEXT: mov r9, r0 +; BE-NEXT: vmov r0, s28 +; BE-NEXT: str r1, [sp, #4] @ 4-byte Spill +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: mov r10, r0 +; BE-NEXT: vmov r0, s24 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: mov r5, r0 +; BE-NEXT: vmov r0, s26 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: mov r7, r0 +; BE-NEXT: vmov r0, s20 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: mov r6, r0 +; BE-NEXT: vmov r0, s22 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: mov r4, r0 +; BE-NEXT: vmov r0, s18 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vmov s0, r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov s0, r4 +; BE-NEXT: mov r11, r1 +; BE-NEXT: vmov.32 d9[0], r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov s0, r6 +; BE-NEXT: mov r8, r1 +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov s0, r7 +; BE-NEXT: mov r6, r1 +; BE-NEXT: vmov.32 d11[0], r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov s0, r5 +; BE-NEXT: mov r7, r1 +; BE-NEXT: vmov.32 d12[0], r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov s0, r10 +; BE-NEXT: mov r5, r1 +; BE-NEXT: vmov.32 d13[0], r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d14[0], r0 +; BE-NEXT: vmov r0, s16 +; BE-NEXT: mov r4, r1 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vmov s0, r0 +; BE-NEXT: vmov.32 d8[0], r9 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d16[0], r0 +; BE-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; BE-NEXT: vmov.32 d13[1], r5 +; BE-NEXT: vmov.32 d8[1], r0 +; BE-NEXT: vmov.32 d11[1], r6 +; BE-NEXT: vmov.32 d9[1], r11 +; BE-NEXT: vmov.32 d14[1], r4 +; BE-NEXT: vmov.32 d12[1], r7 +; BE-NEXT: vmov.32 d10[1], r8 +; BE-NEXT: vmov.32 d16[1], r1 +; BE-NEXT: vrev64.32 d1, d8 +; BE-NEXT: vrev64.32 d3, d13 +; BE-NEXT: vrev64.32 d5, d11 +; BE-NEXT: vrev64.32 d7, d9 +; BE-NEXT: vrev64.32 d0, d14 +; BE-NEXT: vrev64.32 d2, d12 +; BE-NEXT: vrev64.32 d4, d10 +; BE-NEXT: vrev64.32 d6, d16 +; BE-NEXT: add sp, sp, #8 +; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14} +; BE-NEXT: add sp, sp, #4 +; BE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-NEON-LABEL: llrint_v8i64_v8f16: +; BE-NEON: @ %bb.0: +; BE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-NEON-NEXT: .pad #4 +; BE-NEON-NEXT: sub sp, sp, #4 +; BE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14} +; BE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14} +; BE-NEON-NEXT: .pad #8 +; BE-NEON-NEXT: sub sp, sp, #8 +; BE-NEON-NEXT: vmov r0, s1 +; BE-NEON-NEXT: vmov.f32 s18, s7 +; BE-NEON-NEXT: vmov.f32 s16, s6 +; BE-NEON-NEXT: vmov.f32 s20, s5 +; BE-NEON-NEXT: vmov.f32 s22, s4 +; BE-NEON-NEXT: vmov.f32 s24, s3 +; BE-NEON-NEXT: vmov.f32 s26, s2 +; BE-NEON-NEXT: vmov.f32 s28, s0 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vmov s0, r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: mov r9, r0 +; BE-NEON-NEXT: vmov r0, s28 +; BE-NEON-NEXT: str r1, [sp, #4] @ 4-byte Spill +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: mov r10, r0 +; BE-NEON-NEXT: vmov r0, s24 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: mov r5, r0 +; BE-NEON-NEXT: vmov r0, s26 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: mov r7, r0 +; BE-NEON-NEXT: vmov r0, s20 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: mov r6, r0 +; BE-NEON-NEXT: vmov r0, s22 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: mov r4, r0 +; BE-NEON-NEXT: vmov r0, s18 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vmov s0, r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov s0, r4 +; BE-NEON-NEXT: mov r11, r1 +; BE-NEON-NEXT: vmov.32 d9[0], r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov s0, r6 +; BE-NEON-NEXT: mov r8, r1 +; BE-NEON-NEXT: vmov.32 d10[0], r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov s0, r7 +; BE-NEON-NEXT: mov r6, r1 +; BE-NEON-NEXT: vmov.32 d11[0], r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov s0, r5 +; BE-NEON-NEXT: mov r7, r1 +; BE-NEON-NEXT: vmov.32 d12[0], r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov s0, r10 +; BE-NEON-NEXT: mov r5, r1 +; BE-NEON-NEXT: vmov.32 d13[0], r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.32 d14[0], r0 +; BE-NEON-NEXT: vmov r0, s16 +; BE-NEON-NEXT: mov r4, r1 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vmov s0, r0 +; BE-NEON-NEXT: vmov.32 d8[0], r9 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.32 d16[0], r0 +; BE-NEON-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; BE-NEON-NEXT: vmov.32 d13[1], r5 +; BE-NEON-NEXT: vmov.32 d8[1], r0 +; BE-NEON-NEXT: vmov.32 d11[1], r6 +; BE-NEON-NEXT: vmov.32 d9[1], r11 +; BE-NEON-NEXT: vmov.32 d14[1], r4 +; BE-NEON-NEXT: vmov.32 d12[1], r7 +; BE-NEON-NEXT: vmov.32 d10[1], r8 +; BE-NEON-NEXT: vmov.32 d16[1], r1 +; BE-NEON-NEXT: vrev64.32 d1, d8 +; BE-NEON-NEXT: vrev64.32 d3, d13 +; BE-NEON-NEXT: vrev64.32 d5, d11 +; BE-NEON-NEXT: vrev64.32 d7, d9 +; BE-NEON-NEXT: vrev64.32 d0, d14 +; BE-NEON-NEXT: vrev64.32 d2, d12 +; BE-NEON-NEXT: vrev64.32 d4, d10 +; BE-NEON-NEXT: vrev64.32 d6, d16 +; BE-NEON-NEXT: add sp, sp, #8 +; BE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14} +; BE-NEON-NEXT: add sp, sp, #4 +; BE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + %a = call <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half> %x) + ret <8 x i64> %a +} +declare <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half>) + +define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) { +; LE-LABEL: llrint_v16i64_v16f16: +; LE: @ %bb.0: +; LE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-NEXT: .pad #4 +; LE-NEXT: sub sp, sp, #4 +; LE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEXT: .pad #120 +; LE-NEXT: sub sp, sp, #120 +; LE-NEXT: mov r11, r0 +; LE-NEXT: vmov r0, s7 +; LE-NEXT: vstr s15, [sp, #24] @ 4-byte Spill +; LE-NEXT: vmov.f32 s23, s13 +; LE-NEXT: vstr s14, [sp, #100] @ 4-byte Spill +; LE-NEXT: vmov.f32 s25, s12 +; LE-NEXT: vmov.f32 s27, s11 +; LE-NEXT: vstr s10, [sp, #104] @ 4-byte Spill +; LE-NEXT: vstr s9, [sp, #108] @ 4-byte Spill +; LE-NEXT: vmov.f32 s24, s8 +; LE-NEXT: vmov.f32 s19, s6 +; LE-NEXT: vmov.f32 s29, s5 +; LE-NEXT: vmov.f32 s17, s4 +; LE-NEXT: vmov.f32 s16, s3 +; LE-NEXT: vmov.f32 s21, s2 +; LE-NEXT: vmov.f32 s26, s1 +; LE-NEXT: vmov.f32 s18, s0 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vmov s0, r0 +; LE-NEXT: bl llrintf +; LE-NEXT: mov r7, r0 +; LE-NEXT: vmov r0, s25 +; LE-NEXT: str r1, [sp, #56] @ 4-byte Spill +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vmov s0, r0 +; LE-NEXT: bl llrintf +; LE-NEXT: mov r5, r0 +; LE-NEXT: vmov r0, s27 +; LE-NEXT: str r1, [sp, #116] @ 4-byte Spill +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vmov s0, r0 +; LE-NEXT: bl llrintf +; LE-NEXT: mov r6, r0 +; LE-NEXT: vmov r0, s29 +; LE-NEXT: str r1, [sp, #112] @ 4-byte Spill +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vmov s0, r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.32 d15[0], r0 +; LE-NEXT: vmov r0, s23 +; LE-NEXT: mov r4, r1 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vmov s0, r0 +; LE-NEXT: add lr, sp, #80 +; LE-NEXT: vmov.32 d17[0], r6 +; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEXT: bl llrintf +; LE-NEXT: mov r6, r0 +; LE-NEXT: vmov r0, s17 +; LE-NEXT: vmov r8, s21 +; LE-NEXT: str r1, [sp, #76] @ 4-byte Spill +; LE-NEXT: vmov r10, s19 +; LE-NEXT: vmov.32 d10[0], r5 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vmov s0, r0 +; LE-NEXT: add lr, sp, #40 +; LE-NEXT: vmov.32 d11[0], r6 +; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.32 d14[0], r0 +; LE-NEXT: mov r0, r10 +; LE-NEXT: mov r9, r1 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vmov s0, r0 +; LE-NEXT: vmov.32 d11[0], r7 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.32 d10[0], r0 +; LE-NEXT: mov r0, r8 +; LE-NEXT: mov r7, r1 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: mov r6, r0 +; LE-NEXT: ldr r0, [sp, #56] @ 4-byte Reload +; LE-NEXT: vmov.32 d11[1], r0 +; LE-NEXT: vmov r0, s18 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: mov r5, r0 +; LE-NEXT: vmov r0, s16 +; LE-NEXT: vmov.32 d10[1], r7 +; LE-NEXT: add lr, sp, #56 +; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vmov s0, r0 +; LE-NEXT: vmov.32 d15[1], r4 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.32 d9[0], r0 +; LE-NEXT: vmov r0, s26 +; LE-NEXT: add lr, sp, #24 +; LE-NEXT: vmov r8, s24 +; LE-NEXT: vmov.32 d14[1], r9 +; LE-NEXT: mov r10, r1 +; LE-NEXT: vmov s24, r5 +; LE-NEXT: vldr s0, [sp, #24] @ 4-byte Reload +; LE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-NEXT: vmov r7, s0 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vmov.f32 s0, s24 +; LE-NEXT: vmov s22, r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s22 +; LE-NEXT: mov r5, r1 +; LE-NEXT: vmov.32 d14[0], r0 +; LE-NEXT: vmov s24, r6 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.32 d15[0], r0 +; LE-NEXT: mov r0, r7 +; LE-NEXT: mov r6, r1 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vmov.f32 s0, s24 +; LE-NEXT: vmov s22, r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s22 +; LE-NEXT: vmov.32 d8[0], r0 +; LE-NEXT: add lr, sp, #8 +; LE-NEXT: mov r9, r1 +; LE-NEXT: vmov.32 d15[1], r6 +; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.32 d13[0], r0 +; LE-NEXT: mov r0, r8 +; LE-NEXT: mov r6, r1 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vldr s0, [sp, #100] @ 4-byte Reload +; LE-NEXT: mov r7, r0 +; LE-NEXT: vmov.32 d14[1], r5 +; LE-NEXT: vmov r0, s0 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vldr s0, [sp, #104] @ 4-byte Reload +; LE-NEXT: vmov s20, r0 +; LE-NEXT: vmov.32 d13[1], r6 +; LE-NEXT: vmov r4, s0 +; LE-NEXT: vldr s0, [sp, #108] @ 4-byte Reload +; LE-NEXT: vmov r0, s0 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vmov.f32 s0, s20 +; LE-NEXT: vmov s16, r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s16 +; LE-NEXT: mov r5, r1 +; LE-NEXT: vmov.32 d12[0], r0 +; LE-NEXT: vmov s18, r7 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.32 d11[0], r0 +; LE-NEXT: mov r0, r4 +; LE-NEXT: mov r6, r1 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vmov.f32 s0, s18 +; LE-NEXT: vmov s16, r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s16 +; LE-NEXT: vmov.32 d10[0], r0 +; LE-NEXT: mov r4, r1 +; LE-NEXT: vmov.32 d11[1], r6 +; LE-NEXT: bl llrintf +; LE-NEXT: add lr, sp, #80 +; LE-NEXT: vmov.32 d10[1], r4 +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: add lr, sp, #40 +; LE-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; LE-NEXT: add lr, sp, #8 +; LE-NEXT: vmov.32 d16[0], r0 +; LE-NEXT: ldr r0, [sp, #76] @ 4-byte Reload +; LE-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload +; LE-NEXT: add lr, sp, #24 +; LE-NEXT: vmov.32 d19[1], r0 +; LE-NEXT: ldr r0, [sp, #116] @ 4-byte Reload +; LE-NEXT: vmov.32 d21[1], r10 +; LE-NEXT: vmov.32 d18[1], r0 +; LE-NEXT: ldr r0, [sp, #112] @ 4-byte Reload +; LE-NEXT: vmov.32 d12[1], r5 +; LE-NEXT: vmov.32 d17[1], r0 +; LE-NEXT: add r0, r11, #64 +; LE-NEXT: vmov.32 d16[1], r1 +; LE-NEXT: vst1.64 {d10, d11}, [r0:128]! +; LE-NEXT: vst1.64 {d16, d17}, [r0:128]! +; LE-NEXT: vst1.64 {d18, d19}, [r0:128]! +; LE-NEXT: vmov.32 d20[1], r9 +; LE-NEXT: vst1.64 {d12, d13}, [r0:128] +; LE-NEXT: vst1.64 {d14, d15}, [r11:128]! +; LE-NEXT: vst1.64 {d20, d21}, [r11:128]! +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: add lr, sp, #56 +; LE-NEXT: vst1.64 {d16, d17}, [r11:128]! +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: vst1.64 {d16, d17}, [r11:128] +; LE-NEXT: add sp, sp, #120 +; LE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEXT: add sp, sp, #4 +; LE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; LE-NEON-LABEL: llrint_v16i64_v16f16: +; LE-NEON: @ %bb.0: +; LE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-NEON-NEXT: .pad #4 +; LE-NEON-NEXT: sub sp, sp, #4 +; LE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEON-NEXT: .pad #120 +; LE-NEON-NEXT: sub sp, sp, #120 +; LE-NEON-NEXT: mov r11, r0 +; LE-NEON-NEXT: vmov r0, s7 +; LE-NEON-NEXT: vstr s15, [sp, #24] @ 4-byte Spill +; LE-NEON-NEXT: vmov.f32 s23, s13 +; LE-NEON-NEXT: vstr s14, [sp, #100] @ 4-byte Spill +; LE-NEON-NEXT: vmov.f32 s25, s12 +; LE-NEON-NEXT: vmov.f32 s27, s11 +; LE-NEON-NEXT: vstr s10, [sp, #104] @ 4-byte Spill +; LE-NEON-NEXT: vstr s9, [sp, #108] @ 4-byte Spill +; LE-NEON-NEXT: vmov.f32 s24, s8 +; LE-NEON-NEXT: vmov.f32 s19, s6 +; LE-NEON-NEXT: vmov.f32 s29, s5 +; LE-NEON-NEXT: vmov.f32 s17, s4 +; LE-NEON-NEXT: vmov.f32 s16, s3 +; LE-NEON-NEXT: vmov.f32 s21, s2 +; LE-NEON-NEXT: vmov.f32 s26, s1 +; LE-NEON-NEXT: vmov.f32 s18, s0 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vmov s0, r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: mov r7, r0 +; LE-NEON-NEXT: vmov r0, s25 +; LE-NEON-NEXT: str r1, [sp, #56] @ 4-byte Spill +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vmov s0, r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: mov r5, r0 +; LE-NEON-NEXT: vmov r0, s27 +; LE-NEON-NEXT: str r1, [sp, #116] @ 4-byte Spill +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vmov s0, r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: mov r6, r0 +; LE-NEON-NEXT: vmov r0, s29 +; LE-NEON-NEXT: str r1, [sp, #112] @ 4-byte Spill +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vmov s0, r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.32 d15[0], r0 +; LE-NEON-NEXT: vmov r0, s23 +; LE-NEON-NEXT: mov r4, r1 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vmov s0, r0 +; LE-NEON-NEXT: add lr, sp, #80 +; LE-NEON-NEXT: vmov.32 d17[0], r6 +; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: mov r6, r0 +; LE-NEON-NEXT: vmov r0, s17 +; LE-NEON-NEXT: vmov r8, s21 +; LE-NEON-NEXT: str r1, [sp, #76] @ 4-byte Spill +; LE-NEON-NEXT: vmov r10, s19 +; LE-NEON-NEXT: vmov.32 d10[0], r5 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vmov s0, r0 +; LE-NEON-NEXT: add lr, sp, #40 +; LE-NEON-NEXT: vmov.32 d11[0], r6 +; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.32 d14[0], r0 +; LE-NEON-NEXT: mov r0, r10 +; LE-NEON-NEXT: mov r9, r1 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vmov s0, r0 +; LE-NEON-NEXT: vmov.32 d11[0], r7 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.32 d10[0], r0 +; LE-NEON-NEXT: mov r0, r8 +; LE-NEON-NEXT: mov r7, r1 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: mov r6, r0 +; LE-NEON-NEXT: ldr r0, [sp, #56] @ 4-byte Reload +; LE-NEON-NEXT: vmov.32 d11[1], r0 +; LE-NEON-NEXT: vmov r0, s18 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: mov r5, r0 +; LE-NEON-NEXT: vmov r0, s16 +; LE-NEON-NEXT: vmov.32 d10[1], r7 +; LE-NEON-NEXT: add lr, sp, #56 +; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vmov s0, r0 +; LE-NEON-NEXT: vmov.32 d15[1], r4 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.32 d9[0], r0 +; LE-NEON-NEXT: vmov r0, s26 +; LE-NEON-NEXT: add lr, sp, #24 +; LE-NEON-NEXT: vmov r8, s24 +; LE-NEON-NEXT: vmov.32 d14[1], r9 +; LE-NEON-NEXT: mov r10, r1 +; LE-NEON-NEXT: vmov s24, r5 +; LE-NEON-NEXT: vldr s0, [sp, #24] @ 4-byte Reload +; LE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-NEON-NEXT: vmov r7, s0 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vmov.f32 s0, s24 +; LE-NEON-NEXT: vmov s22, r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s22 +; LE-NEON-NEXT: mov r5, r1 +; LE-NEON-NEXT: vmov.32 d14[0], r0 +; LE-NEON-NEXT: vmov s24, r6 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.32 d15[0], r0 +; LE-NEON-NEXT: mov r0, r7 +; LE-NEON-NEXT: mov r6, r1 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vmov.f32 s0, s24 +; LE-NEON-NEXT: vmov s22, r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s22 +; LE-NEON-NEXT: vmov.32 d8[0], r0 +; LE-NEON-NEXT: add lr, sp, #8 +; LE-NEON-NEXT: mov r9, r1 +; LE-NEON-NEXT: vmov.32 d15[1], r6 +; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.32 d13[0], r0 +; LE-NEON-NEXT: mov r0, r8 +; LE-NEON-NEXT: mov r6, r1 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vldr s0, [sp, #100] @ 4-byte Reload +; LE-NEON-NEXT: mov r7, r0 +; LE-NEON-NEXT: vmov.32 d14[1], r5 +; LE-NEON-NEXT: vmov r0, s0 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vldr s0, [sp, #104] @ 4-byte Reload +; LE-NEON-NEXT: vmov s20, r0 +; LE-NEON-NEXT: vmov.32 d13[1], r6 +; LE-NEON-NEXT: vmov r4, s0 +; LE-NEON-NEXT: vldr s0, [sp, #108] @ 4-byte Reload +; LE-NEON-NEXT: vmov r0, s0 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vmov.f32 s0, s20 +; LE-NEON-NEXT: vmov s16, r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s16 +; LE-NEON-NEXT: mov r5, r1 +; LE-NEON-NEXT: vmov.32 d12[0], r0 +; LE-NEON-NEXT: vmov s18, r7 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.32 d11[0], r0 +; LE-NEON-NEXT: mov r0, r4 +; LE-NEON-NEXT: mov r6, r1 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vmov.f32 s0, s18 +; LE-NEON-NEXT: vmov s16, r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s16 +; LE-NEON-NEXT: vmov.32 d10[0], r0 +; LE-NEON-NEXT: mov r4, r1 +; LE-NEON-NEXT: vmov.32 d11[1], r6 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: add lr, sp, #80 +; LE-NEON-NEXT: vmov.32 d10[1], r4 +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #40 +; LE-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #8 +; LE-NEON-NEXT: vmov.32 d16[0], r0 +; LE-NEON-NEXT: ldr r0, [sp, #76] @ 4-byte Reload +; LE-NEON-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #24 +; LE-NEON-NEXT: vmov.32 d19[1], r0 +; LE-NEON-NEXT: ldr r0, [sp, #116] @ 4-byte Reload +; LE-NEON-NEXT: vmov.32 d21[1], r10 +; LE-NEON-NEXT: vmov.32 d18[1], r0 +; LE-NEON-NEXT: ldr r0, [sp, #112] @ 4-byte Reload +; LE-NEON-NEXT: vmov.32 d12[1], r5 +; LE-NEON-NEXT: vmov.32 d17[1], r0 +; LE-NEON-NEXT: add r0, r11, #64 +; LE-NEON-NEXT: vmov.32 d16[1], r1 +; LE-NEON-NEXT: vst1.64 {d10, d11}, [r0:128]! +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! +; LE-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]! +; LE-NEON-NEXT: vmov.32 d20[1], r9 +; LE-NEON-NEXT: vst1.64 {d12, d13}, [r0:128] +; LE-NEON-NEXT: vst1.64 {d14, d15}, [r11:128]! +; LE-NEON-NEXT: vst1.64 {d20, d21}, [r11:128]! +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #56 +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r11:128]! +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r11:128] +; LE-NEON-NEXT: add sp, sp, #120 +; LE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEON-NEXT: add sp, sp, #4 +; LE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-LABEL: llrint_v16i64_v16f16: +; BE: @ %bb.0: +; BE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-NEXT: .pad #4 +; BE-NEXT: sub sp, sp, #4 +; BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEXT: .pad #112 +; BE-NEXT: sub sp, sp, #112 +; BE-NEXT: mov r11, r0 +; BE-NEXT: vmov r0, s14 +; BE-NEXT: vmov.f32 s17, s15 +; BE-NEXT: vstr s13, [sp, #52] @ 4-byte Spill +; BE-NEXT: vmov.f32 s21, s12 +; BE-NEXT: vstr s10, [sp, #68] @ 4-byte Spill +; BE-NEXT: vmov.f32 s23, s11 +; BE-NEXT: vstr s7, [sp, #72] @ 4-byte Spill +; BE-NEXT: vmov.f32 s19, s9 +; BE-NEXT: vstr s4, [sp, #28] @ 4-byte Spill +; BE-NEXT: vmov.f32 s26, s8 +; BE-NEXT: vmov.f32 s24, s6 +; BE-NEXT: vmov.f32 s18, s5 +; BE-NEXT: vmov.f32 s25, s3 +; BE-NEXT: vmov.f32 s16, s2 +; BE-NEXT: vmov.f32 s27, s1 +; BE-NEXT: vmov.f32 s29, s0 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vmov s0, r0 +; BE-NEXT: bl llrintf +; BE-NEXT: mov r8, r0 +; BE-NEXT: vmov r0, s29 +; BE-NEXT: mov r4, r1 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: mov r9, r0 +; BE-NEXT: vmov r0, s27 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: mov r7, r0 +; BE-NEXT: vmov r0, s21 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: mov r6, r0 +; BE-NEXT: vmov r0, s25 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: mov r5, r0 +; BE-NEXT: vmov r0, s23 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vmov s0, r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d16[0], r0 +; BE-NEXT: vmov s0, r5 +; BE-NEXT: str r1, [sp, #108] @ 4-byte Spill +; BE-NEXT: vstr d16, [sp, #96] @ 8-byte Spill +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d16[0], r0 +; BE-NEXT: vmov s0, r6 +; BE-NEXT: str r1, [sp, #92] @ 4-byte Spill +; BE-NEXT: vstr d16, [sp, #80] @ 8-byte Spill +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d16[0], r0 +; BE-NEXT: vmov s0, r7 +; BE-NEXT: str r1, [sp, #76] @ 4-byte Spill +; BE-NEXT: vstr d16, [sp, #56] @ 8-byte Spill +; BE-NEXT: bl llrintf +; BE-NEXT: vmov s0, r9 +; BE-NEXT: mov r10, r1 +; BE-NEXT: vmov.32 d14[0], r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d15[0], r0 +; BE-NEXT: vmov r0, s17 +; BE-NEXT: mov r5, r1 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vmov s0, r0 +; BE-NEXT: vmov.32 d10[0], r8 +; BE-NEXT: vmov r6, s19 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d11[0], r0 +; BE-NEXT: mov r0, r6 +; BE-NEXT: mov r7, r1 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: mov r6, r0 +; BE-NEXT: vmov r0, s18 +; BE-NEXT: vmov.32 d10[1], r4 +; BE-NEXT: vstr d10, [sp, #40] @ 8-byte Spill +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: mov r4, r0 +; BE-NEXT: vmov r0, s16 +; BE-NEXT: vmov.32 d11[1], r7 +; BE-NEXT: vstr d11, [sp, #32] @ 8-byte Spill +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vmov.32 d15[1], r5 +; BE-NEXT: vmov s0, r0 +; BE-NEXT: vstr d15, [sp, #16] @ 8-byte Spill +; BE-NEXT: bl llrintf +; BE-NEXT: vldr s0, [sp, #28] @ 4-byte Reload +; BE-NEXT: vmov r5, s26 +; BE-NEXT: vmov.32 d16[0], r0 +; BE-NEXT: vmov s26, r4 +; BE-NEXT: vmov r0, s0 +; BE-NEXT: mov r8, r1 +; BE-NEXT: vmov.32 d14[1], r10 +; BE-NEXT: vmov r4, s24 +; BE-NEXT: vstr d16, [sp] @ 8-byte Spill +; BE-NEXT: vstr d14, [sp, #8] @ 8-byte Spill +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vmov.f32 s0, s26 +; BE-NEXT: vmov s22, r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s22 +; BE-NEXT: mov r7, r1 +; BE-NEXT: vmov.32 d13[0], r0 +; BE-NEXT: vmov s24, r6 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d14[0], r0 +; BE-NEXT: mov r0, r4 +; BE-NEXT: mov r6, r1 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vmov.f32 s0, s24 +; BE-NEXT: vmov s22, r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s22 +; BE-NEXT: mov r9, r1 +; BE-NEXT: vmov.32 d12[0], r0 +; BE-NEXT: vmov.32 d14[1], r6 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d11[0], r0 +; BE-NEXT: mov r0, r5 +; BE-NEXT: mov r6, r1 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vldr s0, [sp, #52] @ 4-byte Reload +; BE-NEXT: mov r4, r0 +; BE-NEXT: vmov.32 d13[1], r7 +; BE-NEXT: vmov r0, s0 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vldr s0, [sp, #68] @ 4-byte Reload +; BE-NEXT: vmov s20, r0 +; BE-NEXT: vmov.32 d11[1], r6 +; BE-NEXT: vmov r7, s0 +; BE-NEXT: vldr s0, [sp, #72] @ 4-byte Reload +; BE-NEXT: vmov r0, s0 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vmov.f32 s0, s20 +; BE-NEXT: vmov s16, r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s16 +; BE-NEXT: mov r5, r1 +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: vmov s18, r4 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d15[0], r0 +; BE-NEXT: mov r0, r7 +; BE-NEXT: mov r4, r1 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vmov.f32 s0, s18 +; BE-NEXT: vmov s16, r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s16 +; BE-NEXT: mov r6, r1 +; BE-NEXT: vmov.32 d9[0], r0 +; BE-NEXT: vmov.32 d15[1], r4 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d24[0], r0 +; BE-NEXT: ldr r0, [sp, #76] @ 4-byte Reload +; BE-NEXT: vldr d23, [sp, #56] @ 8-byte Reload +; BE-NEXT: vldr d20, [sp, #8] @ 8-byte Reload +; BE-NEXT: vmov.32 d23[1], r0 +; BE-NEXT: ldr r0, [sp, #92] @ 4-byte Reload +; BE-NEXT: vldr d22, [sp, #80] @ 8-byte Reload +; BE-NEXT: vldr d26, [sp, #16] @ 8-byte Reload +; BE-NEXT: vrev64.32 d21, d20 +; BE-NEXT: vmov.32 d22[1], r0 +; BE-NEXT: ldr r0, [sp, #108] @ 4-byte Reload +; BE-NEXT: vldr d30, [sp] @ 8-byte Reload +; BE-NEXT: vldr d25, [sp, #96] @ 8-byte Reload +; BE-NEXT: vrev64.32 d20, d26 +; BE-NEXT: vldr d26, [sp, #32] @ 8-byte Reload +; BE-NEXT: vmov.32 d10[1], r5 +; BE-NEXT: vmov.32 d12[1], r9 +; BE-NEXT: vldr d28, [sp, #40] @ 8-byte Reload +; BE-NEXT: vrev64.32 d27, d26 +; BE-NEXT: vmov.32 d25[1], r0 +; BE-NEXT: add r0, r11, #64 +; BE-NEXT: vmov.32 d30[1], r8 +; BE-NEXT: vmov.32 d9[1], r6 +; BE-NEXT: vrev64.32 d26, d28 +; BE-NEXT: vrev64.32 d29, d10 +; BE-NEXT: vmov.32 d24[1], r1 +; BE-NEXT: vrev64.32 d1, d12 +; BE-NEXT: vrev64.32 d28, d23 +; BE-NEXT: vrev64.32 d23, d22 +; BE-NEXT: vrev64.32 d22, d30 +; BE-NEXT: vrev64.32 d31, d25 +; BE-NEXT: vrev64.32 d0, d9 +; BE-NEXT: vrev64.32 d30, d24 +; BE-NEXT: vst1.64 {d0, d1}, [r0:128]! +; BE-NEXT: vst1.64 {d30, d31}, [r0:128]! +; BE-NEXT: vst1.64 {d28, d29}, [r0:128]! +; BE-NEXT: vrev64.32 d19, d13 +; BE-NEXT: vst1.64 {d26, d27}, [r0:128] +; BE-NEXT: vst1.64 {d20, d21}, [r11:128]! +; BE-NEXT: vrev64.32 d18, d14 +; BE-NEXT: vst1.64 {d22, d23}, [r11:128]! +; BE-NEXT: vrev64.32 d17, d15 +; BE-NEXT: vrev64.32 d16, d11 +; BE-NEXT: vst1.64 {d18, d19}, [r11:128]! +; BE-NEXT: vst1.64 {d16, d17}, [r11:128] +; BE-NEXT: add sp, sp, #112 +; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEXT: add sp, sp, #4 +; BE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-NEON-LABEL: llrint_v16i64_v16f16: +; BE-NEON: @ %bb.0: +; BE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-NEON-NEXT: .pad #4 +; BE-NEON-NEXT: sub sp, sp, #4 +; BE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEON-NEXT: .pad #112 +; BE-NEON-NEXT: sub sp, sp, #112 +; BE-NEON-NEXT: mov r11, r0 +; BE-NEON-NEXT: vmov r0, s14 +; BE-NEON-NEXT: vmov.f32 s17, s15 +; BE-NEON-NEXT: vstr s13, [sp, #52] @ 4-byte Spill +; BE-NEON-NEXT: vmov.f32 s21, s12 +; BE-NEON-NEXT: vstr s10, [sp, #68] @ 4-byte Spill +; BE-NEON-NEXT: vmov.f32 s23, s11 +; BE-NEON-NEXT: vstr s7, [sp, #72] @ 4-byte Spill +; BE-NEON-NEXT: vmov.f32 s19, s9 +; BE-NEON-NEXT: vstr s4, [sp, #28] @ 4-byte Spill +; BE-NEON-NEXT: vmov.f32 s26, s8 +; BE-NEON-NEXT: vmov.f32 s24, s6 +; BE-NEON-NEXT: vmov.f32 s18, s5 +; BE-NEON-NEXT: vmov.f32 s25, s3 +; BE-NEON-NEXT: vmov.f32 s16, s2 +; BE-NEON-NEXT: vmov.f32 s27, s1 +; BE-NEON-NEXT: vmov.f32 s29, s0 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vmov s0, r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: mov r8, r0 +; BE-NEON-NEXT: vmov r0, s29 +; BE-NEON-NEXT: mov r4, r1 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: mov r9, r0 +; BE-NEON-NEXT: vmov r0, s27 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: mov r7, r0 +; BE-NEON-NEXT: vmov r0, s21 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: mov r6, r0 +; BE-NEON-NEXT: vmov r0, s25 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: mov r5, r0 +; BE-NEON-NEXT: vmov r0, s23 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vmov s0, r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.32 d16[0], r0 +; BE-NEON-NEXT: vmov s0, r5 +; BE-NEON-NEXT: str r1, [sp, #108] @ 4-byte Spill +; BE-NEON-NEXT: vstr d16, [sp, #96] @ 8-byte Spill +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.32 d16[0], r0 +; BE-NEON-NEXT: vmov s0, r6 +; BE-NEON-NEXT: str r1, [sp, #92] @ 4-byte Spill +; BE-NEON-NEXT: vstr d16, [sp, #80] @ 8-byte Spill +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.32 d16[0], r0 +; BE-NEON-NEXT: vmov s0, r7 +; BE-NEON-NEXT: str r1, [sp, #76] @ 4-byte Spill +; BE-NEON-NEXT: vstr d16, [sp, #56] @ 8-byte Spill +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov s0, r9 +; BE-NEON-NEXT: mov r10, r1 +; BE-NEON-NEXT: vmov.32 d14[0], r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.32 d15[0], r0 +; BE-NEON-NEXT: vmov r0, s17 +; BE-NEON-NEXT: mov r5, r1 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vmov s0, r0 +; BE-NEON-NEXT: vmov.32 d10[0], r8 +; BE-NEON-NEXT: vmov r6, s19 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.32 d11[0], r0 +; BE-NEON-NEXT: mov r0, r6 +; BE-NEON-NEXT: mov r7, r1 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: mov r6, r0 +; BE-NEON-NEXT: vmov r0, s18 +; BE-NEON-NEXT: vmov.32 d10[1], r4 +; BE-NEON-NEXT: vstr d10, [sp, #40] @ 8-byte Spill +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: mov r4, r0 +; BE-NEON-NEXT: vmov r0, s16 +; BE-NEON-NEXT: vmov.32 d11[1], r7 +; BE-NEON-NEXT: vstr d11, [sp, #32] @ 8-byte Spill +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vmov.32 d15[1], r5 +; BE-NEON-NEXT: vmov s0, r0 +; BE-NEON-NEXT: vstr d15, [sp, #16] @ 8-byte Spill +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vldr s0, [sp, #28] @ 4-byte Reload +; BE-NEON-NEXT: vmov r5, s26 +; BE-NEON-NEXT: vmov.32 d16[0], r0 +; BE-NEON-NEXT: vmov s26, r4 +; BE-NEON-NEXT: vmov r0, s0 +; BE-NEON-NEXT: mov r8, r1 +; BE-NEON-NEXT: vmov.32 d14[1], r10 +; BE-NEON-NEXT: vmov r4, s24 +; BE-NEON-NEXT: vstr d16, [sp] @ 8-byte Spill +; BE-NEON-NEXT: vstr d14, [sp, #8] @ 8-byte Spill +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vmov.f32 s0, s26 +; BE-NEON-NEXT: vmov s22, r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s22 +; BE-NEON-NEXT: mov r7, r1 +; BE-NEON-NEXT: vmov.32 d13[0], r0 +; BE-NEON-NEXT: vmov s24, r6 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.32 d14[0], r0 +; BE-NEON-NEXT: mov r0, r4 +; BE-NEON-NEXT: mov r6, r1 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vmov.f32 s0, s24 +; BE-NEON-NEXT: vmov s22, r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s22 +; BE-NEON-NEXT: mov r9, r1 +; BE-NEON-NEXT: vmov.32 d12[0], r0 +; BE-NEON-NEXT: vmov.32 d14[1], r6 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.32 d11[0], r0 +; BE-NEON-NEXT: mov r0, r5 +; BE-NEON-NEXT: mov r6, r1 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vldr s0, [sp, #52] @ 4-byte Reload +; BE-NEON-NEXT: mov r4, r0 +; BE-NEON-NEXT: vmov.32 d13[1], r7 +; BE-NEON-NEXT: vmov r0, s0 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vldr s0, [sp, #68] @ 4-byte Reload +; BE-NEON-NEXT: vmov s20, r0 +; BE-NEON-NEXT: vmov.32 d11[1], r6 +; BE-NEON-NEXT: vmov r7, s0 +; BE-NEON-NEXT: vldr s0, [sp, #72] @ 4-byte Reload +; BE-NEON-NEXT: vmov r0, s0 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vmov.f32 s0, s20 +; BE-NEON-NEXT: vmov s16, r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s16 +; BE-NEON-NEXT: mov r5, r1 +; BE-NEON-NEXT: vmov.32 d10[0], r0 +; BE-NEON-NEXT: vmov s18, r4 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.32 d15[0], r0 +; BE-NEON-NEXT: mov r0, r7 +; BE-NEON-NEXT: mov r4, r1 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vmov.f32 s0, s18 +; BE-NEON-NEXT: vmov s16, r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s16 +; BE-NEON-NEXT: mov r6, r1 +; BE-NEON-NEXT: vmov.32 d9[0], r0 +; BE-NEON-NEXT: vmov.32 d15[1], r4 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.32 d24[0], r0 +; BE-NEON-NEXT: ldr r0, [sp, #76] @ 4-byte Reload +; BE-NEON-NEXT: vldr d23, [sp, #56] @ 8-byte Reload +; BE-NEON-NEXT: vldr d20, [sp, #8] @ 8-byte Reload +; BE-NEON-NEXT: vmov.32 d23[1], r0 +; BE-NEON-NEXT: ldr r0, [sp, #92] @ 4-byte Reload +; BE-NEON-NEXT: vldr d22, [sp, #80] @ 8-byte Reload +; BE-NEON-NEXT: vldr d26, [sp, #16] @ 8-byte Reload +; BE-NEON-NEXT: vrev64.32 d21, d20 +; BE-NEON-NEXT: vmov.32 d22[1], r0 +; BE-NEON-NEXT: ldr r0, [sp, #108] @ 4-byte Reload +; BE-NEON-NEXT: vldr d30, [sp] @ 8-byte Reload +; BE-NEON-NEXT: vldr d25, [sp, #96] @ 8-byte Reload +; BE-NEON-NEXT: vrev64.32 d20, d26 +; BE-NEON-NEXT: vldr d26, [sp, #32] @ 8-byte Reload +; BE-NEON-NEXT: vmov.32 d10[1], r5 +; BE-NEON-NEXT: vmov.32 d12[1], r9 +; BE-NEON-NEXT: vldr d28, [sp, #40] @ 8-byte Reload +; BE-NEON-NEXT: vrev64.32 d27, d26 +; BE-NEON-NEXT: vmov.32 d25[1], r0 +; BE-NEON-NEXT: add r0, r11, #64 +; BE-NEON-NEXT: vmov.32 d30[1], r8 +; BE-NEON-NEXT: vmov.32 d9[1], r6 +; BE-NEON-NEXT: vrev64.32 d26, d28 +; BE-NEON-NEXT: vrev64.32 d29, d10 +; BE-NEON-NEXT: vmov.32 d24[1], r1 +; BE-NEON-NEXT: vrev64.32 d1, d12 +; BE-NEON-NEXT: vrev64.32 d28, d23 +; BE-NEON-NEXT: vrev64.32 d23, d22 +; BE-NEON-NEXT: vrev64.32 d22, d30 +; BE-NEON-NEXT: vrev64.32 d31, d25 +; BE-NEON-NEXT: vrev64.32 d0, d9 +; BE-NEON-NEXT: vrev64.32 d30, d24 +; BE-NEON-NEXT: vst1.64 {d0, d1}, [r0:128]! +; BE-NEON-NEXT: vst1.64 {d30, d31}, [r0:128]! +; BE-NEON-NEXT: vst1.64 {d28, d29}, [r0:128]! +; BE-NEON-NEXT: vrev64.32 d19, d13 +; BE-NEON-NEXT: vst1.64 {d26, d27}, [r0:128] +; BE-NEON-NEXT: vst1.64 {d20, d21}, [r11:128]! +; BE-NEON-NEXT: vrev64.32 d18, d14 +; BE-NEON-NEXT: vst1.64 {d22, d23}, [r11:128]! +; BE-NEON-NEXT: vrev64.32 d17, d15 +; BE-NEON-NEXT: vrev64.32 d16, d11 +; BE-NEON-NEXT: vst1.64 {d18, d19}, [r11:128]! +; BE-NEON-NEXT: vst1.64 {d16, d17}, [r11:128] +; BE-NEON-NEXT: add sp, sp, #112 +; BE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEON-NEXT: add sp, sp, #4 +; BE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + %a = call <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half> %x) + ret <16 x i64> %a +} +declare <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half>) + +define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { +; LE-LABEL: llrint_v32i64_v32f16: +; LE: @ %bb.0: +; LE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-NEXT: .pad #4 +; LE-NEXT: sub sp, sp, #4 +; LE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEXT: .pad #248 +; LE-NEXT: sub sp, sp, #248 +; LE-NEXT: add lr, sp, #256 +; LE-NEXT: mov r11, r0 +; LE-NEXT: vstr s15, [sp, #176] @ 4-byte Spill +; LE-NEXT: vmov.f32 s19, s14 +; LE-NEXT: ldrh r0, [lr, #132] +; LE-NEXT: vmov.f32 s17, s11 +; LE-NEXT: vstr s13, [sp, #196] @ 4-byte Spill +; LE-NEXT: vstr s12, [sp, #112] @ 4-byte Spill +; LE-NEXT: vstr s10, [sp, #136] @ 4-byte Spill +; LE-NEXT: vstr s9, [sp, #160] @ 4-byte Spill +; LE-NEXT: vstr s8, [sp, #200] @ 4-byte Spill +; LE-NEXT: vstr s7, [sp, #100] @ 4-byte Spill +; LE-NEXT: vstr s6, [sp, #116] @ 4-byte Spill +; LE-NEXT: vstr s5, [sp, #76] @ 4-byte Spill +; LE-NEXT: vstr s4, [sp, #120] @ 4-byte Spill +; LE-NEXT: vstr s3, [sp, #156] @ 4-byte Spill +; LE-NEXT: vstr s2, [sp, #192] @ 4-byte Spill +; LE-NEXT: vstr s1, [sp, #104] @ 4-byte Spill +; LE-NEXT: vstr s0, [sp, #108] @ 4-byte Spill +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vmov s0, r0 +; LE-NEXT: bl llrintf +; LE-NEXT: add lr, sp, #256 +; LE-NEXT: str r0, [sp, #52] @ 4-byte Spill +; LE-NEXT: str r1, [sp, #56] @ 4-byte Spill +; LE-NEXT: ldrh r0, [lr, #108] +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: add lr, sp, #256 +; LE-NEXT: str r0, [sp, #32] @ 4-byte Spill +; LE-NEXT: ldrh r0, [lr, #96] +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: add lr, sp, #256 +; LE-NEXT: mov r5, r0 +; LE-NEXT: ldrh r0, [lr, #100] +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: add lr, sp, #256 +; LE-NEXT: mov r7, r0 +; LE-NEXT: ldrh r0, [lr, #156] +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: add lr, sp, #256 +; LE-NEXT: mov r6, r0 +; LE-NEXT: ldrh r0, [lr, #152] +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: add lr, sp, #256 +; LE-NEXT: mov r4, r0 +; LE-NEXT: ldrh r0, [lr, #148] +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vmov s0, r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov s0, r4 +; LE-NEXT: mov r8, r1 +; LE-NEXT: vmov.32 d13[0], r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov s0, r6 +; LE-NEXT: mov r4, r1 +; LE-NEXT: vmov.32 d14[0], r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov s0, r7 +; LE-NEXT: mov r6, r1 +; LE-NEXT: vmov.32 d15[0], r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov s0, r5 +; LE-NEXT: mov r7, r1 +; LE-NEXT: vmov.32 d11[0], r0 +; LE-NEXT: bl llrintf +; LE-NEXT: add lr, sp, #256 +; LE-NEXT: vmov.32 d10[0], r0 +; LE-NEXT: mov r5, r1 +; LE-NEXT: ldrh r0, [lr, #144] +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: add lr, sp, #256 +; LE-NEXT: mov r10, r0 +; LE-NEXT: vmov.32 d11[1], r7 +; LE-NEXT: ldrh r0, [lr, #104] +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vmov.32 d10[1], r5 +; LE-NEXT: add lr, sp, #80 +; LE-NEXT: mov r7, r0 +; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-NEXT: add lr, sp, #256 +; LE-NEXT: ldrh r0, [lr, #124] +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: add lr, sp, #256 +; LE-NEXT: mov r5, r0 +; LE-NEXT: vmov.32 d15[1], r6 +; LE-NEXT: ldrh r0, [lr, #120] +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vmov.32 d14[1], r4 +; LE-NEXT: add lr, sp, #16 +; LE-NEXT: mov r6, r0 +; LE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-NEXT: add lr, sp, #256 +; LE-NEXT: ldrh r0, [lr, #116] +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: add lr, sp, #256 +; LE-NEXT: vorr q5, q6, q6 +; LE-NEXT: mov r4, r0 +; LE-NEXT: ldrh r0, [lr, #112] +; LE-NEXT: vmov.32 d11[1], r8 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vmov s0, r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov s0, r4 +; LE-NEXT: str r1, [sp, #12] @ 4-byte Spill +; LE-NEXT: vmov.32 d12[0], r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov s0, r6 +; LE-NEXT: add lr, sp, #216 +; LE-NEXT: vmov.32 d13[0], r0 +; LE-NEXT: str r1, [sp, #8] @ 4-byte Spill +; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEXT: bl llrintf +; LE-NEXT: vmov s0, r5 +; LE-NEXT: mov r4, r1 +; LE-NEXT: vmov.32 d12[0], r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov s0, r7 +; LE-NEXT: add lr, sp, #232 +; LE-NEXT: vmov.32 d13[0], r0 +; LE-NEXT: mov r8, r1 +; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEXT: bl llrintf +; LE-NEXT: vmov s0, r10 +; LE-NEXT: mov r9, r1 +; LE-NEXT: vmov.32 d12[0], r0 +; LE-NEXT: bl llrintf +; LE-NEXT: add lr, sp, #256 +; LE-NEXT: vmov.32 d10[0], r0 +; LE-NEXT: mov r5, r1 +; LE-NEXT: ldrh r0, [lr, #140] +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vmov s0, r0 +; LE-NEXT: ldr r0, [sp, #32] @ 4-byte Reload +; LE-NEXT: vmov.32 d10[1], r5 +; LE-NEXT: add lr, sp, #32 +; LE-NEXT: vmov s16, r0 +; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-NEXT: bl llrintf +; LE-NEXT: add lr, sp, #256 +; LE-NEXT: mov r6, r1 +; LE-NEXT: vmov.32 d15[0], r0 +; LE-NEXT: ldrh r1, [lr, #128] +; LE-NEXT: mov r0, r1 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vmov.f32 s0, s16 +; LE-NEXT: vmov s18, r0 +; LE-NEXT: bl llrintf +; LE-NEXT: add lr, sp, #256 +; LE-NEXT: mov r5, r1 +; LE-NEXT: vmov.32 d13[0], r0 +; LE-NEXT: ldr r1, [sp, #52] @ 4-byte Reload +; LE-NEXT: ldrh r0, [lr, #136] +; LE-NEXT: vmov.32 d15[1], r6 +; LE-NEXT: vmov.32 d11[0], r1 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vmov.f32 s0, s18 +; LE-NEXT: vmov s16, r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s16 +; LE-NEXT: vmov.32 d10[0], r0 +; LE-NEXT: ldr r0, [sp, #56] @ 4-byte Reload +; LE-NEXT: mov r6, r1 +; LE-NEXT: vmov.32 d11[1], r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.32 d13[1], r5 +; LE-NEXT: add lr, sp, #56 +; LE-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; LE-NEXT: vmov.32 d12[1], r9 +; LE-NEXT: vmov.32 d14[0], r0 +; LE-NEXT: vmov r0, s19 +; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEXT: add lr, sp, #232 +; LE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-NEXT: vmov.32 d13[1], r8 +; LE-NEXT: vmov.32 d12[1], r4 +; LE-NEXT: vmov.32 d10[1], r6 +; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEXT: add lr, sp, #216 +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: vmov.32 d17[1], r2 +; LE-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; LE-NEXT: vmov.32 d14[1], r1 +; LE-NEXT: add r1, r11, #192 +; LE-NEXT: vmov.32 d16[1], r2 +; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEXT: add lr, sp, #32 +; LE-NEXT: vst1.64 {d10, d11}, [r1:128]! +; LE-NEXT: vst1.64 {d14, d15}, [r1:128]! +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: add lr, sp, #16 +; LE-NEXT: vst1.64 {d16, d17}, [r1:128]! +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: vst1.64 {d16, d17}, [r1:128] +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: str r0, [sp, #52] @ 4-byte Spill +; LE-NEXT: vmov r0, s17 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vldr s0, [sp, #76] @ 4-byte Reload +; LE-NEXT: mov r10, r0 +; LE-NEXT: vmov r0, s0 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vldr s0, [sp, #100] @ 4-byte Reload +; LE-NEXT: mov r4, r0 +; LE-NEXT: vmov r0, s0 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vldr s0, [sp, #104] @ 4-byte Reload +; LE-NEXT: mov r7, r0 +; LE-NEXT: vmov r0, s0 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vldr s0, [sp, #108] @ 4-byte Reload +; LE-NEXT: mov r5, r0 +; LE-NEXT: vmov r0, s0 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vldr s0, [sp, #112] @ 4-byte Reload +; LE-NEXT: mov r6, r0 +; LE-NEXT: vmov r0, s0 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vmov s0, r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov s0, r6 +; LE-NEXT: str r1, [sp, #112] @ 4-byte Spill +; LE-NEXT: vmov.32 d14[0], r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov s0, r5 +; LE-NEXT: mov r9, r1 +; LE-NEXT: vmov.32 d8[0], r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov s0, r7 +; LE-NEXT: mov r8, r1 +; LE-NEXT: vmov.32 d9[0], r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov s0, r4 +; LE-NEXT: mov r5, r1 +; LE-NEXT: vmov.32 d11[0], r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov s0, r10 +; LE-NEXT: mov r4, r1 +; LE-NEXT: vmov.32 d13[0], r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vldr s0, [sp, #116] @ 4-byte Reload +; LE-NEXT: mov r6, r0 +; LE-NEXT: str r1, [sp, #108] @ 4-byte Spill +; LE-NEXT: vmov.32 d11[1], r5 +; LE-NEXT: vmov r0, s0 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vmov s0, r0 +; LE-NEXT: vmov.32 d13[1], r4 +; LE-NEXT: bl llrintf +; LE-NEXT: vldr s0, [sp, #120] @ 4-byte Reload +; LE-NEXT: mov r4, r0 +; LE-NEXT: mov r5, r1 +; LE-NEXT: vmov.32 d9[1], r8 +; LE-NEXT: vmov r0, s0 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vldr s0, [sp, #136] @ 4-byte Reload +; LE-NEXT: vmov.32 d10[0], r4 +; LE-NEXT: vmov r7, s0 +; LE-NEXT: vmov s0, r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.32 d12[0], r0 +; LE-NEXT: add lr, sp, #136 +; LE-NEXT: add r10, r11, #128 +; LE-NEXT: mov r0, r7 +; LE-NEXT: vmov.32 d10[1], r5 +; LE-NEXT: vmov.32 d12[1], r1 +; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-NEXT: add lr, sp, #120 +; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEXT: add lr, sp, #80 +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: add lr, sp, #56 +; LE-NEXT: vmov.32 d13[0], r6 +; LE-NEXT: vst1.64 {d16, d17}, [r10:128]! +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: vst1.64 {d16, d17}, [r10:128]! +; LE-NEXT: vldr s0, [sp, #156] @ 4-byte Reload +; LE-NEXT: vmov r4, s0 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vldr s0, [sp, #160] @ 4-byte Reload +; LE-NEXT: mov r5, r0 +; LE-NEXT: ldr r0, [sp, #52] @ 4-byte Reload +; LE-NEXT: vmov.32 d8[1], r9 +; LE-NEXT: vmov r7, s0 +; LE-NEXT: add lr, sp, #160 +; LE-NEXT: vldr s0, [sp, #176] @ 4-byte Reload +; LE-NEXT: vmov s20, r0 +; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-NEXT: vmov r0, s0 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vmov.f32 s0, s20 +; LE-NEXT: vmov s18, r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s18 +; LE-NEXT: mov r6, r1 +; LE-NEXT: vmov.32 d10[0], r0 +; LE-NEXT: vmov s16, r5 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.32 d11[0], r0 +; LE-NEXT: mov r0, r7 +; LE-NEXT: mov r5, r1 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vmov.f32 s0, s16 +; LE-NEXT: vmov s18, r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s18 +; LE-NEXT: vmov.32 d12[0], r0 +; LE-NEXT: add lr, sp, #176 +; LE-NEXT: mov r8, r1 +; LE-NEXT: vmov.32 d11[1], r5 +; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.32 d13[0], r0 +; LE-NEXT: mov r0, r4 +; LE-NEXT: mov r5, r1 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vldr s0, [sp, #196] @ 4-byte Reload +; LE-NEXT: mov r7, r0 +; LE-NEXT: vmov.32 d10[1], r6 +; LE-NEXT: vmov r0, s0 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vldr s0, [sp, #192] @ 4-byte Reload +; LE-NEXT: vmov s16, r0 +; LE-NEXT: vmov.32 d13[1], r5 +; LE-NEXT: vmov r6, s0 +; LE-NEXT: vldr s0, [sp, #200] @ 4-byte Reload +; LE-NEXT: vmov r0, s0 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vmov.f32 s0, s16 +; LE-NEXT: vmov s18, r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s18 +; LE-NEXT: add lr, sp, #200 +; LE-NEXT: vmov.32 d15[0], r0 +; LE-NEXT: mov r4, r1 +; LE-NEXT: vmov s16, r7 +; LE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.32 d12[0], r0 +; LE-NEXT: mov r0, r6 +; LE-NEXT: mov r5, r1 +; LE-NEXT: bl __aeabi_h2f +; LE-NEXT: vmov.f32 s0, s16 +; LE-NEXT: vmov s18, r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s18 +; LE-NEXT: mov r6, r1 +; LE-NEXT: vmov.32 d15[0], r0 +; LE-NEXT: vmov.32 d12[1], r5 +; LE-NEXT: bl llrintf +; LE-NEXT: add lr, sp, #200 +; LE-NEXT: vmov.32 d14[0], r0 +; LE-NEXT: ldr r0, [sp, #112] @ 4-byte Reload +; LE-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; LE-NEXT: add lr, sp, #176 +; LE-NEXT: vmov.32 d19[1], r4 +; LE-NEXT: vmov.32 d18[1], r0 +; LE-NEXT: ldr r0, [sp, #108] @ 4-byte Reload +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: add lr, sp, #216 +; LE-NEXT: vmov.32 d17[1], r0 +; LE-NEXT: add r0, r11, #64 +; LE-NEXT: vmov.32 d16[1], r8 +; LE-NEXT: vorr q10, q8, q8 +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: add lr, sp, #232 +; LE-NEXT: vmov.32 d15[1], r6 +; LE-NEXT: vst1.64 {d16, d17}, [r10:128]! +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: add lr, sp, #160 +; LE-NEXT: vmov.32 d14[1], r1 +; LE-NEXT: vst1.64 {d16, d17}, [r10:128] +; LE-NEXT: vst1.64 {d12, d13}, [r0:128]! +; LE-NEXT: vst1.64 {d20, d21}, [r0:128]! +; LE-NEXT: vst1.64 {d18, d19}, [r0:128]! +; LE-NEXT: vst1.64 {d10, d11}, [r0:128] +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: add lr, sp, #120 +; LE-NEXT: vst1.64 {d16, d17}, [r11:128]! +; LE-NEXT: vst1.64 {d14, d15}, [r11:128]! +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: add lr, sp, #136 +; LE-NEXT: vst1.64 {d16, d17}, [r11:128]! +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: vst1.64 {d16, d17}, [r11:128] +; LE-NEXT: add sp, sp, #248 +; LE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEXT: add sp, sp, #4 +; LE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; LE-NEON-LABEL: llrint_v32i64_v32f16: +; LE-NEON: @ %bb.0: +; LE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-NEON-NEXT: .pad #4 +; LE-NEON-NEXT: sub sp, sp, #4 +; LE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEON-NEXT: .pad #248 +; LE-NEON-NEXT: sub sp, sp, #248 +; LE-NEON-NEXT: add lr, sp, #256 +; LE-NEON-NEXT: mov r11, r0 +; LE-NEON-NEXT: vstr s15, [sp, #176] @ 4-byte Spill +; LE-NEON-NEXT: vmov.f32 s19, s14 +; LE-NEON-NEXT: ldrh r0, [lr, #132] +; LE-NEON-NEXT: vmov.f32 s17, s11 +; LE-NEON-NEXT: vstr s13, [sp, #196] @ 4-byte Spill +; LE-NEON-NEXT: vstr s12, [sp, #112] @ 4-byte Spill +; LE-NEON-NEXT: vstr s10, [sp, #136] @ 4-byte Spill +; LE-NEON-NEXT: vstr s9, [sp, #160] @ 4-byte Spill +; LE-NEON-NEXT: vstr s8, [sp, #200] @ 4-byte Spill +; LE-NEON-NEXT: vstr s7, [sp, #100] @ 4-byte Spill +; LE-NEON-NEXT: vstr s6, [sp, #116] @ 4-byte Spill +; LE-NEON-NEXT: vstr s5, [sp, #76] @ 4-byte Spill +; LE-NEON-NEXT: vstr s4, [sp, #120] @ 4-byte Spill +; LE-NEON-NEXT: vstr s3, [sp, #156] @ 4-byte Spill +; LE-NEON-NEXT: vstr s2, [sp, #192] @ 4-byte Spill +; LE-NEON-NEXT: vstr s1, [sp, #104] @ 4-byte Spill +; LE-NEON-NEXT: vstr s0, [sp, #108] @ 4-byte Spill +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vmov s0, r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: add lr, sp, #256 +; LE-NEON-NEXT: str r0, [sp, #52] @ 4-byte Spill +; LE-NEON-NEXT: str r1, [sp, #56] @ 4-byte Spill +; LE-NEON-NEXT: ldrh r0, [lr, #108] +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: add lr, sp, #256 +; LE-NEON-NEXT: str r0, [sp, #32] @ 4-byte Spill +; LE-NEON-NEXT: ldrh r0, [lr, #96] +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: add lr, sp, #256 +; LE-NEON-NEXT: mov r5, r0 +; LE-NEON-NEXT: ldrh r0, [lr, #100] +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: add lr, sp, #256 +; LE-NEON-NEXT: mov r7, r0 +; LE-NEON-NEXT: ldrh r0, [lr, #156] +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: add lr, sp, #256 +; LE-NEON-NEXT: mov r6, r0 +; LE-NEON-NEXT: ldrh r0, [lr, #152] +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: add lr, sp, #256 +; LE-NEON-NEXT: mov r4, r0 +; LE-NEON-NEXT: ldrh r0, [lr, #148] +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vmov s0, r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov s0, r4 +; LE-NEON-NEXT: mov r8, r1 +; LE-NEON-NEXT: vmov.32 d13[0], r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov s0, r6 +; LE-NEON-NEXT: mov r4, r1 +; LE-NEON-NEXT: vmov.32 d14[0], r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov s0, r7 +; LE-NEON-NEXT: mov r6, r1 +; LE-NEON-NEXT: vmov.32 d15[0], r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov s0, r5 +; LE-NEON-NEXT: mov r7, r1 +; LE-NEON-NEXT: vmov.32 d11[0], r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: add lr, sp, #256 +; LE-NEON-NEXT: vmov.32 d10[0], r0 +; LE-NEON-NEXT: mov r5, r1 +; LE-NEON-NEXT: ldrh r0, [lr, #144] +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: add lr, sp, #256 +; LE-NEON-NEXT: mov r10, r0 +; LE-NEON-NEXT: vmov.32 d11[1], r7 +; LE-NEON-NEXT: ldrh r0, [lr, #104] +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vmov.32 d10[1], r5 +; LE-NEON-NEXT: add lr, sp, #80 +; LE-NEON-NEXT: mov r7, r0 +; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #256 +; LE-NEON-NEXT: ldrh r0, [lr, #124] +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: add lr, sp, #256 +; LE-NEON-NEXT: mov r5, r0 +; LE-NEON-NEXT: vmov.32 d15[1], r6 +; LE-NEON-NEXT: ldrh r0, [lr, #120] +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vmov.32 d14[1], r4 +; LE-NEON-NEXT: add lr, sp, #16 +; LE-NEON-NEXT: mov r6, r0 +; LE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #256 +; LE-NEON-NEXT: ldrh r0, [lr, #116] +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: add lr, sp, #256 +; LE-NEON-NEXT: vorr q5, q6, q6 +; LE-NEON-NEXT: mov r4, r0 +; LE-NEON-NEXT: ldrh r0, [lr, #112] +; LE-NEON-NEXT: vmov.32 d11[1], r8 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vmov s0, r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov s0, r4 +; LE-NEON-NEXT: str r1, [sp, #12] @ 4-byte Spill +; LE-NEON-NEXT: vmov.32 d12[0], r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov s0, r6 +; LE-NEON-NEXT: add lr, sp, #216 +; LE-NEON-NEXT: vmov.32 d13[0], r0 +; LE-NEON-NEXT: str r1, [sp, #8] @ 4-byte Spill +; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov s0, r5 +; LE-NEON-NEXT: mov r4, r1 +; LE-NEON-NEXT: vmov.32 d12[0], r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov s0, r7 +; LE-NEON-NEXT: add lr, sp, #232 +; LE-NEON-NEXT: vmov.32 d13[0], r0 +; LE-NEON-NEXT: mov r8, r1 +; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov s0, r10 +; LE-NEON-NEXT: mov r9, r1 +; LE-NEON-NEXT: vmov.32 d12[0], r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: add lr, sp, #256 +; LE-NEON-NEXT: vmov.32 d10[0], r0 +; LE-NEON-NEXT: mov r5, r1 +; LE-NEON-NEXT: ldrh r0, [lr, #140] +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vmov s0, r0 +; LE-NEON-NEXT: ldr r0, [sp, #32] @ 4-byte Reload +; LE-NEON-NEXT: vmov.32 d10[1], r5 +; LE-NEON-NEXT: add lr, sp, #32 +; LE-NEON-NEXT: vmov s16, r0 +; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: add lr, sp, #256 +; LE-NEON-NEXT: mov r6, r1 +; LE-NEON-NEXT: vmov.32 d15[0], r0 +; LE-NEON-NEXT: ldrh r1, [lr, #128] +; LE-NEON-NEXT: mov r0, r1 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vmov.f32 s0, s16 +; LE-NEON-NEXT: vmov s18, r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: add lr, sp, #256 +; LE-NEON-NEXT: mov r5, r1 +; LE-NEON-NEXT: vmov.32 d13[0], r0 +; LE-NEON-NEXT: ldr r1, [sp, #52] @ 4-byte Reload +; LE-NEON-NEXT: ldrh r0, [lr, #136] +; LE-NEON-NEXT: vmov.32 d15[1], r6 +; LE-NEON-NEXT: vmov.32 d11[0], r1 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vmov.f32 s0, s18 +; LE-NEON-NEXT: vmov s16, r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s16 +; LE-NEON-NEXT: vmov.32 d10[0], r0 +; LE-NEON-NEXT: ldr r0, [sp, #56] @ 4-byte Reload +; LE-NEON-NEXT: mov r6, r1 +; LE-NEON-NEXT: vmov.32 d11[1], r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.32 d13[1], r5 +; LE-NEON-NEXT: add lr, sp, #56 +; LE-NEON-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; LE-NEON-NEXT: vmov.32 d12[1], r9 +; LE-NEON-NEXT: vmov.32 d14[0], r0 +; LE-NEON-NEXT: vmov r0, s19 +; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #232 +; LE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-NEON-NEXT: vmov.32 d13[1], r8 +; LE-NEON-NEXT: vmov.32 d12[1], r4 +; LE-NEON-NEXT: vmov.32 d10[1], r6 +; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #216 +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: vmov.32 d17[1], r2 +; LE-NEON-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; LE-NEON-NEXT: vmov.32 d14[1], r1 +; LE-NEON-NEXT: add r1, r11, #192 +; LE-NEON-NEXT: vmov.32 d16[1], r2 +; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #32 +; LE-NEON-NEXT: vst1.64 {d10, d11}, [r1:128]! +; LE-NEON-NEXT: vst1.64 {d14, d15}, [r1:128]! +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #16 +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r1:128]! +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r1:128] +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: str r0, [sp, #52] @ 4-byte Spill +; LE-NEON-NEXT: vmov r0, s17 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vldr s0, [sp, #76] @ 4-byte Reload +; LE-NEON-NEXT: mov r10, r0 +; LE-NEON-NEXT: vmov r0, s0 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vldr s0, [sp, #100] @ 4-byte Reload +; LE-NEON-NEXT: mov r4, r0 +; LE-NEON-NEXT: vmov r0, s0 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vldr s0, [sp, #104] @ 4-byte Reload +; LE-NEON-NEXT: mov r7, r0 +; LE-NEON-NEXT: vmov r0, s0 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vldr s0, [sp, #108] @ 4-byte Reload +; LE-NEON-NEXT: mov r5, r0 +; LE-NEON-NEXT: vmov r0, s0 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vldr s0, [sp, #112] @ 4-byte Reload +; LE-NEON-NEXT: mov r6, r0 +; LE-NEON-NEXT: vmov r0, s0 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vmov s0, r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov s0, r6 +; LE-NEON-NEXT: str r1, [sp, #112] @ 4-byte Spill +; LE-NEON-NEXT: vmov.32 d14[0], r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov s0, r5 +; LE-NEON-NEXT: mov r9, r1 +; LE-NEON-NEXT: vmov.32 d8[0], r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov s0, r7 +; LE-NEON-NEXT: mov r8, r1 +; LE-NEON-NEXT: vmov.32 d9[0], r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov s0, r4 +; LE-NEON-NEXT: mov r5, r1 +; LE-NEON-NEXT: vmov.32 d11[0], r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov s0, r10 +; LE-NEON-NEXT: mov r4, r1 +; LE-NEON-NEXT: vmov.32 d13[0], r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vldr s0, [sp, #116] @ 4-byte Reload +; LE-NEON-NEXT: mov r6, r0 +; LE-NEON-NEXT: str r1, [sp, #108] @ 4-byte Spill +; LE-NEON-NEXT: vmov.32 d11[1], r5 +; LE-NEON-NEXT: vmov r0, s0 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vmov s0, r0 +; LE-NEON-NEXT: vmov.32 d13[1], r4 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vldr s0, [sp, #120] @ 4-byte Reload +; LE-NEON-NEXT: mov r4, r0 +; LE-NEON-NEXT: mov r5, r1 +; LE-NEON-NEXT: vmov.32 d9[1], r8 +; LE-NEON-NEXT: vmov r0, s0 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vldr s0, [sp, #136] @ 4-byte Reload +; LE-NEON-NEXT: vmov.32 d10[0], r4 +; LE-NEON-NEXT: vmov r7, s0 +; LE-NEON-NEXT: vmov s0, r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.32 d12[0], r0 +; LE-NEON-NEXT: add lr, sp, #136 +; LE-NEON-NEXT: add r10, r11, #128 +; LE-NEON-NEXT: mov r0, r7 +; LE-NEON-NEXT: vmov.32 d10[1], r5 +; LE-NEON-NEXT: vmov.32 d12[1], r1 +; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #120 +; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #80 +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #56 +; LE-NEON-NEXT: vmov.32 d13[0], r6 +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r10:128]! +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r10:128]! +; LE-NEON-NEXT: vldr s0, [sp, #156] @ 4-byte Reload +; LE-NEON-NEXT: vmov r4, s0 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vldr s0, [sp, #160] @ 4-byte Reload +; LE-NEON-NEXT: mov r5, r0 +; LE-NEON-NEXT: ldr r0, [sp, #52] @ 4-byte Reload +; LE-NEON-NEXT: vmov.32 d8[1], r9 +; LE-NEON-NEXT: vmov r7, s0 +; LE-NEON-NEXT: add lr, sp, #160 +; LE-NEON-NEXT: vldr s0, [sp, #176] @ 4-byte Reload +; LE-NEON-NEXT: vmov s20, r0 +; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-NEON-NEXT: vmov r0, s0 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vmov.f32 s0, s20 +; LE-NEON-NEXT: vmov s18, r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s18 +; LE-NEON-NEXT: mov r6, r1 +; LE-NEON-NEXT: vmov.32 d10[0], r0 +; LE-NEON-NEXT: vmov s16, r5 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.32 d11[0], r0 +; LE-NEON-NEXT: mov r0, r7 +; LE-NEON-NEXT: mov r5, r1 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vmov.f32 s0, s16 +; LE-NEON-NEXT: vmov s18, r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s18 +; LE-NEON-NEXT: vmov.32 d12[0], r0 +; LE-NEON-NEXT: add lr, sp, #176 +; LE-NEON-NEXT: mov r8, r1 +; LE-NEON-NEXT: vmov.32 d11[1], r5 +; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.32 d13[0], r0 +; LE-NEON-NEXT: mov r0, r4 +; LE-NEON-NEXT: mov r5, r1 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vldr s0, [sp, #196] @ 4-byte Reload +; LE-NEON-NEXT: mov r7, r0 +; LE-NEON-NEXT: vmov.32 d10[1], r6 +; LE-NEON-NEXT: vmov r0, s0 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vldr s0, [sp, #192] @ 4-byte Reload +; LE-NEON-NEXT: vmov s16, r0 +; LE-NEON-NEXT: vmov.32 d13[1], r5 +; LE-NEON-NEXT: vmov r6, s0 +; LE-NEON-NEXT: vldr s0, [sp, #200] @ 4-byte Reload +; LE-NEON-NEXT: vmov r0, s0 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vmov.f32 s0, s16 +; LE-NEON-NEXT: vmov s18, r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s18 +; LE-NEON-NEXT: add lr, sp, #200 +; LE-NEON-NEXT: vmov.32 d15[0], r0 +; LE-NEON-NEXT: mov r4, r1 +; LE-NEON-NEXT: vmov s16, r7 +; LE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.32 d12[0], r0 +; LE-NEON-NEXT: mov r0, r6 +; LE-NEON-NEXT: mov r5, r1 +; LE-NEON-NEXT: bl __aeabi_h2f +; LE-NEON-NEXT: vmov.f32 s0, s16 +; LE-NEON-NEXT: vmov s18, r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s18 +; LE-NEON-NEXT: mov r6, r1 +; LE-NEON-NEXT: vmov.32 d15[0], r0 +; LE-NEON-NEXT: vmov.32 d12[1], r5 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: add lr, sp, #200 +; LE-NEON-NEXT: vmov.32 d14[0], r0 +; LE-NEON-NEXT: ldr r0, [sp, #112] @ 4-byte Reload +; LE-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #176 +; LE-NEON-NEXT: vmov.32 d19[1], r4 +; LE-NEON-NEXT: vmov.32 d18[1], r0 +; LE-NEON-NEXT: ldr r0, [sp, #108] @ 4-byte Reload +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #216 +; LE-NEON-NEXT: vmov.32 d17[1], r0 +; LE-NEON-NEXT: add r0, r11, #64 +; LE-NEON-NEXT: vmov.32 d16[1], r8 +; LE-NEON-NEXT: vorr q10, q8, q8 +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #232 +; LE-NEON-NEXT: vmov.32 d15[1], r6 +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r10:128]! +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #160 +; LE-NEON-NEXT: vmov.32 d14[1], r1 +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r10:128] +; LE-NEON-NEXT: vst1.64 {d12, d13}, [r0:128]! +; LE-NEON-NEXT: vst1.64 {d20, d21}, [r0:128]! +; LE-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]! +; LE-NEON-NEXT: vst1.64 {d10, d11}, [r0:128] +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #120 +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r11:128]! +; LE-NEON-NEXT: vst1.64 {d14, d15}, [r11:128]! +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #136 +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r11:128]! +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r11:128] +; LE-NEON-NEXT: add sp, sp, #248 +; LE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEON-NEXT: add sp, sp, #4 +; LE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-LABEL: llrint_v32i64_v32f16: +; BE: @ %bb.0: +; BE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-NEXT: .pad #4 +; BE-NEXT: sub sp, sp, #4 +; BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEXT: .pad #176 +; BE-NEXT: sub sp, sp, #176 +; BE-NEXT: add lr, sp, #256 +; BE-NEXT: mov r10, r0 +; BE-NEXT: vstr s15, [sp, #112] @ 4-byte Spill +; BE-NEXT: ldrh r0, [lr, #74] +; BE-NEXT: vstr s14, [sp, #80] @ 4-byte Spill +; BE-NEXT: vstr s13, [sp, #48] @ 4-byte Spill +; BE-NEXT: vstr s12, [sp, #148] @ 4-byte Spill +; BE-NEXT: vstr s11, [sp, #76] @ 4-byte Spill +; BE-NEXT: vstr s10, [sp, #152] @ 4-byte Spill +; BE-NEXT: vstr s9, [sp, #156] @ 4-byte Spill +; BE-NEXT: vstr s8, [sp, #120] @ 4-byte Spill +; BE-NEXT: vstr s7, [sp, #136] @ 4-byte Spill +; BE-NEXT: vstr s6, [sp, #132] @ 4-byte Spill +; BE-NEXT: vstr s5, [sp, #144] @ 4-byte Spill +; BE-NEXT: vstr s4, [sp, #64] @ 4-byte Spill +; BE-NEXT: vstr s3, [sp, #104] @ 4-byte Spill +; BE-NEXT: vstr s2, [sp, #88] @ 4-byte Spill +; BE-NEXT: vstr s1, [sp, #56] @ 4-byte Spill +; BE-NEXT: vstr s0, [sp, #96] @ 4-byte Spill +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vmov s0, r0 +; BE-NEXT: bl llrintf +; BE-NEXT: add lr, sp, #256 +; BE-NEXT: mov r9, r0 +; BE-NEXT: mov r8, r1 +; BE-NEXT: ldrh r0, [lr, #62] +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: add lr, sp, #256 +; BE-NEXT: mov r6, r0 +; BE-NEXT: ldrh r0, [lr, #58] +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: add lr, sp, #256 +; BE-NEXT: mov r7, r0 +; BE-NEXT: ldrh r0, [lr, #66] +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: add lr, sp, #256 +; BE-NEXT: mov r4, r0 +; BE-NEXT: ldrh r0, [lr, #54] +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: add lr, sp, #256 +; BE-NEXT: mov r5, r0 +; BE-NEXT: ldrh r0, [lr, #50] +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vmov s0, r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d16[0], r0 +; BE-NEXT: vmov s0, r5 +; BE-NEXT: str r1, [sp, #44] @ 4-byte Spill +; BE-NEXT: vstr d16, [sp, #168] @ 8-byte Spill +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d16[0], r0 +; BE-NEXT: vmov s0, r4 +; BE-NEXT: str r1, [sp, #40] @ 4-byte Spill +; BE-NEXT: vstr d16, [sp, #160] @ 8-byte Spill +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d16[0], r0 +; BE-NEXT: vmov s0, r7 +; BE-NEXT: mov r4, r1 +; BE-NEXT: vstr d16, [sp, #32] @ 8-byte Spill +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d16[0], r0 +; BE-NEXT: vmov s0, r6 +; BE-NEXT: mov r11, r1 +; BE-NEXT: vstr d16, [sp, #24] @ 8-byte Spill +; BE-NEXT: bl llrintf +; BE-NEXT: add lr, sp, #256 +; BE-NEXT: vmov.32 d16[0], r0 +; BE-NEXT: mov r6, r1 +; BE-NEXT: ldrh r0, [lr, #34] +; BE-NEXT: vstr d16, [sp, #16] @ 8-byte Spill +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vmov s0, r0 +; BE-NEXT: vmov.32 d8[0], r9 +; BE-NEXT: bl llrintf +; BE-NEXT: add lr, sp, #256 +; BE-NEXT: mov r7, r1 +; BE-NEXT: vmov.32 d12[0], r0 +; BE-NEXT: ldrh r1, [lr, #38] +; BE-NEXT: mov r0, r1 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vmov.32 d8[1], r8 +; BE-NEXT: vmov s0, r0 +; BE-NEXT: vstr d8, [sp, #8] @ 8-byte Spill +; BE-NEXT: bl llrintf +; BE-NEXT: add lr, sp, #256 +; BE-NEXT: mov r5, r1 +; BE-NEXT: vmov.32 d13[0], r0 +; BE-NEXT: ldrh r1, [lr, #26] +; BE-NEXT: mov r0, r1 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vmov s0, r0 +; BE-NEXT: vmov.32 d12[1], r7 +; BE-NEXT: bl llrintf +; BE-NEXT: add lr, sp, #256 +; BE-NEXT: mov r7, r1 +; BE-NEXT: vmov.32 d9[0], r0 +; BE-NEXT: ldrh r1, [lr, #30] +; BE-NEXT: mov r0, r1 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vmov s0, r0 +; BE-NEXT: vmov.32 d13[1], r5 +; BE-NEXT: bl llrintf +; BE-NEXT: add lr, sp, #256 +; BE-NEXT: mov r5, r1 +; BE-NEXT: vmov.32 d15[0], r0 +; BE-NEXT: ldrh r1, [lr, #78] +; BE-NEXT: mov r0, r1 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vmov s0, r0 +; BE-NEXT: vmov.32 d9[1], r7 +; BE-NEXT: bl llrintf +; BE-NEXT: add lr, sp, #256 +; BE-NEXT: mov r7, r1 +; BE-NEXT: vmov.32 d14[0], r0 +; BE-NEXT: ldrh r1, [lr, #82] +; BE-NEXT: mov r0, r1 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vmov s0, r0 +; BE-NEXT: vmov.32 d15[1], r5 +; BE-NEXT: bl llrintf +; BE-NEXT: add lr, sp, #256 +; BE-NEXT: mov r5, r1 +; BE-NEXT: vmov.32 d8[0], r0 +; BE-NEXT: ldrh r1, [lr, #86] +; BE-NEXT: mov r0, r1 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vmov s0, r0 +; BE-NEXT: vmov.32 d14[1], r7 +; BE-NEXT: bl llrintf +; BE-NEXT: add lr, sp, #256 +; BE-NEXT: mov r7, r1 +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: ldrh r1, [lr, #70] +; BE-NEXT: mov r0, r1 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vmov s0, r0 +; BE-NEXT: vmov.32 d8[1], r5 +; BE-NEXT: bl llrintf +; BE-NEXT: add lr, sp, #256 +; BE-NEXT: mov r5, r1 +; BE-NEXT: vmov.32 d11[0], r0 +; BE-NEXT: ldrh r1, [lr, #46] +; BE-NEXT: mov r0, r1 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vmov s0, r0 +; BE-NEXT: vmov.32 d10[1], r7 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d25[0], r0 +; BE-NEXT: ldr r0, [sp, #40] @ 4-byte Reload +; BE-NEXT: ldr r2, [sp, #44] @ 4-byte Reload +; BE-NEXT: vldr d24, [sp, #160] @ 8-byte Reload +; BE-NEXT: vldr s0, [sp, #48] @ 4-byte Reload +; BE-NEXT: vmov.32 d24[1], r0 +; BE-NEXT: vmov r0, s0 +; BE-NEXT: vldr d26, [sp, #16] @ 8-byte Reload +; BE-NEXT: vstr d24, [sp, #160] @ 8-byte Spill +; BE-NEXT: vldr d24, [sp, #8] @ 8-byte Reload +; BE-NEXT: vrev64.32 d23, d14 +; BE-NEXT: vldr d29, [sp, #24] @ 8-byte Reload +; BE-NEXT: vrev64.32 d22, d24 +; BE-NEXT: vldr d24, [sp, #168] @ 8-byte Reload +; BE-NEXT: vmov.32 d26[1], r6 +; BE-NEXT: vldr d28, [sp, #32] @ 8-byte Reload +; BE-NEXT: vmov.32 d25[1], r1 +; BE-NEXT: add r1, r10, #192 +; BE-NEXT: vmov.32 d29[1], r11 +; BE-NEXT: add r11, r10, #128 +; BE-NEXT: vmov.32 d24[1], r2 +; BE-NEXT: vmov.32 d11[1], r5 +; BE-NEXT: vmov.32 d28[1], r4 +; BE-NEXT: vrev64.32 d27, d26 +; BE-NEXT: vstr d24, [sp, #168] @ 8-byte Spill +; BE-NEXT: vstr d25, [sp, #48] @ 8-byte Spill +; BE-NEXT: vrev64.32 d25, d11 +; BE-NEXT: vrev64.32 d26, d29 +; BE-NEXT: vrev64.32 d24, d28 +; BE-NEXT: vst1.64 {d26, d27}, [r1:128]! +; BE-NEXT: vst1.64 {d24, d25}, [r1:128]! +; BE-NEXT: vrev64.32 d21, d10 +; BE-NEXT: vrev64.32 d19, d15 +; BE-NEXT: vrev64.32 d17, d13 +; BE-NEXT: vrev64.32 d20, d8 +; BE-NEXT: vst1.64 {d22, d23}, [r1:128]! +; BE-NEXT: vrev64.32 d18, d9 +; BE-NEXT: vrev64.32 d16, d12 +; BE-NEXT: vst1.64 {d20, d21}, [r1:128] +; BE-NEXT: vst1.64 {d18, d19}, [r11:128]! +; BE-NEXT: vst1.64 {d16, d17}, [r11:128]! +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vmov s0, r0 +; BE-NEXT: bl llrintf +; BE-NEXT: add lr, sp, #256 +; BE-NEXT: mov r7, r0 +; BE-NEXT: mov r8, r1 +; BE-NEXT: ldrh r0, [lr, #42] +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vldr s0, [sp, #56] @ 4-byte Reload +; BE-NEXT: mov r4, r0 +; BE-NEXT: vmov r0, s0 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vmov s0, r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov s0, r4 +; BE-NEXT: str r1, [sp, #44] @ 4-byte Spill +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vldr s0, [sp, #64] @ 4-byte Reload +; BE-NEXT: vmov.32 d16[0], r0 +; BE-NEXT: vmov r2, s0 +; BE-NEXT: vldr s0, [sp, #80] @ 4-byte Reload +; BE-NEXT: vmov.32 d16[1], r1 +; BE-NEXT: vmov r4, s0 +; BE-NEXT: vldr s0, [sp, #76] @ 4-byte Reload +; BE-NEXT: vstr d16, [sp, #80] @ 8-byte Spill +; BE-NEXT: vmov r5, s0 +; BE-NEXT: mov r0, r2 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vmov s0, r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d11[0], r0 +; BE-NEXT: mov r0, r4 +; BE-NEXT: mov r9, r1 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vmov s0, r0 +; BE-NEXT: vmov.32 d8[0], r7 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d12[0], r0 +; BE-NEXT: mov r0, r5 +; BE-NEXT: mov r6, r1 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vldr s0, [sp, #88] @ 4-byte Reload +; BE-NEXT: mov r4, r0 +; BE-NEXT: vmov.32 d8[1], r8 +; BE-NEXT: vmov r7, s0 +; BE-NEXT: vldr s0, [sp, #96] @ 4-byte Reload +; BE-NEXT: vstr d8, [sp, #88] @ 8-byte Spill +; BE-NEXT: vmov r0, s0 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vldr s0, [sp, #104] @ 4-byte Reload +; BE-NEXT: vmov s19, r0 +; BE-NEXT: vmov.32 d12[1], r6 +; BE-NEXT: vmov r5, s0 +; BE-NEXT: vldr s0, [sp, #112] @ 4-byte Reload +; BE-NEXT: vstr d12, [sp, #104] @ 8-byte Spill +; BE-NEXT: vmov r0, s0 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vmov.f32 s0, s19 +; BE-NEXT: vmov s30, r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s30 +; BE-NEXT: mov r6, r1 +; BE-NEXT: vmov.32 d9[0], r0 +; BE-NEXT: vmov s17, r4 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d12[0], r0 +; BE-NEXT: mov r0, r5 +; BE-NEXT: mov r4, r1 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vmov.f32 s0, s17 +; BE-NEXT: vmov s30, r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s30 +; BE-NEXT: vmov.32 d16[0], r0 +; BE-NEXT: str r1, [sp, #76] @ 4-byte Spill +; BE-NEXT: vmov.32 d12[1], r4 +; BE-NEXT: vstr d16, [sp, #64] @ 8-byte Spill +; BE-NEXT: vstr d12, [sp, #112] @ 8-byte Spill +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d16[0], r0 +; BE-NEXT: mov r0, r7 +; BE-NEXT: mov r8, r1 +; BE-NEXT: vmov.32 d9[1], r6 +; BE-NEXT: vstr d16, [sp, #56] @ 8-byte Spill +; BE-NEXT: vstr d9, [sp, #96] @ 8-byte Spill +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vldr s0, [sp, #120] @ 4-byte Reload +; BE-NEXT: mov r5, r0 +; BE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload +; BE-NEXT: vmov r7, s0 +; BE-NEXT: vldr s0, [sp, #132] @ 4-byte Reload +; BE-NEXT: vmov.32 d10[1], r0 +; BE-NEXT: vmov r0, s0 +; BE-NEXT: vstr d10, [sp, #120] @ 8-byte Spill +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vldr s0, [sp, #136] @ 4-byte Reload +; BE-NEXT: vmov s26, r0 +; BE-NEXT: vmov.32 d11[1], r9 +; BE-NEXT: vmov r4, s0 +; BE-NEXT: vldr s0, [sp, #144] @ 4-byte Reload +; BE-NEXT: vstr d11, [sp, #136] @ 8-byte Spill +; BE-NEXT: vmov r0, s0 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vmov.f32 s0, s26 +; BE-NEXT: vmov s22, r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s22 +; BE-NEXT: mov r6, r1 +; BE-NEXT: vmov.32 d13[0], r0 +; BE-NEXT: vmov s24, r5 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d14[0], r0 +; BE-NEXT: mov r0, r4 +; BE-NEXT: mov r5, r1 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vmov.f32 s0, s24 +; BE-NEXT: vmov s22, r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s22 +; BE-NEXT: mov r9, r1 +; BE-NEXT: vmov.32 d12[0], r0 +; BE-NEXT: vmov.32 d14[1], r5 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d11[0], r0 +; BE-NEXT: mov r0, r7 +; BE-NEXT: mov r5, r1 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vldr s0, [sp, #148] @ 4-byte Reload +; BE-NEXT: mov r7, r0 +; BE-NEXT: vmov.32 d13[1], r6 +; BE-NEXT: vmov r0, s0 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vldr s0, [sp, #152] @ 4-byte Reload +; BE-NEXT: vmov s20, r0 +; BE-NEXT: vmov.32 d11[1], r5 +; BE-NEXT: vmov r4, s0 +; BE-NEXT: vldr s0, [sp, #156] @ 4-byte Reload +; BE-NEXT: vmov r0, s0 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vmov.f32 s0, s20 +; BE-NEXT: vmov s16, r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s16 +; BE-NEXT: mov r6, r1 +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: vmov s18, r7 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d15[0], r0 +; BE-NEXT: mov r0, r4 +; BE-NEXT: mov r5, r1 +; BE-NEXT: bl __aeabi_h2f +; BE-NEXT: vmov.f32 s0, s18 +; BE-NEXT: vmov s16, r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s16 +; BE-NEXT: mov r4, r1 +; BE-NEXT: vmov.32 d9[0], r0 +; BE-NEXT: vmov.32 d15[1], r5 +; BE-NEXT: bl llrintf +; BE-NEXT: vldr d16, [sp, #160] @ 8-byte Reload +; BE-NEXT: vldr d20, [sp, #136] @ 8-byte Reload +; BE-NEXT: vrev64.32 d19, d14 +; BE-NEXT: vrev64.32 d31, d16 +; BE-NEXT: vldr d16, [sp, #168] @ 8-byte Reload +; BE-NEXT: vrev64.32 d18, d20 +; BE-NEXT: vldr d20, [sp, #120] @ 8-byte Reload +; BE-NEXT: vldr d22, [sp, #96] @ 8-byte Reload +; BE-NEXT: vmov.32 d28[0], r0 +; BE-NEXT: ldr r0, [sp, #76] @ 4-byte Reload +; BE-NEXT: vrev64.32 d21, d20 +; BE-NEXT: vrev64.32 d30, d16 +; BE-NEXT: vldr d16, [sp, #48] @ 8-byte Reload +; BE-NEXT: vldr d23, [sp, #64] @ 8-byte Reload +; BE-NEXT: vrev64.32 d20, d22 +; BE-NEXT: vldr d22, [sp, #112] @ 8-byte Reload +; BE-NEXT: vrev64.32 d1, d16 +; BE-NEXT: vldr d16, [sp, #80] @ 8-byte Reload +; BE-NEXT: vmov.32 d23[1], r0 +; BE-NEXT: add r0, r10, #64 +; BE-NEXT: vrev64.32 d25, d22 +; BE-NEXT: vldr d22, [sp, #104] @ 8-byte Reload +; BE-NEXT: vmov.32 d9[1], r4 +; BE-NEXT: vrev64.32 d0, d16 +; BE-NEXT: vmov.32 d28[1], r1 +; BE-NEXT: vldr d29, [sp, #56] @ 8-byte Reload +; BE-NEXT: vrev64.32 d3, d15 +; BE-NEXT: vrev64.32 d24, d22 +; BE-NEXT: vldr d22, [sp, #88] @ 8-byte Reload +; BE-NEXT: vmov.32 d10[1], r6 +; BE-NEXT: vrev64.32 d5, d23 +; BE-NEXT: vst1.64 {d0, d1}, [r11:128]! +; BE-NEXT: vrev64.32 d2, d9 +; BE-NEXT: vrev64.32 d27, d22 +; BE-NEXT: vmov.32 d29[1], r8 +; BE-NEXT: vrev64.32 d4, d28 +; BE-NEXT: vst1.64 {d30, d31}, [r11:128] +; BE-NEXT: vst1.64 {d2, d3}, [r0:128]! +; BE-NEXT: vmov.32 d12[1], r9 +; BE-NEXT: vrev64.32 d26, d10 +; BE-NEXT: vst1.64 {d4, d5}, [r0:128]! +; BE-NEXT: vrev64.32 d23, d29 +; BE-NEXT: vst1.64 {d26, d27}, [r0:128]! +; BE-NEXT: vrev64.32 d22, d12 +; BE-NEXT: vst1.64 {d24, d25}, [r0:128] +; BE-NEXT: vst1.64 {d20, d21}, [r10:128]! +; BE-NEXT: vst1.64 {d22, d23}, [r10:128]! +; BE-NEXT: vrev64.32 d17, d11 +; BE-NEXT: vrev64.32 d16, d13 +; BE-NEXT: vst1.64 {d18, d19}, [r10:128]! +; BE-NEXT: vst1.64 {d16, d17}, [r10:128] +; BE-NEXT: add sp, sp, #176 +; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEXT: add sp, sp, #4 +; BE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-NEON-LABEL: llrint_v32i64_v32f16: +; BE-NEON: @ %bb.0: +; BE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-NEON-NEXT: .pad #4 +; BE-NEON-NEXT: sub sp, sp, #4 +; BE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEON-NEXT: .pad #176 +; BE-NEON-NEXT: sub sp, sp, #176 +; BE-NEON-NEXT: add lr, sp, #256 +; BE-NEON-NEXT: mov r10, r0 +; BE-NEON-NEXT: vstr s15, [sp, #112] @ 4-byte Spill +; BE-NEON-NEXT: ldrh r0, [lr, #74] +; BE-NEON-NEXT: vstr s14, [sp, #80] @ 4-byte Spill +; BE-NEON-NEXT: vstr s13, [sp, #48] @ 4-byte Spill +; BE-NEON-NEXT: vstr s12, [sp, #148] @ 4-byte Spill +; BE-NEON-NEXT: vstr s11, [sp, #76] @ 4-byte Spill +; BE-NEON-NEXT: vstr s10, [sp, #152] @ 4-byte Spill +; BE-NEON-NEXT: vstr s9, [sp, #156] @ 4-byte Spill +; BE-NEON-NEXT: vstr s8, [sp, #120] @ 4-byte Spill +; BE-NEON-NEXT: vstr s7, [sp, #136] @ 4-byte Spill +; BE-NEON-NEXT: vstr s6, [sp, #132] @ 4-byte Spill +; BE-NEON-NEXT: vstr s5, [sp, #144] @ 4-byte Spill +; BE-NEON-NEXT: vstr s4, [sp, #64] @ 4-byte Spill +; BE-NEON-NEXT: vstr s3, [sp, #104] @ 4-byte Spill +; BE-NEON-NEXT: vstr s2, [sp, #88] @ 4-byte Spill +; BE-NEON-NEXT: vstr s1, [sp, #56] @ 4-byte Spill +; BE-NEON-NEXT: vstr s0, [sp, #96] @ 4-byte Spill +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vmov s0, r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: add lr, sp, #256 +; BE-NEON-NEXT: mov r9, r0 +; BE-NEON-NEXT: mov r8, r1 +; BE-NEON-NEXT: ldrh r0, [lr, #62] +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: add lr, sp, #256 +; BE-NEON-NEXT: mov r6, r0 +; BE-NEON-NEXT: ldrh r0, [lr, #58] +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: add lr, sp, #256 +; BE-NEON-NEXT: mov r7, r0 +; BE-NEON-NEXT: ldrh r0, [lr, #66] +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: add lr, sp, #256 +; BE-NEON-NEXT: mov r4, r0 +; BE-NEON-NEXT: ldrh r0, [lr, #54] +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: add lr, sp, #256 +; BE-NEON-NEXT: mov r5, r0 +; BE-NEON-NEXT: ldrh r0, [lr, #50] +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vmov s0, r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.32 d16[0], r0 +; BE-NEON-NEXT: vmov s0, r5 +; BE-NEON-NEXT: str r1, [sp, #44] @ 4-byte Spill +; BE-NEON-NEXT: vstr d16, [sp, #168] @ 8-byte Spill +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.32 d16[0], r0 +; BE-NEON-NEXT: vmov s0, r4 +; BE-NEON-NEXT: str r1, [sp, #40] @ 4-byte Spill +; BE-NEON-NEXT: vstr d16, [sp, #160] @ 8-byte Spill +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.32 d16[0], r0 +; BE-NEON-NEXT: vmov s0, r7 +; BE-NEON-NEXT: mov r4, r1 +; BE-NEON-NEXT: vstr d16, [sp, #32] @ 8-byte Spill +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.32 d16[0], r0 +; BE-NEON-NEXT: vmov s0, r6 +; BE-NEON-NEXT: mov r11, r1 +; BE-NEON-NEXT: vstr d16, [sp, #24] @ 8-byte Spill +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: add lr, sp, #256 +; BE-NEON-NEXT: vmov.32 d16[0], r0 +; BE-NEON-NEXT: mov r6, r1 +; BE-NEON-NEXT: ldrh r0, [lr, #34] +; BE-NEON-NEXT: vstr d16, [sp, #16] @ 8-byte Spill +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vmov s0, r0 +; BE-NEON-NEXT: vmov.32 d8[0], r9 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: add lr, sp, #256 +; BE-NEON-NEXT: mov r7, r1 +; BE-NEON-NEXT: vmov.32 d12[0], r0 +; BE-NEON-NEXT: ldrh r1, [lr, #38] +; BE-NEON-NEXT: mov r0, r1 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vmov.32 d8[1], r8 +; BE-NEON-NEXT: vmov s0, r0 +; BE-NEON-NEXT: vstr d8, [sp, #8] @ 8-byte Spill +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: add lr, sp, #256 +; BE-NEON-NEXT: mov r5, r1 +; BE-NEON-NEXT: vmov.32 d13[0], r0 +; BE-NEON-NEXT: ldrh r1, [lr, #26] +; BE-NEON-NEXT: mov r0, r1 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vmov s0, r0 +; BE-NEON-NEXT: vmov.32 d12[1], r7 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: add lr, sp, #256 +; BE-NEON-NEXT: mov r7, r1 +; BE-NEON-NEXT: vmov.32 d9[0], r0 +; BE-NEON-NEXT: ldrh r1, [lr, #30] +; BE-NEON-NEXT: mov r0, r1 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vmov s0, r0 +; BE-NEON-NEXT: vmov.32 d13[1], r5 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: add lr, sp, #256 +; BE-NEON-NEXT: mov r5, r1 +; BE-NEON-NEXT: vmov.32 d15[0], r0 +; BE-NEON-NEXT: ldrh r1, [lr, #78] +; BE-NEON-NEXT: mov r0, r1 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vmov s0, r0 +; BE-NEON-NEXT: vmov.32 d9[1], r7 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: add lr, sp, #256 +; BE-NEON-NEXT: mov r7, r1 +; BE-NEON-NEXT: vmov.32 d14[0], r0 +; BE-NEON-NEXT: ldrh r1, [lr, #82] +; BE-NEON-NEXT: mov r0, r1 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vmov s0, r0 +; BE-NEON-NEXT: vmov.32 d15[1], r5 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: add lr, sp, #256 +; BE-NEON-NEXT: mov r5, r1 +; BE-NEON-NEXT: vmov.32 d8[0], r0 +; BE-NEON-NEXT: ldrh r1, [lr, #86] +; BE-NEON-NEXT: mov r0, r1 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vmov s0, r0 +; BE-NEON-NEXT: vmov.32 d14[1], r7 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: add lr, sp, #256 +; BE-NEON-NEXT: mov r7, r1 +; BE-NEON-NEXT: vmov.32 d10[0], r0 +; BE-NEON-NEXT: ldrh r1, [lr, #70] +; BE-NEON-NEXT: mov r0, r1 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vmov s0, r0 +; BE-NEON-NEXT: vmov.32 d8[1], r5 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: add lr, sp, #256 +; BE-NEON-NEXT: mov r5, r1 +; BE-NEON-NEXT: vmov.32 d11[0], r0 +; BE-NEON-NEXT: ldrh r1, [lr, #46] +; BE-NEON-NEXT: mov r0, r1 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vmov s0, r0 +; BE-NEON-NEXT: vmov.32 d10[1], r7 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.32 d25[0], r0 +; BE-NEON-NEXT: ldr r0, [sp, #40] @ 4-byte Reload +; BE-NEON-NEXT: ldr r2, [sp, #44] @ 4-byte Reload +; BE-NEON-NEXT: vldr d24, [sp, #160] @ 8-byte Reload +; BE-NEON-NEXT: vldr s0, [sp, #48] @ 4-byte Reload +; BE-NEON-NEXT: vmov.32 d24[1], r0 +; BE-NEON-NEXT: vmov r0, s0 +; BE-NEON-NEXT: vldr d26, [sp, #16] @ 8-byte Reload +; BE-NEON-NEXT: vstr d24, [sp, #160] @ 8-byte Spill +; BE-NEON-NEXT: vldr d24, [sp, #8] @ 8-byte Reload +; BE-NEON-NEXT: vrev64.32 d23, d14 +; BE-NEON-NEXT: vldr d29, [sp, #24] @ 8-byte Reload +; BE-NEON-NEXT: vrev64.32 d22, d24 +; BE-NEON-NEXT: vldr d24, [sp, #168] @ 8-byte Reload +; BE-NEON-NEXT: vmov.32 d26[1], r6 +; BE-NEON-NEXT: vldr d28, [sp, #32] @ 8-byte Reload +; BE-NEON-NEXT: vmov.32 d25[1], r1 +; BE-NEON-NEXT: add r1, r10, #192 +; BE-NEON-NEXT: vmov.32 d29[1], r11 +; BE-NEON-NEXT: add r11, r10, #128 +; BE-NEON-NEXT: vmov.32 d24[1], r2 +; BE-NEON-NEXT: vmov.32 d11[1], r5 +; BE-NEON-NEXT: vmov.32 d28[1], r4 +; BE-NEON-NEXT: vrev64.32 d27, d26 +; BE-NEON-NEXT: vstr d24, [sp, #168] @ 8-byte Spill +; BE-NEON-NEXT: vstr d25, [sp, #48] @ 8-byte Spill +; BE-NEON-NEXT: vrev64.32 d25, d11 +; BE-NEON-NEXT: vrev64.32 d26, d29 +; BE-NEON-NEXT: vrev64.32 d24, d28 +; BE-NEON-NEXT: vst1.64 {d26, d27}, [r1:128]! +; BE-NEON-NEXT: vst1.64 {d24, d25}, [r1:128]! +; BE-NEON-NEXT: vrev64.32 d21, d10 +; BE-NEON-NEXT: vrev64.32 d19, d15 +; BE-NEON-NEXT: vrev64.32 d17, d13 +; BE-NEON-NEXT: vrev64.32 d20, d8 +; BE-NEON-NEXT: vst1.64 {d22, d23}, [r1:128]! +; BE-NEON-NEXT: vrev64.32 d18, d9 +; BE-NEON-NEXT: vrev64.32 d16, d12 +; BE-NEON-NEXT: vst1.64 {d20, d21}, [r1:128] +; BE-NEON-NEXT: vst1.64 {d18, d19}, [r11:128]! +; BE-NEON-NEXT: vst1.64 {d16, d17}, [r11:128]! +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vmov s0, r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: add lr, sp, #256 +; BE-NEON-NEXT: mov r7, r0 +; BE-NEON-NEXT: mov r8, r1 +; BE-NEON-NEXT: ldrh r0, [lr, #42] +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vldr s0, [sp, #56] @ 4-byte Reload +; BE-NEON-NEXT: mov r4, r0 +; BE-NEON-NEXT: vmov r0, s0 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vmov s0, r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov s0, r4 +; BE-NEON-NEXT: str r1, [sp, #44] @ 4-byte Spill +; BE-NEON-NEXT: vmov.32 d10[0], r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vldr s0, [sp, #64] @ 4-byte Reload +; BE-NEON-NEXT: vmov.32 d16[0], r0 +; BE-NEON-NEXT: vmov r2, s0 +; BE-NEON-NEXT: vldr s0, [sp, #80] @ 4-byte Reload +; BE-NEON-NEXT: vmov.32 d16[1], r1 +; BE-NEON-NEXT: vmov r4, s0 +; BE-NEON-NEXT: vldr s0, [sp, #76] @ 4-byte Reload +; BE-NEON-NEXT: vstr d16, [sp, #80] @ 8-byte Spill +; BE-NEON-NEXT: vmov r5, s0 +; BE-NEON-NEXT: mov r0, r2 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vmov s0, r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.32 d11[0], r0 +; BE-NEON-NEXT: mov r0, r4 +; BE-NEON-NEXT: mov r9, r1 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vmov s0, r0 +; BE-NEON-NEXT: vmov.32 d8[0], r7 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.32 d12[0], r0 +; BE-NEON-NEXT: mov r0, r5 +; BE-NEON-NEXT: mov r6, r1 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vldr s0, [sp, #88] @ 4-byte Reload +; BE-NEON-NEXT: mov r4, r0 +; BE-NEON-NEXT: vmov.32 d8[1], r8 +; BE-NEON-NEXT: vmov r7, s0 +; BE-NEON-NEXT: vldr s0, [sp, #96] @ 4-byte Reload +; BE-NEON-NEXT: vstr d8, [sp, #88] @ 8-byte Spill +; BE-NEON-NEXT: vmov r0, s0 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vldr s0, [sp, #104] @ 4-byte Reload +; BE-NEON-NEXT: vmov s19, r0 +; BE-NEON-NEXT: vmov.32 d12[1], r6 +; BE-NEON-NEXT: vmov r5, s0 +; BE-NEON-NEXT: vldr s0, [sp, #112] @ 4-byte Reload +; BE-NEON-NEXT: vstr d12, [sp, #104] @ 8-byte Spill +; BE-NEON-NEXT: vmov r0, s0 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vmov.f32 s0, s19 +; BE-NEON-NEXT: vmov s30, r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s30 +; BE-NEON-NEXT: mov r6, r1 +; BE-NEON-NEXT: vmov.32 d9[0], r0 +; BE-NEON-NEXT: vmov s17, r4 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.32 d12[0], r0 +; BE-NEON-NEXT: mov r0, r5 +; BE-NEON-NEXT: mov r4, r1 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vmov.f32 s0, s17 +; BE-NEON-NEXT: vmov s30, r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s30 +; BE-NEON-NEXT: vmov.32 d16[0], r0 +; BE-NEON-NEXT: str r1, [sp, #76] @ 4-byte Spill +; BE-NEON-NEXT: vmov.32 d12[1], r4 +; BE-NEON-NEXT: vstr d16, [sp, #64] @ 8-byte Spill +; BE-NEON-NEXT: vstr d12, [sp, #112] @ 8-byte Spill +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.32 d16[0], r0 +; BE-NEON-NEXT: mov r0, r7 +; BE-NEON-NEXT: mov r8, r1 +; BE-NEON-NEXT: vmov.32 d9[1], r6 +; BE-NEON-NEXT: vstr d16, [sp, #56] @ 8-byte Spill +; BE-NEON-NEXT: vstr d9, [sp, #96] @ 8-byte Spill +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vldr s0, [sp, #120] @ 4-byte Reload +; BE-NEON-NEXT: mov r5, r0 +; BE-NEON-NEXT: ldr r0, [sp, #44] @ 4-byte Reload +; BE-NEON-NEXT: vmov r7, s0 +; BE-NEON-NEXT: vldr s0, [sp, #132] @ 4-byte Reload +; BE-NEON-NEXT: vmov.32 d10[1], r0 +; BE-NEON-NEXT: vmov r0, s0 +; BE-NEON-NEXT: vstr d10, [sp, #120] @ 8-byte Spill +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vldr s0, [sp, #136] @ 4-byte Reload +; BE-NEON-NEXT: vmov s26, r0 +; BE-NEON-NEXT: vmov.32 d11[1], r9 +; BE-NEON-NEXT: vmov r4, s0 +; BE-NEON-NEXT: vldr s0, [sp, #144] @ 4-byte Reload +; BE-NEON-NEXT: vstr d11, [sp, #136] @ 8-byte Spill +; BE-NEON-NEXT: vmov r0, s0 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vmov.f32 s0, s26 +; BE-NEON-NEXT: vmov s22, r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s22 +; BE-NEON-NEXT: mov r6, r1 +; BE-NEON-NEXT: vmov.32 d13[0], r0 +; BE-NEON-NEXT: vmov s24, r5 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.32 d14[0], r0 +; BE-NEON-NEXT: mov r0, r4 +; BE-NEON-NEXT: mov r5, r1 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vmov.f32 s0, s24 +; BE-NEON-NEXT: vmov s22, r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s22 +; BE-NEON-NEXT: mov r9, r1 +; BE-NEON-NEXT: vmov.32 d12[0], r0 +; BE-NEON-NEXT: vmov.32 d14[1], r5 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.32 d11[0], r0 +; BE-NEON-NEXT: mov r0, r7 +; BE-NEON-NEXT: mov r5, r1 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vldr s0, [sp, #148] @ 4-byte Reload +; BE-NEON-NEXT: mov r7, r0 +; BE-NEON-NEXT: vmov.32 d13[1], r6 +; BE-NEON-NEXT: vmov r0, s0 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vldr s0, [sp, #152] @ 4-byte Reload +; BE-NEON-NEXT: vmov s20, r0 +; BE-NEON-NEXT: vmov.32 d11[1], r5 +; BE-NEON-NEXT: vmov r4, s0 +; BE-NEON-NEXT: vldr s0, [sp, #156] @ 4-byte Reload +; BE-NEON-NEXT: vmov r0, s0 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vmov.f32 s0, s20 +; BE-NEON-NEXT: vmov s16, r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s16 +; BE-NEON-NEXT: mov r6, r1 +; BE-NEON-NEXT: vmov.32 d10[0], r0 +; BE-NEON-NEXT: vmov s18, r7 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.32 d15[0], r0 +; BE-NEON-NEXT: mov r0, r4 +; BE-NEON-NEXT: mov r5, r1 +; BE-NEON-NEXT: bl __aeabi_h2f +; BE-NEON-NEXT: vmov.f32 s0, s18 +; BE-NEON-NEXT: vmov s16, r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s16 +; BE-NEON-NEXT: mov r4, r1 +; BE-NEON-NEXT: vmov.32 d9[0], r0 +; BE-NEON-NEXT: vmov.32 d15[1], r5 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vldr d16, [sp, #160] @ 8-byte Reload +; BE-NEON-NEXT: vldr d20, [sp, #136] @ 8-byte Reload +; BE-NEON-NEXT: vrev64.32 d19, d14 +; BE-NEON-NEXT: vrev64.32 d31, d16 +; BE-NEON-NEXT: vldr d16, [sp, #168] @ 8-byte Reload +; BE-NEON-NEXT: vrev64.32 d18, d20 +; BE-NEON-NEXT: vldr d20, [sp, #120] @ 8-byte Reload +; BE-NEON-NEXT: vldr d22, [sp, #96] @ 8-byte Reload +; BE-NEON-NEXT: vmov.32 d28[0], r0 +; BE-NEON-NEXT: ldr r0, [sp, #76] @ 4-byte Reload +; BE-NEON-NEXT: vrev64.32 d21, d20 +; BE-NEON-NEXT: vrev64.32 d30, d16 +; BE-NEON-NEXT: vldr d16, [sp, #48] @ 8-byte Reload +; BE-NEON-NEXT: vldr d23, [sp, #64] @ 8-byte Reload +; BE-NEON-NEXT: vrev64.32 d20, d22 +; BE-NEON-NEXT: vldr d22, [sp, #112] @ 8-byte Reload +; BE-NEON-NEXT: vrev64.32 d1, d16 +; BE-NEON-NEXT: vldr d16, [sp, #80] @ 8-byte Reload +; BE-NEON-NEXT: vmov.32 d23[1], r0 +; BE-NEON-NEXT: add r0, r10, #64 +; BE-NEON-NEXT: vrev64.32 d25, d22 +; BE-NEON-NEXT: vldr d22, [sp, #104] @ 8-byte Reload +; BE-NEON-NEXT: vmov.32 d9[1], r4 +; BE-NEON-NEXT: vrev64.32 d0, d16 +; BE-NEON-NEXT: vmov.32 d28[1], r1 +; BE-NEON-NEXT: vldr d29, [sp, #56] @ 8-byte Reload +; BE-NEON-NEXT: vrev64.32 d3, d15 +; BE-NEON-NEXT: vrev64.32 d24, d22 +; BE-NEON-NEXT: vldr d22, [sp, #88] @ 8-byte Reload +; BE-NEON-NEXT: vmov.32 d10[1], r6 +; BE-NEON-NEXT: vrev64.32 d5, d23 +; BE-NEON-NEXT: vst1.64 {d0, d1}, [r11:128]! +; BE-NEON-NEXT: vrev64.32 d2, d9 +; BE-NEON-NEXT: vrev64.32 d27, d22 +; BE-NEON-NEXT: vmov.32 d29[1], r8 +; BE-NEON-NEXT: vrev64.32 d4, d28 +; BE-NEON-NEXT: vst1.64 {d30, d31}, [r11:128] +; BE-NEON-NEXT: vst1.64 {d2, d3}, [r0:128]! +; BE-NEON-NEXT: vmov.32 d12[1], r9 +; BE-NEON-NEXT: vrev64.32 d26, d10 +; BE-NEON-NEXT: vst1.64 {d4, d5}, [r0:128]! +; BE-NEON-NEXT: vrev64.32 d23, d29 +; BE-NEON-NEXT: vst1.64 {d26, d27}, [r0:128]! +; BE-NEON-NEXT: vrev64.32 d22, d12 +; BE-NEON-NEXT: vst1.64 {d24, d25}, [r0:128] +; BE-NEON-NEXT: vst1.64 {d20, d21}, [r10:128]! +; BE-NEON-NEXT: vst1.64 {d22, d23}, [r10:128]! +; BE-NEON-NEXT: vrev64.32 d17, d11 +; BE-NEON-NEXT: vrev64.32 d16, d13 +; BE-NEON-NEXT: vst1.64 {d18, d19}, [r10:128]! +; BE-NEON-NEXT: vst1.64 {d16, d17}, [r10:128] +; BE-NEON-NEXT: add sp, sp, #176 +; BE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEON-NEXT: add sp, sp, #4 +; BE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + %a = call <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half> %x) + ret <32 x i64> %a +} +declare <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half>) + +define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) { +; LE-LABEL: llrint_v1i64_v1f32: +; LE: @ %bb.0: +; LE-NEXT: .save {r11, lr} +; LE-NEXT: push {r11, lr} +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.32 d0[0], r0 +; LE-NEXT: vmov.32 d0[1], r1 +; LE-NEXT: pop {r11, pc} +; +; LE-NEON-LABEL: llrint_v1i64_v1f32: +; LE-NEON: @ %bb.0: +; LE-NEON-NEXT: .save {r11, lr} +; LE-NEON-NEXT: push {r11, lr} +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.32 d0[0], r0 +; LE-NEON-NEXT: vmov.32 d0[1], r1 +; LE-NEON-NEXT: pop {r11, pc} +; +; BE-LABEL: llrint_v1i64_v1f32: +; BE: @ %bb.0: +; BE-NEXT: .save {r11, lr} +; BE-NEXT: push {r11, lr} +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d16[0], r0 +; BE-NEXT: vmov.32 d16[1], r1 +; BE-NEXT: vrev64.32 d0, d16 +; BE-NEXT: pop {r11, pc} +; +; BE-NEON-LABEL: llrint_v1i64_v1f32: +; BE-NEON: @ %bb.0: +; BE-NEON-NEXT: .save {r11, lr} +; BE-NEON-NEXT: push {r11, lr} +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.32 d16[0], r0 +; BE-NEON-NEXT: vmov.32 d16[1], r1 +; BE-NEON-NEXT: vrev64.32 d0, d16 +; BE-NEON-NEXT: pop {r11, pc} + %a = call <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float> %x) + ret <1 x i64> %a +} +declare <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float>) + +define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) { +; LE-LABEL: llrint_v2i64_v2f32: +; LE: @ %bb.0: +; LE-NEXT: .save {r4, lr} +; LE-NEXT: push {r4, lr} +; LE-NEXT: .vsave {d10, d11} +; LE-NEXT: vpush {d10, d11} +; LE-NEXT: .vsave {d8} +; LE-NEXT: vpush {d8} +; LE-NEXT: vmov.f64 d8, d0 +; LE-NEXT: vmov.f32 s0, s17 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s16 +; LE-NEXT: mov r4, r1 +; LE-NEXT: vmov.32 d11[0], r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.32 d10[0], r0 +; LE-NEXT: vmov.32 d11[1], r4 +; LE-NEXT: vmov.32 d10[1], r1 +; LE-NEXT: vorr q0, q5, q5 +; LE-NEXT: vpop {d8} +; LE-NEXT: vpop {d10, d11} +; LE-NEXT: pop {r4, pc} +; +; LE-NEON-LABEL: llrint_v2i64_v2f32: +; LE-NEON: @ %bb.0: +; LE-NEON-NEXT: .save {r4, lr} +; LE-NEON-NEXT: push {r4, lr} +; LE-NEON-NEXT: .vsave {d10, d11} +; LE-NEON-NEXT: vpush {d10, d11} +; LE-NEON-NEXT: .vsave {d8} +; LE-NEON-NEXT: vpush {d8} +; LE-NEON-NEXT: vmov.f64 d8, d0 +; LE-NEON-NEXT: vmov.f32 s0, s17 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s16 +; LE-NEON-NEXT: mov r4, r1 +; LE-NEON-NEXT: vmov.32 d11[0], r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.32 d10[0], r0 +; LE-NEON-NEXT: vmov.32 d11[1], r4 +; LE-NEON-NEXT: vmov.32 d10[1], r1 +; LE-NEON-NEXT: vorr q0, q5, q5 +; LE-NEON-NEXT: vpop {d8} +; LE-NEON-NEXT: vpop {d10, d11} +; LE-NEON-NEXT: pop {r4, pc} +; +; BE-LABEL: llrint_v2i64_v2f32: +; BE: @ %bb.0: +; BE-NEXT: .save {r4, lr} +; BE-NEXT: push {r4, lr} +; BE-NEXT: .vsave {d10, d11} +; BE-NEXT: vpush {d10, d11} +; BE-NEXT: .vsave {d8} +; BE-NEXT: vpush {d8} +; BE-NEXT: vrev64.32 d8, d0 +; BE-NEXT: vmov.f32 s0, s17 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s16 +; BE-NEXT: mov r4, r1 +; BE-NEXT: vmov.32 d11[0], r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: vmov.32 d11[1], r4 +; BE-NEXT: vmov.32 d10[1], r1 +; BE-NEXT: vrev64.32 q0, q5 +; BE-NEXT: vpop {d8} +; BE-NEXT: vpop {d10, d11} +; BE-NEXT: pop {r4, pc} +; +; BE-NEON-LABEL: llrint_v2i64_v2f32: +; BE-NEON: @ %bb.0: +; BE-NEON-NEXT: .save {r4, lr} +; BE-NEON-NEXT: push {r4, lr} +; BE-NEON-NEXT: .vsave {d10, d11} +; BE-NEON-NEXT: vpush {d10, d11} +; BE-NEON-NEXT: .vsave {d8} +; BE-NEON-NEXT: vpush {d8} +; BE-NEON-NEXT: vrev64.32 d8, d0 +; BE-NEON-NEXT: vmov.f32 s0, s17 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s16 +; BE-NEON-NEXT: mov r4, r1 +; BE-NEON-NEXT: vmov.32 d11[0], r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.32 d10[0], r0 +; BE-NEON-NEXT: vmov.32 d11[1], r4 +; BE-NEON-NEXT: vmov.32 d10[1], r1 +; BE-NEON-NEXT: vrev64.32 q0, q5 +; BE-NEON-NEXT: vpop {d8} +; BE-NEON-NEXT: vpop {d10, d11} +; BE-NEON-NEXT: pop {r4, pc} + %a = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> %x) + ret <2 x i64> %a +} +declare <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float>) + +define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) { +; LE-LABEL: llrint_v4i64_v4f32: +; LE: @ %bb.0: +; LE-NEXT: .save {r4, r5, r6, lr} +; LE-NEXT: push {r4, r5, r6, lr} +; LE-NEXT: .vsave {d8, d9, d10, d11, d12, d13} +; LE-NEXT: vpush {d8, d9, d10, d11, d12, d13} +; LE-NEXT: vorr q5, q0, q0 +; LE-NEXT: vmov.f32 s0, s23 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s20 +; LE-NEXT: mov r4, r1 +; LE-NEXT: vmov.32 d9[0], r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s21 +; LE-NEXT: mov r5, r1 +; LE-NEXT: vmov.32 d12[0], r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s22 +; LE-NEXT: mov r6, r1 +; LE-NEXT: vmov.32 d13[0], r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.32 d8[0], r0 +; LE-NEXT: vmov.32 d13[1], r6 +; LE-NEXT: vmov.32 d9[1], r4 +; LE-NEXT: vmov.32 d12[1], r5 +; LE-NEXT: vmov.32 d8[1], r1 +; LE-NEXT: vorr q0, q6, q6 +; LE-NEXT: vorr q1, q4, q4 +; LE-NEXT: vpop {d8, d9, d10, d11, d12, d13} +; LE-NEXT: pop {r4, r5, r6, pc} +; +; LE-NEON-LABEL: llrint_v4i64_v4f32: +; LE-NEON: @ %bb.0: +; LE-NEON-NEXT: .save {r4, r5, r6, lr} +; LE-NEON-NEXT: push {r4, r5, r6, lr} +; LE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13} +; LE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13} +; LE-NEON-NEXT: vorr q5, q0, q0 +; LE-NEON-NEXT: vmov.f32 s0, s23 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s20 +; LE-NEON-NEXT: mov r4, r1 +; LE-NEON-NEXT: vmov.32 d9[0], r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s21 +; LE-NEON-NEXT: mov r5, r1 +; LE-NEON-NEXT: vmov.32 d12[0], r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s22 +; LE-NEON-NEXT: mov r6, r1 +; LE-NEON-NEXT: vmov.32 d13[0], r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.32 d8[0], r0 +; LE-NEON-NEXT: vmov.32 d13[1], r6 +; LE-NEON-NEXT: vmov.32 d9[1], r4 +; LE-NEON-NEXT: vmov.32 d12[1], r5 +; LE-NEON-NEXT: vmov.32 d8[1], r1 +; LE-NEON-NEXT: vorr q0, q6, q6 +; LE-NEON-NEXT: vorr q1, q4, q4 +; LE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13} +; LE-NEON-NEXT: pop {r4, r5, r6, pc} +; +; BE-LABEL: llrint_v4i64_v4f32: +; BE: @ %bb.0: +; BE-NEXT: .save {r4, r5, r6, lr} +; BE-NEXT: push {r4, r5, r6, lr} +; BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13} +; BE-NEXT: vpush {d8, d9, d10, d11, d12, d13} +; BE-NEXT: vrev64.32 d8, d1 +; BE-NEXT: vrev64.32 d9, d0 +; BE-NEXT: vmov.f32 s0, s17 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s18 +; BE-NEXT: mov r4, r1 +; BE-NEXT: vmov.32 d11[0], r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s19 +; BE-NEXT: mov r5, r1 +; BE-NEXT: vmov.32 d12[0], r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s16 +; BE-NEXT: mov r6, r1 +; BE-NEXT: vmov.32 d13[0], r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: vmov.32 d13[1], r6 +; BE-NEXT: vmov.32 d11[1], r4 +; BE-NEXT: vmov.32 d12[1], r5 +; BE-NEXT: vmov.32 d10[1], r1 +; BE-NEXT: vrev64.32 q0, q6 +; BE-NEXT: vrev64.32 q1, q5 +; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13} +; BE-NEXT: pop {r4, r5, r6, pc} +; +; BE-NEON-LABEL: llrint_v4i64_v4f32: +; BE-NEON: @ %bb.0: +; BE-NEON-NEXT: .save {r4, r5, r6, lr} +; BE-NEON-NEXT: push {r4, r5, r6, lr} +; BE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13} +; BE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13} +; BE-NEON-NEXT: vrev64.32 d8, d1 +; BE-NEON-NEXT: vrev64.32 d9, d0 +; BE-NEON-NEXT: vmov.f32 s0, s17 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s18 +; BE-NEON-NEXT: mov r4, r1 +; BE-NEON-NEXT: vmov.32 d11[0], r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s19 +; BE-NEON-NEXT: mov r5, r1 +; BE-NEON-NEXT: vmov.32 d12[0], r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s16 +; BE-NEON-NEXT: mov r6, r1 +; BE-NEON-NEXT: vmov.32 d13[0], r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.32 d10[0], r0 +; BE-NEON-NEXT: vmov.32 d13[1], r6 +; BE-NEON-NEXT: vmov.32 d11[1], r4 +; BE-NEON-NEXT: vmov.32 d12[1], r5 +; BE-NEON-NEXT: vmov.32 d10[1], r1 +; BE-NEON-NEXT: vrev64.32 q0, q6 +; BE-NEON-NEXT: vrev64.32 q1, q5 +; BE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13} +; BE-NEON-NEXT: pop {r4, r5, r6, pc} + %a = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> %x) + ret <4 x i64> %a +} +declare <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float>) + +define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) { +; LE-LABEL: llrint_v8i64_v8f32: +; LE: @ %bb.0: +; LE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; LE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; LE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEXT: .pad #40 +; LE-NEXT: sub sp, sp, #40 +; LE-NEXT: vorr q6, q1, q1 +; LE-NEXT: add lr, sp, #24 +; LE-NEXT: vorr q7, q0, q0 +; LE-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill +; LE-NEXT: vmov.f32 s0, s27 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s24 +; LE-NEXT: mov r8, r1 +; LE-NEXT: vmov.32 d9[0], r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s25 +; LE-NEXT: mov r9, r1 +; LE-NEXT: vmov.32 d10[0], r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vorr q6, q7, q7 +; LE-NEXT: add lr, sp, #8 +; LE-NEXT: mov r10, r1 +; LE-NEXT: vmov.32 d11[0], r0 +; LE-NEXT: vmov.f32 s0, s26 +; LE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s27 +; LE-NEXT: mov r7, r1 +; LE-NEXT: vmov.32 d14[0], r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s24 +; LE-NEXT: mov r4, r1 +; LE-NEXT: vmov.32 d15[0], r0 +; LE-NEXT: bl llrintf +; LE-NEXT: add lr, sp, #8 +; LE-NEXT: mov r5, r1 +; LE-NEXT: vmov.32 d12[0], r0 +; LE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-NEXT: vmov.f32 s0, s1 +; LE-NEXT: bl llrintf +; LE-NEXT: add lr, sp, #24 +; LE-NEXT: mov r6, r1 +; LE-NEXT: vmov.32 d13[0], r0 +; LE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-NEXT: vmov.f32 s0, s2 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.32 d8[0], r0 +; LE-NEXT: vmov.32 d13[1], r6 +; LE-NEXT: vmov.32 d15[1], r4 +; LE-NEXT: vmov.32 d11[1], r10 +; LE-NEXT: vmov.32 d9[1], r8 +; LE-NEXT: vmov.32 d12[1], r5 +; LE-NEXT: vmov.32 d14[1], r7 +; LE-NEXT: vorr q0, q6, q6 +; LE-NEXT: vmov.32 d10[1], r9 +; LE-NEXT: vorr q1, q7, q7 +; LE-NEXT: vmov.32 d8[1], r1 +; LE-NEXT: vorr q2, q5, q5 +; LE-NEXT: vorr q3, q4, q4 +; LE-NEXT: add sp, sp, #40 +; LE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; +; LE-NEON-LABEL: llrint_v8i64_v8f32: +; LE-NEON: @ %bb.0: +; LE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; LE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; LE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEON-NEXT: .pad #40 +; LE-NEON-NEXT: sub sp, sp, #40 +; LE-NEON-NEXT: vorr q6, q1, q1 +; LE-NEON-NEXT: add lr, sp, #24 +; LE-NEON-NEXT: vorr q7, q0, q0 +; LE-NEON-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill +; LE-NEON-NEXT: vmov.f32 s0, s27 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s24 +; LE-NEON-NEXT: mov r8, r1 +; LE-NEON-NEXT: vmov.32 d9[0], r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s25 +; LE-NEON-NEXT: mov r9, r1 +; LE-NEON-NEXT: vmov.32 d10[0], r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vorr q6, q7, q7 +; LE-NEON-NEXT: add lr, sp, #8 +; LE-NEON-NEXT: mov r10, r1 +; LE-NEON-NEXT: vmov.32 d11[0], r0 +; LE-NEON-NEXT: vmov.f32 s0, s26 +; LE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s27 +; LE-NEON-NEXT: mov r7, r1 +; LE-NEON-NEXT: vmov.32 d14[0], r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s24 +; LE-NEON-NEXT: mov r4, r1 +; LE-NEON-NEXT: vmov.32 d15[0], r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: add lr, sp, #8 +; LE-NEON-NEXT: mov r5, r1 +; LE-NEON-NEXT: vmov.32 d12[0], r0 +; LE-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-NEON-NEXT: vmov.f32 s0, s1 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: add lr, sp, #24 +; LE-NEON-NEXT: mov r6, r1 +; LE-NEON-NEXT: vmov.32 d13[0], r0 +; LE-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-NEON-NEXT: vmov.f32 s0, s2 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.32 d8[0], r0 +; LE-NEON-NEXT: vmov.32 d13[1], r6 +; LE-NEON-NEXT: vmov.32 d15[1], r4 +; LE-NEON-NEXT: vmov.32 d11[1], r10 +; LE-NEON-NEXT: vmov.32 d9[1], r8 +; LE-NEON-NEXT: vmov.32 d12[1], r5 +; LE-NEON-NEXT: vmov.32 d14[1], r7 +; LE-NEON-NEXT: vorr q0, q6, q6 +; LE-NEON-NEXT: vmov.32 d10[1], r9 +; LE-NEON-NEXT: vorr q1, q7, q7 +; LE-NEON-NEXT: vmov.32 d8[1], r1 +; LE-NEON-NEXT: vorr q2, q5, q5 +; LE-NEON-NEXT: vorr q3, q4, q4 +; LE-NEON-NEXT: add sp, sp, #40 +; LE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; +; BE-LABEL: llrint_v8i64_v8f32: +; BE: @ %bb.0: +; BE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; BE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEXT: .pad #32 +; BE-NEXT: sub sp, sp, #32 +; BE-NEXT: vorr q4, q1, q1 +; BE-NEXT: add lr, sp, #8 +; BE-NEXT: vorr q5, q0, q0 +; BE-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; BE-NEXT: vrev64.32 d12, d8 +; BE-NEXT: vmov.f32 s0, s25 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s24 +; BE-NEXT: mov r8, r1 +; BE-NEXT: vmov.32 d15[0], r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vrev64.32 d0, d11 +; BE-NEXT: mov r9, r1 +; BE-NEXT: vrev64.32 d8, d9 +; BE-NEXT: vorr d9, d0, d0 +; BE-NEXT: vmov.32 d14[0], r0 +; BE-NEXT: vstr d8, [sp, #24] @ 8-byte Spill +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s17 +; BE-NEXT: mov r10, r1 +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: bl llrintf +; BE-NEXT: add lr, sp, #8 +; BE-NEXT: vmov.f32 s0, s19 +; BE-NEXT: mov r7, r1 +; BE-NEXT: vmov.32 d13[0], r0 +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: vrev64.32 d8, d16 +; BE-NEXT: vstr d8, [sp, #8] @ 8-byte Spill +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s16 +; BE-NEXT: mov r4, r1 +; BE-NEXT: vmov.32 d11[0], r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vldr d0, [sp, #8] @ 8-byte Reload +; BE-NEXT: mov r5, r1 +; BE-NEXT: vmov.32 d8[0], r0 +; BE-NEXT: vmov.f32 s0, s1 +; BE-NEXT: bl llrintf +; BE-NEXT: vldr d0, [sp, #24] @ 8-byte Reload +; BE-NEXT: mov r6, r1 +; BE-NEXT: @ kill: def $s0 killed $s0 killed $d0 +; BE-NEXT: vmov.32 d9[0], r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d12[0], r0 +; BE-NEXT: vmov.32 d9[1], r6 +; BE-NEXT: vmov.32 d11[1], r4 +; BE-NEXT: vmov.32 d15[1], r8 +; BE-NEXT: vmov.32 d13[1], r7 +; BE-NEXT: vmov.32 d8[1], r5 +; BE-NEXT: vmov.32 d10[1], r10 +; BE-NEXT: vmov.32 d14[1], r9 +; BE-NEXT: vmov.32 d12[1], r1 +; BE-NEXT: vrev64.32 q0, q4 +; BE-NEXT: vrev64.32 q1, q5 +; BE-NEXT: vrev64.32 q2, q7 +; BE-NEXT: vrev64.32 q3, q6 +; BE-NEXT: add sp, sp, #32 +; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; +; BE-NEON-LABEL: llrint_v8i64_v8f32: +; BE-NEON: @ %bb.0: +; BE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; BE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; BE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEON-NEXT: .pad #32 +; BE-NEON-NEXT: sub sp, sp, #32 +; BE-NEON-NEXT: vorr q4, q1, q1 +; BE-NEON-NEXT: add lr, sp, #8 +; BE-NEON-NEXT: vorr q5, q0, q0 +; BE-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; BE-NEON-NEXT: vrev64.32 d12, d8 +; BE-NEON-NEXT: vmov.f32 s0, s25 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s24 +; BE-NEON-NEXT: mov r8, r1 +; BE-NEON-NEXT: vmov.32 d15[0], r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vrev64.32 d0, d11 +; BE-NEON-NEXT: mov r9, r1 +; BE-NEON-NEXT: vrev64.32 d8, d9 +; BE-NEON-NEXT: vorr d9, d0, d0 +; BE-NEON-NEXT: vmov.32 d14[0], r0 +; BE-NEON-NEXT: vstr d8, [sp, #24] @ 8-byte Spill +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s17 +; BE-NEON-NEXT: mov r10, r1 +; BE-NEON-NEXT: vmov.32 d10[0], r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: add lr, sp, #8 +; BE-NEON-NEXT: vmov.f32 s0, s19 +; BE-NEON-NEXT: mov r7, r1 +; BE-NEON-NEXT: vmov.32 d13[0], r0 +; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEON-NEXT: vrev64.32 d8, d16 +; BE-NEON-NEXT: vstr d8, [sp, #8] @ 8-byte Spill +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s16 +; BE-NEON-NEXT: mov r4, r1 +; BE-NEON-NEXT: vmov.32 d11[0], r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vldr d0, [sp, #8] @ 8-byte Reload +; BE-NEON-NEXT: mov r5, r1 +; BE-NEON-NEXT: vmov.32 d8[0], r0 +; BE-NEON-NEXT: vmov.f32 s0, s1 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vldr d0, [sp, #24] @ 8-byte Reload +; BE-NEON-NEXT: mov r6, r1 +; BE-NEON-NEXT: @ kill: def $s0 killed $s0 killed $d0 +; BE-NEON-NEXT: vmov.32 d9[0], r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.32 d12[0], r0 +; BE-NEON-NEXT: vmov.32 d9[1], r6 +; BE-NEON-NEXT: vmov.32 d11[1], r4 +; BE-NEON-NEXT: vmov.32 d15[1], r8 +; BE-NEON-NEXT: vmov.32 d13[1], r7 +; BE-NEON-NEXT: vmov.32 d8[1], r5 +; BE-NEON-NEXT: vmov.32 d10[1], r10 +; BE-NEON-NEXT: vmov.32 d14[1], r9 +; BE-NEON-NEXT: vmov.32 d12[1], r1 +; BE-NEON-NEXT: vrev64.32 q0, q4 +; BE-NEON-NEXT: vrev64.32 q1, q5 +; BE-NEON-NEXT: vrev64.32 q2, q7 +; BE-NEON-NEXT: vrev64.32 q3, q6 +; BE-NEON-NEXT: add sp, sp, #32 +; BE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} + %a = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> %x) + ret <8 x i64> %a +} +declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>) + +define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) { +; LE-LABEL: llrint_v16i64_v16f32: +; LE: @ %bb.0: +; LE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-NEXT: .pad #4 +; LE-NEXT: sub sp, sp, #4 +; LE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEXT: .pad #160 +; LE-NEXT: sub sp, sp, #160 +; LE-NEXT: add lr, sp, #112 +; LE-NEXT: vorr q5, q3, q3 +; LE-NEXT: vorr q6, q0, q0 +; LE-NEXT: mov r4, r0 +; LE-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill +; LE-NEXT: add lr, sp, #48 +; LE-NEXT: vorr q7, q1, q1 +; LE-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; LE-NEXT: vmov.f32 s0, s23 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s24 +; LE-NEXT: add lr, sp, #144 +; LE-NEXT: vmov.32 d17[0], r0 +; LE-NEXT: str r1, [sp, #108] @ 4-byte Spill +; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s25 +; LE-NEXT: str r1, [sp, #84] @ 4-byte Spill +; LE-NEXT: vmov.32 d8[0], r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s28 +; LE-NEXT: add lr, sp, #128 +; LE-NEXT: vmov.32 d9[0], r0 +; LE-NEXT: str r1, [sp, #44] @ 4-byte Spill +; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s29 +; LE-NEXT: mov r9, r1 +; LE-NEXT: vmov.32 d8[0], r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s30 +; LE-NEXT: mov r6, r1 +; LE-NEXT: vmov.32 d9[0], r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s31 +; LE-NEXT: mov r5, r1 +; LE-NEXT: vmov.32 d12[0], r0 +; LE-NEXT: bl llrintf +; LE-NEXT: add lr, sp, #112 +; LE-NEXT: mov r7, r1 +; LE-NEXT: vmov.32 d13[0], r0 +; LE-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload +; LE-NEXT: vmov.f32 s0, s29 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s22 +; LE-NEXT: add lr, sp, #24 +; LE-NEXT: vmov.32 d17[0], r0 +; LE-NEXT: mov r11, r1 +; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEXT: vmov.32 d13[1], r7 +; LE-NEXT: bl llrintf +; LE-NEXT: add lr, sp, #144 +; LE-NEXT: vmov.f32 s0, s21 +; LE-NEXT: vmov.32 d12[1], r5 +; LE-NEXT: str r1, [sp, #40] @ 4-byte Spill +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: vmov.32 d16[0], r0 +; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEXT: add lr, sp, #88 +; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s20 +; LE-NEXT: mov r10, r1 +; LE-NEXT: vmov.32 d13[0], r0 +; LE-NEXT: vmov.32 d9[1], r6 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s31 +; LE-NEXT: vmov.32 d12[0], r0 +; LE-NEXT: add lr, sp, #8 +; LE-NEXT: mov r8, r1 +; LE-NEXT: vmov.32 d8[1], r9 +; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEXT: add lr, sp, #64 +; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-NEXT: bl llrintf +; LE-NEXT: add lr, sp, #128 +; LE-NEXT: vmov.32 d9[0], r0 +; LE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload +; LE-NEXT: mov r9, r1 +; LE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-NEXT: add lr, sp, #48 +; LE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-NEXT: vmov.f32 s0, s27 +; LE-NEXT: vmov.32 d11[1], r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s26 +; LE-NEXT: vmov.32 d15[0], r0 +; LE-NEXT: ldr r0, [sp, #84] @ 4-byte Reload +; LE-NEXT: add lr, sp, #128 +; LE-NEXT: mov r7, r1 +; LE-NEXT: vmov.32 d10[1], r0 +; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.32 d14[0], r0 +; LE-NEXT: add lr, sp, #144 +; LE-NEXT: ldr r0, [sp, #108] @ 4-byte Reload +; LE-NEXT: mov r5, r1 +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: vmov.32 d17[1], r0 +; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEXT: add lr, sp, #112 +; LE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-NEXT: vmov.f32 s0, s20 +; LE-NEXT: bl llrintf +; LE-NEXT: add lr, sp, #24 +; LE-NEXT: vmov.f32 s0, s22 +; LE-NEXT: mov r6, r1 +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: vmov.32 d16[0], r0 +; LE-NEXT: vmov.32 d17[1], r11 +; LE-NEXT: vorr q6, q8, q8 +; LE-NEXT: bl llrintf +; LE-NEXT: add lr, sp, #144 +; LE-NEXT: vmov.32 d8[0], r0 +; LE-NEXT: ldr r0, [sp, #40] @ 4-byte Reload +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: add lr, sp, #8 +; LE-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; LE-NEXT: add lr, sp, #128 +; LE-NEXT: vmov.32 d9[1], r9 +; LE-NEXT: vmov.32 d12[1], r6 +; LE-NEXT: vmov.32 d19[1], r10 +; LE-NEXT: vmov.32 d8[1], r1 +; LE-NEXT: vmov.32 d16[1], r0 +; LE-NEXT: add r0, r4, #64 +; LE-NEXT: vmov.32 d18[1], r8 +; LE-NEXT: vst1.64 {d12, d13}, [r0:128]! +; LE-NEXT: vst1.64 {d8, d9}, [r0:128]! +; LE-NEXT: vst1.64 {d18, d19}, [r0:128]! +; LE-NEXT: vst1.64 {d16, d17}, [r0:128] +; LE-NEXT: vmov.32 d15[1], r7 +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: add lr, sp, #64 +; LE-NEXT: vmov.32 d14[1], r5 +; LE-NEXT: vst1.64 {d16, d17}, [r4:128]! +; LE-NEXT: vst1.64 {d14, d15}, [r4:128]! +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: add lr, sp, #88 +; LE-NEXT: vst1.64 {d16, d17}, [r4:128]! +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: vst1.64 {d16, d17}, [r4:128] +; LE-NEXT: add sp, sp, #160 +; LE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEXT: add sp, sp, #4 +; LE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; LE-NEON-LABEL: llrint_v16i64_v16f32: +; LE-NEON: @ %bb.0: +; LE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-NEON-NEXT: .pad #4 +; LE-NEON-NEXT: sub sp, sp, #4 +; LE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEON-NEXT: .pad #160 +; LE-NEON-NEXT: sub sp, sp, #160 +; LE-NEON-NEXT: add lr, sp, #112 +; LE-NEON-NEXT: vorr q5, q3, q3 +; LE-NEON-NEXT: vorr q6, q0, q0 +; LE-NEON-NEXT: mov r4, r0 +; LE-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #48 +; LE-NEON-NEXT: vorr q7, q1, q1 +; LE-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; LE-NEON-NEXT: vmov.f32 s0, s23 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s24 +; LE-NEON-NEXT: add lr, sp, #144 +; LE-NEON-NEXT: vmov.32 d17[0], r0 +; LE-NEON-NEXT: str r1, [sp, #108] @ 4-byte Spill +; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s25 +; LE-NEON-NEXT: str r1, [sp, #84] @ 4-byte Spill +; LE-NEON-NEXT: vmov.32 d8[0], r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s28 +; LE-NEON-NEXT: add lr, sp, #128 +; LE-NEON-NEXT: vmov.32 d9[0], r0 +; LE-NEON-NEXT: str r1, [sp, #44] @ 4-byte Spill +; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s29 +; LE-NEON-NEXT: mov r9, r1 +; LE-NEON-NEXT: vmov.32 d8[0], r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s30 +; LE-NEON-NEXT: mov r6, r1 +; LE-NEON-NEXT: vmov.32 d9[0], r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s31 +; LE-NEON-NEXT: mov r5, r1 +; LE-NEON-NEXT: vmov.32 d12[0], r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: add lr, sp, #112 +; LE-NEON-NEXT: mov r7, r1 +; LE-NEON-NEXT: vmov.32 d13[0], r0 +; LE-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload +; LE-NEON-NEXT: vmov.f32 s0, s29 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s22 +; LE-NEON-NEXT: add lr, sp, #24 +; LE-NEON-NEXT: vmov.32 d17[0], r0 +; LE-NEON-NEXT: mov r11, r1 +; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEON-NEXT: vmov.32 d13[1], r7 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: add lr, sp, #144 +; LE-NEON-NEXT: vmov.f32 s0, s21 +; LE-NEON-NEXT: vmov.32 d12[1], r5 +; LE-NEON-NEXT: str r1, [sp, #40] @ 4-byte Spill +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: vmov.32 d16[0], r0 +; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #88 +; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s20 +; LE-NEON-NEXT: mov r10, r1 +; LE-NEON-NEXT: vmov.32 d13[0], r0 +; LE-NEON-NEXT: vmov.32 d9[1], r6 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s31 +; LE-NEON-NEXT: vmov.32 d12[0], r0 +; LE-NEON-NEXT: add lr, sp, #8 +; LE-NEON-NEXT: mov r8, r1 +; LE-NEON-NEXT: vmov.32 d8[1], r9 +; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #64 +; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: add lr, sp, #128 +; LE-NEON-NEXT: vmov.32 d9[0], r0 +; LE-NEON-NEXT: ldr r0, [sp, #44] @ 4-byte Reload +; LE-NEON-NEXT: mov r9, r1 +; LE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #48 +; LE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-NEON-NEXT: vmov.f32 s0, s27 +; LE-NEON-NEXT: vmov.32 d11[1], r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s26 +; LE-NEON-NEXT: vmov.32 d15[0], r0 +; LE-NEON-NEXT: ldr r0, [sp, #84] @ 4-byte Reload +; LE-NEON-NEXT: add lr, sp, #128 +; LE-NEON-NEXT: mov r7, r1 +; LE-NEON-NEXT: vmov.32 d10[1], r0 +; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.32 d14[0], r0 +; LE-NEON-NEXT: add lr, sp, #144 +; LE-NEON-NEXT: ldr r0, [sp, #108] @ 4-byte Reload +; LE-NEON-NEXT: mov r5, r1 +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: vmov.32 d17[1], r0 +; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #112 +; LE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-NEON-NEXT: vmov.f32 s0, s20 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: add lr, sp, #24 +; LE-NEON-NEXT: vmov.f32 s0, s22 +; LE-NEON-NEXT: mov r6, r1 +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: vmov.32 d16[0], r0 +; LE-NEON-NEXT: vmov.32 d17[1], r11 +; LE-NEON-NEXT: vorr q6, q8, q8 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: add lr, sp, #144 +; LE-NEON-NEXT: vmov.32 d8[0], r0 +; LE-NEON-NEXT: ldr r0, [sp, #40] @ 4-byte Reload +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #8 +; LE-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #128 +; LE-NEON-NEXT: vmov.32 d9[1], r9 +; LE-NEON-NEXT: vmov.32 d12[1], r6 +; LE-NEON-NEXT: vmov.32 d19[1], r10 +; LE-NEON-NEXT: vmov.32 d8[1], r1 +; LE-NEON-NEXT: vmov.32 d16[1], r0 +; LE-NEON-NEXT: add r0, r4, #64 +; LE-NEON-NEXT: vmov.32 d18[1], r8 +; LE-NEON-NEXT: vst1.64 {d12, d13}, [r0:128]! +; LE-NEON-NEXT: vst1.64 {d8, d9}, [r0:128]! +; LE-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]! +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] +; LE-NEON-NEXT: vmov.32 d15[1], r7 +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #64 +; LE-NEON-NEXT: vmov.32 d14[1], r5 +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r4:128]! +; LE-NEON-NEXT: vst1.64 {d14, d15}, [r4:128]! +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #88 +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r4:128]! +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r4:128] +; LE-NEON-NEXT: add sp, sp, #160 +; LE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEON-NEXT: add sp, sp, #4 +; LE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-LABEL: llrint_v16i64_v16f32: +; BE: @ %bb.0: +; BE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-NEXT: .pad #4 +; BE-NEXT: sub sp, sp, #4 +; BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEXT: .pad #144 +; BE-NEXT: sub sp, sp, #144 +; BE-NEXT: vorr q6, q3, q3 +; BE-NEXT: add lr, sp, #112 +; BE-NEXT: vorr q7, q0, q0 +; BE-NEXT: mov r4, r0 +; BE-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill +; BE-NEXT: add lr, sp, #96 +; BE-NEXT: vrev64.32 d8, d13 +; BE-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill +; BE-NEXT: vmov.f32 s0, s17 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s16 +; BE-NEXT: str r1, [sp, #88] @ 4-byte Spill +; BE-NEXT: vmov.32 d11[0], r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vrev64.32 d8, d14 +; BE-NEXT: add lr, sp, #128 +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: str r1, [sp, #92] @ 4-byte Spill +; BE-NEXT: vmov.f32 s0, s16 +; BE-NEXT: vrev64.32 d9, d12 +; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-NEXT: vstr d9, [sp, #64] @ 8-byte Spill +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s19 +; BE-NEXT: mov r9, r1 +; BE-NEXT: vmov.32 d12[0], r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s17 +; BE-NEXT: str r1, [sp, #84] @ 4-byte Spill +; BE-NEXT: vmov.32 d11[0], r0 +; BE-NEXT: vrev64.32 d9, d15 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s18 +; BE-NEXT: mov r6, r1 +; BE-NEXT: vmov.32 d13[0], r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s19 +; BE-NEXT: mov r5, r1 +; BE-NEXT: vmov.32 d14[0], r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vldr d0, [sp, #64] @ 8-byte Reload +; BE-NEXT: mov r7, r1 +; BE-NEXT: @ kill: def $s0 killed $s0 killed $d0 +; BE-NEXT: vmov.32 d15[0], r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: add lr, sp, #40 +; BE-NEXT: str r1, [sp, #60] @ 4-byte Spill +; BE-NEXT: vmov.32 d15[1], r7 +; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-NEXT: add lr, sp, #96 +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: vrev64.32 d8, d16 +; BE-NEXT: vmov.f32 s0, s17 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s16 +; BE-NEXT: vmov.32 d14[1], r5 +; BE-NEXT: add lr, sp, #64 +; BE-NEXT: mov r10, r1 +; BE-NEXT: vmov.32 d11[0], r0 +; BE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: add lr, sp, #24 +; BE-NEXT: mov r11, r1 +; BE-NEXT: vmov.32 d13[1], r6 +; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-NEXT: add lr, sp, #96 +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: vrev64.32 d8, d17 +; BE-NEXT: vmov.f32 s0, s17 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s16 +; BE-NEXT: vmov.32 d12[1], r9 +; BE-NEXT: add lr, sp, #96 +; BE-NEXT: mov r8, r1 +; BE-NEXT: vmov.32 d11[0], r0 +; BE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: add lr, sp, #8 +; BE-NEXT: ldr r0, [sp, #88] @ 4-byte Reload +; BE-NEXT: mov r9, r1 +; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-NEXT: add lr, sp, #112 +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: add lr, sp, #128 +; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-NEXT: vrev64.32 d8, d16 +; BE-NEXT: vmov.32 d11[1], r0 +; BE-NEXT: vmov.f32 s0, s17 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s16 +; BE-NEXT: vmov.32 d15[0], r0 +; BE-NEXT: ldr r0, [sp, #92] @ 4-byte Reload +; BE-NEXT: add lr, sp, #128 +; BE-NEXT: mov r7, r1 +; BE-NEXT: vmov.32 d10[1], r0 +; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-NEXT: bl llrintf +; BE-NEXT: add lr, sp, #112 +; BE-NEXT: vmov.32 d14[0], r0 +; BE-NEXT: ldr r0, [sp, #84] @ 4-byte Reload +; BE-NEXT: mov r5, r1 +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: add lr, sp, #40 +; BE-NEXT: vrev64.32 d8, d17 +; BE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; BE-NEXT: vmov.f32 s0, s17 +; BE-NEXT: vmov.32 d13[1], r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s16 +; BE-NEXT: vmov.32 d11[0], r0 +; BE-NEXT: ldr r0, [sp, #60] @ 4-byte Reload +; BE-NEXT: mov r6, r1 +; BE-NEXT: vmov.32 d12[1], r0 +; BE-NEXT: bl llrintf +; BE-NEXT: add lr, sp, #24 +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: add r0, r4, #64 +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: add lr, sp, #8 +; BE-NEXT: vmov.32 d17[1], r10 +; BE-NEXT: vmov.32 d16[1], r11 +; BE-NEXT: vorr q12, q8, q8 +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: add lr, sp, #128 +; BE-NEXT: vmov.32 d15[1], r7 +; BE-NEXT: vmov.32 d11[1], r6 +; BE-NEXT: vmov.32 d14[1], r5 +; BE-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; BE-NEXT: add lr, sp, #96 +; BE-NEXT: vmov.32 d10[1], r1 +; BE-NEXT: vmov.32 d17[1], r8 +; BE-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload +; BE-NEXT: add lr, sp, #64 +; BE-NEXT: vmov.32 d16[1], r9 +; BE-NEXT: vrev64.32 q14, q7 +; BE-NEXT: vorr q13, q8, q8 +; BE-NEXT: vrev64.32 q15, q5 +; BE-NEXT: vldmia lr, {d22, d23} @ 16-byte Reload +; BE-NEXT: vrev64.32 q8, q6 +; BE-NEXT: vst1.64 {d28, d29}, [r0:128]! +; BE-NEXT: vst1.64 {d30, d31}, [r0:128]! +; BE-NEXT: vrev64.32 q9, q9 +; BE-NEXT: vrev64.32 q10, q10 +; BE-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-NEXT: vrev64.32 q11, q11 +; BE-NEXT: vrev64.32 q12, q12 +; BE-NEXT: vst1.64 {d18, d19}, [r0:128] +; BE-NEXT: vst1.64 {d20, d21}, [r4:128]! +; BE-NEXT: vst1.64 {d22, d23}, [r4:128]! +; BE-NEXT: vrev64.32 q13, q13 +; BE-NEXT: vst1.64 {d24, d25}, [r4:128]! +; BE-NEXT: vst1.64 {d26, d27}, [r4:128] +; BE-NEXT: add sp, sp, #144 +; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEXT: add sp, sp, #4 +; BE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-NEON-LABEL: llrint_v16i64_v16f32: +; BE-NEON: @ %bb.0: +; BE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-NEON-NEXT: .pad #4 +; BE-NEON-NEXT: sub sp, sp, #4 +; BE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEON-NEXT: .pad #144 +; BE-NEON-NEXT: sub sp, sp, #144 +; BE-NEON-NEXT: vorr q6, q3, q3 +; BE-NEON-NEXT: add lr, sp, #112 +; BE-NEON-NEXT: vorr q7, q0, q0 +; BE-NEON-NEXT: mov r4, r0 +; BE-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #96 +; BE-NEON-NEXT: vrev64.32 d8, d13 +; BE-NEON-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill +; BE-NEON-NEXT: vmov.f32 s0, s17 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s16 +; BE-NEON-NEXT: str r1, [sp, #88] @ 4-byte Spill +; BE-NEON-NEXT: vmov.32 d11[0], r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vrev64.32 d8, d14 +; BE-NEON-NEXT: add lr, sp, #128 +; BE-NEON-NEXT: vmov.32 d10[0], r0 +; BE-NEON-NEXT: str r1, [sp, #92] @ 4-byte Spill +; BE-NEON-NEXT: vmov.f32 s0, s16 +; BE-NEON-NEXT: vrev64.32 d9, d12 +; BE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-NEON-NEXT: vstr d9, [sp, #64] @ 8-byte Spill +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s19 +; BE-NEON-NEXT: mov r9, r1 +; BE-NEON-NEXT: vmov.32 d12[0], r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s17 +; BE-NEON-NEXT: str r1, [sp, #84] @ 4-byte Spill +; BE-NEON-NEXT: vmov.32 d11[0], r0 +; BE-NEON-NEXT: vrev64.32 d9, d15 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s18 +; BE-NEON-NEXT: mov r6, r1 +; BE-NEON-NEXT: vmov.32 d13[0], r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s19 +; BE-NEON-NEXT: mov r5, r1 +; BE-NEON-NEXT: vmov.32 d14[0], r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vldr d0, [sp, #64] @ 8-byte Reload +; BE-NEON-NEXT: mov r7, r1 +; BE-NEON-NEXT: @ kill: def $s0 killed $s0 killed $d0 +; BE-NEON-NEXT: vmov.32 d15[0], r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.32 d10[0], r0 +; BE-NEON-NEXT: add lr, sp, #40 +; BE-NEON-NEXT: str r1, [sp, #60] @ 4-byte Spill +; BE-NEON-NEXT: vmov.32 d15[1], r7 +; BE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #96 +; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEON-NEXT: vrev64.32 d8, d16 +; BE-NEON-NEXT: vmov.f32 s0, s17 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s16 +; BE-NEON-NEXT: vmov.32 d14[1], r5 +; BE-NEON-NEXT: add lr, sp, #64 +; BE-NEON-NEXT: mov r10, r1 +; BE-NEON-NEXT: vmov.32 d11[0], r0 +; BE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.32 d10[0], r0 +; BE-NEON-NEXT: add lr, sp, #24 +; BE-NEON-NEXT: mov r11, r1 +; BE-NEON-NEXT: vmov.32 d13[1], r6 +; BE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #96 +; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEON-NEXT: vrev64.32 d8, d17 +; BE-NEON-NEXT: vmov.f32 s0, s17 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s16 +; BE-NEON-NEXT: vmov.32 d12[1], r9 +; BE-NEON-NEXT: add lr, sp, #96 +; BE-NEON-NEXT: mov r8, r1 +; BE-NEON-NEXT: vmov.32 d11[0], r0 +; BE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.32 d10[0], r0 +; BE-NEON-NEXT: add lr, sp, #8 +; BE-NEON-NEXT: ldr r0, [sp, #88] @ 4-byte Reload +; BE-NEON-NEXT: mov r9, r1 +; BE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #112 +; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEON-NEXT: add lr, sp, #128 +; BE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-NEON-NEXT: vrev64.32 d8, d16 +; BE-NEON-NEXT: vmov.32 d11[1], r0 +; BE-NEON-NEXT: vmov.f32 s0, s17 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s16 +; BE-NEON-NEXT: vmov.32 d15[0], r0 +; BE-NEON-NEXT: ldr r0, [sp, #92] @ 4-byte Reload +; BE-NEON-NEXT: add lr, sp, #128 +; BE-NEON-NEXT: mov r7, r1 +; BE-NEON-NEXT: vmov.32 d10[1], r0 +; BE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: add lr, sp, #112 +; BE-NEON-NEXT: vmov.32 d14[0], r0 +; BE-NEON-NEXT: ldr r0, [sp, #84] @ 4-byte Reload +; BE-NEON-NEXT: mov r5, r1 +; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEON-NEXT: add lr, sp, #40 +; BE-NEON-NEXT: vrev64.32 d8, d17 +; BE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; BE-NEON-NEXT: vmov.f32 s0, s17 +; BE-NEON-NEXT: vmov.32 d13[1], r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s16 +; BE-NEON-NEXT: vmov.32 d11[0], r0 +; BE-NEON-NEXT: ldr r0, [sp, #60] @ 4-byte Reload +; BE-NEON-NEXT: mov r6, r1 +; BE-NEON-NEXT: vmov.32 d12[1], r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: add lr, sp, #24 +; BE-NEON-NEXT: vmov.32 d10[0], r0 +; BE-NEON-NEXT: add r0, r4, #64 +; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEON-NEXT: add lr, sp, #8 +; BE-NEON-NEXT: vmov.32 d17[1], r10 +; BE-NEON-NEXT: vmov.32 d16[1], r11 +; BE-NEON-NEXT: vorr q12, q8, q8 +; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEON-NEXT: add lr, sp, #128 +; BE-NEON-NEXT: vmov.32 d15[1], r7 +; BE-NEON-NEXT: vmov.32 d11[1], r6 +; BE-NEON-NEXT: vmov.32 d14[1], r5 +; BE-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; BE-NEON-NEXT: add lr, sp, #96 +; BE-NEON-NEXT: vmov.32 d10[1], r1 +; BE-NEON-NEXT: vmov.32 d17[1], r8 +; BE-NEON-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload +; BE-NEON-NEXT: add lr, sp, #64 +; BE-NEON-NEXT: vmov.32 d16[1], r9 +; BE-NEON-NEXT: vrev64.32 q14, q7 +; BE-NEON-NEXT: vorr q13, q8, q8 +; BE-NEON-NEXT: vrev64.32 q15, q5 +; BE-NEON-NEXT: vldmia lr, {d22, d23} @ 16-byte Reload +; BE-NEON-NEXT: vrev64.32 q8, q6 +; BE-NEON-NEXT: vst1.64 {d28, d29}, [r0:128]! +; BE-NEON-NEXT: vst1.64 {d30, d31}, [r0:128]! +; BE-NEON-NEXT: vrev64.32 q9, q9 +; BE-NEON-NEXT: vrev64.32 q10, q10 +; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-NEON-NEXT: vrev64.32 q11, q11 +; BE-NEON-NEXT: vrev64.32 q12, q12 +; BE-NEON-NEXT: vst1.64 {d18, d19}, [r0:128] +; BE-NEON-NEXT: vst1.64 {d20, d21}, [r4:128]! +; BE-NEON-NEXT: vst1.64 {d22, d23}, [r4:128]! +; BE-NEON-NEXT: vrev64.32 q13, q13 +; BE-NEON-NEXT: vst1.64 {d24, d25}, [r4:128]! +; BE-NEON-NEXT: vst1.64 {d26, d27}, [r4:128] +; BE-NEON-NEXT: add sp, sp, #144 +; BE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEON-NEXT: add sp, sp, #4 +; BE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + %a = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> %x) + ret <16 x i64> %a +} +declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>) + +define <32 x i64> @llrint_v32i64_v32f32(<32 x float> %x) { +; LE-LABEL: llrint_v32i64_v32f32: +; LE: @ %bb.0: +; LE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-NEXT: .pad #4 +; LE-NEXT: sub sp, sp, #4 +; LE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEXT: .pad #184 +; LE-NEXT: sub sp, sp, #184 +; LE-NEXT: add lr, sp, #152 +; LE-NEXT: vorr q7, q3, q3 +; LE-NEXT: vorr q4, q2, q2 +; LE-NEXT: mov r5, r0 +; LE-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill +; LE-NEXT: add lr, sp, #88 +; LE-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; LE-NEXT: vmov.f32 s0, s3 +; LE-NEXT: str r0, [sp, #68] @ 4-byte Spill +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s18 +; LE-NEXT: add lr, sp, #168 +; LE-NEXT: vmov.32 d17[0], r0 +; LE-NEXT: str r1, [sp, #16] @ 4-byte Spill +; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s16 +; LE-NEXT: mov r8, r1 +; LE-NEXT: vmov.32 d10[0], r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s17 +; LE-NEXT: mov r6, r1 +; LE-NEXT: vmov.32 d12[0], r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s19 +; LE-NEXT: mov r4, r1 +; LE-NEXT: vmov.32 d13[0], r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s31 +; LE-NEXT: mov r7, r1 +; LE-NEXT: vmov.32 d11[0], r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s30 +; LE-NEXT: str r1, [sp, #8] @ 4-byte Spill +; LE-NEXT: vmov.32 d9[0], r0 +; LE-NEXT: vmov.32 d11[1], r7 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s29 +; LE-NEXT: vmov.32 d8[0], r0 +; LE-NEXT: add lr, sp, #104 +; LE-NEXT: str r1, [sp, #12] @ 4-byte Spill +; LE-NEXT: vmov.32 d13[1], r4 +; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.32 d9[0], r0 +; LE-NEXT: add r0, sp, #320 +; LE-NEXT: add lr, sp, #120 +; LE-NEXT: mov r11, r1 +; LE-NEXT: vld1.64 {d0, d1}, [r0] +; LE-NEXT: add r0, sp, #304 +; LE-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; LE-NEXT: add lr, sp, #72 +; LE-NEXT: vld1.64 {d0, d1}, [r0] +; LE-NEXT: add r0, sp, #336 +; LE-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; LE-NEXT: add lr, sp, #32 +; LE-NEXT: vld1.64 {d0, d1}, [r0] +; LE-NEXT: add r0, sp, #288 +; LE-NEXT: vmov.32 d12[1], r6 +; LE-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; LE-NEXT: add lr, sp, #48 +; LE-NEXT: vld1.64 {d0, d1}, [r0] +; LE-NEXT: vmov.32 d10[1], r8 +; LE-NEXT: add r8, r5, #64 +; LE-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; LE-NEXT: add lr, sp, #152 +; LE-NEXT: vst1.64 {d12, d13}, [r8:128]! +; LE-NEXT: vst1.64 {d10, d11}, [r8:128]! +; LE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-NEXT: vmov.f32 s0, s27 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s28 +; LE-NEXT: mov r4, r1 +; LE-NEXT: vmov.32 d11[0], r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s26 +; LE-NEXT: mov r9, r1 +; LE-NEXT: vmov.32 d8[0], r0 +; LE-NEXT: vmov.32 d11[1], r4 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.32 d10[0], r0 +; LE-NEXT: add lr, sp, #136 +; LE-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; LE-NEXT: mov r10, r1 +; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-NEXT: add lr, sp, #168 +; LE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-NEXT: add lr, sp, #88 +; LE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-NEXT: vmov.f32 s0, s26 +; LE-NEXT: vmov.32 d11[1], r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s25 +; LE-NEXT: vmov.32 d10[0], r0 +; LE-NEXT: add lr, sp, #168 +; LE-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; LE-NEXT: mov r7, r1 +; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-NEXT: add lr, sp, #104 +; LE-NEXT: vorr q5, q6, q6 +; LE-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload +; LE-NEXT: vmov.32 d15[1], r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s20 +; LE-NEXT: vmov.32 d13[0], r0 +; LE-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; LE-NEXT: add lr, sp, #104 +; LE-NEXT: mov r4, r1 +; LE-NEXT: vmov.32 d14[1], r0 +; LE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.32 d12[0], r0 +; LE-NEXT: add lr, sp, #152 +; LE-NEXT: mov r6, r1 +; LE-NEXT: vorr q7, q6, q6 +; LE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-NEXT: vmov.32 d9[1], r11 +; LE-NEXT: vmov.f32 s0, s25 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s24 +; LE-NEXT: mov r5, r1 +; LE-NEXT: vmov.32 d11[0], r0 +; LE-NEXT: vmov.32 d8[1], r9 +; LE-NEXT: bl llrintf +; LE-NEXT: add lr, sp, #136 +; LE-NEXT: mov r11, r1 +; LE-NEXT: vmov.32 d10[0], r0 +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: vmov.32 d16[1], r10 +; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEXT: add lr, sp, #120 +; LE-NEXT: vst1.64 {d8, d9}, [r8:128]! +; LE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-NEXT: vmov.f32 s0, s1 +; LE-NEXT: bl llrintf +; LE-NEXT: add lr, sp, #152 +; LE-NEXT: vmov.32 d17[0], r0 +; LE-NEXT: mov r10, r1 +; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEXT: add lr, sp, #104 +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: add lr, sp, #72 +; LE-NEXT: vst1.64 {d16, d17}, [r8:128] +; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-NEXT: vmov.f32 s0, s19 +; LE-NEXT: bl llrintf +; LE-NEXT: add lr, sp, #168 +; LE-NEXT: vmov.f32 s0, s18 +; LE-NEXT: mov r8, r1 +; LE-NEXT: vmov.32 d13[0], r0 +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: vmov.32 d16[1], r7 +; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s17 +; LE-NEXT: vmov.32 d12[0], r0 +; LE-NEXT: add lr, sp, #104 +; LE-NEXT: mov r7, r1 +; LE-NEXT: vmov.32 d15[1], r4 +; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s16 +; LE-NEXT: mov r9, r1 +; LE-NEXT: vmov.32 d13[0], r0 +; LE-NEXT: vmov.32 d14[1], r6 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.32 d12[0], r0 +; LE-NEXT: add lr, sp, #88 +; LE-NEXT: mov r6, r1 +; LE-NEXT: vmov.32 d11[1], r5 +; LE-NEXT: vmov.32 d10[1], r11 +; LE-NEXT: ldr r11, [sp, #68] @ 4-byte Reload +; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEXT: add lr, sp, #16 +; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-NEXT: add lr, sp, #32 +; LE-NEXT: vst1.64 {d14, d15}, [r11:128]! +; LE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-NEXT: vmov.f32 s0, s23 +; LE-NEXT: bl llrintf +; LE-NEXT: add lr, sp, #152 +; LE-NEXT: mov r4, r1 +; LE-NEXT: vmov.32 d9[0], r0 +; LE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-NEXT: add lr, sp, #120 +; LE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-NEXT: @ kill: def $s0 killed $s0 killed $q0 +; LE-NEXT: vmov.32 d13[1], r10 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s22 +; LE-NEXT: vmov.32 d12[0], r0 +; LE-NEXT: add lr, sp, #152 +; LE-NEXT: mov r5, r1 +; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEXT: add lr, sp, #104 +; LE-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload +; LE-NEXT: vmov.32 d15[1], r8 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s21 +; LE-NEXT: vmov.32 d8[0], r0 +; LE-NEXT: add lr, sp, #72 +; LE-NEXT: mov r8, r1 +; LE-NEXT: vmov.32 d14[1], r7 +; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-NEXT: add lr, sp, #104 +; LE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s20 +; LE-NEXT: add lr, sp, #88 +; LE-NEXT: mov r7, r1 +; LE-NEXT: vmov.32 d9[0], r0 +; LE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-NEXT: vmov.32 d13[1], r9 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.32 d8[0], r0 +; LE-NEXT: add lr, sp, #32 +; LE-NEXT: mov r9, r1 +; LE-NEXT: vmov.32 d12[1], r6 +; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-NEXT: add lr, sp, #88 +; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEXT: add lr, sp, #120 +; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-NEXT: vmov.f32 s0, s19 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s18 +; LE-NEXT: add lr, sp, #72 +; LE-NEXT: mov r10, r1 +; LE-NEXT: vmov.32 d15[0], r0 +; LE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-NEXT: vmov.32 d13[1], r4 +; LE-NEXT: bl llrintf +; LE-NEXT: add lr, sp, #152 +; LE-NEXT: mov r4, r1 +; LE-NEXT: vmov.32 d14[0], r0 +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: vmov.32 d16[1], r5 +; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEXT: add lr, sp, #168 +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: add lr, sp, #48 +; LE-NEXT: vst1.64 {d16, d17}, [r11:128]! +; LE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-NEXT: vmov.f32 s0, s21 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s20 +; LE-NEXT: vmov.32 d12[1], r8 +; LE-NEXT: add lr, sp, #72 +; LE-NEXT: mov r5, r1 +; LE-NEXT: vmov.32 d9[0], r0 +; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s23 +; LE-NEXT: add lr, sp, #32 +; LE-NEXT: mov r6, r1 +; LE-NEXT: vmov.32 d8[0], r0 +; LE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-NEXT: vmov.32 d13[1], r7 +; LE-NEXT: bl llrintf +; LE-NEXT: add lr, sp, #48 +; LE-NEXT: mov r7, r1 +; LE-NEXT: vmov.32 d11[0], r0 +; LE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-NEXT: vmov.f32 s0, s2 +; LE-NEXT: vmov.32 d12[1], r9 +; LE-NEXT: bl llrintf +; LE-NEXT: add lr, sp, #16 +; LE-NEXT: vmov.32 d10[0], r0 +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: add lr, sp, #136 +; LE-NEXT: vmov.32 d11[1], r7 +; LE-NEXT: vst1.64 {d16, d17}, [r11:128]! +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: add lr, sp, #152 +; LE-NEXT: vmov.32 d15[1], r10 +; LE-NEXT: vst1.64 {d16, d17}, [r11:128] +; LE-NEXT: vmov.32 d10[1], r1 +; LE-NEXT: ldr r1, [sp, #68] @ 4-byte Reload +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: add r0, r1, #192 +; LE-NEXT: add lr, sp, #72 +; LE-NEXT: vmov.32 d14[1], r4 +; LE-NEXT: vst1.64 {d16, d17}, [r0:128]! +; LE-NEXT: vst1.64 {d14, d15}, [r0:128]! +; LE-NEXT: vst1.64 {d12, d13}, [r0:128]! +; LE-NEXT: vmov.32 d9[1], r5 +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: add lr, sp, #88 +; LE-NEXT: vmov.32 d8[1], r6 +; LE-NEXT: vst1.64 {d16, d17}, [r0:128] +; LE-NEXT: add r0, r1, #128 +; LE-NEXT: vst1.64 {d8, d9}, [r0:128]! +; LE-NEXT: vst1.64 {d10, d11}, [r0:128]! +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: add lr, sp, #104 +; LE-NEXT: vst1.64 {d16, d17}, [r0:128]! +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: vst1.64 {d16, d17}, [r0:128] +; LE-NEXT: add sp, sp, #184 +; LE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEXT: add sp, sp, #4 +; LE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; LE-NEON-LABEL: llrint_v32i64_v32f32: +; LE-NEON: @ %bb.0: +; LE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-NEON-NEXT: .pad #4 +; LE-NEON-NEXT: sub sp, sp, #4 +; LE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEON-NEXT: .pad #184 +; LE-NEON-NEXT: sub sp, sp, #184 +; LE-NEON-NEXT: add lr, sp, #152 +; LE-NEON-NEXT: vorr q7, q3, q3 +; LE-NEON-NEXT: vorr q4, q2, q2 +; LE-NEON-NEXT: mov r5, r0 +; LE-NEON-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #88 +; LE-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; LE-NEON-NEXT: vmov.f32 s0, s3 +; LE-NEON-NEXT: str r0, [sp, #68] @ 4-byte Spill +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s18 +; LE-NEON-NEXT: add lr, sp, #168 +; LE-NEON-NEXT: vmov.32 d17[0], r0 +; LE-NEON-NEXT: str r1, [sp, #16] @ 4-byte Spill +; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s16 +; LE-NEON-NEXT: mov r8, r1 +; LE-NEON-NEXT: vmov.32 d10[0], r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s17 +; LE-NEON-NEXT: mov r6, r1 +; LE-NEON-NEXT: vmov.32 d12[0], r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s19 +; LE-NEON-NEXT: mov r4, r1 +; LE-NEON-NEXT: vmov.32 d13[0], r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s31 +; LE-NEON-NEXT: mov r7, r1 +; LE-NEON-NEXT: vmov.32 d11[0], r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s30 +; LE-NEON-NEXT: str r1, [sp, #8] @ 4-byte Spill +; LE-NEON-NEXT: vmov.32 d9[0], r0 +; LE-NEON-NEXT: vmov.32 d11[1], r7 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s29 +; LE-NEON-NEXT: vmov.32 d8[0], r0 +; LE-NEON-NEXT: add lr, sp, #104 +; LE-NEON-NEXT: str r1, [sp, #12] @ 4-byte Spill +; LE-NEON-NEXT: vmov.32 d13[1], r4 +; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.32 d9[0], r0 +; LE-NEON-NEXT: add r0, sp, #320 +; LE-NEON-NEXT: add lr, sp, #120 +; LE-NEON-NEXT: mov r11, r1 +; LE-NEON-NEXT: vld1.64 {d0, d1}, [r0] +; LE-NEON-NEXT: add r0, sp, #304 +; LE-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #72 +; LE-NEON-NEXT: vld1.64 {d0, d1}, [r0] +; LE-NEON-NEXT: add r0, sp, #336 +; LE-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #32 +; LE-NEON-NEXT: vld1.64 {d0, d1}, [r0] +; LE-NEON-NEXT: add r0, sp, #288 +; LE-NEON-NEXT: vmov.32 d12[1], r6 +; LE-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #48 +; LE-NEON-NEXT: vld1.64 {d0, d1}, [r0] +; LE-NEON-NEXT: vmov.32 d10[1], r8 +; LE-NEON-NEXT: add r8, r5, #64 +; LE-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #152 +; LE-NEON-NEXT: vst1.64 {d12, d13}, [r8:128]! +; LE-NEON-NEXT: vst1.64 {d10, d11}, [r8:128]! +; LE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-NEON-NEXT: vmov.f32 s0, s27 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s28 +; LE-NEON-NEXT: mov r4, r1 +; LE-NEON-NEXT: vmov.32 d11[0], r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s26 +; LE-NEON-NEXT: mov r9, r1 +; LE-NEON-NEXT: vmov.32 d8[0], r0 +; LE-NEON-NEXT: vmov.32 d11[1], r4 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.32 d10[0], r0 +; LE-NEON-NEXT: add lr, sp, #136 +; LE-NEON-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; LE-NEON-NEXT: mov r10, r1 +; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #168 +; LE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #88 +; LE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-NEON-NEXT: vmov.f32 s0, s26 +; LE-NEON-NEXT: vmov.32 d11[1], r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s25 +; LE-NEON-NEXT: vmov.32 d10[0], r0 +; LE-NEON-NEXT: add lr, sp, #168 +; LE-NEON-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; LE-NEON-NEXT: mov r7, r1 +; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #104 +; LE-NEON-NEXT: vorr q5, q6, q6 +; LE-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload +; LE-NEON-NEXT: vmov.32 d15[1], r0 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s20 +; LE-NEON-NEXT: vmov.32 d13[0], r0 +; LE-NEON-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; LE-NEON-NEXT: add lr, sp, #104 +; LE-NEON-NEXT: mov r4, r1 +; LE-NEON-NEXT: vmov.32 d14[1], r0 +; LE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.32 d12[0], r0 +; LE-NEON-NEXT: add lr, sp, #152 +; LE-NEON-NEXT: mov r6, r1 +; LE-NEON-NEXT: vorr q7, q6, q6 +; LE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-NEON-NEXT: vmov.32 d9[1], r11 +; LE-NEON-NEXT: vmov.f32 s0, s25 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s24 +; LE-NEON-NEXT: mov r5, r1 +; LE-NEON-NEXT: vmov.32 d11[0], r0 +; LE-NEON-NEXT: vmov.32 d8[1], r9 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: add lr, sp, #136 +; LE-NEON-NEXT: mov r11, r1 +; LE-NEON-NEXT: vmov.32 d10[0], r0 +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: vmov.32 d16[1], r10 +; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #120 +; LE-NEON-NEXT: vst1.64 {d8, d9}, [r8:128]! +; LE-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-NEON-NEXT: vmov.f32 s0, s1 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: add lr, sp, #152 +; LE-NEON-NEXT: vmov.32 d17[0], r0 +; LE-NEON-NEXT: mov r10, r1 +; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #104 +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #72 +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r8:128] +; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-NEON-NEXT: vmov.f32 s0, s19 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: add lr, sp, #168 +; LE-NEON-NEXT: vmov.f32 s0, s18 +; LE-NEON-NEXT: mov r8, r1 +; LE-NEON-NEXT: vmov.32 d13[0], r0 +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: vmov.32 d16[1], r7 +; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s17 +; LE-NEON-NEXT: vmov.32 d12[0], r0 +; LE-NEON-NEXT: add lr, sp, #104 +; LE-NEON-NEXT: mov r7, r1 +; LE-NEON-NEXT: vmov.32 d15[1], r4 +; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s16 +; LE-NEON-NEXT: mov r9, r1 +; LE-NEON-NEXT: vmov.32 d13[0], r0 +; LE-NEON-NEXT: vmov.32 d14[1], r6 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.32 d12[0], r0 +; LE-NEON-NEXT: add lr, sp, #88 +; LE-NEON-NEXT: mov r6, r1 +; LE-NEON-NEXT: vmov.32 d11[1], r5 +; LE-NEON-NEXT: vmov.32 d10[1], r11 +; LE-NEON-NEXT: ldr r11, [sp, #68] @ 4-byte Reload +; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #16 +; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #32 +; LE-NEON-NEXT: vst1.64 {d14, d15}, [r11:128]! +; LE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-NEON-NEXT: vmov.f32 s0, s23 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: add lr, sp, #152 +; LE-NEON-NEXT: mov r4, r1 +; LE-NEON-NEXT: vmov.32 d9[0], r0 +; LE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #120 +; LE-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-NEON-NEXT: @ kill: def $s0 killed $s0 killed $q0 +; LE-NEON-NEXT: vmov.32 d13[1], r10 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s22 +; LE-NEON-NEXT: vmov.32 d12[0], r0 +; LE-NEON-NEXT: add lr, sp, #152 +; LE-NEON-NEXT: mov r5, r1 +; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #104 +; LE-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload +; LE-NEON-NEXT: vmov.32 d15[1], r8 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s21 +; LE-NEON-NEXT: vmov.32 d8[0], r0 +; LE-NEON-NEXT: add lr, sp, #72 +; LE-NEON-NEXT: mov r8, r1 +; LE-NEON-NEXT: vmov.32 d14[1], r7 +; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #104 +; LE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s20 +; LE-NEON-NEXT: add lr, sp, #88 +; LE-NEON-NEXT: mov r7, r1 +; LE-NEON-NEXT: vmov.32 d9[0], r0 +; LE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-NEON-NEXT: vmov.32 d13[1], r9 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.32 d8[0], r0 +; LE-NEON-NEXT: add lr, sp, #32 +; LE-NEON-NEXT: mov r9, r1 +; LE-NEON-NEXT: vmov.32 d12[1], r6 +; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #88 +; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #120 +; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-NEON-NEXT: vmov.f32 s0, s19 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s18 +; LE-NEON-NEXT: add lr, sp, #72 +; LE-NEON-NEXT: mov r10, r1 +; LE-NEON-NEXT: vmov.32 d15[0], r0 +; LE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-NEON-NEXT: vmov.32 d13[1], r4 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: add lr, sp, #152 +; LE-NEON-NEXT: mov r4, r1 +; LE-NEON-NEXT: vmov.32 d14[0], r0 +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: vmov.32 d16[1], r5 +; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #168 +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #48 +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r11:128]! +; LE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-NEON-NEXT: vmov.f32 s0, s21 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s20 +; LE-NEON-NEXT: vmov.32 d12[1], r8 +; LE-NEON-NEXT: add lr, sp, #72 +; LE-NEON-NEXT: mov r5, r1 +; LE-NEON-NEXT: vmov.32 d9[0], r0 +; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: vmov.f32 s0, s23 +; LE-NEON-NEXT: add lr, sp, #32 +; LE-NEON-NEXT: mov r6, r1 +; LE-NEON-NEXT: vmov.32 d8[0], r0 +; LE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-NEON-NEXT: vmov.32 d13[1], r7 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: add lr, sp, #48 +; LE-NEON-NEXT: mov r7, r1 +; LE-NEON-NEXT: vmov.32 d11[0], r0 +; LE-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-NEON-NEXT: vmov.f32 s0, s2 +; LE-NEON-NEXT: vmov.32 d12[1], r9 +; LE-NEON-NEXT: bl llrintf +; LE-NEON-NEXT: add lr, sp, #16 +; LE-NEON-NEXT: vmov.32 d10[0], r0 +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #136 +; LE-NEON-NEXT: vmov.32 d11[1], r7 +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r11:128]! +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #152 +; LE-NEON-NEXT: vmov.32 d15[1], r10 +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r11:128] +; LE-NEON-NEXT: vmov.32 d10[1], r1 +; LE-NEON-NEXT: ldr r1, [sp, #68] @ 4-byte Reload +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: add r0, r1, #192 +; LE-NEON-NEXT: add lr, sp, #72 +; LE-NEON-NEXT: vmov.32 d14[1], r4 +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! +; LE-NEON-NEXT: vst1.64 {d14, d15}, [r0:128]! +; LE-NEON-NEXT: vst1.64 {d12, d13}, [r0:128]! +; LE-NEON-NEXT: vmov.32 d9[1], r5 +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #88 +; LE-NEON-NEXT: vmov.32 d8[1], r6 +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] +; LE-NEON-NEXT: add r0, r1, #128 +; LE-NEON-NEXT: vst1.64 {d8, d9}, [r0:128]! +; LE-NEON-NEXT: vst1.64 {d10, d11}, [r0:128]! +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #104 +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] +; LE-NEON-NEXT: add sp, sp, #184 +; LE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEON-NEXT: add sp, sp, #4 +; LE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-LABEL: llrint_v32i64_v32f32: +; BE: @ %bb.0: +; BE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-NEXT: .pad #4 +; BE-NEXT: sub sp, sp, #4 +; BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEXT: .pad #256 +; BE-NEXT: sub sp, sp, #256 +; BE-NEXT: add lr, sp, #208 +; BE-NEXT: str r0, [sp, #156] @ 4-byte Spill +; BE-NEXT: add r0, sp, #408 +; BE-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill +; BE-NEXT: add lr, sp, #120 +; BE-NEXT: vld1.64 {d10, d11}, [r0] +; BE-NEXT: add r0, sp, #392 +; BE-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill +; BE-NEXT: add lr, sp, #160 +; BE-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill +; BE-NEXT: add lr, sp, #176 +; BE-NEXT: vrev64.32 d8, d10 +; BE-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; BE-NEXT: add lr, sp, #136 +; BE-NEXT: vmov.f32 s0, s17 +; BE-NEXT: vld1.64 {d12, d13}, [r0] +; BE-NEXT: add r0, sp, #360 +; BE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; BE-NEXT: add lr, sp, #192 +; BE-NEXT: vld1.64 {d16, d17}, [r0] +; BE-NEXT: add r0, sp, #376 +; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEXT: add lr, sp, #40 +; BE-NEXT: vld1.64 {d16, d17}, [r0] +; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s16 +; BE-NEXT: str r1, [sp, #88] @ 4-byte Spill +; BE-NEXT: vmov.32 d15[0], r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vrev64.32 d9, d11 +; BE-NEXT: add lr, sp, #240 +; BE-NEXT: vmov.32 d14[0], r0 +; BE-NEXT: str r1, [sp, #104] @ 4-byte Spill +; BE-NEXT: vmov.f32 s0, s18 +; BE-NEXT: vrev64.32 d8, d13 +; BE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s17 +; BE-NEXT: mov r5, r1 +; BE-NEXT: vmov.32 d14[0], r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s19 +; BE-NEXT: add lr, sp, #192 +; BE-NEXT: str r1, [sp, #72] @ 4-byte Spill +; BE-NEXT: vmov.32 d13[0], r0 +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: vrev64.32 d10, d16 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s20 +; BE-NEXT: add lr, sp, #224 +; BE-NEXT: vmov.32 d15[0], r0 +; BE-NEXT: mov r4, r1 +; BE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s21 +; BE-NEXT: mov r7, r1 +; BE-NEXT: vmov.32 d14[0], r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s16 +; BE-NEXT: mov r6, r1 +; BE-NEXT: vmov.32 d15[0], r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d12[0], r0 +; BE-NEXT: add lr, sp, #24 +; BE-NEXT: mov r9, r1 +; BE-NEXT: vmov.32 d15[1], r6 +; BE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; BE-NEXT: add lr, sp, #192 +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: vrev64.32 d8, d17 +; BE-NEXT: vmov.f32 s0, s17 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s16 +; BE-NEXT: vmov.32 d14[1], r7 +; BE-NEXT: add lr, sp, #56 +; BE-NEXT: mov r10, r1 +; BE-NEXT: vmov.32 d11[0], r0 +; BE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: add lr, sp, #192 +; BE-NEXT: mov r11, r1 +; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-NEXT: add lr, sp, #40 +; BE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; BE-NEXT: add lr, sp, #224 +; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-NEXT: vrev64.32 d8, d12 +; BE-NEXT: vmov.32 d11[1], r4 +; BE-NEXT: vmov.f32 s0, s17 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s16 +; BE-NEXT: vmov.32 d10[1], r5 +; BE-NEXT: add lr, sp, #224 +; BE-NEXT: mov r8, r1 +; BE-NEXT: vmov.32 d15[0], r0 +; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-NEXT: bl llrintf +; BE-NEXT: vrev64.32 d8, d13 +; BE-NEXT: add lr, sp, #8 +; BE-NEXT: vmov.32 d14[0], r0 +; BE-NEXT: ldr r0, [sp, #88] @ 4-byte Reload +; BE-NEXT: vmov.f32 s0, s17 +; BE-NEXT: mov r5, r1 +; BE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-NEXT: add lr, sp, #240 +; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-NEXT: vmov.32 d11[1], r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s16 +; BE-NEXT: vmov.32 d15[0], r0 +; BE-NEXT: ldr r0, [sp, #104] @ 4-byte Reload +; BE-NEXT: add lr, sp, #240 +; BE-NEXT: mov r4, r1 +; BE-NEXT: vmov.32 d10[1], r0 +; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-NEXT: bl llrintf +; BE-NEXT: add lr, sp, #136 +; BE-NEXT: vmov.32 d14[0], r0 +; BE-NEXT: ldr r0, [sp, #72] @ 4-byte Reload +; BE-NEXT: mov r6, r1 +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: add lr, sp, #24 +; BE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; BE-NEXT: vrev64.32 d8, d16 +; BE-NEXT: vmov.32 d13[1], r0 +; BE-NEXT: vmov.f32 s0, s17 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s16 +; BE-NEXT: mov r7, r1 +; BE-NEXT: vmov.32 d11[0], r0 +; BE-NEXT: vmov.32 d12[1], r9 +; BE-NEXT: bl llrintf +; BE-NEXT: add lr, sp, #192 +; BE-NEXT: vmov.32 d15[1], r4 +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: add lr, sp, #8 +; BE-NEXT: vmov.32 d17[1], r10 +; BE-NEXT: vmov.32 d16[1], r11 +; BE-NEXT: vorr q9, q8, q8 +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: add lr, sp, #192 +; BE-NEXT: vmov.32 d17[1], r8 +; BE-NEXT: vmov.32 d16[1], r5 +; BE-NEXT: vorr q10, q8, q8 +; BE-NEXT: vrev64.32 q8, q6 +; BE-NEXT: vmov.32 d14[1], r6 +; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEXT: add lr, sp, #240 +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: vrev64.32 q8, q8 +; BE-NEXT: vmov.32 d11[1], r7 +; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEXT: add lr, sp, #224 +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: vmov.32 d10[1], r1 +; BE-NEXT: vrev64.32 q8, q8 +; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEXT: add lr, sp, #56 +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: add lr, sp, #136 +; BE-NEXT: vrev64.32 q8, q8 +; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEXT: add lr, sp, #104 +; BE-NEXT: vrev64.32 q8, q9 +; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEXT: add lr, sp, #88 +; BE-NEXT: vrev64.32 q8, q10 +; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEXT: add lr, sp, #72 +; BE-NEXT: vrev64.32 q8, q7 +; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEXT: add lr, sp, #208 +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: add lr, sp, #56 +; BE-NEXT: vrev64.32 d8, d17 +; BE-NEXT: vrev64.32 q8, q5 +; BE-NEXT: vmov.f32 s0, s17 +; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s16 +; BE-NEXT: mov r4, r1 +; BE-NEXT: vmov.32 d13[0], r0 +; BE-NEXT: bl llrintf +; BE-NEXT: add lr, sp, #120 +; BE-NEXT: vmov.32 d12[0], r0 +; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-NEXT: vmov.32 d13[1], r4 +; BE-NEXT: vrev64.32 d8, d10 +; BE-NEXT: vmov.32 d12[1], r1 +; BE-NEXT: vmov.f32 s0, s17 +; BE-NEXT: vrev64.32 q6, q6 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s16 +; BE-NEXT: vmov.32 d15[0], r0 +; BE-NEXT: vmov.32 d15[1], r1 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d14[0], r0 +; BE-NEXT: ldr r6, [sp, #156] @ 4-byte Reload +; BE-NEXT: vrev64.32 d8, d11 +; BE-NEXT: add r5, r6, #64 +; BE-NEXT: vmov.32 d14[1], r1 +; BE-NEXT: vmov.f32 s0, s17 +; BE-NEXT: vrev64.32 q8, q7 +; BE-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s16 +; BE-NEXT: vmov.32 d15[0], r0 +; BE-NEXT: vmov.32 d15[1], r1 +; BE-NEXT: bl llrintf +; BE-NEXT: add lr, sp, #208 +; BE-NEXT: vmov.32 d14[0], r0 +; BE-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; BE-NEXT: vmov.32 d14[1], r1 +; BE-NEXT: vrev64.32 d8, d18 +; BE-NEXT: vrev64.32 q8, q7 +; BE-NEXT: vmov.f32 s0, s17 +; BE-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s16 +; BE-NEXT: mov r4, r1 +; BE-NEXT: vmov.32 d15[0], r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d14[0], r0 +; BE-NEXT: add lr, sp, #160 +; BE-NEXT: vmov.32 d15[1], r4 +; BE-NEXT: vmov.32 d14[1], r1 +; BE-NEXT: vrev64.32 q8, q7 +; BE-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-NEXT: vrev64.32 d8, d11 +; BE-NEXT: vst1.64 {d12, d13}, [r5:128] +; BE-NEXT: vmov.f32 s0, s17 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s16 +; BE-NEXT: mov r4, r1 +; BE-NEXT: vmov.32 d13[0], r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d12[0], r0 +; BE-NEXT: add lr, sp, #208 +; BE-NEXT: vmov.32 d13[1], r4 +; BE-NEXT: vmov.32 d12[1], r1 +; BE-NEXT: vrev64.32 q8, q6 +; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEXT: add lr, sp, #176 +; BE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; BE-NEXT: vrev64.32 d8, d12 +; BE-NEXT: vmov.f32 s0, s17 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s16 +; BE-NEXT: vmov.32 d15[0], r0 +; BE-NEXT: vmov.32 d15[1], r1 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d14[0], r0 +; BE-NEXT: mov r5, r6 +; BE-NEXT: vrev64.32 d8, d13 +; BE-NEXT: vmov.32 d14[1], r1 +; BE-NEXT: vmov.f32 s0, s17 +; BE-NEXT: vrev64.32 q8, q7 +; BE-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s16 +; BE-NEXT: vmov.32 d15[0], r0 +; BE-NEXT: vmov.32 d15[1], r1 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d14[0], r0 +; BE-NEXT: vrev64.32 d8, d10 +; BE-NEXT: vmov.32 d14[1], r1 +; BE-NEXT: vmov.f32 s0, s17 +; BE-NEXT: vrev64.32 q8, q7 +; BE-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s16 +; BE-NEXT: mov r4, r1 +; BE-NEXT: vmov.32 d15[0], r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d14[0], r0 +; BE-NEXT: add lr, sp, #208 +; BE-NEXT: add r0, r6, #192 +; BE-NEXT: vmov.32 d15[1], r4 +; BE-NEXT: vmov.32 d14[1], r1 +; BE-NEXT: vrev64.32 q8, q7 +; BE-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: add lr, sp, #56 +; BE-NEXT: vst1.64 {d16, d17}, [r5:128] +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: add lr, sp, #192 +; BE-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: add lr, sp, #240 +; BE-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: add lr, sp, #224 +; BE-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: add lr, sp, #136 +; BE-NEXT: vst1.64 {d16, d17}, [r0:128] +; BE-NEXT: add r0, r6, #128 +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: add lr, sp, #104 +; BE-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: add lr, sp, #88 +; BE-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: add lr, sp, #72 +; BE-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: vst1.64 {d16, d17}, [r0:128] +; BE-NEXT: add sp, sp, #256 +; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEXT: add sp, sp, #4 +; BE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-NEON-LABEL: llrint_v32i64_v32f32: +; BE-NEON: @ %bb.0: +; BE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-NEON-NEXT: .pad #4 +; BE-NEON-NEXT: sub sp, sp, #4 +; BE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEON-NEXT: .pad #256 +; BE-NEON-NEXT: sub sp, sp, #256 +; BE-NEON-NEXT: add lr, sp, #208 +; BE-NEON-NEXT: str r0, [sp, #156] @ 4-byte Spill +; BE-NEON-NEXT: add r0, sp, #408 +; BE-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #120 +; BE-NEON-NEXT: vld1.64 {d10, d11}, [r0] +; BE-NEON-NEXT: add r0, sp, #392 +; BE-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #160 +; BE-NEON-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #176 +; BE-NEON-NEXT: vrev64.32 d8, d10 +; BE-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #136 +; BE-NEON-NEXT: vmov.f32 s0, s17 +; BE-NEON-NEXT: vld1.64 {d12, d13}, [r0] +; BE-NEON-NEXT: add r0, sp, #360 +; BE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #192 +; BE-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; BE-NEON-NEXT: add r0, sp, #376 +; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #40 +; BE-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s16 +; BE-NEON-NEXT: str r1, [sp, #88] @ 4-byte Spill +; BE-NEON-NEXT: vmov.32 d15[0], r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vrev64.32 d9, d11 +; BE-NEON-NEXT: add lr, sp, #240 +; BE-NEON-NEXT: vmov.32 d14[0], r0 +; BE-NEON-NEXT: str r1, [sp, #104] @ 4-byte Spill +; BE-NEON-NEXT: vmov.f32 s0, s18 +; BE-NEON-NEXT: vrev64.32 d8, d13 +; BE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s17 +; BE-NEON-NEXT: mov r5, r1 +; BE-NEON-NEXT: vmov.32 d14[0], r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s19 +; BE-NEON-NEXT: add lr, sp, #192 +; BE-NEON-NEXT: str r1, [sp, #72] @ 4-byte Spill +; BE-NEON-NEXT: vmov.32 d13[0], r0 +; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEON-NEXT: vrev64.32 d10, d16 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s20 +; BE-NEON-NEXT: add lr, sp, #224 +; BE-NEON-NEXT: vmov.32 d15[0], r0 +; BE-NEON-NEXT: mov r4, r1 +; BE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s21 +; BE-NEON-NEXT: mov r7, r1 +; BE-NEON-NEXT: vmov.32 d14[0], r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s16 +; BE-NEON-NEXT: mov r6, r1 +; BE-NEON-NEXT: vmov.32 d15[0], r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.32 d12[0], r0 +; BE-NEON-NEXT: add lr, sp, #24 +; BE-NEON-NEXT: mov r9, r1 +; BE-NEON-NEXT: vmov.32 d15[1], r6 +; BE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #192 +; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEON-NEXT: vrev64.32 d8, d17 +; BE-NEON-NEXT: vmov.f32 s0, s17 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s16 +; BE-NEON-NEXT: vmov.32 d14[1], r7 +; BE-NEON-NEXT: add lr, sp, #56 +; BE-NEON-NEXT: mov r10, r1 +; BE-NEON-NEXT: vmov.32 d11[0], r0 +; BE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.32 d10[0], r0 +; BE-NEON-NEXT: add lr, sp, #192 +; BE-NEON-NEXT: mov r11, r1 +; BE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #40 +; BE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; BE-NEON-NEXT: add lr, sp, #224 +; BE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-NEON-NEXT: vrev64.32 d8, d12 +; BE-NEON-NEXT: vmov.32 d11[1], r4 +; BE-NEON-NEXT: vmov.f32 s0, s17 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s16 +; BE-NEON-NEXT: vmov.32 d10[1], r5 +; BE-NEON-NEXT: add lr, sp, #224 +; BE-NEON-NEXT: mov r8, r1 +; BE-NEON-NEXT: vmov.32 d15[0], r0 +; BE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vrev64.32 d8, d13 +; BE-NEON-NEXT: add lr, sp, #8 +; BE-NEON-NEXT: vmov.32 d14[0], r0 +; BE-NEON-NEXT: ldr r0, [sp, #88] @ 4-byte Reload +; BE-NEON-NEXT: vmov.f32 s0, s17 +; BE-NEON-NEXT: mov r5, r1 +; BE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #240 +; BE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-NEON-NEXT: vmov.32 d11[1], r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s16 +; BE-NEON-NEXT: vmov.32 d15[0], r0 +; BE-NEON-NEXT: ldr r0, [sp, #104] @ 4-byte Reload +; BE-NEON-NEXT: add lr, sp, #240 +; BE-NEON-NEXT: mov r4, r1 +; BE-NEON-NEXT: vmov.32 d10[1], r0 +; BE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: add lr, sp, #136 +; BE-NEON-NEXT: vmov.32 d14[0], r0 +; BE-NEON-NEXT: ldr r0, [sp, #72] @ 4-byte Reload +; BE-NEON-NEXT: mov r6, r1 +; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEON-NEXT: add lr, sp, #24 +; BE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; BE-NEON-NEXT: vrev64.32 d8, d16 +; BE-NEON-NEXT: vmov.32 d13[1], r0 +; BE-NEON-NEXT: vmov.f32 s0, s17 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s16 +; BE-NEON-NEXT: mov r7, r1 +; BE-NEON-NEXT: vmov.32 d11[0], r0 +; BE-NEON-NEXT: vmov.32 d12[1], r9 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: add lr, sp, #192 +; BE-NEON-NEXT: vmov.32 d15[1], r4 +; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEON-NEXT: add lr, sp, #8 +; BE-NEON-NEXT: vmov.32 d17[1], r10 +; BE-NEON-NEXT: vmov.32 d16[1], r11 +; BE-NEON-NEXT: vorr q9, q8, q8 +; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEON-NEXT: add lr, sp, #192 +; BE-NEON-NEXT: vmov.32 d17[1], r8 +; BE-NEON-NEXT: vmov.32 d16[1], r5 +; BE-NEON-NEXT: vorr q10, q8, q8 +; BE-NEON-NEXT: vrev64.32 q8, q6 +; BE-NEON-NEXT: vmov.32 d14[1], r6 +; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #240 +; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEON-NEXT: vmov.32 d10[0], r0 +; BE-NEON-NEXT: vrev64.32 q8, q8 +; BE-NEON-NEXT: vmov.32 d11[1], r7 +; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #224 +; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEON-NEXT: vmov.32 d10[1], r1 +; BE-NEON-NEXT: vrev64.32 q8, q8 +; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #56 +; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEON-NEXT: add lr, sp, #136 +; BE-NEON-NEXT: vrev64.32 q8, q8 +; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #104 +; BE-NEON-NEXT: vrev64.32 q8, q9 +; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #88 +; BE-NEON-NEXT: vrev64.32 q8, q10 +; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #72 +; BE-NEON-NEXT: vrev64.32 q8, q7 +; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #208 +; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEON-NEXT: add lr, sp, #56 +; BE-NEON-NEXT: vrev64.32 d8, d17 +; BE-NEON-NEXT: vrev64.32 q8, q5 +; BE-NEON-NEXT: vmov.f32 s0, s17 +; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s16 +; BE-NEON-NEXT: mov r4, r1 +; BE-NEON-NEXT: vmov.32 d13[0], r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: add lr, sp, #120 +; BE-NEON-NEXT: vmov.32 d12[0], r0 +; BE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-NEON-NEXT: vmov.32 d13[1], r4 +; BE-NEON-NEXT: vrev64.32 d8, d10 +; BE-NEON-NEXT: vmov.32 d12[1], r1 +; BE-NEON-NEXT: vmov.f32 s0, s17 +; BE-NEON-NEXT: vrev64.32 q6, q6 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s16 +; BE-NEON-NEXT: vmov.32 d15[0], r0 +; BE-NEON-NEXT: vmov.32 d15[1], r1 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.32 d14[0], r0 +; BE-NEON-NEXT: ldr r6, [sp, #156] @ 4-byte Reload +; BE-NEON-NEXT: vrev64.32 d8, d11 +; BE-NEON-NEXT: add r5, r6, #64 +; BE-NEON-NEXT: vmov.32 d14[1], r1 +; BE-NEON-NEXT: vmov.f32 s0, s17 +; BE-NEON-NEXT: vrev64.32 q8, q7 +; BE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s16 +; BE-NEON-NEXT: vmov.32 d15[0], r0 +; BE-NEON-NEXT: vmov.32 d15[1], r1 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: add lr, sp, #208 +; BE-NEON-NEXT: vmov.32 d14[0], r0 +; BE-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; BE-NEON-NEXT: vmov.32 d14[1], r1 +; BE-NEON-NEXT: vrev64.32 d8, d18 +; BE-NEON-NEXT: vrev64.32 q8, q7 +; BE-NEON-NEXT: vmov.f32 s0, s17 +; BE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s16 +; BE-NEON-NEXT: mov r4, r1 +; BE-NEON-NEXT: vmov.32 d15[0], r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.32 d14[0], r0 +; BE-NEON-NEXT: add lr, sp, #160 +; BE-NEON-NEXT: vmov.32 d15[1], r4 +; BE-NEON-NEXT: vmov.32 d14[1], r1 +; BE-NEON-NEXT: vrev64.32 q8, q7 +; BE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-NEON-NEXT: vrev64.32 d8, d11 +; BE-NEON-NEXT: vst1.64 {d12, d13}, [r5:128] +; BE-NEON-NEXT: vmov.f32 s0, s17 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s16 +; BE-NEON-NEXT: mov r4, r1 +; BE-NEON-NEXT: vmov.32 d13[0], r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.32 d12[0], r0 +; BE-NEON-NEXT: add lr, sp, #208 +; BE-NEON-NEXT: vmov.32 d13[1], r4 +; BE-NEON-NEXT: vmov.32 d12[1], r1 +; BE-NEON-NEXT: vrev64.32 q8, q6 +; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #176 +; BE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; BE-NEON-NEXT: vrev64.32 d8, d12 +; BE-NEON-NEXT: vmov.f32 s0, s17 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s16 +; BE-NEON-NEXT: vmov.32 d15[0], r0 +; BE-NEON-NEXT: vmov.32 d15[1], r1 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.32 d14[0], r0 +; BE-NEON-NEXT: mov r5, r6 +; BE-NEON-NEXT: vrev64.32 d8, d13 +; BE-NEON-NEXT: vmov.32 d14[1], r1 +; BE-NEON-NEXT: vmov.f32 s0, s17 +; BE-NEON-NEXT: vrev64.32 q8, q7 +; BE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s16 +; BE-NEON-NEXT: vmov.32 d15[0], r0 +; BE-NEON-NEXT: vmov.32 d15[1], r1 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.32 d14[0], r0 +; BE-NEON-NEXT: vrev64.32 d8, d10 +; BE-NEON-NEXT: vmov.32 d14[1], r1 +; BE-NEON-NEXT: vmov.f32 s0, s17 +; BE-NEON-NEXT: vrev64.32 q8, q7 +; BE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.f32 s0, s16 +; BE-NEON-NEXT: mov r4, r1 +; BE-NEON-NEXT: vmov.32 d15[0], r0 +; BE-NEON-NEXT: bl llrintf +; BE-NEON-NEXT: vmov.32 d14[0], r0 +; BE-NEON-NEXT: add lr, sp, #208 +; BE-NEON-NEXT: add r0, r6, #192 +; BE-NEON-NEXT: vmov.32 d15[1], r4 +; BE-NEON-NEXT: vmov.32 d14[1], r1 +; BE-NEON-NEXT: vrev64.32 q8, q7 +; BE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEON-NEXT: add lr, sp, #56 +; BE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128] +; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEON-NEXT: add lr, sp, #192 +; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEON-NEXT: add lr, sp, #240 +; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEON-NEXT: add lr, sp, #224 +; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEON-NEXT: add lr, sp, #136 +; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] +; BE-NEON-NEXT: add r0, r6, #128 +; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEON-NEXT: add lr, sp, #104 +; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEON-NEXT: add lr, sp, #88 +; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEON-NEXT: add lr, sp, #72 +; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] +; BE-NEON-NEXT: add sp, sp, #256 +; BE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEON-NEXT: add sp, sp, #4 +; BE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + %a = call <32 x i64> @llvm.llrint.v32i64.v32f32(<32 x float> %x) + ret <32 x i64> %a +} +declare <32 x i64> @llvm.llrint.v32i64.v32f32(<32 x float>) + +define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) { +; LE-LABEL: llrint_v1i64_v1f64: +; LE: @ %bb.0: +; LE-NEXT: .save {r11, lr} +; LE-NEXT: push {r11, lr} +; LE-NEXT: bl llrint +; LE-NEXT: vmov.32 d0[0], r0 +; LE-NEXT: vmov.32 d0[1], r1 +; LE-NEXT: pop {r11, pc} +; +; LE-NEON-LABEL: llrint_v1i64_v1f64: +; LE-NEON: @ %bb.0: +; LE-NEON-NEXT: .save {r11, lr} +; LE-NEON-NEXT: push {r11, lr} +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vmov.32 d0[0], r0 +; LE-NEON-NEXT: vmov.32 d0[1], r1 +; LE-NEON-NEXT: pop {r11, pc} +; +; BE-LABEL: llrint_v1i64_v1f64: +; BE: @ %bb.0: +; BE-NEXT: .save {r11, lr} +; BE-NEXT: push {r11, lr} +; BE-NEXT: bl llrint +; BE-NEXT: vmov.32 d16[0], r0 +; BE-NEXT: vmov.32 d16[1], r1 +; BE-NEXT: vrev64.32 d0, d16 +; BE-NEXT: pop {r11, pc} +; +; BE-NEON-LABEL: llrint_v1i64_v1f64: +; BE-NEON: @ %bb.0: +; BE-NEON-NEXT: .save {r11, lr} +; BE-NEON-NEXT: push {r11, lr} +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vmov.32 d16[0], r0 +; BE-NEON-NEXT: vmov.32 d16[1], r1 +; BE-NEON-NEXT: vrev64.32 d0, d16 +; BE-NEON-NEXT: pop {r11, pc} + %a = call <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double> %x) + ret <1 x i64> %a +} +declare <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double>) + +define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) { +; LE-LABEL: llrint_v2i64_v2f64: +; LE: @ %bb.0: +; LE-NEXT: .save {r4, lr} +; LE-NEXT: push {r4, lr} +; LE-NEXT: .vsave {d8, d9, d10, d11} +; LE-NEXT: vpush {d8, d9, d10, d11} +; LE-NEXT: vorr q4, q0, q0 +; LE-NEXT: vorr d0, d9, d9 +; LE-NEXT: bl llrint +; LE-NEXT: vorr d0, d8, d8 +; LE-NEXT: mov r4, r1 +; LE-NEXT: vmov.32 d11[0], r0 +; LE-NEXT: bl llrint +; LE-NEXT: vmov.32 d10[0], r0 +; LE-NEXT: vmov.32 d11[1], r4 +; LE-NEXT: vmov.32 d10[1], r1 +; LE-NEXT: vorr q0, q5, q5 +; LE-NEXT: vpop {d8, d9, d10, d11} +; LE-NEXT: pop {r4, pc} +; +; LE-NEON-LABEL: llrint_v2i64_v2f64: +; LE-NEON: @ %bb.0: +; LE-NEON-NEXT: .save {r4, lr} +; LE-NEON-NEXT: push {r4, lr} +; LE-NEON-NEXT: .vsave {d8, d9, d10, d11} +; LE-NEON-NEXT: vpush {d8, d9, d10, d11} +; LE-NEON-NEXT: vorr q4, q0, q0 +; LE-NEON-NEXT: vorr d0, d9, d9 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vorr d0, d8, d8 +; LE-NEON-NEXT: mov r4, r1 +; LE-NEON-NEXT: vmov.32 d11[0], r0 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vmov.32 d10[0], r0 +; LE-NEON-NEXT: vmov.32 d11[1], r4 +; LE-NEON-NEXT: vmov.32 d10[1], r1 +; LE-NEON-NEXT: vorr q0, q5, q5 +; LE-NEON-NEXT: vpop {d8, d9, d10, d11} +; LE-NEON-NEXT: pop {r4, pc} +; +; BE-LABEL: llrint_v2i64_v2f64: +; BE: @ %bb.0: +; BE-NEXT: .save {r4, lr} +; BE-NEXT: push {r4, lr} +; BE-NEXT: .vsave {d8, d9, d10, d11} +; BE-NEXT: vpush {d8, d9, d10, d11} +; BE-NEXT: vorr q4, q0, q0 +; BE-NEXT: vorr d0, d9, d9 +; BE-NEXT: bl llrint +; BE-NEXT: vorr d0, d8, d8 +; BE-NEXT: mov r4, r1 +; BE-NEXT: vmov.32 d11[0], r0 +; BE-NEXT: bl llrint +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: vmov.32 d11[1], r4 +; BE-NEXT: vmov.32 d10[1], r1 +; BE-NEXT: vrev64.32 q0, q5 +; BE-NEXT: vpop {d8, d9, d10, d11} +; BE-NEXT: pop {r4, pc} +; +; BE-NEON-LABEL: llrint_v2i64_v2f64: +; BE-NEON: @ %bb.0: +; BE-NEON-NEXT: .save {r4, lr} +; BE-NEON-NEXT: push {r4, lr} +; BE-NEON-NEXT: .vsave {d8, d9, d10, d11} +; BE-NEON-NEXT: vpush {d8, d9, d10, d11} +; BE-NEON-NEXT: vorr q4, q0, q0 +; BE-NEON-NEXT: vorr d0, d9, d9 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vorr d0, d8, d8 +; BE-NEON-NEXT: mov r4, r1 +; BE-NEON-NEXT: vmov.32 d11[0], r0 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vmov.32 d10[0], r0 +; BE-NEON-NEXT: vmov.32 d11[1], r4 +; BE-NEON-NEXT: vmov.32 d10[1], r1 +; BE-NEON-NEXT: vrev64.32 q0, q5 +; BE-NEON-NEXT: vpop {d8, d9, d10, d11} +; BE-NEON-NEXT: pop {r4, pc} + %a = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> %x) + ret <2 x i64> %a +} +declare <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double>) + +define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) { +; LE-LABEL: llrint_v4i64_v4f64: +; LE: @ %bb.0: +; LE-NEXT: .save {r4, r5, r6, lr} +; LE-NEXT: push {r4, r5, r6, lr} +; LE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEXT: vorr q5, q1, q1 +; LE-NEXT: vorr q6, q0, q0 +; LE-NEXT: vorr d0, d11, d11 +; LE-NEXT: bl llrint +; LE-NEXT: vorr d0, d12, d12 +; LE-NEXT: mov r4, r1 +; LE-NEXT: vmov.32 d9[0], r0 +; LE-NEXT: bl llrint +; LE-NEXT: vorr d0, d13, d13 +; LE-NEXT: mov r5, r1 +; LE-NEXT: vmov.32 d14[0], r0 +; LE-NEXT: bl llrint +; LE-NEXT: vorr d0, d10, d10 +; LE-NEXT: mov r6, r1 +; LE-NEXT: vmov.32 d15[0], r0 +; LE-NEXT: bl llrint +; LE-NEXT: vmov.32 d8[0], r0 +; LE-NEXT: vmov.32 d15[1], r6 +; LE-NEXT: vmov.32 d9[1], r4 +; LE-NEXT: vmov.32 d14[1], r5 +; LE-NEXT: vmov.32 d8[1], r1 +; LE-NEXT: vorr q0, q7, q7 +; LE-NEXT: vorr q1, q4, q4 +; LE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEXT: pop {r4, r5, r6, pc} +; +; LE-NEON-LABEL: llrint_v4i64_v4f64: +; LE-NEON: @ %bb.0: +; LE-NEON-NEXT: .save {r4, r5, r6, lr} +; LE-NEON-NEXT: push {r4, r5, r6, lr} +; LE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEON-NEXT: vorr q5, q1, q1 +; LE-NEON-NEXT: vorr q6, q0, q0 +; LE-NEON-NEXT: vorr d0, d11, d11 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vorr d0, d12, d12 +; LE-NEON-NEXT: mov r4, r1 +; LE-NEON-NEXT: vmov.32 d9[0], r0 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vorr d0, d13, d13 +; LE-NEON-NEXT: mov r5, r1 +; LE-NEON-NEXT: vmov.32 d14[0], r0 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vorr d0, d10, d10 +; LE-NEON-NEXT: mov r6, r1 +; LE-NEON-NEXT: vmov.32 d15[0], r0 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vmov.32 d8[0], r0 +; LE-NEON-NEXT: vmov.32 d15[1], r6 +; LE-NEON-NEXT: vmov.32 d9[1], r4 +; LE-NEON-NEXT: vmov.32 d14[1], r5 +; LE-NEON-NEXT: vmov.32 d8[1], r1 +; LE-NEON-NEXT: vorr q0, q7, q7 +; LE-NEON-NEXT: vorr q1, q4, q4 +; LE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEON-NEXT: pop {r4, r5, r6, pc} +; +; BE-LABEL: llrint_v4i64_v4f64: +; BE: @ %bb.0: +; BE-NEXT: .save {r4, r5, r6, lr} +; BE-NEXT: push {r4, r5, r6, lr} +; BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEXT: vorr q4, q1, q1 +; BE-NEXT: vorr q5, q0, q0 +; BE-NEXT: vorr d0, d9, d9 +; BE-NEXT: bl llrint +; BE-NEXT: vorr d0, d10, d10 +; BE-NEXT: mov r4, r1 +; BE-NEXT: vmov.32 d13[0], r0 +; BE-NEXT: bl llrint +; BE-NEXT: vorr d0, d11, d11 +; BE-NEXT: mov r5, r1 +; BE-NEXT: vmov.32 d14[0], r0 +; BE-NEXT: bl llrint +; BE-NEXT: vorr d0, d8, d8 +; BE-NEXT: mov r6, r1 +; BE-NEXT: vmov.32 d15[0], r0 +; BE-NEXT: bl llrint +; BE-NEXT: vmov.32 d12[0], r0 +; BE-NEXT: vmov.32 d15[1], r6 +; BE-NEXT: vmov.32 d13[1], r4 +; BE-NEXT: vmov.32 d14[1], r5 +; BE-NEXT: vmov.32 d12[1], r1 +; BE-NEXT: vrev64.32 q0, q7 +; BE-NEXT: vrev64.32 q1, q6 +; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEXT: pop {r4, r5, r6, pc} +; +; BE-NEON-LABEL: llrint_v4i64_v4f64: +; BE-NEON: @ %bb.0: +; BE-NEON-NEXT: .save {r4, r5, r6, lr} +; BE-NEON-NEXT: push {r4, r5, r6, lr} +; BE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEON-NEXT: vorr q4, q1, q1 +; BE-NEON-NEXT: vorr q5, q0, q0 +; BE-NEON-NEXT: vorr d0, d9, d9 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vorr d0, d10, d10 +; BE-NEON-NEXT: mov r4, r1 +; BE-NEON-NEXT: vmov.32 d13[0], r0 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vorr d0, d11, d11 +; BE-NEON-NEXT: mov r5, r1 +; BE-NEON-NEXT: vmov.32 d14[0], r0 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vorr d0, d8, d8 +; BE-NEON-NEXT: mov r6, r1 +; BE-NEON-NEXT: vmov.32 d15[0], r0 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vmov.32 d12[0], r0 +; BE-NEON-NEXT: vmov.32 d15[1], r6 +; BE-NEON-NEXT: vmov.32 d13[1], r4 +; BE-NEON-NEXT: vmov.32 d14[1], r5 +; BE-NEON-NEXT: vmov.32 d12[1], r1 +; BE-NEON-NEXT: vrev64.32 q0, q7 +; BE-NEON-NEXT: vrev64.32 q1, q6 +; BE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEON-NEXT: pop {r4, r5, r6, pc} + %a = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> %x) + ret <4 x i64> %a +} +declare <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double>) + +define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) { +; LE-LABEL: llrint_v8i64_v8f64: +; LE: @ %bb.0: +; LE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; LE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; LE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEXT: .pad #40 +; LE-NEXT: sub sp, sp, #40 +; LE-NEXT: vorr q4, q0, q0 +; LE-NEXT: add lr, sp, #24 +; LE-NEXT: vorr d0, d7, d7 +; LE-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill +; LE-NEXT: vorr q7, q2, q2 +; LE-NEXT: vorr q6, q1, q1 +; LE-NEXT: bl llrint +; LE-NEXT: vorr d0, d14, d14 +; LE-NEXT: add lr, sp, #8 +; LE-NEXT: vmov.32 d17[0], r0 +; LE-NEXT: mov r8, r1 +; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEXT: bl llrint +; LE-NEXT: vorr d0, d15, d15 +; LE-NEXT: mov r9, r1 +; LE-NEXT: vmov.32 d10[0], r0 +; LE-NEXT: bl llrint +; LE-NEXT: vorr d0, d12, d12 +; LE-NEXT: mov r10, r1 +; LE-NEXT: vmov.32 d11[0], r0 +; LE-NEXT: bl llrint +; LE-NEXT: vorr d0, d13, d13 +; LE-NEXT: mov r7, r1 +; LE-NEXT: vmov.32 d14[0], r0 +; LE-NEXT: bl llrint +; LE-NEXT: vorr d0, d8, d8 +; LE-NEXT: mov r4, r1 +; LE-NEXT: vmov.32 d15[0], r0 +; LE-NEXT: bl llrint +; LE-NEXT: vorr d0, d9, d9 +; LE-NEXT: mov r5, r1 +; LE-NEXT: vmov.32 d12[0], r0 +; LE-NEXT: bl llrint +; LE-NEXT: add lr, sp, #24 +; LE-NEXT: mov r6, r1 +; LE-NEXT: vmov.32 d13[0], r0 +; LE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; LE-NEXT: bl llrint +; LE-NEXT: add lr, sp, #8 +; LE-NEXT: vmov.32 d13[1], r6 +; LE-NEXT: vldmia lr, {d6, d7} @ 16-byte Reload +; LE-NEXT: vmov.32 d15[1], r4 +; LE-NEXT: vmov.32 d11[1], r10 +; LE-NEXT: vmov.32 d6[0], r0 +; LE-NEXT: vmov.32 d12[1], r5 +; LE-NEXT: vmov.32 d14[1], r7 +; LE-NEXT: vorr q0, q6, q6 +; LE-NEXT: vmov.32 d10[1], r9 +; LE-NEXT: vorr q1, q7, q7 +; LE-NEXT: vmov.32 d7[1], r8 +; LE-NEXT: vorr q2, q5, q5 +; LE-NEXT: vmov.32 d6[1], r1 +; LE-NEXT: add sp, sp, #40 +; LE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; +; LE-NEON-LABEL: llrint_v8i64_v8f64: +; LE-NEON: @ %bb.0: +; LE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; LE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; LE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEON-NEXT: .pad #40 +; LE-NEON-NEXT: sub sp, sp, #40 +; LE-NEON-NEXT: vorr q4, q0, q0 +; LE-NEON-NEXT: add lr, sp, #24 +; LE-NEON-NEXT: vorr d0, d7, d7 +; LE-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill +; LE-NEON-NEXT: vorr q7, q2, q2 +; LE-NEON-NEXT: vorr q6, q1, q1 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vorr d0, d14, d14 +; LE-NEON-NEXT: add lr, sp, #8 +; LE-NEON-NEXT: vmov.32 d17[0], r0 +; LE-NEON-NEXT: mov r8, r1 +; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vorr d0, d15, d15 +; LE-NEON-NEXT: mov r9, r1 +; LE-NEON-NEXT: vmov.32 d10[0], r0 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vorr d0, d12, d12 +; LE-NEON-NEXT: mov r10, r1 +; LE-NEON-NEXT: vmov.32 d11[0], r0 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vorr d0, d13, d13 +; LE-NEON-NEXT: mov r7, r1 +; LE-NEON-NEXT: vmov.32 d14[0], r0 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vorr d0, d8, d8 +; LE-NEON-NEXT: mov r4, r1 +; LE-NEON-NEXT: vmov.32 d15[0], r0 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vorr d0, d9, d9 +; LE-NEON-NEXT: mov r5, r1 +; LE-NEON-NEXT: vmov.32 d12[0], r0 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: add lr, sp, #24 +; LE-NEON-NEXT: mov r6, r1 +; LE-NEON-NEXT: vmov.32 d13[0], r0 +; LE-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: add lr, sp, #8 +; LE-NEON-NEXT: vmov.32 d13[1], r6 +; LE-NEON-NEXT: vldmia lr, {d6, d7} @ 16-byte Reload +; LE-NEON-NEXT: vmov.32 d15[1], r4 +; LE-NEON-NEXT: vmov.32 d11[1], r10 +; LE-NEON-NEXT: vmov.32 d6[0], r0 +; LE-NEON-NEXT: vmov.32 d12[1], r5 +; LE-NEON-NEXT: vmov.32 d14[1], r7 +; LE-NEON-NEXT: vorr q0, q6, q6 +; LE-NEON-NEXT: vmov.32 d10[1], r9 +; LE-NEON-NEXT: vorr q1, q7, q7 +; LE-NEON-NEXT: vmov.32 d7[1], r8 +; LE-NEON-NEXT: vorr q2, q5, q5 +; LE-NEON-NEXT: vmov.32 d6[1], r1 +; LE-NEON-NEXT: add sp, sp, #40 +; LE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; +; BE-LABEL: llrint_v8i64_v8f64: +; BE: @ %bb.0: +; BE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; BE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEXT: .pad #40 +; BE-NEXT: sub sp, sp, #40 +; BE-NEXT: vorr q4, q0, q0 +; BE-NEXT: add lr, sp, #24 +; BE-NEXT: vorr d0, d7, d7 +; BE-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill +; BE-NEXT: vorr q7, q2, q2 +; BE-NEXT: vorr q6, q1, q1 +; BE-NEXT: bl llrint +; BE-NEXT: vorr d0, d14, d14 +; BE-NEXT: add lr, sp, #8 +; BE-NEXT: vmov.32 d17[0], r0 +; BE-NEXT: mov r8, r1 +; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEXT: bl llrint +; BE-NEXT: vorr d0, d15, d15 +; BE-NEXT: mov r9, r1 +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: bl llrint +; BE-NEXT: vorr d0, d12, d12 +; BE-NEXT: mov r10, r1 +; BE-NEXT: vmov.32 d11[0], r0 +; BE-NEXT: bl llrint +; BE-NEXT: vorr d0, d13, d13 +; BE-NEXT: mov r7, r1 +; BE-NEXT: vmov.32 d14[0], r0 +; BE-NEXT: bl llrint +; BE-NEXT: vorr d0, d8, d8 +; BE-NEXT: mov r4, r1 +; BE-NEXT: vmov.32 d15[0], r0 +; BE-NEXT: bl llrint +; BE-NEXT: vorr d0, d9, d9 +; BE-NEXT: mov r5, r1 +; BE-NEXT: vmov.32 d12[0], r0 +; BE-NEXT: bl llrint +; BE-NEXT: add lr, sp, #24 +; BE-NEXT: mov r6, r1 +; BE-NEXT: vmov.32 d13[0], r0 +; BE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; BE-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; BE-NEXT: bl llrint +; BE-NEXT: add lr, sp, #8 +; BE-NEXT: vmov.32 d13[1], r6 +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: vmov.32 d15[1], r4 +; BE-NEXT: vmov.32 d16[0], r0 +; BE-NEXT: vmov.32 d11[1], r10 +; BE-NEXT: vmov.32 d17[1], r8 +; BE-NEXT: vmov.32 d12[1], r5 +; BE-NEXT: vmov.32 d14[1], r7 +; BE-NEXT: vmov.32 d10[1], r9 +; BE-NEXT: vmov.32 d16[1], r1 +; BE-NEXT: vrev64.32 q0, q6 +; BE-NEXT: vrev64.32 q1, q7 +; BE-NEXT: vrev64.32 q2, q5 +; BE-NEXT: vrev64.32 q3, q8 +; BE-NEXT: add sp, sp, #40 +; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; +; BE-NEON-LABEL: llrint_v8i64_v8f64: +; BE-NEON: @ %bb.0: +; BE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; BE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; BE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEON-NEXT: .pad #40 +; BE-NEON-NEXT: sub sp, sp, #40 +; BE-NEON-NEXT: vorr q4, q0, q0 +; BE-NEON-NEXT: add lr, sp, #24 +; BE-NEON-NEXT: vorr d0, d7, d7 +; BE-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill +; BE-NEON-NEXT: vorr q7, q2, q2 +; BE-NEON-NEXT: vorr q6, q1, q1 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vorr d0, d14, d14 +; BE-NEON-NEXT: add lr, sp, #8 +; BE-NEON-NEXT: vmov.32 d17[0], r0 +; BE-NEON-NEXT: mov r8, r1 +; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vorr d0, d15, d15 +; BE-NEON-NEXT: mov r9, r1 +; BE-NEON-NEXT: vmov.32 d10[0], r0 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vorr d0, d12, d12 +; BE-NEON-NEXT: mov r10, r1 +; BE-NEON-NEXT: vmov.32 d11[0], r0 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vorr d0, d13, d13 +; BE-NEON-NEXT: mov r7, r1 +; BE-NEON-NEXT: vmov.32 d14[0], r0 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vorr d0, d8, d8 +; BE-NEON-NEXT: mov r4, r1 +; BE-NEON-NEXT: vmov.32 d15[0], r0 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vorr d0, d9, d9 +; BE-NEON-NEXT: mov r5, r1 +; BE-NEON-NEXT: vmov.32 d12[0], r0 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: add lr, sp, #24 +; BE-NEON-NEXT: mov r6, r1 +; BE-NEON-NEXT: vmov.32 d13[0], r0 +; BE-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; BE-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: add lr, sp, #8 +; BE-NEON-NEXT: vmov.32 d13[1], r6 +; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEON-NEXT: vmov.32 d15[1], r4 +; BE-NEON-NEXT: vmov.32 d16[0], r0 +; BE-NEON-NEXT: vmov.32 d11[1], r10 +; BE-NEON-NEXT: vmov.32 d17[1], r8 +; BE-NEON-NEXT: vmov.32 d12[1], r5 +; BE-NEON-NEXT: vmov.32 d14[1], r7 +; BE-NEON-NEXT: vmov.32 d10[1], r9 +; BE-NEON-NEXT: vmov.32 d16[1], r1 +; BE-NEON-NEXT: vrev64.32 q0, q6 +; BE-NEON-NEXT: vrev64.32 q1, q7 +; BE-NEON-NEXT: vrev64.32 q2, q5 +; BE-NEON-NEXT: vrev64.32 q3, q8 +; BE-NEON-NEXT: add sp, sp, #40 +; BE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} + %a = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> %x) + ret <8 x i64> %a +} +declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>) + +define <16 x i64> @llrint_v16f64(<16 x double> %x) { +; LE-LABEL: llrint_v16f64: +; LE: @ %bb.0: +; LE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-NEXT: .pad #4 +; LE-NEXT: sub sp, sp, #4 +; LE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEXT: .pad #176 +; LE-NEXT: sub sp, sp, #176 +; LE-NEXT: add lr, sp, #40 +; LE-NEXT: str r0, [sp, #140] @ 4-byte Spill +; LE-NEXT: add r0, sp, #312 +; LE-NEXT: vorr q6, q2, q2 +; LE-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill +; LE-NEXT: add lr, sp, #96 +; LE-NEXT: vorr q7, q1, q1 +; LE-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; LE-NEXT: add lr, sp, #144 +; LE-NEXT: vorr d0, d1, d1 +; LE-NEXT: vld1.64 {d16, d17}, [r0] +; LE-NEXT: add r0, sp, #280 +; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEXT: add lr, sp, #80 +; LE-NEXT: vld1.64 {d16, d17}, [r0] +; LE-NEXT: add r0, sp, #296 +; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEXT: add lr, sp, #120 +; LE-NEXT: vld1.64 {d16, d17}, [r0] +; LE-NEXT: add r0, sp, #328 +; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEXT: add lr, sp, #56 +; LE-NEXT: vld1.64 {d16, d17}, [r0] +; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEXT: bl llrint +; LE-NEXT: vorr d0, d14, d14 +; LE-NEXT: str r1, [sp, #116] @ 4-byte Spill +; LE-NEXT: vmov.32 d11[0], r0 +; LE-NEXT: bl llrint +; LE-NEXT: vorr d0, d15, d15 +; LE-NEXT: str r1, [sp, #76] @ 4-byte Spill +; LE-NEXT: vmov.32 d8[0], r0 +; LE-NEXT: bl llrint +; LE-NEXT: vorr d0, d12, d12 +; LE-NEXT: add lr, sp, #160 +; LE-NEXT: vmov.32 d9[0], r0 +; LE-NEXT: str r1, [sp, #72] @ 4-byte Spill +; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-NEXT: bl llrint +; LE-NEXT: vorr d0, d13, d13 +; LE-NEXT: mov r6, r1 +; LE-NEXT: vmov.32 d14[0], r0 +; LE-NEXT: bl llrint +; LE-NEXT: add lr, sp, #40 +; LE-NEXT: mov r4, r1 +; LE-NEXT: vmov.32 d15[0], r0 +; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-NEXT: vorr d0, d8, d8 +; LE-NEXT: bl llrint +; LE-NEXT: vorr d0, d9, d9 +; LE-NEXT: mov r7, r1 +; LE-NEXT: vmov.32 d12[0], r0 +; LE-NEXT: bl llrint +; LE-NEXT: add lr, sp, #96 +; LE-NEXT: mov r5, r1 +; LE-NEXT: vmov.32 d13[0], r0 +; LE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; LE-NEXT: bl llrint +; LE-NEXT: vmov.32 d10[0], r0 +; LE-NEXT: add lr, sp, #40 +; LE-NEXT: mov r10, r1 +; LE-NEXT: vmov.32 d13[1], r5 +; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-NEXT: add lr, sp, #56 +; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-NEXT: vorr d0, d9, d9 +; LE-NEXT: bl llrint +; LE-NEXT: vorr d0, d8, d8 +; LE-NEXT: vmov.32 d12[1], r7 +; LE-NEXT: add lr, sp, #96 +; LE-NEXT: mov r9, r1 +; LE-NEXT: vmov.32 d11[0], r0 +; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEXT: bl llrint +; LE-NEXT: vmov.32 d10[0], r0 +; LE-NEXT: add lr, sp, #24 +; LE-NEXT: mov r11, r1 +; LE-NEXT: vmov.32 d15[1], r4 +; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-NEXT: add lr, sp, #144 +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: vorr d0, d17, d17 +; LE-NEXT: bl llrint +; LE-NEXT: add lr, sp, #8 +; LE-NEXT: vmov.32 d14[1], r6 +; LE-NEXT: mov r8, r1 +; LE-NEXT: vmov.32 d17[0], r0 +; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEXT: add lr, sp, #56 +; LE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-NEXT: add lr, sp, #80 +; LE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-NEXT: vorr d0, d11, d11 +; LE-NEXT: bl llrint +; LE-NEXT: vmov.32 d15[0], r0 +; LE-NEXT: add lr, sp, #160 +; LE-NEXT: vorr d0, d10, d10 +; LE-NEXT: ldr r0, [sp, #72] @ 4-byte Reload +; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-NEXT: mov r6, r1 +; LE-NEXT: vmov.32 d9[1], r0 +; LE-NEXT: bl llrint +; LE-NEXT: vmov.32 d14[0], r0 +; LE-NEXT: ldr r0, [sp, #76] @ 4-byte Reload +; LE-NEXT: add lr, sp, #160 +; LE-NEXT: mov r4, r1 +; LE-NEXT: vmov.32 d8[1], r0 +; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-NEXT: add lr, sp, #120 +; LE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-NEXT: vorr d0, d11, d11 +; LE-NEXT: bl llrint +; LE-NEXT: vmov.32 d13[0], r0 +; LE-NEXT: add lr, sp, #40 +; LE-NEXT: vorr d0, d10, d10 +; LE-NEXT: ldr r0, [sp, #116] @ 4-byte Reload +; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-NEXT: mov r5, r1 +; LE-NEXT: vmov.32 d9[1], r0 +; LE-NEXT: bl llrint +; LE-NEXT: add lr, sp, #144 +; LE-NEXT: mov r7, r1 +; LE-NEXT: vmov.32 d12[0], r0 +; LE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; LE-NEXT: vmov.32 d8[1], r10 +; LE-NEXT: bl llrint +; LE-NEXT: add lr, sp, #8 +; LE-NEXT: vmov.32 d15[1], r6 +; LE-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload +; LE-NEXT: add lr, sp, #24 +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: add lr, sp, #160 +; LE-NEXT: vmov.32 d20[0], r0 +; LE-NEXT: vmov.32 d21[1], r8 +; LE-NEXT: vmov.32 d20[1], r1 +; LE-NEXT: ldr r1, [sp, #140] @ 4-byte Reload +; LE-NEXT: vmov.32 d13[1], r5 +; LE-NEXT: mov r0, r1 +; LE-NEXT: vst1.64 {d8, d9}, [r0:128]! +; LE-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; LE-NEXT: add lr, sp, #56 +; LE-NEXT: vmov.32 d14[1], r4 +; LE-NEXT: vst1.64 {d18, d19}, [r0:128]! +; LE-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; LE-NEXT: add lr, sp, #96 +; LE-NEXT: vmov.32 d12[1], r7 +; LE-NEXT: vst1.64 {d18, d19}, [r0:128]! +; LE-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; LE-NEXT: vmov.32 d17[1], r9 +; LE-NEXT: vst1.64 {d18, d19}, [r0:128] +; LE-NEXT: add r0, r1, #64 +; LE-NEXT: vst1.64 {d14, d15}, [r0:128]! +; LE-NEXT: vst1.64 {d12, d13}, [r0:128]! +; LE-NEXT: vmov.32 d16[1], r11 +; LE-NEXT: vst1.64 {d20, d21}, [r0:128]! +; LE-NEXT: vst1.64 {d16, d17}, [r0:128] +; LE-NEXT: add sp, sp, #176 +; LE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEXT: add sp, sp, #4 +; LE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; LE-NEON-LABEL: llrint_v16f64: +; LE-NEON: @ %bb.0: +; LE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-NEON-NEXT: .pad #4 +; LE-NEON-NEXT: sub sp, sp, #4 +; LE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEON-NEXT: .pad #176 +; LE-NEON-NEXT: sub sp, sp, #176 +; LE-NEON-NEXT: add lr, sp, #40 +; LE-NEON-NEXT: str r0, [sp, #140] @ 4-byte Spill +; LE-NEON-NEXT: add r0, sp, #312 +; LE-NEON-NEXT: vorr q6, q2, q2 +; LE-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #96 +; LE-NEON-NEXT: vorr q7, q1, q1 +; LE-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #144 +; LE-NEON-NEXT: vorr d0, d1, d1 +; LE-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; LE-NEON-NEXT: add r0, sp, #280 +; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #80 +; LE-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; LE-NEON-NEXT: add r0, sp, #296 +; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #120 +; LE-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; LE-NEON-NEXT: add r0, sp, #328 +; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #56 +; LE-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vorr d0, d14, d14 +; LE-NEON-NEXT: str r1, [sp, #116] @ 4-byte Spill +; LE-NEON-NEXT: vmov.32 d11[0], r0 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vorr d0, d15, d15 +; LE-NEON-NEXT: str r1, [sp, #76] @ 4-byte Spill +; LE-NEON-NEXT: vmov.32 d8[0], r0 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vorr d0, d12, d12 +; LE-NEON-NEXT: add lr, sp, #160 +; LE-NEON-NEXT: vmov.32 d9[0], r0 +; LE-NEON-NEXT: str r1, [sp, #72] @ 4-byte Spill +; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vorr d0, d13, d13 +; LE-NEON-NEXT: mov r6, r1 +; LE-NEON-NEXT: vmov.32 d14[0], r0 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: add lr, sp, #40 +; LE-NEON-NEXT: mov r4, r1 +; LE-NEON-NEXT: vmov.32 d15[0], r0 +; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-NEON-NEXT: vorr d0, d8, d8 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vorr d0, d9, d9 +; LE-NEON-NEXT: mov r7, r1 +; LE-NEON-NEXT: vmov.32 d12[0], r0 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: add lr, sp, #96 +; LE-NEON-NEXT: mov r5, r1 +; LE-NEON-NEXT: vmov.32 d13[0], r0 +; LE-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vmov.32 d10[0], r0 +; LE-NEON-NEXT: add lr, sp, #40 +; LE-NEON-NEXT: mov r10, r1 +; LE-NEON-NEXT: vmov.32 d13[1], r5 +; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #56 +; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-NEON-NEXT: vorr d0, d9, d9 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vorr d0, d8, d8 +; LE-NEON-NEXT: vmov.32 d12[1], r7 +; LE-NEON-NEXT: add lr, sp, #96 +; LE-NEON-NEXT: mov r9, r1 +; LE-NEON-NEXT: vmov.32 d11[0], r0 +; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vmov.32 d10[0], r0 +; LE-NEON-NEXT: add lr, sp, #24 +; LE-NEON-NEXT: mov r11, r1 +; LE-NEON-NEXT: vmov.32 d15[1], r4 +; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #144 +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: vorr d0, d17, d17 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: add lr, sp, #8 +; LE-NEON-NEXT: vmov.32 d14[1], r6 +; LE-NEON-NEXT: mov r8, r1 +; LE-NEON-NEXT: vmov.32 d17[0], r0 +; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #56 +; LE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #80 +; LE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-NEON-NEXT: vorr d0, d11, d11 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vmov.32 d15[0], r0 +; LE-NEON-NEXT: add lr, sp, #160 +; LE-NEON-NEXT: vorr d0, d10, d10 +; LE-NEON-NEXT: ldr r0, [sp, #72] @ 4-byte Reload +; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-NEON-NEXT: mov r6, r1 +; LE-NEON-NEXT: vmov.32 d9[1], r0 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vmov.32 d14[0], r0 +; LE-NEON-NEXT: ldr r0, [sp, #76] @ 4-byte Reload +; LE-NEON-NEXT: add lr, sp, #160 +; LE-NEON-NEXT: mov r4, r1 +; LE-NEON-NEXT: vmov.32 d8[1], r0 +; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #120 +; LE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-NEON-NEXT: vorr d0, d11, d11 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vmov.32 d13[0], r0 +; LE-NEON-NEXT: add lr, sp, #40 +; LE-NEON-NEXT: vorr d0, d10, d10 +; LE-NEON-NEXT: ldr r0, [sp, #116] @ 4-byte Reload +; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-NEON-NEXT: mov r5, r1 +; LE-NEON-NEXT: vmov.32 d9[1], r0 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: add lr, sp, #144 +; LE-NEON-NEXT: mov r7, r1 +; LE-NEON-NEXT: vmov.32 d12[0], r0 +; LE-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; LE-NEON-NEXT: vmov.32 d8[1], r10 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: add lr, sp, #8 +; LE-NEON-NEXT: vmov.32 d15[1], r6 +; LE-NEON-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #24 +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #160 +; LE-NEON-NEXT: vmov.32 d20[0], r0 +; LE-NEON-NEXT: vmov.32 d21[1], r8 +; LE-NEON-NEXT: vmov.32 d20[1], r1 +; LE-NEON-NEXT: ldr r1, [sp, #140] @ 4-byte Reload +; LE-NEON-NEXT: vmov.32 d13[1], r5 +; LE-NEON-NEXT: mov r0, r1 +; LE-NEON-NEXT: vst1.64 {d8, d9}, [r0:128]! +; LE-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #56 +; LE-NEON-NEXT: vmov.32 d14[1], r4 +; LE-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]! +; LE-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #96 +; LE-NEON-NEXT: vmov.32 d12[1], r7 +; LE-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]! +; LE-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; LE-NEON-NEXT: vmov.32 d17[1], r9 +; LE-NEON-NEXT: vst1.64 {d18, d19}, [r0:128] +; LE-NEON-NEXT: add r0, r1, #64 +; LE-NEON-NEXT: vst1.64 {d14, d15}, [r0:128]! +; LE-NEON-NEXT: vst1.64 {d12, d13}, [r0:128]! +; LE-NEON-NEXT: vmov.32 d16[1], r11 +; LE-NEON-NEXT: vst1.64 {d20, d21}, [r0:128]! +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] +; LE-NEON-NEXT: add sp, sp, #176 +; LE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEON-NEXT: add sp, sp, #4 +; LE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-LABEL: llrint_v16f64: +; BE: @ %bb.0: +; BE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-NEXT: .pad #4 +; BE-NEXT: sub sp, sp, #4 +; BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEXT: .pad #168 +; BE-NEXT: sub sp, sp, #168 +; BE-NEXT: add lr, sp, #64 +; BE-NEXT: str r0, [sp, #132] @ 4-byte Spill +; BE-NEXT: add r0, sp, #304 +; BE-NEXT: vorr q4, q3, q3 +; BE-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; BE-NEXT: add lr, sp, #48 +; BE-NEXT: vorr d0, d1, d1 +; BE-NEXT: vld1.64 {d16, d17}, [r0] +; BE-NEXT: add r0, sp, #320 +; BE-NEXT: vorr q6, q2, q2 +; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEXT: add lr, sp, #88 +; BE-NEXT: vorr q7, q1, q1 +; BE-NEXT: vld1.64 {d16, d17}, [r0] +; BE-NEXT: add r0, sp, #272 +; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEXT: add lr, sp, #112 +; BE-NEXT: vld1.64 {d16, d17}, [r0] +; BE-NEXT: add r0, sp, #288 +; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEXT: add lr, sp, #24 +; BE-NEXT: vld1.64 {d16, d17}, [r0] +; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEXT: bl llrint +; BE-NEXT: vorr d0, d14, d14 +; BE-NEXT: add lr, sp, #136 +; BE-NEXT: vmov.32 d17[0], r0 +; BE-NEXT: str r1, [sp, #108] @ 4-byte Spill +; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEXT: bl llrint +; BE-NEXT: vorr d0, d15, d15 +; BE-NEXT: str r1, [sp, #84] @ 4-byte Spill +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: bl llrint +; BE-NEXT: vorr d0, d12, d12 +; BE-NEXT: add lr, sp, #152 +; BE-NEXT: vmov.32 d11[0], r0 +; BE-NEXT: str r1, [sp, #44] @ 4-byte Spill +; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-NEXT: bl llrint +; BE-NEXT: vorr d0, d13, d13 +; BE-NEXT: mov r6, r1 +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: bl llrint +; BE-NEXT: vorr d0, d8, d8 +; BE-NEXT: mov r4, r1 +; BE-NEXT: vmov.32 d11[0], r0 +; BE-NEXT: bl llrint +; BE-NEXT: vorr d0, d9, d9 +; BE-NEXT: mov r7, r1 +; BE-NEXT: vmov.32 d12[0], r0 +; BE-NEXT: bl llrint +; BE-NEXT: add lr, sp, #64 +; BE-NEXT: mov r5, r1 +; BE-NEXT: vmov.32 d13[0], r0 +; BE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; BE-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; BE-NEXT: bl llrint +; BE-NEXT: add lr, sp, #136 +; BE-NEXT: mov r9, r1 +; BE-NEXT: vmov.32 d13[1], r5 +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: vmov.32 d16[0], r0 +; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEXT: add lr, sp, #24 +; BE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-NEXT: vorr d0, d9, d9 +; BE-NEXT: bl llrint +; BE-NEXT: vorr d0, d8, d8 +; BE-NEXT: vmov.32 d12[1], r7 +; BE-NEXT: add lr, sp, #64 +; BE-NEXT: mov r10, r1 +; BE-NEXT: vmov.32 d15[0], r0 +; BE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; BE-NEXT: bl llrint +; BE-NEXT: vmov.32 d14[0], r0 +; BE-NEXT: add lr, sp, #8 +; BE-NEXT: mov r11, r1 +; BE-NEXT: vmov.32 d11[1], r4 +; BE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-NEXT: add lr, sp, #48 +; BE-NEXT: vorr q6, q5, q5 +; BE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-NEXT: vorr d0, d9, d9 +; BE-NEXT: bl llrint +; BE-NEXT: vorr d0, d8, d8 +; BE-NEXT: vmov.32 d12[1], r6 +; BE-NEXT: add lr, sp, #24 +; BE-NEXT: mov r8, r1 +; BE-NEXT: vmov.32 d11[0], r0 +; BE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; BE-NEXT: bl llrint +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: add lr, sp, #48 +; BE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload +; BE-NEXT: mov r6, r1 +; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-NEXT: add lr, sp, #152 +; BE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-NEXT: add lr, sp, #88 +; BE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; BE-NEXT: vorr d0, d13, d13 +; BE-NEXT: vmov.32 d9[1], r0 +; BE-NEXT: bl llrint +; BE-NEXT: vmov.32 d15[0], r0 +; BE-NEXT: ldr r0, [sp, #84] @ 4-byte Reload +; BE-NEXT: vorr d0, d12, d12 +; BE-NEXT: add lr, sp, #152 +; BE-NEXT: mov r4, r1 +; BE-NEXT: vmov.32 d8[1], r0 +; BE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; BE-NEXT: bl llrint +; BE-NEXT: add lr, sp, #136 +; BE-NEXT: vmov.32 d14[0], r0 +; BE-NEXT: ldr r0, [sp, #108] @ 4-byte Reload +; BE-NEXT: mov r5, r1 +; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-NEXT: add lr, sp, #112 +; BE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-NEXT: vorr d0, d9, d9 +; BE-NEXT: vmov.32 d11[1], r0 +; BE-NEXT: bl llrint +; BE-NEXT: vorr d0, d8, d8 +; BE-NEXT: mov r7, r1 +; BE-NEXT: vmov.32 d13[0], r0 +; BE-NEXT: vmov.32 d10[1], r9 +; BE-NEXT: bl llrint +; BE-NEXT: add lr, sp, #8 +; BE-NEXT: vmov.32 d12[0], r0 +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: add lr, sp, #48 +; BE-NEXT: vmov.32 d17[1], r10 +; BE-NEXT: vmov.32 d16[1], r11 +; BE-NEXT: vorr q12, q8, q8 +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: add lr, sp, #152 +; BE-NEXT: vmov.32 d17[1], r8 +; BE-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; BE-NEXT: add lr, sp, #24 +; BE-NEXT: vmov.32 d13[1], r7 +; BE-NEXT: vmov.32 d16[1], r6 +; BE-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload +; BE-NEXT: add lr, sp, #64 +; BE-NEXT: vorr q13, q8, q8 +; BE-NEXT: vmov.32 d12[1], r1 +; BE-NEXT: ldr r1, [sp, #132] @ 4-byte Reload +; BE-NEXT: vrev64.32 q8, q5 +; BE-NEXT: mov r0, r1 +; BE-NEXT: vldmia lr, {d22, d23} @ 16-byte Reload +; BE-NEXT: vrev64.32 q9, q9 +; BE-NEXT: vrev64.32 q10, q10 +; BE-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-NEXT: vst1.64 {d18, d19}, [r0:128]! +; BE-NEXT: vrev64.32 q11, q11 +; BE-NEXT: vmov.32 d15[1], r4 +; BE-NEXT: vst1.64 {d20, d21}, [r0:128]! +; BE-NEXT: vrev64.32 q15, q6 +; BE-NEXT: vmov.32 d14[1], r5 +; BE-NEXT: vrev64.32 q12, q12 +; BE-NEXT: vst1.64 {d22, d23}, [r0:128] +; BE-NEXT: add r0, r1, #64 +; BE-NEXT: vrev64.32 q13, q13 +; BE-NEXT: vst1.64 {d30, d31}, [r0:128]! +; BE-NEXT: vst1.64 {d24, d25}, [r0:128]! +; BE-NEXT: vrev64.32 q14, q7 +; BE-NEXT: vst1.64 {d26, d27}, [r0:128]! +; BE-NEXT: vst1.64 {d28, d29}, [r0:128] +; BE-NEXT: add sp, sp, #168 +; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEXT: add sp, sp, #4 +; BE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-NEON-LABEL: llrint_v16f64: +; BE-NEON: @ %bb.0: +; BE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-NEON-NEXT: .pad #4 +; BE-NEON-NEXT: sub sp, sp, #4 +; BE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEON-NEXT: .pad #168 +; BE-NEON-NEXT: sub sp, sp, #168 +; BE-NEON-NEXT: add lr, sp, #64 +; BE-NEON-NEXT: str r0, [sp, #132] @ 4-byte Spill +; BE-NEON-NEXT: add r0, sp, #304 +; BE-NEON-NEXT: vorr q4, q3, q3 +; BE-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #48 +; BE-NEON-NEXT: vorr d0, d1, d1 +; BE-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; BE-NEON-NEXT: add r0, sp, #320 +; BE-NEON-NEXT: vorr q6, q2, q2 +; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #88 +; BE-NEON-NEXT: vorr q7, q1, q1 +; BE-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; BE-NEON-NEXT: add r0, sp, #272 +; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #112 +; BE-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; BE-NEON-NEXT: add r0, sp, #288 +; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #24 +; BE-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vorr d0, d14, d14 +; BE-NEON-NEXT: add lr, sp, #136 +; BE-NEON-NEXT: vmov.32 d17[0], r0 +; BE-NEON-NEXT: str r1, [sp, #108] @ 4-byte Spill +; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vorr d0, d15, d15 +; BE-NEON-NEXT: str r1, [sp, #84] @ 4-byte Spill +; BE-NEON-NEXT: vmov.32 d10[0], r0 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vorr d0, d12, d12 +; BE-NEON-NEXT: add lr, sp, #152 +; BE-NEON-NEXT: vmov.32 d11[0], r0 +; BE-NEON-NEXT: str r1, [sp, #44] @ 4-byte Spill +; BE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vorr d0, d13, d13 +; BE-NEON-NEXT: mov r6, r1 +; BE-NEON-NEXT: vmov.32 d10[0], r0 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vorr d0, d8, d8 +; BE-NEON-NEXT: mov r4, r1 +; BE-NEON-NEXT: vmov.32 d11[0], r0 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vorr d0, d9, d9 +; BE-NEON-NEXT: mov r7, r1 +; BE-NEON-NEXT: vmov.32 d12[0], r0 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: add lr, sp, #64 +; BE-NEON-NEXT: mov r5, r1 +; BE-NEON-NEXT: vmov.32 d13[0], r0 +; BE-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; BE-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: add lr, sp, #136 +; BE-NEON-NEXT: mov r9, r1 +; BE-NEON-NEXT: vmov.32 d13[1], r5 +; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEON-NEXT: vmov.32 d16[0], r0 +; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #24 +; BE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-NEON-NEXT: vorr d0, d9, d9 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vorr d0, d8, d8 +; BE-NEON-NEXT: vmov.32 d12[1], r7 +; BE-NEON-NEXT: add lr, sp, #64 +; BE-NEON-NEXT: mov r10, r1 +; BE-NEON-NEXT: vmov.32 d15[0], r0 +; BE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vmov.32 d14[0], r0 +; BE-NEON-NEXT: add lr, sp, #8 +; BE-NEON-NEXT: mov r11, r1 +; BE-NEON-NEXT: vmov.32 d11[1], r4 +; BE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #48 +; BE-NEON-NEXT: vorr q6, q5, q5 +; BE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-NEON-NEXT: vorr d0, d9, d9 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vorr d0, d8, d8 +; BE-NEON-NEXT: vmov.32 d12[1], r6 +; BE-NEON-NEXT: add lr, sp, #24 +; BE-NEON-NEXT: mov r8, r1 +; BE-NEON-NEXT: vmov.32 d11[0], r0 +; BE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vmov.32 d10[0], r0 +; BE-NEON-NEXT: add lr, sp, #48 +; BE-NEON-NEXT: ldr r0, [sp, #44] @ 4-byte Reload +; BE-NEON-NEXT: mov r6, r1 +; BE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #152 +; BE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-NEON-NEXT: add lr, sp, #88 +; BE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; BE-NEON-NEXT: vorr d0, d13, d13 +; BE-NEON-NEXT: vmov.32 d9[1], r0 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vmov.32 d15[0], r0 +; BE-NEON-NEXT: ldr r0, [sp, #84] @ 4-byte Reload +; BE-NEON-NEXT: vorr d0, d12, d12 +; BE-NEON-NEXT: add lr, sp, #152 +; BE-NEON-NEXT: mov r4, r1 +; BE-NEON-NEXT: vmov.32 d8[1], r0 +; BE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: add lr, sp, #136 +; BE-NEON-NEXT: vmov.32 d14[0], r0 +; BE-NEON-NEXT: ldr r0, [sp, #108] @ 4-byte Reload +; BE-NEON-NEXT: mov r5, r1 +; BE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-NEON-NEXT: add lr, sp, #112 +; BE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-NEON-NEXT: vorr d0, d9, d9 +; BE-NEON-NEXT: vmov.32 d11[1], r0 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vorr d0, d8, d8 +; BE-NEON-NEXT: mov r7, r1 +; BE-NEON-NEXT: vmov.32 d13[0], r0 +; BE-NEON-NEXT: vmov.32 d10[1], r9 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: add lr, sp, #8 +; BE-NEON-NEXT: vmov.32 d12[0], r0 +; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEON-NEXT: add lr, sp, #48 +; BE-NEON-NEXT: vmov.32 d17[1], r10 +; BE-NEON-NEXT: vmov.32 d16[1], r11 +; BE-NEON-NEXT: vorr q12, q8, q8 +; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEON-NEXT: add lr, sp, #152 +; BE-NEON-NEXT: vmov.32 d17[1], r8 +; BE-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; BE-NEON-NEXT: add lr, sp, #24 +; BE-NEON-NEXT: vmov.32 d13[1], r7 +; BE-NEON-NEXT: vmov.32 d16[1], r6 +; BE-NEON-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload +; BE-NEON-NEXT: add lr, sp, #64 +; BE-NEON-NEXT: vorr q13, q8, q8 +; BE-NEON-NEXT: vmov.32 d12[1], r1 +; BE-NEON-NEXT: ldr r1, [sp, #132] @ 4-byte Reload +; BE-NEON-NEXT: vrev64.32 q8, q5 +; BE-NEON-NEXT: mov r0, r1 +; BE-NEON-NEXT: vldmia lr, {d22, d23} @ 16-byte Reload +; BE-NEON-NEXT: vrev64.32 q9, q9 +; BE-NEON-NEXT: vrev64.32 q10, q10 +; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]! +; BE-NEON-NEXT: vrev64.32 q11, q11 +; BE-NEON-NEXT: vmov.32 d15[1], r4 +; BE-NEON-NEXT: vst1.64 {d20, d21}, [r0:128]! +; BE-NEON-NEXT: vrev64.32 q15, q6 +; BE-NEON-NEXT: vmov.32 d14[1], r5 +; BE-NEON-NEXT: vrev64.32 q12, q12 +; BE-NEON-NEXT: vst1.64 {d22, d23}, [r0:128] +; BE-NEON-NEXT: add r0, r1, #64 +; BE-NEON-NEXT: vrev64.32 q13, q13 +; BE-NEON-NEXT: vst1.64 {d30, d31}, [r0:128]! +; BE-NEON-NEXT: vst1.64 {d24, d25}, [r0:128]! +; BE-NEON-NEXT: vrev64.32 q14, q7 +; BE-NEON-NEXT: vst1.64 {d26, d27}, [r0:128]! +; BE-NEON-NEXT: vst1.64 {d28, d29}, [r0:128] +; BE-NEON-NEXT: add sp, sp, #168 +; BE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEON-NEXT: add sp, sp, #4 +; BE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + %a = call <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double> %x) + ret <16 x i64> %a +} +declare <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double>) + +define <32 x i64> @llrint_v32f64(<32 x double> %x) { +; LE-LABEL: llrint_v32f64: +; LE: @ %bb.0: +; LE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-NEXT: .pad #4 +; LE-NEXT: sub sp, sp, #4 +; LE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEXT: .pad #208 +; LE-NEXT: sub sp, sp, #208 +; LE-NEXT: add lr, sp, #72 +; LE-NEXT: str r0, [sp, #156] @ 4-byte Spill +; LE-NEXT: add r0, sp, #456 +; LE-NEXT: vorr q4, q0, q0 +; LE-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill +; LE-NEXT: add lr, sp, #56 +; LE-NEXT: vorr d0, d7, d7 +; LE-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill +; LE-NEXT: add lr, sp, #104 +; LE-NEXT: vorr q5, q2, q2 +; LE-NEXT: vld1.64 {d16, d17}, [r0] +; LE-NEXT: add r0, sp, #344 +; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEXT: add lr, sp, #192 +; LE-NEXT: vld1.64 {d16, d17}, [r0] +; LE-NEXT: add r0, sp, #376 +; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEXT: add lr, sp, #160 +; LE-NEXT: vld1.64 {d16, d17}, [r0] +; LE-NEXT: add r0, sp, #360 +; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEXT: add lr, sp, #136 +; LE-NEXT: vld1.64 {d16, d17}, [r0] +; LE-NEXT: add r0, sp, #440 +; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEXT: add lr, sp, #176 +; LE-NEXT: vld1.64 {d16, d17}, [r0] +; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEXT: bl llrint +; LE-NEXT: vorr d0, d10, d10 +; LE-NEXT: str r1, [sp, #120] @ 4-byte Spill +; LE-NEXT: vmov.32 d13[0], r0 +; LE-NEXT: bl llrint +; LE-NEXT: vorr d0, d11, d11 +; LE-NEXT: mov r10, r1 +; LE-NEXT: vmov.32 d14[0], r0 +; LE-NEXT: bl llrint +; LE-NEXT: vorr d0, d8, d8 +; LE-NEXT: add lr, sp, #88 +; LE-NEXT: vmov.32 d15[0], r0 +; LE-NEXT: mov r11, r1 +; LE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-NEXT: bl llrint +; LE-NEXT: vorr d0, d9, d9 +; LE-NEXT: mov r9, r1 +; LE-NEXT: vmov.32 d10[0], r0 +; LE-NEXT: bl llrint +; LE-NEXT: add lr, sp, #40 +; LE-NEXT: vmov.32 d11[0], r0 +; LE-NEXT: mov r6, r1 +; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-NEXT: add lr, sp, #56 +; LE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-NEXT: vorr d0, d10, d10 +; LE-NEXT: bl llrint +; LE-NEXT: vorr d0, d11, d11 +; LE-NEXT: mov r4, r1 +; LE-NEXT: vmov.32 d8[0], r0 +; LE-NEXT: bl llrint +; LE-NEXT: add lr, sp, #72 +; LE-NEXT: mov r7, r1 +; LE-NEXT: vmov.32 d9[0], r0 +; LE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; LE-NEXT: bl llrint +; LE-NEXT: vmov.32 d12[0], r0 +; LE-NEXT: add lr, sp, #56 +; LE-NEXT: mov r5, r1 +; LE-NEXT: vmov.32 d9[1], r7 +; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEXT: add lr, sp, #176 +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: vorr d0, d17, d17 +; LE-NEXT: bl llrint +; LE-NEXT: vmov.32 d8[1], r4 +; LE-NEXT: add lr, sp, #72 +; LE-NEXT: mov r7, r1 +; LE-NEXT: vmov.32 d13[0], r0 +; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-NEXT: add lr, sp, #104 +; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-NEXT: vorr d0, d9, d9 +; LE-NEXT: bl llrint +; LE-NEXT: add lr, sp, #40 +; LE-NEXT: vorr d0, d8, d8 +; LE-NEXT: mov r8, r1 +; LE-NEXT: vmov.32 d15[0], r0 +; LE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-NEXT: vmov.32 d11[1], r6 +; LE-NEXT: bl llrint +; LE-NEXT: add lr, sp, #176 +; LE-NEXT: mov r4, r1 +; LE-NEXT: vmov.32 d14[0], r0 +; LE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; LE-NEXT: vmov.32 d10[1], r9 +; LE-NEXT: bl llrint +; LE-NEXT: add lr, sp, #88 +; LE-NEXT: vmov.32 d12[0], r0 +; LE-NEXT: ldr r0, [sp, #120] @ 4-byte Reload +; LE-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload +; LE-NEXT: add lr, sp, #56 +; LE-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; LE-NEXT: add lr, sp, #24 +; LE-NEXT: vmov.32 d19[1], r0 +; LE-NEXT: add r0, sp, #408 +; LE-NEXT: ldr r2, [sp, #156] @ 4-byte Reload +; LE-NEXT: vld1.64 {d16, d17}, [r0] +; LE-NEXT: vmov.32 d13[1], r7 +; LE-NEXT: mov r0, r2 +; LE-NEXT: vmov.32 d12[1], r1 +; LE-NEXT: add r1, sp, #488 +; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEXT: add lr, sp, #40 +; LE-NEXT: vld1.64 {d16, d17}, [r1] +; LE-NEXT: add r1, sp, #472 +; LE-NEXT: vst1.64 {d10, d11}, [r0:128]! +; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEXT: add lr, sp, #72 +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: add lr, sp, #56 +; LE-NEXT: vmov.32 d21[1], r11 +; LE-NEXT: vmov.32 d20[1], r10 +; LE-NEXT: add r10, r2, #192 +; LE-NEXT: vst1.64 {d16, d17}, [r0:128]! +; LE-NEXT: vld1.64 {d16, d17}, [r1] +; LE-NEXT: add r1, sp, #392 +; LE-NEXT: vmov.32 d18[1], r5 +; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEXT: add lr, sp, #72 +; LE-NEXT: vst1.64 {d20, d21}, [r0:128]! +; LE-NEXT: vld1.64 {d16, d17}, [r1] +; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEXT: add lr, sp, #104 +; LE-NEXT: vst1.64 {d18, d19}, [r0:128] +; LE-NEXT: add r0, sp, #312 +; LE-NEXT: vld1.64 {d16, d17}, [r0] +; LE-NEXT: add r0, sp, #328 +; LE-NEXT: vmov.32 d15[1], r8 +; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEXT: add lr, sp, #120 +; LE-NEXT: vld1.64 {d16, d17}, [r0] +; LE-NEXT: add r0, sp, #424 +; LE-NEXT: vmov.32 d14[1], r4 +; LE-NEXT: vst1.64 {d12, d13}, [r10:128]! +; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEXT: add lr, sp, #176 +; LE-NEXT: vld1.64 {d16, d17}, [r0] +; LE-NEXT: vst1.64 {d14, d15}, [r10:128]! +; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEXT: add lr, sp, #192 +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: vorr d0, d17, d17 +; LE-NEXT: bl llrint +; LE-NEXT: add lr, sp, #136 +; LE-NEXT: mov r9, r1 +; LE-NEXT: vmov.32 d9[0], r0 +; LE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-NEXT: vorr d0, d10, d10 +; LE-NEXT: bl llrint +; LE-NEXT: vorr d0, d11, d11 +; LE-NEXT: mov r8, r1 +; LE-NEXT: vmov.32 d12[0], r0 +; LE-NEXT: bl llrint +; LE-NEXT: add lr, sp, #160 +; LE-NEXT: mov r5, r1 +; LE-NEXT: vmov.32 d13[0], r0 +; LE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-NEXT: vorr d0, d10, d10 +; LE-NEXT: bl llrint +; LE-NEXT: vorr d0, d11, d11 +; LE-NEXT: mov r6, r1 +; LE-NEXT: vmov.32 d14[0], r0 +; LE-NEXT: bl llrint +; LE-NEXT: add lr, sp, #192 +; LE-NEXT: mov r4, r1 +; LE-NEXT: vmov.32 d15[0], r0 +; LE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; LE-NEXT: bl llrint +; LE-NEXT: vmov.32 d8[0], r0 +; LE-NEXT: add lr, sp, #192 +; LE-NEXT: mov r11, r1 +; LE-NEXT: vmov.32 d15[1], r4 +; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-NEXT: add lr, sp, #176 +; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-NEXT: vorr d0, d9, d9 +; LE-NEXT: bl llrint +; LE-NEXT: vorr d0, d8, d8 +; LE-NEXT: vmov.32 d14[1], r6 +; LE-NEXT: add lr, sp, #136 +; LE-NEXT: mov r4, r1 +; LE-NEXT: vmov.32 d11[0], r0 +; LE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-NEXT: bl llrint +; LE-NEXT: vmov.32 d10[0], r0 +; LE-NEXT: add lr, sp, #176 +; LE-NEXT: mov r7, r1 +; LE-NEXT: vmov.32 d13[1], r5 +; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-NEXT: add lr, sp, #24 +; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-NEXT: vorr d0, d9, d9 +; LE-NEXT: bl llrint +; LE-NEXT: vorr d0, d8, d8 +; LE-NEXT: vmov.32 d12[1], r8 +; LE-NEXT: add lr, sp, #88 +; LE-NEXT: mov r5, r1 +; LE-NEXT: vmov.32 d15[0], r0 +; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEXT: bl llrint +; LE-NEXT: add lr, sp, #192 +; LE-NEXT: str r1, [sp, #24] @ 4-byte Spill +; LE-NEXT: vmov.32 d14[0], r0 +; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-NEXT: add lr, sp, #40 +; LE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-NEXT: vorr d0, d11, d11 +; LE-NEXT: vmov.32 d9[1], r9 +; LE-NEXT: bl llrint +; LE-NEXT: vorr d0, d10, d10 +; LE-NEXT: vmov.32 d8[1], r11 +; LE-NEXT: add lr, sp, #192 +; LE-NEXT: mov r6, r1 +; LE-NEXT: vmov.32 d13[0], r0 +; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-NEXT: bl llrint +; LE-NEXT: vmov.32 d12[0], r0 +; LE-NEXT: add lr, sp, #160 +; LE-NEXT: str r1, [sp, #40] @ 4-byte Spill +; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEXT: add lr, sp, #176 +; LE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-NEXT: add lr, sp, #56 +; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-NEXT: vorr d0, d9, d9 +; LE-NEXT: vmov.32 d11[1], r4 +; LE-NEXT: bl llrint +; LE-NEXT: vorr d0, d8, d8 +; LE-NEXT: vmov.32 d10[1], r7 +; LE-NEXT: add lr, sp, #176 +; LE-NEXT: mov r8, r1 +; LE-NEXT: vmov.32 d13[0], r0 +; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-NEXT: bl llrint +; LE-NEXT: vmov.32 d12[0], r0 +; LE-NEXT: add lr, sp, #8 +; LE-NEXT: mov r11, r1 +; LE-NEXT: vmov.32 d15[1], r5 +; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEXT: add lr, sp, #72 +; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-NEXT: vorr d0, d9, d9 +; LE-NEXT: bl llrint +; LE-NEXT: vmov.32 d11[0], r0 +; LE-NEXT: ldr r0, [sp, #24] @ 4-byte Reload +; LE-NEXT: vorr d0, d8, d8 +; LE-NEXT: add lr, sp, #56 +; LE-NEXT: mov r9, r1 +; LE-NEXT: vmov.32 d14[1], r0 +; LE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-NEXT: bl llrint +; LE-NEXT: vmov.32 d10[0], r0 +; LE-NEXT: add lr, sp, #72 +; LE-NEXT: mov r7, r1 +; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-NEXT: add lr, sp, #160 +; LE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-NEXT: add lr, sp, #104 +; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-NEXT: vorr d0, d9, d9 +; LE-NEXT: vmov.32 d13[1], r6 +; LE-NEXT: bl llrint +; LE-NEXT: vmov.32 d11[0], r0 +; LE-NEXT: ldr r0, [sp, #40] @ 4-byte Reload +; LE-NEXT: vorr d0, d8, d8 +; LE-NEXT: add lr, sp, #160 +; LE-NEXT: mov r6, r1 +; LE-NEXT: vmov.32 d12[1], r0 +; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEXT: bl llrint +; LE-NEXT: add lr, sp, #8 +; LE-NEXT: mov r4, r1 +; LE-NEXT: vmov.32 d10[0], r0 +; LE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-NEXT: add lr, sp, #120 +; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-NEXT: vorr d0, d9, d9 +; LE-NEXT: vmov.32 d13[1], r8 +; LE-NEXT: bl llrint +; LE-NEXT: vorr d0, d8, d8 +; LE-NEXT: mov r5, r1 +; LE-NEXT: vmov.32 d15[0], r0 +; LE-NEXT: vmov.32 d12[1], r11 +; LE-NEXT: bl llrint +; LE-NEXT: add lr, sp, #72 +; LE-NEXT: vmov.32 d14[0], r0 +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: add lr, sp, #160 +; LE-NEXT: vmov.32 d17[1], r9 +; LE-NEXT: vmov.32 d16[1], r7 +; LE-NEXT: vst1.64 {d12, d13}, [r10:128]! +; LE-NEXT: vorr q9, q8, q8 +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: add lr, sp, #136 +; LE-NEXT: vmov.32 d15[1], r5 +; LE-NEXT: vst1.64 {d16, d17}, [r10:128] +; LE-NEXT: vmov.32 d14[1], r1 +; LE-NEXT: ldr r1, [sp, #156] @ 4-byte Reload +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: add r0, r1, #128 +; LE-NEXT: add lr, sp, #56 +; LE-NEXT: vmov.32 d11[1], r6 +; LE-NEXT: vst1.64 {d16, d17}, [r0:128]! +; LE-NEXT: vst1.64 {d18, d19}, [r0:128]! +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: add lr, sp, #176 +; LE-NEXT: vmov.32 d10[1], r4 +; LE-NEXT: vst1.64 {d16, d17}, [r0:128]! +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: add lr, sp, #192 +; LE-NEXT: vst1.64 {d16, d17}, [r0:128] +; LE-NEXT: add r0, r1, #64 +; LE-NEXT: vst1.64 {d10, d11}, [r0:128]! +; LE-NEXT: vst1.64 {d14, d15}, [r0:128]! +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: add lr, sp, #88 +; LE-NEXT: vst1.64 {d16, d17}, [r0:128]! +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: vst1.64 {d16, d17}, [r0:128] +; LE-NEXT: add sp, sp, #208 +; LE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEXT: add sp, sp, #4 +; LE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; LE-NEON-LABEL: llrint_v32f64: +; LE-NEON: @ %bb.0: +; LE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-NEON-NEXT: .pad #4 +; LE-NEON-NEXT: sub sp, sp, #4 +; LE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEON-NEXT: .pad #208 +; LE-NEON-NEXT: sub sp, sp, #208 +; LE-NEON-NEXT: add lr, sp, #72 +; LE-NEON-NEXT: str r0, [sp, #156] @ 4-byte Spill +; LE-NEON-NEXT: add r0, sp, #456 +; LE-NEON-NEXT: vorr q4, q0, q0 +; LE-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #56 +; LE-NEON-NEXT: vorr d0, d7, d7 +; LE-NEON-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #104 +; LE-NEON-NEXT: vorr q5, q2, q2 +; LE-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; LE-NEON-NEXT: add r0, sp, #344 +; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #192 +; LE-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; LE-NEON-NEXT: add r0, sp, #376 +; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #160 +; LE-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; LE-NEON-NEXT: add r0, sp, #360 +; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #136 +; LE-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; LE-NEON-NEXT: add r0, sp, #440 +; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #176 +; LE-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vorr d0, d10, d10 +; LE-NEON-NEXT: str r1, [sp, #120] @ 4-byte Spill +; LE-NEON-NEXT: vmov.32 d13[0], r0 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vorr d0, d11, d11 +; LE-NEON-NEXT: mov r10, r1 +; LE-NEON-NEXT: vmov.32 d14[0], r0 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vorr d0, d8, d8 +; LE-NEON-NEXT: add lr, sp, #88 +; LE-NEON-NEXT: vmov.32 d15[0], r0 +; LE-NEON-NEXT: mov r11, r1 +; LE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vorr d0, d9, d9 +; LE-NEON-NEXT: mov r9, r1 +; LE-NEON-NEXT: vmov.32 d10[0], r0 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: add lr, sp, #40 +; LE-NEON-NEXT: vmov.32 d11[0], r0 +; LE-NEON-NEXT: mov r6, r1 +; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #56 +; LE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-NEON-NEXT: vorr d0, d10, d10 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vorr d0, d11, d11 +; LE-NEON-NEXT: mov r4, r1 +; LE-NEON-NEXT: vmov.32 d8[0], r0 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: add lr, sp, #72 +; LE-NEON-NEXT: mov r7, r1 +; LE-NEON-NEXT: vmov.32 d9[0], r0 +; LE-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vmov.32 d12[0], r0 +; LE-NEON-NEXT: add lr, sp, #56 +; LE-NEON-NEXT: mov r5, r1 +; LE-NEON-NEXT: vmov.32 d9[1], r7 +; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #176 +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: vorr d0, d17, d17 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vmov.32 d8[1], r4 +; LE-NEON-NEXT: add lr, sp, #72 +; LE-NEON-NEXT: mov r7, r1 +; LE-NEON-NEXT: vmov.32 d13[0], r0 +; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #104 +; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-NEON-NEXT: vorr d0, d9, d9 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: add lr, sp, #40 +; LE-NEON-NEXT: vorr d0, d8, d8 +; LE-NEON-NEXT: mov r8, r1 +; LE-NEON-NEXT: vmov.32 d15[0], r0 +; LE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-NEON-NEXT: vmov.32 d11[1], r6 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: add lr, sp, #176 +; LE-NEON-NEXT: mov r4, r1 +; LE-NEON-NEXT: vmov.32 d14[0], r0 +; LE-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; LE-NEON-NEXT: vmov.32 d10[1], r9 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: add lr, sp, #88 +; LE-NEON-NEXT: vmov.32 d12[0], r0 +; LE-NEON-NEXT: ldr r0, [sp, #120] @ 4-byte Reload +; LE-NEON-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #56 +; LE-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #24 +; LE-NEON-NEXT: vmov.32 d19[1], r0 +; LE-NEON-NEXT: add r0, sp, #408 +; LE-NEON-NEXT: ldr r2, [sp, #156] @ 4-byte Reload +; LE-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; LE-NEON-NEXT: vmov.32 d13[1], r7 +; LE-NEON-NEXT: mov r0, r2 +; LE-NEON-NEXT: vmov.32 d12[1], r1 +; LE-NEON-NEXT: add r1, sp, #488 +; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #40 +; LE-NEON-NEXT: vld1.64 {d16, d17}, [r1] +; LE-NEON-NEXT: add r1, sp, #472 +; LE-NEON-NEXT: vst1.64 {d10, d11}, [r0:128]! +; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #72 +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #56 +; LE-NEON-NEXT: vmov.32 d21[1], r11 +; LE-NEON-NEXT: vmov.32 d20[1], r10 +; LE-NEON-NEXT: add r10, r2, #192 +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! +; LE-NEON-NEXT: vld1.64 {d16, d17}, [r1] +; LE-NEON-NEXT: add r1, sp, #392 +; LE-NEON-NEXT: vmov.32 d18[1], r5 +; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #72 +; LE-NEON-NEXT: vst1.64 {d20, d21}, [r0:128]! +; LE-NEON-NEXT: vld1.64 {d16, d17}, [r1] +; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #104 +; LE-NEON-NEXT: vst1.64 {d18, d19}, [r0:128] +; LE-NEON-NEXT: add r0, sp, #312 +; LE-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; LE-NEON-NEXT: add r0, sp, #328 +; LE-NEON-NEXT: vmov.32 d15[1], r8 +; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #120 +; LE-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; LE-NEON-NEXT: add r0, sp, #424 +; LE-NEON-NEXT: vmov.32 d14[1], r4 +; LE-NEON-NEXT: vst1.64 {d12, d13}, [r10:128]! +; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #176 +; LE-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; LE-NEON-NEXT: vst1.64 {d14, d15}, [r10:128]! +; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #192 +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: vorr d0, d17, d17 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: add lr, sp, #136 +; LE-NEON-NEXT: mov r9, r1 +; LE-NEON-NEXT: vmov.32 d9[0], r0 +; LE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-NEON-NEXT: vorr d0, d10, d10 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vorr d0, d11, d11 +; LE-NEON-NEXT: mov r8, r1 +; LE-NEON-NEXT: vmov.32 d12[0], r0 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: add lr, sp, #160 +; LE-NEON-NEXT: mov r5, r1 +; LE-NEON-NEXT: vmov.32 d13[0], r0 +; LE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-NEON-NEXT: vorr d0, d10, d10 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vorr d0, d11, d11 +; LE-NEON-NEXT: mov r6, r1 +; LE-NEON-NEXT: vmov.32 d14[0], r0 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: add lr, sp, #192 +; LE-NEON-NEXT: mov r4, r1 +; LE-NEON-NEXT: vmov.32 d15[0], r0 +; LE-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vmov.32 d8[0], r0 +; LE-NEON-NEXT: add lr, sp, #192 +; LE-NEON-NEXT: mov r11, r1 +; LE-NEON-NEXT: vmov.32 d15[1], r4 +; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #176 +; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-NEON-NEXT: vorr d0, d9, d9 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vorr d0, d8, d8 +; LE-NEON-NEXT: vmov.32 d14[1], r6 +; LE-NEON-NEXT: add lr, sp, #136 +; LE-NEON-NEXT: mov r4, r1 +; LE-NEON-NEXT: vmov.32 d11[0], r0 +; LE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vmov.32 d10[0], r0 +; LE-NEON-NEXT: add lr, sp, #176 +; LE-NEON-NEXT: mov r7, r1 +; LE-NEON-NEXT: vmov.32 d13[1], r5 +; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #24 +; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-NEON-NEXT: vorr d0, d9, d9 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vorr d0, d8, d8 +; LE-NEON-NEXT: vmov.32 d12[1], r8 +; LE-NEON-NEXT: add lr, sp, #88 +; LE-NEON-NEXT: mov r5, r1 +; LE-NEON-NEXT: vmov.32 d15[0], r0 +; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: add lr, sp, #192 +; LE-NEON-NEXT: str r1, [sp, #24] @ 4-byte Spill +; LE-NEON-NEXT: vmov.32 d14[0], r0 +; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #40 +; LE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-NEON-NEXT: vorr d0, d11, d11 +; LE-NEON-NEXT: vmov.32 d9[1], r9 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vorr d0, d10, d10 +; LE-NEON-NEXT: vmov.32 d8[1], r11 +; LE-NEON-NEXT: add lr, sp, #192 +; LE-NEON-NEXT: mov r6, r1 +; LE-NEON-NEXT: vmov.32 d13[0], r0 +; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vmov.32 d12[0], r0 +; LE-NEON-NEXT: add lr, sp, #160 +; LE-NEON-NEXT: str r1, [sp, #40] @ 4-byte Spill +; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #176 +; LE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #56 +; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-NEON-NEXT: vorr d0, d9, d9 +; LE-NEON-NEXT: vmov.32 d11[1], r4 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vorr d0, d8, d8 +; LE-NEON-NEXT: vmov.32 d10[1], r7 +; LE-NEON-NEXT: add lr, sp, #176 +; LE-NEON-NEXT: mov r8, r1 +; LE-NEON-NEXT: vmov.32 d13[0], r0 +; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vmov.32 d12[0], r0 +; LE-NEON-NEXT: add lr, sp, #8 +; LE-NEON-NEXT: mov r11, r1 +; LE-NEON-NEXT: vmov.32 d15[1], r5 +; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #72 +; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-NEON-NEXT: vorr d0, d9, d9 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vmov.32 d11[0], r0 +; LE-NEON-NEXT: ldr r0, [sp, #24] @ 4-byte Reload +; LE-NEON-NEXT: vorr d0, d8, d8 +; LE-NEON-NEXT: add lr, sp, #56 +; LE-NEON-NEXT: mov r9, r1 +; LE-NEON-NEXT: vmov.32 d14[1], r0 +; LE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vmov.32 d10[0], r0 +; LE-NEON-NEXT: add lr, sp, #72 +; LE-NEON-NEXT: mov r7, r1 +; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #160 +; LE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #104 +; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-NEON-NEXT: vorr d0, d9, d9 +; LE-NEON-NEXT: vmov.32 d13[1], r6 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vmov.32 d11[0], r0 +; LE-NEON-NEXT: ldr r0, [sp, #40] @ 4-byte Reload +; LE-NEON-NEXT: vorr d0, d8, d8 +; LE-NEON-NEXT: add lr, sp, #160 +; LE-NEON-NEXT: mov r6, r1 +; LE-NEON-NEXT: vmov.32 d12[1], r0 +; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: add lr, sp, #8 +; LE-NEON-NEXT: mov r4, r1 +; LE-NEON-NEXT: vmov.32 d10[0], r0 +; LE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #120 +; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-NEON-NEXT: vorr d0, d9, d9 +; LE-NEON-NEXT: vmov.32 d13[1], r8 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: vorr d0, d8, d8 +; LE-NEON-NEXT: mov r5, r1 +; LE-NEON-NEXT: vmov.32 d15[0], r0 +; LE-NEON-NEXT: vmov.32 d12[1], r11 +; LE-NEON-NEXT: bl llrint +; LE-NEON-NEXT: add lr, sp, #72 +; LE-NEON-NEXT: vmov.32 d14[0], r0 +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #160 +; LE-NEON-NEXT: vmov.32 d17[1], r9 +; LE-NEON-NEXT: vmov.32 d16[1], r7 +; LE-NEON-NEXT: vst1.64 {d12, d13}, [r10:128]! +; LE-NEON-NEXT: vorr q9, q8, q8 +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #136 +; LE-NEON-NEXT: vmov.32 d15[1], r5 +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r10:128] +; LE-NEON-NEXT: vmov.32 d14[1], r1 +; LE-NEON-NEXT: ldr r1, [sp, #156] @ 4-byte Reload +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: add r0, r1, #128 +; LE-NEON-NEXT: add lr, sp, #56 +; LE-NEON-NEXT: vmov.32 d11[1], r6 +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! +; LE-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]! +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #176 +; LE-NEON-NEXT: vmov.32 d10[1], r4 +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #192 +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] +; LE-NEON-NEXT: add r0, r1, #64 +; LE-NEON-NEXT: vst1.64 {d10, d11}, [r0:128]! +; LE-NEON-NEXT: vst1.64 {d14, d15}, [r0:128]! +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #88 +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] +; LE-NEON-NEXT: add sp, sp, #208 +; LE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEON-NEXT: add sp, sp, #4 +; LE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-LABEL: llrint_v32f64: +; BE: @ %bb.0: +; BE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-NEXT: .pad #4 +; BE-NEXT: sub sp, sp, #4 +; BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEXT: .pad #232 +; BE-NEXT: sub sp, sp, #232 +; BE-NEXT: add lr, sp, #184 +; BE-NEXT: str r0, [sp, #148] @ 4-byte Spill +; BE-NEXT: add r0, sp, #416 +; BE-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill +; BE-NEXT: add lr, sp, #168 +; BE-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill +; BE-NEXT: add lr, sp, #152 +; BE-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill +; BE-NEXT: add lr, sp, #128 +; BE-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; BE-NEXT: add lr, sp, #200 +; BE-NEXT: vld1.64 {d18, d19}, [r0] +; BE-NEXT: add r0, sp, #448 +; BE-NEXT: vorr d0, d19, d19 +; BE-NEXT: vld1.64 {d14, d15}, [r0] +; BE-NEXT: add r0, sp, #336 +; BE-NEXT: vstmia lr, {d18, d19} @ 16-byte Spill +; BE-NEXT: add lr, sp, #64 +; BE-NEXT: vld1.64 {d16, d17}, [r0] +; BE-NEXT: add r0, sp, #400 +; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEXT: add lr, sp, #8 +; BE-NEXT: vld1.64 {d16, d17}, [r0] +; BE-NEXT: add r0, sp, #352 +; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEXT: add lr, sp, #24 +; BE-NEXT: vld1.64 {d16, d17}, [r0] +; BE-NEXT: add r0, sp, #368 +; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEXT: add lr, sp, #48 +; BE-NEXT: vld1.64 {d16, d17}, [r0] +; BE-NEXT: add r0, sp, #384 +; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEXT: add lr, sp, #96 +; BE-NEXT: vld1.64 {d16, d17}, [r0] +; BE-NEXT: add r0, sp, #512 +; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEXT: add lr, sp, #112 +; BE-NEXT: vld1.64 {d16, d17}, [r0] +; BE-NEXT: add r0, sp, #432 +; BE-NEXT: vld1.64 {d8, d9}, [r0] +; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEXT: bl llrint +; BE-NEXT: vorr d0, d8, d8 +; BE-NEXT: str r1, [sp, #80] @ 4-byte Spill +; BE-NEXT: vmov.32 d13[0], r0 +; BE-NEXT: bl llrint +; BE-NEXT: vorr d0, d9, d9 +; BE-NEXT: str r1, [sp, #44] @ 4-byte Spill +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: bl llrint +; BE-NEXT: vorr d0, d14, d14 +; BE-NEXT: add lr, sp, #216 +; BE-NEXT: vmov.32 d11[0], r0 +; BE-NEXT: mov r9, r1 +; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-NEXT: bl llrint +; BE-NEXT: vorr d0, d15, d15 +; BE-NEXT: mov r8, r1 +; BE-NEXT: vmov.32 d8[0], r0 +; BE-NEXT: bl llrint +; BE-NEXT: add lr, sp, #64 +; BE-NEXT: mov r4, r1 +; BE-NEXT: vmov.32 d9[0], r0 +; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-NEXT: vorr d0, d10, d10 +; BE-NEXT: bl llrint +; BE-NEXT: vorr d0, d11, d11 +; BE-NEXT: mov r6, r1 +; BE-NEXT: vmov.32 d14[0], r0 +; BE-NEXT: bl llrint +; BE-NEXT: add lr, sp, #200 +; BE-NEXT: mov r7, r1 +; BE-NEXT: vmov.32 d15[0], r0 +; BE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; BE-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; BE-NEXT: bl llrint +; BE-NEXT: vmov.32 d12[0], r0 +; BE-NEXT: add lr, sp, #200 +; BE-NEXT: mov r5, r1 +; BE-NEXT: vmov.32 d15[1], r7 +; BE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; BE-NEXT: add lr, sp, #8 +; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-NEXT: vorr d0, d11, d11 +; BE-NEXT: bl llrint +; BE-NEXT: vorr d0, d10, d10 +; BE-NEXT: vmov.32 d14[1], r6 +; BE-NEXT: add lr, sp, #64 +; BE-NEXT: mov r10, r1 +; BE-NEXT: vmov.32 d13[0], r0 +; BE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-NEXT: bl llrint +; BE-NEXT: add lr, sp, #24 +; BE-NEXT: mov r11, r1 +; BE-NEXT: vmov.32 d12[0], r0 +; BE-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload +; BE-NEXT: vorr d0, d15, d15 +; BE-NEXT: vmov.32 d9[1], r4 +; BE-NEXT: bl llrint +; BE-NEXT: vorr d0, d14, d14 +; BE-NEXT: vmov.32 d8[1], r8 +; BE-NEXT: add lr, sp, #8 +; BE-NEXT: str r1, [sp, #4] @ 4-byte Spill +; BE-NEXT: vmov.32 d11[0], r0 +; BE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; BE-NEXT: bl llrint +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: add lr, sp, #24 +; BE-NEXT: mov r8, r1 +; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-NEXT: add lr, sp, #216 +; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-NEXT: add lr, sp, #48 +; BE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-NEXT: vorr d0, d9, d9 +; BE-NEXT: vmov.32 d11[1], r9 +; BE-NEXT: bl llrint +; BE-NEXT: vmov.32 d15[0], r0 +; BE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload +; BE-NEXT: vorr d0, d8, d8 +; BE-NEXT: add lr, sp, #216 +; BE-NEXT: mov r9, r1 +; BE-NEXT: vmov.32 d10[1], r0 +; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-NEXT: bl llrint +; BE-NEXT: vmov.32 d14[0], r0 +; BE-NEXT: add lr, sp, #48 +; BE-NEXT: ldr r0, [sp, #80] @ 4-byte Reload +; BE-NEXT: mov r6, r1 +; BE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-NEXT: add lr, sp, #200 +; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-NEXT: add lr, sp, #96 +; BE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-NEXT: vorr d0, d9, d9 +; BE-NEXT: vmov.32 d11[1], r0 +; BE-NEXT: bl llrint +; BE-NEXT: vorr d0, d8, d8 +; BE-NEXT: vmov.32 d10[1], r5 +; BE-NEXT: add lr, sp, #200 +; BE-NEXT: mov r7, r1 +; BE-NEXT: vmov.32 d15[0], r0 +; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-NEXT: bl llrint +; BE-NEXT: add lr, sp, #112 +; BE-NEXT: vorr q4, q6, q6 +; BE-NEXT: mov r4, r1 +; BE-NEXT: vmov.32 d14[0], r0 +; BE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; BE-NEXT: vorr d0, d13, d13 +; BE-NEXT: vmov.32 d9[1], r10 +; BE-NEXT: bl llrint +; BE-NEXT: vorr d0, d12, d12 +; BE-NEXT: mov r5, r1 +; BE-NEXT: vmov.32 d11[0], r0 +; BE-NEXT: vmov.32 d8[1], r11 +; BE-NEXT: bl llrint +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: add lr, sp, #24 +; BE-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: add lr, sp, #48 +; BE-NEXT: vmov.32 d17[1], r0 +; BE-NEXT: vmov.32 d16[1], r8 +; BE-NEXT: vorr q9, q8, q8 +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: add lr, sp, #112 +; BE-NEXT: vmov.32 d17[1], r9 +; BE-NEXT: vmov.32 d16[1], r6 +; BE-NEXT: vorr q10, q8, q8 +; BE-NEXT: vrev64.32 q8, q4 +; BE-NEXT: vmov.32 d15[1], r7 +; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEXT: add lr, sp, #200 +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: vmov.32 d11[1], r5 +; BE-NEXT: vrev64.32 q8, q8 +; BE-NEXT: vmov.32 d14[1], r4 +; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEXT: add lr, sp, #216 +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: vmov.32 d10[1], r1 +; BE-NEXT: vrev64.32 q8, q8 +; BE-NEXT: vrev64.32 q6, q7 +; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEXT: add lr, sp, #8 +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: add lr, sp, #96 +; BE-NEXT: vrev64.32 q7, q5 +; BE-NEXT: vrev64.32 q8, q8 +; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEXT: add lr, sp, #64 +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: add lr, sp, #80 +; BE-NEXT: vrev64.32 q8, q8 +; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEXT: add lr, sp, #64 +; BE-NEXT: vrev64.32 q8, q9 +; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEXT: add lr, sp, #48 +; BE-NEXT: vrev64.32 q8, q10 +; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEXT: add lr, sp, #128 +; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-NEXT: vorr d0, d11, d11 +; BE-NEXT: bl llrint +; BE-NEXT: vorr d0, d10, d10 +; BE-NEXT: mov r4, r1 +; BE-NEXT: vmov.32 d9[0], r0 +; BE-NEXT: bl llrint +; BE-NEXT: vmov.32 d8[0], r0 +; BE-NEXT: ldr r6, [sp, #148] @ 4-byte Reload +; BE-NEXT: add lr, sp, #152 +; BE-NEXT: vmov.32 d9[1], r4 +; BE-NEXT: mov r5, r6 +; BE-NEXT: vmov.32 d8[1], r1 +; BE-NEXT: vrev64.32 q8, q4 +; BE-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-NEXT: vorr d0, d11, d11 +; BE-NEXT: bl llrint +; BE-NEXT: vorr d0, d10, d10 +; BE-NEXT: mov r4, r1 +; BE-NEXT: vmov.32 d9[0], r0 +; BE-NEXT: bl llrint +; BE-NEXT: vmov.32 d8[0], r0 +; BE-NEXT: add lr, sp, #168 +; BE-NEXT: vmov.32 d9[1], r4 +; BE-NEXT: vmov.32 d8[1], r1 +; BE-NEXT: vrev64.32 q8, q4 +; BE-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-NEXT: vorr d0, d11, d11 +; BE-NEXT: bl llrint +; BE-NEXT: vorr d0, d10, d10 +; BE-NEXT: mov r4, r1 +; BE-NEXT: vmov.32 d9[0], r0 +; BE-NEXT: bl llrint +; BE-NEXT: vmov.32 d8[0], r0 +; BE-NEXT: add lr, sp, #184 +; BE-NEXT: vmov.32 d9[1], r4 +; BE-NEXT: vmov.32 d8[1], r1 +; BE-NEXT: vrev64.32 q8, q4 +; BE-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-NEXT: vorr d0, d11, d11 +; BE-NEXT: bl llrint +; BE-NEXT: vorr d0, d10, d10 +; BE-NEXT: mov r4, r1 +; BE-NEXT: vmov.32 d9[0], r0 +; BE-NEXT: bl llrint +; BE-NEXT: vmov.32 d8[0], r0 +; BE-NEXT: add r0, sp, #464 +; BE-NEXT: vmov.32 d9[1], r4 +; BE-NEXT: vmov.32 d8[1], r1 +; BE-NEXT: vrev64.32 q8, q4 +; BE-NEXT: vld1.64 {d8, d9}, [r0] +; BE-NEXT: vorr d0, d9, d9 +; BE-NEXT: vst1.64 {d16, d17}, [r5:128] +; BE-NEXT: bl llrint +; BE-NEXT: vorr d0, d8, d8 +; BE-NEXT: mov r4, r1 +; BE-NEXT: vmov.32 d11[0], r0 +; BE-NEXT: bl llrint +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: add r0, sp, #480 +; BE-NEXT: add r5, r6, #192 +; BE-NEXT: vmov.32 d11[1], r4 +; BE-NEXT: vmov.32 d10[1], r1 +; BE-NEXT: vld1.64 {d8, d9}, [r0] +; BE-NEXT: vorr d0, d9, d9 +; BE-NEXT: vrev64.32 q8, q5 +; BE-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-NEXT: bl llrint +; BE-NEXT: vorr d0, d8, d8 +; BE-NEXT: mov r4, r1 +; BE-NEXT: vmov.32 d11[0], r0 +; BE-NEXT: bl llrint +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: add r0, sp, #496 +; BE-NEXT: vmov.32 d11[1], r4 +; BE-NEXT: vmov.32 d10[1], r1 +; BE-NEXT: vld1.64 {d8, d9}, [r0] +; BE-NEXT: vorr d0, d9, d9 +; BE-NEXT: vrev64.32 q8, q5 +; BE-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-NEXT: bl llrint +; BE-NEXT: vorr d0, d8, d8 +; BE-NEXT: mov r4, r1 +; BE-NEXT: vmov.32 d11[0], r0 +; BE-NEXT: bl llrint +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: add lr, sp, #112 +; BE-NEXT: add r0, r6, #128 +; BE-NEXT: vmov.32 d11[1], r4 +; BE-NEXT: vmov.32 d10[1], r1 +; BE-NEXT: vrev64.32 q8, q5 +; BE-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-NEXT: vst1.64 {d14, d15}, [r5:128] +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: add lr, sp, #200 +; BE-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: add lr, sp, #216 +; BE-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: add lr, sp, #96 +; BE-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: add lr, sp, #80 +; BE-NEXT: vst1.64 {d16, d17}, [r0:128] +; BE-NEXT: add r0, r6, #64 +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: add lr, sp, #64 +; BE-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: add lr, sp, #48 +; BE-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-NEXT: vst1.64 {d12, d13}, [r0:128] +; BE-NEXT: add sp, sp, #232 +; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEXT: add sp, sp, #4 +; BE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-NEON-LABEL: llrint_v32f64: +; BE-NEON: @ %bb.0: +; BE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-NEON-NEXT: .pad #4 +; BE-NEON-NEXT: sub sp, sp, #4 +; BE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEON-NEXT: .pad #232 +; BE-NEON-NEXT: sub sp, sp, #232 +; BE-NEON-NEXT: add lr, sp, #184 +; BE-NEON-NEXT: str r0, [sp, #148] @ 4-byte Spill +; BE-NEON-NEXT: add r0, sp, #416 +; BE-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #168 +; BE-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #152 +; BE-NEON-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #128 +; BE-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #200 +; BE-NEON-NEXT: vld1.64 {d18, d19}, [r0] +; BE-NEON-NEXT: add r0, sp, #448 +; BE-NEON-NEXT: vorr d0, d19, d19 +; BE-NEON-NEXT: vld1.64 {d14, d15}, [r0] +; BE-NEON-NEXT: add r0, sp, #336 +; BE-NEON-NEXT: vstmia lr, {d18, d19} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #64 +; BE-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; BE-NEON-NEXT: add r0, sp, #400 +; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #8 +; BE-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; BE-NEON-NEXT: add r0, sp, #352 +; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #24 +; BE-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; BE-NEON-NEXT: add r0, sp, #368 +; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #48 +; BE-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; BE-NEON-NEXT: add r0, sp, #384 +; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #96 +; BE-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; BE-NEON-NEXT: add r0, sp, #512 +; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #112 +; BE-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; BE-NEON-NEXT: add r0, sp, #432 +; BE-NEON-NEXT: vld1.64 {d8, d9}, [r0] +; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vorr d0, d8, d8 +; BE-NEON-NEXT: str r1, [sp, #80] @ 4-byte Spill +; BE-NEON-NEXT: vmov.32 d13[0], r0 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vorr d0, d9, d9 +; BE-NEON-NEXT: str r1, [sp, #44] @ 4-byte Spill +; BE-NEON-NEXT: vmov.32 d10[0], r0 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vorr d0, d14, d14 +; BE-NEON-NEXT: add lr, sp, #216 +; BE-NEON-NEXT: vmov.32 d11[0], r0 +; BE-NEON-NEXT: mov r9, r1 +; BE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vorr d0, d15, d15 +; BE-NEON-NEXT: mov r8, r1 +; BE-NEON-NEXT: vmov.32 d8[0], r0 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: add lr, sp, #64 +; BE-NEON-NEXT: mov r4, r1 +; BE-NEON-NEXT: vmov.32 d9[0], r0 +; BE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-NEON-NEXT: vorr d0, d10, d10 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vorr d0, d11, d11 +; BE-NEON-NEXT: mov r6, r1 +; BE-NEON-NEXT: vmov.32 d14[0], r0 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: add lr, sp, #200 +; BE-NEON-NEXT: mov r7, r1 +; BE-NEON-NEXT: vmov.32 d15[0], r0 +; BE-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; BE-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vmov.32 d12[0], r0 +; BE-NEON-NEXT: add lr, sp, #200 +; BE-NEON-NEXT: mov r5, r1 +; BE-NEON-NEXT: vmov.32 d15[1], r7 +; BE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #8 +; BE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-NEON-NEXT: vorr d0, d11, d11 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vorr d0, d10, d10 +; BE-NEON-NEXT: vmov.32 d14[1], r6 +; BE-NEON-NEXT: add lr, sp, #64 +; BE-NEON-NEXT: mov r10, r1 +; BE-NEON-NEXT: vmov.32 d13[0], r0 +; BE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: add lr, sp, #24 +; BE-NEON-NEXT: mov r11, r1 +; BE-NEON-NEXT: vmov.32 d12[0], r0 +; BE-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload +; BE-NEON-NEXT: vorr d0, d15, d15 +; BE-NEON-NEXT: vmov.32 d9[1], r4 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vorr d0, d14, d14 +; BE-NEON-NEXT: vmov.32 d8[1], r8 +; BE-NEON-NEXT: add lr, sp, #8 +; BE-NEON-NEXT: str r1, [sp, #4] @ 4-byte Spill +; BE-NEON-NEXT: vmov.32 d11[0], r0 +; BE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vmov.32 d10[0], r0 +; BE-NEON-NEXT: add lr, sp, #24 +; BE-NEON-NEXT: mov r8, r1 +; BE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #216 +; BE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-NEON-NEXT: add lr, sp, #48 +; BE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-NEON-NEXT: vorr d0, d9, d9 +; BE-NEON-NEXT: vmov.32 d11[1], r9 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vmov.32 d15[0], r0 +; BE-NEON-NEXT: ldr r0, [sp, #44] @ 4-byte Reload +; BE-NEON-NEXT: vorr d0, d8, d8 +; BE-NEON-NEXT: add lr, sp, #216 +; BE-NEON-NEXT: mov r9, r1 +; BE-NEON-NEXT: vmov.32 d10[1], r0 +; BE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vmov.32 d14[0], r0 +; BE-NEON-NEXT: add lr, sp, #48 +; BE-NEON-NEXT: ldr r0, [sp, #80] @ 4-byte Reload +; BE-NEON-NEXT: mov r6, r1 +; BE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #200 +; BE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-NEON-NEXT: add lr, sp, #96 +; BE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-NEON-NEXT: vorr d0, d9, d9 +; BE-NEON-NEXT: vmov.32 d11[1], r0 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vorr d0, d8, d8 +; BE-NEON-NEXT: vmov.32 d10[1], r5 +; BE-NEON-NEXT: add lr, sp, #200 +; BE-NEON-NEXT: mov r7, r1 +; BE-NEON-NEXT: vmov.32 d15[0], r0 +; BE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: add lr, sp, #112 +; BE-NEON-NEXT: vorr q4, q6, q6 +; BE-NEON-NEXT: mov r4, r1 +; BE-NEON-NEXT: vmov.32 d14[0], r0 +; BE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; BE-NEON-NEXT: vorr d0, d13, d13 +; BE-NEON-NEXT: vmov.32 d9[1], r10 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vorr d0, d12, d12 +; BE-NEON-NEXT: mov r5, r1 +; BE-NEON-NEXT: vmov.32 d11[0], r0 +; BE-NEON-NEXT: vmov.32 d8[1], r11 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vmov.32 d10[0], r0 +; BE-NEON-NEXT: add lr, sp, #24 +; BE-NEON-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEON-NEXT: add lr, sp, #48 +; BE-NEON-NEXT: vmov.32 d17[1], r0 +; BE-NEON-NEXT: vmov.32 d16[1], r8 +; BE-NEON-NEXT: vorr q9, q8, q8 +; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEON-NEXT: add lr, sp, #112 +; BE-NEON-NEXT: vmov.32 d17[1], r9 +; BE-NEON-NEXT: vmov.32 d16[1], r6 +; BE-NEON-NEXT: vorr q10, q8, q8 +; BE-NEON-NEXT: vrev64.32 q8, q4 +; BE-NEON-NEXT: vmov.32 d15[1], r7 +; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #200 +; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEON-NEXT: vmov.32 d11[1], r5 +; BE-NEON-NEXT: vrev64.32 q8, q8 +; BE-NEON-NEXT: vmov.32 d14[1], r4 +; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #216 +; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEON-NEXT: vmov.32 d10[1], r1 +; BE-NEON-NEXT: vrev64.32 q8, q8 +; BE-NEON-NEXT: vrev64.32 q6, q7 +; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #8 +; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEON-NEXT: add lr, sp, #96 +; BE-NEON-NEXT: vrev64.32 q7, q5 +; BE-NEON-NEXT: vrev64.32 q8, q8 +; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #64 +; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEON-NEXT: add lr, sp, #80 +; BE-NEON-NEXT: vrev64.32 q8, q8 +; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #64 +; BE-NEON-NEXT: vrev64.32 q8, q9 +; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #48 +; BE-NEON-NEXT: vrev64.32 q8, q10 +; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEON-NEXT: add lr, sp, #128 +; BE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-NEON-NEXT: vorr d0, d11, d11 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vorr d0, d10, d10 +; BE-NEON-NEXT: mov r4, r1 +; BE-NEON-NEXT: vmov.32 d9[0], r0 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vmov.32 d8[0], r0 +; BE-NEON-NEXT: ldr r6, [sp, #148] @ 4-byte Reload +; BE-NEON-NEXT: add lr, sp, #152 +; BE-NEON-NEXT: vmov.32 d9[1], r4 +; BE-NEON-NEXT: mov r5, r6 +; BE-NEON-NEXT: vmov.32 d8[1], r1 +; BE-NEON-NEXT: vrev64.32 q8, q4 +; BE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-NEON-NEXT: vorr d0, d11, d11 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vorr d0, d10, d10 +; BE-NEON-NEXT: mov r4, r1 +; BE-NEON-NEXT: vmov.32 d9[0], r0 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vmov.32 d8[0], r0 +; BE-NEON-NEXT: add lr, sp, #168 +; BE-NEON-NEXT: vmov.32 d9[1], r4 +; BE-NEON-NEXT: vmov.32 d8[1], r1 +; BE-NEON-NEXT: vrev64.32 q8, q4 +; BE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-NEON-NEXT: vorr d0, d11, d11 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vorr d0, d10, d10 +; BE-NEON-NEXT: mov r4, r1 +; BE-NEON-NEXT: vmov.32 d9[0], r0 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vmov.32 d8[0], r0 +; BE-NEON-NEXT: add lr, sp, #184 +; BE-NEON-NEXT: vmov.32 d9[1], r4 +; BE-NEON-NEXT: vmov.32 d8[1], r1 +; BE-NEON-NEXT: vrev64.32 q8, q4 +; BE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-NEON-NEXT: vorr d0, d11, d11 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vorr d0, d10, d10 +; BE-NEON-NEXT: mov r4, r1 +; BE-NEON-NEXT: vmov.32 d9[0], r0 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vmov.32 d8[0], r0 +; BE-NEON-NEXT: add r0, sp, #464 +; BE-NEON-NEXT: vmov.32 d9[1], r4 +; BE-NEON-NEXT: vmov.32 d8[1], r1 +; BE-NEON-NEXT: vrev64.32 q8, q4 +; BE-NEON-NEXT: vld1.64 {d8, d9}, [r0] +; BE-NEON-NEXT: vorr d0, d9, d9 +; BE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128] +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vorr d0, d8, d8 +; BE-NEON-NEXT: mov r4, r1 +; BE-NEON-NEXT: vmov.32 d11[0], r0 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vmov.32 d10[0], r0 +; BE-NEON-NEXT: add r0, sp, #480 +; BE-NEON-NEXT: add r5, r6, #192 +; BE-NEON-NEXT: vmov.32 d11[1], r4 +; BE-NEON-NEXT: vmov.32 d10[1], r1 +; BE-NEON-NEXT: vld1.64 {d8, d9}, [r0] +; BE-NEON-NEXT: vorr d0, d9, d9 +; BE-NEON-NEXT: vrev64.32 q8, q5 +; BE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vorr d0, d8, d8 +; BE-NEON-NEXT: mov r4, r1 +; BE-NEON-NEXT: vmov.32 d11[0], r0 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vmov.32 d10[0], r0 +; BE-NEON-NEXT: add r0, sp, #496 +; BE-NEON-NEXT: vmov.32 d11[1], r4 +; BE-NEON-NEXT: vmov.32 d10[1], r1 +; BE-NEON-NEXT: vld1.64 {d8, d9}, [r0] +; BE-NEON-NEXT: vorr d0, d9, d9 +; BE-NEON-NEXT: vrev64.32 q8, q5 +; BE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vorr d0, d8, d8 +; BE-NEON-NEXT: mov r4, r1 +; BE-NEON-NEXT: vmov.32 d11[0], r0 +; BE-NEON-NEXT: bl llrint +; BE-NEON-NEXT: vmov.32 d10[0], r0 +; BE-NEON-NEXT: add lr, sp, #112 +; BE-NEON-NEXT: add r0, r6, #128 +; BE-NEON-NEXT: vmov.32 d11[1], r4 +; BE-NEON-NEXT: vmov.32 d10[1], r1 +; BE-NEON-NEXT: vrev64.32 q8, q5 +; BE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-NEON-NEXT: vst1.64 {d14, d15}, [r5:128] +; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEON-NEXT: add lr, sp, #200 +; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEON-NEXT: add lr, sp, #216 +; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEON-NEXT: add lr, sp, #96 +; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEON-NEXT: add lr, sp, #80 +; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] +; BE-NEON-NEXT: add r0, r6, #64 +; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEON-NEXT: add lr, sp, #64 +; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEON-NEXT: add lr, sp, #48 +; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-NEON-NEXT: vst1.64 {d12, d13}, [r0:128] +; BE-NEON-NEXT: add sp, sp, #232 +; BE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEON-NEXT: add sp, sp, #4 +; BE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + %a = call <32 x i64> @llvm.llrint.v32i64.v16f64(<32 x double> %x) + ret <32 x i64> %a +} +declare <32 x i64> @llvm.llrint.v32i64.v32f64(<32 x double>) + +define <1 x i64> @llrint_v1i64_v1f128(<1 x fp128> %x) { +; LE-LABEL: llrint_v1i64_v1f128: +; LE: @ %bb.0: +; LE-NEXT: .save {r11, lr} +; LE-NEXT: push {r11, lr} +; LE-NEXT: bl llrintl +; LE-NEXT: vmov.32 d0[0], r0 +; LE-NEXT: vmov.32 d0[1], r1 +; LE-NEXT: pop {r11, pc} +; +; LE-NEON-LABEL: llrint_v1i64_v1f128: +; LE-NEON: @ %bb.0: +; LE-NEON-NEXT: .save {r11, lr} +; LE-NEON-NEXT: push {r11, lr} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: vmov.32 d0[0], r0 +; LE-NEON-NEXT: vmov.32 d0[1], r1 +; LE-NEON-NEXT: pop {r11, pc} +; +; BE-LABEL: llrint_v1i64_v1f128: +; BE: @ %bb.0: +; BE-NEXT: .save {r11, lr} +; BE-NEXT: push {r11, lr} +; BE-NEXT: bl llrintl +; BE-NEXT: vmov.32 d16[0], r0 +; BE-NEXT: vmov.32 d16[1], r1 +; BE-NEXT: vrev64.32 d0, d16 +; BE-NEXT: pop {r11, pc} +; +; BE-NEON-LABEL: llrint_v1i64_v1f128: +; BE-NEON: @ %bb.0: +; BE-NEON-NEXT: .save {r11, lr} +; BE-NEON-NEXT: push {r11, lr} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: vmov.32 d16[0], r0 +; BE-NEON-NEXT: vmov.32 d16[1], r1 +; BE-NEON-NEXT: vrev64.32 d0, d16 +; BE-NEON-NEXT: pop {r11, pc} + %a = call <1 x i64> @llvm.llrint.v1i64.v1f128(<1 x fp128> %x) + ret <1 x i64> %a +} +declare <1 x i64> @llvm.llrint.v1i64.v1f128(<1 x fp128>) + +define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) { +; LE-LABEL: llrint_v2i64_v2f128: +; LE: @ %bb.0: +; LE-NEXT: .save {r4, r5, r6, r7, r8, lr} +; LE-NEXT: push {r4, r5, r6, r7, r8, lr} +; LE-NEXT: .vsave {d8, d9} +; LE-NEXT: vpush {d8, d9} +; LE-NEXT: mov r8, r3 +; LE-NEXT: add r3, sp, #40 +; LE-NEXT: mov r5, r2 +; LE-NEXT: mov r6, r1 +; LE-NEXT: mov r7, r0 +; LE-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-NEXT: bl llrintl +; LE-NEXT: mov r4, r1 +; LE-NEXT: vmov.32 d9[0], r0 +; LE-NEXT: mov r0, r7 +; LE-NEXT: mov r1, r6 +; LE-NEXT: mov r2, r5 +; LE-NEXT: mov r3, r8 +; LE-NEXT: bl llrintl +; LE-NEXT: vmov.32 d8[0], r0 +; LE-NEXT: vmov.32 d9[1], r4 +; LE-NEXT: vmov.32 d8[1], r1 +; LE-NEXT: vorr q0, q4, q4 +; LE-NEXT: vpop {d8, d9} +; LE-NEXT: pop {r4, r5, r6, r7, r8, pc} +; +; LE-NEON-LABEL: llrint_v2i64_v2f128: +; LE-NEON: @ %bb.0: +; LE-NEON-NEXT: .save {r4, r5, r6, r7, r8, lr} +; LE-NEON-NEXT: push {r4, r5, r6, r7, r8, lr} +; LE-NEON-NEXT: .vsave {d8, d9} +; LE-NEON-NEXT: vpush {d8, d9} +; LE-NEON-NEXT: mov r8, r3 +; LE-NEON-NEXT: add r3, sp, #40 +; LE-NEON-NEXT: mov r5, r2 +; LE-NEON-NEXT: mov r6, r1 +; LE-NEON-NEXT: mov r7, r0 +; LE-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: mov r4, r1 +; LE-NEON-NEXT: vmov.32 d9[0], r0 +; LE-NEON-NEXT: mov r0, r7 +; LE-NEON-NEXT: mov r1, r6 +; LE-NEON-NEXT: mov r2, r5 +; LE-NEON-NEXT: mov r3, r8 +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: vmov.32 d8[0], r0 +; LE-NEON-NEXT: vmov.32 d9[1], r4 +; LE-NEON-NEXT: vmov.32 d8[1], r1 +; LE-NEON-NEXT: vorr q0, q4, q4 +; LE-NEON-NEXT: vpop {d8, d9} +; LE-NEON-NEXT: pop {r4, r5, r6, r7, r8, pc} +; +; BE-LABEL: llrint_v2i64_v2f128: +; BE: @ %bb.0: +; BE-NEXT: .save {r4, r5, r6, r7, r8, lr} +; BE-NEXT: push {r4, r5, r6, r7, r8, lr} +; BE-NEXT: .vsave {d8} +; BE-NEXT: vpush {d8} +; BE-NEXT: mov r8, r3 +; BE-NEXT: add r3, sp, #32 +; BE-NEXT: mov r5, r2 +; BE-NEXT: mov r6, r1 +; BE-NEXT: mov r7, r0 +; BE-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: mov r4, r1 +; BE-NEXT: vmov.32 d8[0], r0 +; BE-NEXT: mov r0, r7 +; BE-NEXT: mov r1, r6 +; BE-NEXT: mov r2, r5 +; BE-NEXT: mov r3, r8 +; BE-NEXT: bl llrintl +; BE-NEXT: vmov.32 d16[0], r0 +; BE-NEXT: vmov.32 d8[1], r4 +; BE-NEXT: vmov.32 d16[1], r1 +; BE-NEXT: vrev64.32 d1, d8 +; BE-NEXT: vrev64.32 d0, d16 +; BE-NEXT: vpop {d8} +; BE-NEXT: pop {r4, r5, r6, r7, r8, pc} +; +; BE-NEON-LABEL: llrint_v2i64_v2f128: +; BE-NEON: @ %bb.0: +; BE-NEON-NEXT: .save {r4, r5, r6, r7, r8, lr} +; BE-NEON-NEXT: push {r4, r5, r6, r7, r8, lr} +; BE-NEON-NEXT: .vsave {d8} +; BE-NEON-NEXT: vpush {d8} +; BE-NEON-NEXT: mov r8, r3 +; BE-NEON-NEXT: add r3, sp, #32 +; BE-NEON-NEXT: mov r5, r2 +; BE-NEON-NEXT: mov r6, r1 +; BE-NEON-NEXT: mov r7, r0 +; BE-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: mov r4, r1 +; BE-NEON-NEXT: vmov.32 d8[0], r0 +; BE-NEON-NEXT: mov r0, r7 +; BE-NEON-NEXT: mov r1, r6 +; BE-NEON-NEXT: mov r2, r5 +; BE-NEON-NEXT: mov r3, r8 +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: vmov.32 d16[0], r0 +; BE-NEON-NEXT: vmov.32 d8[1], r4 +; BE-NEON-NEXT: vmov.32 d16[1], r1 +; BE-NEON-NEXT: vrev64.32 d1, d8 +; BE-NEON-NEXT: vrev64.32 d0, d16 +; BE-NEON-NEXT: vpop {d8} +; BE-NEON-NEXT: pop {r4, r5, r6, r7, r8, pc} + %a = call <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128> %x) + ret <2 x i64> %a +} +declare <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128>) + +define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) { +; LE-LABEL: llrint_v4i64_v4f128: +; LE: @ %bb.0: +; LE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; LE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; LE-NEXT: .vsave {d8, d9, d10, d11} +; LE-NEXT: vpush {d8, d9, d10, d11} +; LE-NEXT: mov r5, r3 +; LE-NEXT: add r3, sp, #96 +; LE-NEXT: mov r7, r2 +; LE-NEXT: mov r6, r1 +; LE-NEXT: mov r4, r0 +; LE-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-NEXT: bl llrintl +; LE-NEXT: mov r9, r1 +; LE-NEXT: vmov.32 d9[0], r0 +; LE-NEXT: mov r0, r4 +; LE-NEXT: mov r1, r6 +; LE-NEXT: mov r2, r7 +; LE-NEXT: mov r3, r5 +; LE-NEXT: ldr r8, [sp, #80] +; LE-NEXT: ldr r10, [sp, #64] +; LE-NEXT: bl llrintl +; LE-NEXT: add r3, sp, #68 +; LE-NEXT: mov r5, r1 +; LE-NEXT: vmov.32 d10[0], r0 +; LE-NEXT: mov r0, r10 +; LE-NEXT: ldm r3, {r1, r2, r3} +; LE-NEXT: bl llrintl +; LE-NEXT: add r3, sp, #84 +; LE-NEXT: mov r4, r1 +; LE-NEXT: vmov.32 d11[0], r0 +; LE-NEXT: mov r0, r8 +; LE-NEXT: ldm r3, {r1, r2, r3} +; LE-NEXT: bl llrintl +; LE-NEXT: vmov.32 d8[0], r0 +; LE-NEXT: vmov.32 d11[1], r4 +; LE-NEXT: vmov.32 d9[1], r9 +; LE-NEXT: vmov.32 d10[1], r5 +; LE-NEXT: vmov.32 d8[1], r1 +; LE-NEXT: vorr q0, q5, q5 +; LE-NEXT: vorr q1, q4, q4 +; LE-NEXT: vpop {d8, d9, d10, d11} +; LE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; +; LE-NEON-LABEL: llrint_v4i64_v4f128: +; LE-NEON: @ %bb.0: +; LE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; LE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; LE-NEON-NEXT: .vsave {d8, d9, d10, d11} +; LE-NEON-NEXT: vpush {d8, d9, d10, d11} +; LE-NEON-NEXT: mov r5, r3 +; LE-NEON-NEXT: add r3, sp, #96 +; LE-NEON-NEXT: mov r7, r2 +; LE-NEON-NEXT: mov r6, r1 +; LE-NEON-NEXT: mov r4, r0 +; LE-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: mov r9, r1 +; LE-NEON-NEXT: vmov.32 d9[0], r0 +; LE-NEON-NEXT: mov r0, r4 +; LE-NEON-NEXT: mov r1, r6 +; LE-NEON-NEXT: mov r2, r7 +; LE-NEON-NEXT: mov r3, r5 +; LE-NEON-NEXT: ldr r8, [sp, #80] +; LE-NEON-NEXT: ldr r10, [sp, #64] +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add r3, sp, #68 +; LE-NEON-NEXT: mov r5, r1 +; LE-NEON-NEXT: vmov.32 d10[0], r0 +; LE-NEON-NEXT: mov r0, r10 +; LE-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add r3, sp, #84 +; LE-NEON-NEXT: mov r4, r1 +; LE-NEON-NEXT: vmov.32 d11[0], r0 +; LE-NEON-NEXT: mov r0, r8 +; LE-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: vmov.32 d8[0], r0 +; LE-NEON-NEXT: vmov.32 d11[1], r4 +; LE-NEON-NEXT: vmov.32 d9[1], r9 +; LE-NEON-NEXT: vmov.32 d10[1], r5 +; LE-NEON-NEXT: vmov.32 d8[1], r1 +; LE-NEON-NEXT: vorr q0, q5, q5 +; LE-NEON-NEXT: vorr q1, q4, q4 +; LE-NEON-NEXT: vpop {d8, d9, d10, d11} +; LE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; +; BE-LABEL: llrint_v4i64_v4f128: +; BE: @ %bb.0: +; BE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; BE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; BE-NEXT: .vsave {d8, d9, d10} +; BE-NEXT: vpush {d8, d9, d10} +; BE-NEXT: mov r5, r3 +; BE-NEXT: add r3, sp, #88 +; BE-NEXT: mov r7, r2 +; BE-NEXT: mov r6, r1 +; BE-NEXT: mov r4, r0 +; BE-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: mov r9, r1 +; BE-NEXT: vmov.32 d8[0], r0 +; BE-NEXT: mov r0, r4 +; BE-NEXT: mov r1, r6 +; BE-NEXT: mov r2, r7 +; BE-NEXT: mov r3, r5 +; BE-NEXT: ldr r8, [sp, #72] +; BE-NEXT: ldr r10, [sp, #56] +; BE-NEXT: bl llrintl +; BE-NEXT: add r3, sp, #60 +; BE-NEXT: mov r5, r1 +; BE-NEXT: vmov.32 d9[0], r0 +; BE-NEXT: mov r0, r10 +; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: add r3, sp, #76 +; BE-NEXT: mov r4, r1 +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: mov r0, r8 +; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: vmov.32 d16[0], r0 +; BE-NEXT: vmov.32 d10[1], r4 +; BE-NEXT: vmov.32 d8[1], r9 +; BE-NEXT: vmov.32 d9[1], r5 +; BE-NEXT: vmov.32 d16[1], r1 +; BE-NEXT: vrev64.32 d1, d10 +; BE-NEXT: vrev64.32 d3, d8 +; BE-NEXT: vrev64.32 d0, d9 +; BE-NEXT: vrev64.32 d2, d16 +; BE-NEXT: vpop {d8, d9, d10} +; BE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; +; BE-NEON-LABEL: llrint_v4i64_v4f128: +; BE-NEON: @ %bb.0: +; BE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; BE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; BE-NEON-NEXT: .vsave {d8, d9, d10} +; BE-NEON-NEXT: vpush {d8, d9, d10} +; BE-NEON-NEXT: mov r5, r3 +; BE-NEON-NEXT: add r3, sp, #88 +; BE-NEON-NEXT: mov r7, r2 +; BE-NEON-NEXT: mov r6, r1 +; BE-NEON-NEXT: mov r4, r0 +; BE-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: mov r9, r1 +; BE-NEON-NEXT: vmov.32 d8[0], r0 +; BE-NEON-NEXT: mov r0, r4 +; BE-NEON-NEXT: mov r1, r6 +; BE-NEON-NEXT: mov r2, r7 +; BE-NEON-NEXT: mov r3, r5 +; BE-NEON-NEXT: ldr r8, [sp, #72] +; BE-NEON-NEXT: ldr r10, [sp, #56] +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: add r3, sp, #60 +; BE-NEON-NEXT: mov r5, r1 +; BE-NEON-NEXT: vmov.32 d9[0], r0 +; BE-NEON-NEXT: mov r0, r10 +; BE-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: add r3, sp, #76 +; BE-NEON-NEXT: mov r4, r1 +; BE-NEON-NEXT: vmov.32 d10[0], r0 +; BE-NEON-NEXT: mov r0, r8 +; BE-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: vmov.32 d16[0], r0 +; BE-NEON-NEXT: vmov.32 d10[1], r4 +; BE-NEON-NEXT: vmov.32 d8[1], r9 +; BE-NEON-NEXT: vmov.32 d9[1], r5 +; BE-NEON-NEXT: vmov.32 d16[1], r1 +; BE-NEON-NEXT: vrev64.32 d1, d10 +; BE-NEON-NEXT: vrev64.32 d3, d8 +; BE-NEON-NEXT: vrev64.32 d0, d9 +; BE-NEON-NEXT: vrev64.32 d2, d16 +; BE-NEON-NEXT: vpop {d8, d9, d10} +; BE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} + %a = call <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128> %x) + ret <4 x i64> %a +} +declare <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128>) + +define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) { +; LE-LABEL: llrint_v8i64_v8f128: +; LE: @ %bb.0: +; LE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-NEXT: .pad #4 +; LE-NEXT: sub sp, sp, #4 +; LE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEXT: .pad #8 +; LE-NEXT: sub sp, sp, #8 +; LE-NEXT: mov r11, r3 +; LE-NEXT: add r3, sp, #208 +; LE-NEXT: mov r10, r2 +; LE-NEXT: mov r4, r1 +; LE-NEXT: mov r5, r0 +; LE-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-NEXT: bl llrintl +; LE-NEXT: add r7, sp, #164 +; LE-NEXT: ldr r6, [sp, #160] +; LE-NEXT: str r1, [sp, #4] @ 4-byte Spill +; LE-NEXT: vmov.32 d9[0], r0 +; LE-NEXT: ldm r7, {r1, r2, r3, r7} +; LE-NEXT: mov r0, r6 +; LE-NEXT: ldr r8, [sp, #128] +; LE-NEXT: ldr r9, [sp, #144] +; LE-NEXT: bl llrintl +; LE-NEXT: add r3, sp, #180 +; LE-NEXT: str r1, [sp] @ 4-byte Spill +; LE-NEXT: vmov.32 d10[0], r0 +; LE-NEXT: mov r0, r7 +; LE-NEXT: ldm r3, {r1, r2, r3} +; LE-NEXT: bl llrintl +; LE-NEXT: add r3, sp, #132 +; LE-NEXT: mov r7, r1 +; LE-NEXT: vmov.32 d11[0], r0 +; LE-NEXT: mov r0, r8 +; LE-NEXT: ldm r3, {r1, r2, r3} +; LE-NEXT: bl llrintl +; LE-NEXT: add r3, sp, #148 +; LE-NEXT: mov r8, r1 +; LE-NEXT: vmov.32 d12[0], r0 +; LE-NEXT: mov r0, r9 +; LE-NEXT: ldm r3, {r1, r2, r3} +; LE-NEXT: bl llrintl +; LE-NEXT: mov r9, r1 +; LE-NEXT: vmov.32 d13[0], r0 +; LE-NEXT: mov r0, r5 +; LE-NEXT: mov r1, r4 +; LE-NEXT: mov r2, r10 +; LE-NEXT: mov r3, r11 +; LE-NEXT: ldr r6, [sp, #112] +; LE-NEXT: bl llrintl +; LE-NEXT: add r3, sp, #116 +; LE-NEXT: mov r4, r1 +; LE-NEXT: vmov.32 d14[0], r0 +; LE-NEXT: mov r0, r6 +; LE-NEXT: ldm r3, {r1, r2, r3} +; LE-NEXT: bl llrintl +; LE-NEXT: add r3, sp, #196 +; LE-NEXT: vmov.32 d15[0], r0 +; LE-NEXT: ldr r0, [sp, #192] +; LE-NEXT: mov r5, r1 +; LE-NEXT: ldm r3, {r1, r2, r3} +; LE-NEXT: bl llrintl +; LE-NEXT: vmov.32 d8[0], r0 +; LE-NEXT: ldr r0, [sp] @ 4-byte Reload +; LE-NEXT: vmov.32 d11[1], r7 +; LE-NEXT: vmov.32 d10[1], r0 +; LE-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; LE-NEXT: vmov.32 d15[1], r5 +; LE-NEXT: vorr q2, q5, q5 +; LE-NEXT: vmov.32 d13[1], r9 +; LE-NEXT: vmov.32 d9[1], r0 +; LE-NEXT: vmov.32 d14[1], r4 +; LE-NEXT: vmov.32 d12[1], r8 +; LE-NEXT: vorr q0, q7, q7 +; LE-NEXT: vmov.32 d8[1], r1 +; LE-NEXT: vorr q1, q6, q6 +; LE-NEXT: vorr q3, q4, q4 +; LE-NEXT: add sp, sp, #8 +; LE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEXT: add sp, sp, #4 +; LE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; LE-NEON-LABEL: llrint_v8i64_v8f128: +; LE-NEON: @ %bb.0: +; LE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-NEON-NEXT: .pad #4 +; LE-NEON-NEXT: sub sp, sp, #4 +; LE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEON-NEXT: .pad #8 +; LE-NEON-NEXT: sub sp, sp, #8 +; LE-NEON-NEXT: mov r11, r3 +; LE-NEON-NEXT: add r3, sp, #208 +; LE-NEON-NEXT: mov r10, r2 +; LE-NEON-NEXT: mov r4, r1 +; LE-NEON-NEXT: mov r5, r0 +; LE-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add r7, sp, #164 +; LE-NEON-NEXT: ldr r6, [sp, #160] +; LE-NEON-NEXT: str r1, [sp, #4] @ 4-byte Spill +; LE-NEON-NEXT: vmov.32 d9[0], r0 +; LE-NEON-NEXT: ldm r7, {r1, r2, r3, r7} +; LE-NEON-NEXT: mov r0, r6 +; LE-NEON-NEXT: ldr r8, [sp, #128] +; LE-NEON-NEXT: ldr r9, [sp, #144] +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add r3, sp, #180 +; LE-NEON-NEXT: str r1, [sp] @ 4-byte Spill +; LE-NEON-NEXT: vmov.32 d10[0], r0 +; LE-NEON-NEXT: mov r0, r7 +; LE-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add r3, sp, #132 +; LE-NEON-NEXT: mov r7, r1 +; LE-NEON-NEXT: vmov.32 d11[0], r0 +; LE-NEON-NEXT: mov r0, r8 +; LE-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add r3, sp, #148 +; LE-NEON-NEXT: mov r8, r1 +; LE-NEON-NEXT: vmov.32 d12[0], r0 +; LE-NEON-NEXT: mov r0, r9 +; LE-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: mov r9, r1 +; LE-NEON-NEXT: vmov.32 d13[0], r0 +; LE-NEON-NEXT: mov r0, r5 +; LE-NEON-NEXT: mov r1, r4 +; LE-NEON-NEXT: mov r2, r10 +; LE-NEON-NEXT: mov r3, r11 +; LE-NEON-NEXT: ldr r6, [sp, #112] +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add r3, sp, #116 +; LE-NEON-NEXT: mov r4, r1 +; LE-NEON-NEXT: vmov.32 d14[0], r0 +; LE-NEON-NEXT: mov r0, r6 +; LE-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add r3, sp, #196 +; LE-NEON-NEXT: vmov.32 d15[0], r0 +; LE-NEON-NEXT: ldr r0, [sp, #192] +; LE-NEON-NEXT: mov r5, r1 +; LE-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: vmov.32 d8[0], r0 +; LE-NEON-NEXT: ldr r0, [sp] @ 4-byte Reload +; LE-NEON-NEXT: vmov.32 d11[1], r7 +; LE-NEON-NEXT: vmov.32 d10[1], r0 +; LE-NEON-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; LE-NEON-NEXT: vmov.32 d15[1], r5 +; LE-NEON-NEXT: vorr q2, q5, q5 +; LE-NEON-NEXT: vmov.32 d13[1], r9 +; LE-NEON-NEXT: vmov.32 d9[1], r0 +; LE-NEON-NEXT: vmov.32 d14[1], r4 +; LE-NEON-NEXT: vmov.32 d12[1], r8 +; LE-NEON-NEXT: vorr q0, q7, q7 +; LE-NEON-NEXT: vmov.32 d8[1], r1 +; LE-NEON-NEXT: vorr q1, q6, q6 +; LE-NEON-NEXT: vorr q3, q4, q4 +; LE-NEON-NEXT: add sp, sp, #8 +; LE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEON-NEXT: add sp, sp, #4 +; LE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-LABEL: llrint_v8i64_v8f128: +; BE: @ %bb.0: +; BE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-NEXT: .pad #4 +; BE-NEXT: sub sp, sp, #4 +; BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14} +; BE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14} +; BE-NEXT: .pad #16 +; BE-NEXT: sub sp, sp, #16 +; BE-NEXT: str r3, [sp, #4] @ 4-byte Spill +; BE-NEXT: add r3, sp, #208 +; BE-NEXT: mov r11, r2 +; BE-NEXT: mov r4, r1 +; BE-NEXT: mov r5, r0 +; BE-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: ldr r7, [sp, #176] +; BE-NEXT: add r3, sp, #180 +; BE-NEXT: str r1, [sp, #12] @ 4-byte Spill +; BE-NEXT: vmov.32 d8[0], r0 +; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: mov r0, r7 +; BE-NEXT: ldr r6, [sp, #128] +; BE-NEXT: ldr r8, [sp, #144] +; BE-NEXT: bl llrintl +; BE-NEXT: add r3, sp, #132 +; BE-NEXT: str r1, [sp, #8] @ 4-byte Spill +; BE-NEXT: vmov.32 d9[0], r0 +; BE-NEXT: mov r0, r6 +; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: add r3, sp, #148 +; BE-NEXT: mov r6, r1 +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: mov r0, r8 +; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: add r3, sp, #160 +; BE-NEXT: mov r9, r0 +; BE-NEXT: mov r7, r1 +; BE-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: ldr r3, [sp, #4] @ 4-byte Reload +; BE-NEXT: mov r8, r1 +; BE-NEXT: vmov.32 d11[0], r0 +; BE-NEXT: mov r0, r5 +; BE-NEXT: mov r1, r4 +; BE-NEXT: mov r2, r11 +; BE-NEXT: ldr r10, [sp, #112] +; BE-NEXT: vmov.32 d12[0], r9 +; BE-NEXT: bl llrintl +; BE-NEXT: add r3, sp, #116 +; BE-NEXT: mov r4, r1 +; BE-NEXT: vmov.32 d13[0], r0 +; BE-NEXT: mov r0, r10 +; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: add r3, sp, #196 +; BE-NEXT: vmov.32 d14[0], r0 +; BE-NEXT: ldr r0, [sp, #192] +; BE-NEXT: mov r5, r1 +; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: vmov.32 d16[0], r0 +; BE-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; BE-NEXT: vmov.32 d14[1], r5 +; BE-NEXT: vmov.32 d9[1], r0 +; BE-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; BE-NEXT: vmov.32 d12[1], r7 +; BE-NEXT: vmov.32 d8[1], r0 +; BE-NEXT: vmov.32 d13[1], r4 +; BE-NEXT: vmov.32 d10[1], r6 +; BE-NEXT: vmov.32 d11[1], r8 +; BE-NEXT: vmov.32 d16[1], r1 +; BE-NEXT: vrev64.32 d1, d14 +; BE-NEXT: vrev64.32 d3, d12 +; BE-NEXT: vrev64.32 d5, d9 +; BE-NEXT: vrev64.32 d7, d8 +; BE-NEXT: vrev64.32 d0, d13 +; BE-NEXT: vrev64.32 d2, d10 +; BE-NEXT: vrev64.32 d4, d11 +; BE-NEXT: vrev64.32 d6, d16 +; BE-NEXT: add sp, sp, #16 +; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14} +; BE-NEXT: add sp, sp, #4 +; BE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-NEON-LABEL: llrint_v8i64_v8f128: +; BE-NEON: @ %bb.0: +; BE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-NEON-NEXT: .pad #4 +; BE-NEON-NEXT: sub sp, sp, #4 +; BE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14} +; BE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14} +; BE-NEON-NEXT: .pad #16 +; BE-NEON-NEXT: sub sp, sp, #16 +; BE-NEON-NEXT: str r3, [sp, #4] @ 4-byte Spill +; BE-NEON-NEXT: add r3, sp, #208 +; BE-NEON-NEXT: mov r11, r2 +; BE-NEON-NEXT: mov r4, r1 +; BE-NEON-NEXT: mov r5, r0 +; BE-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: ldr r7, [sp, #176] +; BE-NEON-NEXT: add r3, sp, #180 +; BE-NEON-NEXT: str r1, [sp, #12] @ 4-byte Spill +; BE-NEON-NEXT: vmov.32 d8[0], r0 +; BE-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-NEON-NEXT: mov r0, r7 +; BE-NEON-NEXT: ldr r6, [sp, #128] +; BE-NEON-NEXT: ldr r8, [sp, #144] +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: add r3, sp, #132 +; BE-NEON-NEXT: str r1, [sp, #8] @ 4-byte Spill +; BE-NEON-NEXT: vmov.32 d9[0], r0 +; BE-NEON-NEXT: mov r0, r6 +; BE-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: add r3, sp, #148 +; BE-NEON-NEXT: mov r6, r1 +; BE-NEON-NEXT: vmov.32 d10[0], r0 +; BE-NEON-NEXT: mov r0, r8 +; BE-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: add r3, sp, #160 +; BE-NEON-NEXT: mov r9, r0 +; BE-NEON-NEXT: mov r7, r1 +; BE-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: ldr r3, [sp, #4] @ 4-byte Reload +; BE-NEON-NEXT: mov r8, r1 +; BE-NEON-NEXT: vmov.32 d11[0], r0 +; BE-NEON-NEXT: mov r0, r5 +; BE-NEON-NEXT: mov r1, r4 +; BE-NEON-NEXT: mov r2, r11 +; BE-NEON-NEXT: ldr r10, [sp, #112] +; BE-NEON-NEXT: vmov.32 d12[0], r9 +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: add r3, sp, #116 +; BE-NEON-NEXT: mov r4, r1 +; BE-NEON-NEXT: vmov.32 d13[0], r0 +; BE-NEON-NEXT: mov r0, r10 +; BE-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: add r3, sp, #196 +; BE-NEON-NEXT: vmov.32 d14[0], r0 +; BE-NEON-NEXT: ldr r0, [sp, #192] +; BE-NEON-NEXT: mov r5, r1 +; BE-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: vmov.32 d16[0], r0 +; BE-NEON-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; BE-NEON-NEXT: vmov.32 d14[1], r5 +; BE-NEON-NEXT: vmov.32 d9[1], r0 +; BE-NEON-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; BE-NEON-NEXT: vmov.32 d12[1], r7 +; BE-NEON-NEXT: vmov.32 d8[1], r0 +; BE-NEON-NEXT: vmov.32 d13[1], r4 +; BE-NEON-NEXT: vmov.32 d10[1], r6 +; BE-NEON-NEXT: vmov.32 d11[1], r8 +; BE-NEON-NEXT: vmov.32 d16[1], r1 +; BE-NEON-NEXT: vrev64.32 d1, d14 +; BE-NEON-NEXT: vrev64.32 d3, d12 +; BE-NEON-NEXT: vrev64.32 d5, d9 +; BE-NEON-NEXT: vrev64.32 d7, d8 +; BE-NEON-NEXT: vrev64.32 d0, d13 +; BE-NEON-NEXT: vrev64.32 d2, d10 +; BE-NEON-NEXT: vrev64.32 d4, d11 +; BE-NEON-NEXT: vrev64.32 d6, d16 +; BE-NEON-NEXT: add sp, sp, #16 +; BE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14} +; BE-NEON-NEXT: add sp, sp, #4 +; BE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + %a = call <8 x i64> @llvm.llrint.v8i64.v8f128(<8 x fp128> %x) + ret <8 x i64> %a +} +declare <8 x i64> @llvm.llrint.v8i64.v8f128(<8 x fp128>) + +define <16 x i64> @llrint_v16f128(<16 x fp128> %x) { +; LE-LABEL: llrint_v16f128: +; LE: @ %bb.0: +; LE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-NEXT: .pad #4 +; LE-NEXT: sub sp, sp, #4 +; LE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEXT: .pad #72 +; LE-NEXT: sub sp, sp, #72 +; LE-NEXT: mov r6, r3 +; LE-NEXT: add r3, sp, #408 +; LE-NEXT: mov r7, r2 +; LE-NEXT: mov r4, r0 +; LE-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-NEXT: bl llrintl +; LE-NEXT: add r5, sp, #176 +; LE-NEXT: mov r10, r1 +; LE-NEXT: vmov.32 d13[0], r0 +; LE-NEXT: mov r0, r7 +; LE-NEXT: ldm r5, {r2, r3, r5} +; LE-NEXT: mov r1, r6 +; LE-NEXT: ldr r8, [sp, #232] +; LE-NEXT: bl llrintl +; LE-NEXT: add r3, sp, #188 +; LE-NEXT: mov r9, r1 +; LE-NEXT: vmov.32 d8[0], r0 +; LE-NEXT: mov r0, r5 +; LE-NEXT: ldm r3, {r1, r2, r3} +; LE-NEXT: bl llrintl +; LE-NEXT: add r3, sp, #236 +; LE-NEXT: mov r11, r1 +; LE-NEXT: vmov.32 d9[0], r0 +; LE-NEXT: mov r0, r8 +; LE-NEXT: ldm r3, {r1, r2, r3} +; LE-NEXT: bl llrintl +; LE-NEXT: add r3, sp, #252 +; LE-NEXT: vmov.32 d10[0], r0 +; LE-NEXT: ldr r0, [sp, #248] +; LE-NEXT: mov r8, r1 +; LE-NEXT: ldm r3, {r1, r2, r3} +; LE-NEXT: bl llrintl +; LE-NEXT: add r3, sp, #268 +; LE-NEXT: vmov.32 d11[0], r0 +; LE-NEXT: ldr r0, [sp, #264] +; LE-NEXT: mov r6, r1 +; LE-NEXT: ldm r3, {r1, r2, r3} +; LE-NEXT: bl llrintl +; LE-NEXT: add r3, sp, #284 +; LE-NEXT: vmov.32 d14[0], r0 +; LE-NEXT: ldr r0, [sp, #280] +; LE-NEXT: mov r7, r1 +; LE-NEXT: ldm r3, {r1, r2, r3} +; LE-NEXT: bl llrintl +; LE-NEXT: add r3, sp, #316 +; LE-NEXT: vmov.32 d15[0], r0 +; LE-NEXT: ldr r0, [sp, #312] +; LE-NEXT: mov r5, r1 +; LE-NEXT: ldm r3, {r1, r2, r3} +; LE-NEXT: bl llrintl +; LE-NEXT: vmov.32 d15[1], r5 +; LE-NEXT: add lr, sp, #56 +; LE-NEXT: ldr r5, [sp, #300] +; LE-NEXT: vmov.32 d14[1], r7 +; LE-NEXT: ldr r2, [sp, #304] +; LE-NEXT: ldr r3, [sp, #308] +; LE-NEXT: vmov.32 d11[1], r6 +; LE-NEXT: ldr r6, [sp, #200] +; LE-NEXT: ldr r7, [sp, #204] +; LE-NEXT: vmov.32 d10[1], r8 +; LE-NEXT: ldr r8, [sp, #344] +; LE-NEXT: vmov.32 d9[1], r11 +; LE-NEXT: ldr r11, [sp, #216] +; LE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-NEXT: add lr, sp, #40 +; LE-NEXT: vmov.32 d17[0], r0 +; LE-NEXT: ldr r0, [sp, #296] +; LE-NEXT: vmov.32 d8[1], r9 +; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-NEXT: add lr, sp, #24 +; LE-NEXT: vorr q5, q8, q8 +; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-NEXT: vorr q4, q6, q6 +; LE-NEXT: vmov.32 d11[1], r1 +; LE-NEXT: mov r1, r5 +; LE-NEXT: vmov.32 d9[1], r10 +; LE-NEXT: bl llrintl +; LE-NEXT: vmov.32 d10[0], r0 +; LE-NEXT: ldr r2, [sp, #208] +; LE-NEXT: ldr r3, [sp, #212] +; LE-NEXT: add lr, sp, #8 +; LE-NEXT: mov r9, r1 +; LE-NEXT: mov r0, r6 +; LE-NEXT: mov r1, r7 +; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-NEXT: bl llrintl +; LE-NEXT: add r3, sp, #220 +; LE-NEXT: mov r10, r1 +; LE-NEXT: vmov.32 d10[0], r0 +; LE-NEXT: mov r0, r11 +; LE-NEXT: ldm r3, {r1, r2, r3} +; LE-NEXT: bl llrintl +; LE-NEXT: add r3, sp, #348 +; LE-NEXT: mov r11, r1 +; LE-NEXT: vmov.32 d11[0], r0 +; LE-NEXT: mov r0, r8 +; LE-NEXT: ldm r3, {r1, r2, r3} +; LE-NEXT: bl llrintl +; LE-NEXT: add r3, sp, #364 +; LE-NEXT: vmov.32 d13[0], r0 +; LE-NEXT: ldr r0, [sp, #360] +; LE-NEXT: mov r8, r1 +; LE-NEXT: ldm r3, {r1, r2, r3} +; LE-NEXT: bl llrintl +; LE-NEXT: add r3, sp, #380 +; LE-NEXT: vmov.32 d14[0], r0 +; LE-NEXT: ldr r0, [sp, #376] +; LE-NEXT: mov r5, r1 +; LE-NEXT: ldm r3, {r1, r2, r3} +; LE-NEXT: bl llrintl +; LE-NEXT: add r3, sp, #396 +; LE-NEXT: vmov.32 d15[0], r0 +; LE-NEXT: ldr r0, [sp, #392] +; LE-NEXT: mov r6, r1 +; LE-NEXT: ldm r3, {r1, r2, r3} +; LE-NEXT: bl llrintl +; LE-NEXT: add r3, sp, #332 +; LE-NEXT: vmov.32 d8[0], r0 +; LE-NEXT: ldr r0, [sp, #328] +; LE-NEXT: mov r7, r1 +; LE-NEXT: ldm r3, {r1, r2, r3} +; LE-NEXT: bl llrintl +; LE-NEXT: add lr, sp, #8 +; LE-NEXT: vmov.32 d12[0], r0 +; LE-NEXT: add r0, r4, #64 +; LE-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; LE-NEXT: add lr, sp, #24 +; LE-NEXT: vmov.32 d13[1], r8 +; LE-NEXT: vmov.32 d18[1], r9 +; LE-NEXT: vmov.32 d15[1], r6 +; LE-NEXT: vmov.32 d12[1], r1 +; LE-NEXT: vmov.32 d14[1], r5 +; LE-NEXT: vst1.64 {d18, d19}, [r0:128]! +; LE-NEXT: vst1.64 {d12, d13}, [r0:128]! +; LE-NEXT: vmov.32 d8[1], r7 +; LE-NEXT: vst1.64 {d14, d15}, [r0:128]! +; LE-NEXT: vst1.64 {d8, d9}, [r0:128] +; LE-NEXT: vmov.32 d11[1], r11 +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: add lr, sp, #40 +; LE-NEXT: vmov.32 d10[1], r10 +; LE-NEXT: vst1.64 {d16, d17}, [r4:128]! +; LE-NEXT: vst1.64 {d10, d11}, [r4:128]! +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: add lr, sp, #56 +; LE-NEXT: vst1.64 {d16, d17}, [r4:128]! +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: vst1.64 {d16, d17}, [r4:128] +; LE-NEXT: add sp, sp, #72 +; LE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEXT: add sp, sp, #4 +; LE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; LE-NEON-LABEL: llrint_v16f128: +; LE-NEON: @ %bb.0: +; LE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-NEON-NEXT: .pad #4 +; LE-NEON-NEXT: sub sp, sp, #4 +; LE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEON-NEXT: .pad #72 +; LE-NEON-NEXT: sub sp, sp, #72 +; LE-NEON-NEXT: mov r6, r3 +; LE-NEON-NEXT: add r3, sp, #408 +; LE-NEON-NEXT: mov r7, r2 +; LE-NEON-NEXT: mov r4, r0 +; LE-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add r5, sp, #176 +; LE-NEON-NEXT: mov r10, r1 +; LE-NEON-NEXT: vmov.32 d13[0], r0 +; LE-NEON-NEXT: mov r0, r7 +; LE-NEON-NEXT: ldm r5, {r2, r3, r5} +; LE-NEON-NEXT: mov r1, r6 +; LE-NEON-NEXT: ldr r8, [sp, #232] +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add r3, sp, #188 +; LE-NEON-NEXT: mov r9, r1 +; LE-NEON-NEXT: vmov.32 d8[0], r0 +; LE-NEON-NEXT: mov r0, r5 +; LE-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add r3, sp, #236 +; LE-NEON-NEXT: mov r11, r1 +; LE-NEON-NEXT: vmov.32 d9[0], r0 +; LE-NEON-NEXT: mov r0, r8 +; LE-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add r3, sp, #252 +; LE-NEON-NEXT: vmov.32 d10[0], r0 +; LE-NEON-NEXT: ldr r0, [sp, #248] +; LE-NEON-NEXT: mov r8, r1 +; LE-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add r3, sp, #268 +; LE-NEON-NEXT: vmov.32 d11[0], r0 +; LE-NEON-NEXT: ldr r0, [sp, #264] +; LE-NEON-NEXT: mov r6, r1 +; LE-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add r3, sp, #284 +; LE-NEON-NEXT: vmov.32 d14[0], r0 +; LE-NEON-NEXT: ldr r0, [sp, #280] +; LE-NEON-NEXT: mov r7, r1 +; LE-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add r3, sp, #316 +; LE-NEON-NEXT: vmov.32 d15[0], r0 +; LE-NEON-NEXT: ldr r0, [sp, #312] +; LE-NEON-NEXT: mov r5, r1 +; LE-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: vmov.32 d15[1], r5 +; LE-NEON-NEXT: add lr, sp, #56 +; LE-NEON-NEXT: ldr r5, [sp, #300] +; LE-NEON-NEXT: vmov.32 d14[1], r7 +; LE-NEON-NEXT: ldr r2, [sp, #304] +; LE-NEON-NEXT: ldr r3, [sp, #308] +; LE-NEON-NEXT: vmov.32 d11[1], r6 +; LE-NEON-NEXT: ldr r6, [sp, #200] +; LE-NEON-NEXT: ldr r7, [sp, #204] +; LE-NEON-NEXT: vmov.32 d10[1], r8 +; LE-NEON-NEXT: ldr r8, [sp, #344] +; LE-NEON-NEXT: vmov.32 d9[1], r11 +; LE-NEON-NEXT: ldr r11, [sp, #216] +; LE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #40 +; LE-NEON-NEXT: vmov.32 d17[0], r0 +; LE-NEON-NEXT: ldr r0, [sp, #296] +; LE-NEON-NEXT: vmov.32 d8[1], r9 +; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #24 +; LE-NEON-NEXT: vorr q5, q8, q8 +; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-NEON-NEXT: vorr q4, q6, q6 +; LE-NEON-NEXT: vmov.32 d11[1], r1 +; LE-NEON-NEXT: mov r1, r5 +; LE-NEON-NEXT: vmov.32 d9[1], r10 +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: vmov.32 d10[0], r0 +; LE-NEON-NEXT: ldr r2, [sp, #208] +; LE-NEON-NEXT: ldr r3, [sp, #212] +; LE-NEON-NEXT: add lr, sp, #8 +; LE-NEON-NEXT: mov r9, r1 +; LE-NEON-NEXT: mov r0, r6 +; LE-NEON-NEXT: mov r1, r7 +; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add r3, sp, #220 +; LE-NEON-NEXT: mov r10, r1 +; LE-NEON-NEXT: vmov.32 d10[0], r0 +; LE-NEON-NEXT: mov r0, r11 +; LE-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add r3, sp, #348 +; LE-NEON-NEXT: mov r11, r1 +; LE-NEON-NEXT: vmov.32 d11[0], r0 +; LE-NEON-NEXT: mov r0, r8 +; LE-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add r3, sp, #364 +; LE-NEON-NEXT: vmov.32 d13[0], r0 +; LE-NEON-NEXT: ldr r0, [sp, #360] +; LE-NEON-NEXT: mov r8, r1 +; LE-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add r3, sp, #380 +; LE-NEON-NEXT: vmov.32 d14[0], r0 +; LE-NEON-NEXT: ldr r0, [sp, #376] +; LE-NEON-NEXT: mov r5, r1 +; LE-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add r3, sp, #396 +; LE-NEON-NEXT: vmov.32 d15[0], r0 +; LE-NEON-NEXT: ldr r0, [sp, #392] +; LE-NEON-NEXT: mov r6, r1 +; LE-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add r3, sp, #332 +; LE-NEON-NEXT: vmov.32 d8[0], r0 +; LE-NEON-NEXT: ldr r0, [sp, #328] +; LE-NEON-NEXT: mov r7, r1 +; LE-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add lr, sp, #8 +; LE-NEON-NEXT: vmov.32 d12[0], r0 +; LE-NEON-NEXT: add r0, r4, #64 +; LE-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #24 +; LE-NEON-NEXT: vmov.32 d13[1], r8 +; LE-NEON-NEXT: vmov.32 d18[1], r9 +; LE-NEON-NEXT: vmov.32 d15[1], r6 +; LE-NEON-NEXT: vmov.32 d12[1], r1 +; LE-NEON-NEXT: vmov.32 d14[1], r5 +; LE-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]! +; LE-NEON-NEXT: vst1.64 {d12, d13}, [r0:128]! +; LE-NEON-NEXT: vmov.32 d8[1], r7 +; LE-NEON-NEXT: vst1.64 {d14, d15}, [r0:128]! +; LE-NEON-NEXT: vst1.64 {d8, d9}, [r0:128] +; LE-NEON-NEXT: vmov.32 d11[1], r11 +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #40 +; LE-NEON-NEXT: vmov.32 d10[1], r10 +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r4:128]! +; LE-NEON-NEXT: vst1.64 {d10, d11}, [r4:128]! +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #56 +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r4:128]! +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r4:128] +; LE-NEON-NEXT: add sp, sp, #72 +; LE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEON-NEXT: add sp, sp, #4 +; LE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-LABEL: llrint_v16f128: +; BE: @ %bb.0: +; BE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-NEXT: .pad #4 +; BE-NEXT: sub sp, sp, #4 +; BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEXT: .pad #56 +; BE-NEXT: sub sp, sp, #56 +; BE-NEXT: mov r5, r3 +; BE-NEXT: add r3, sp, #376 +; BE-NEXT: mov r6, r2 +; BE-NEXT: mov r4, r0 +; BE-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: ldr r7, [sp, #392] +; BE-NEXT: add r3, sp, #396 +; BE-NEXT: mov r9, r1 +; BE-NEXT: vmov.32 d8[0], r0 +; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: mov r0, r7 +; BE-NEXT: ldr r11, [sp, #168] +; BE-NEXT: bl llrintl +; BE-NEXT: ldr r2, [sp, #160] +; BE-NEXT: mov r10, r1 +; BE-NEXT: ldr r3, [sp, #164] +; BE-NEXT: vmov.32 d9[0], r0 +; BE-NEXT: mov r0, r6 +; BE-NEXT: mov r1, r5 +; BE-NEXT: bl llrintl +; BE-NEXT: add r3, sp, #172 +; BE-NEXT: mov r8, r1 +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: mov r0, r11 +; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: add r3, sp, #220 +; BE-NEXT: vmov.32 d11[0], r0 +; BE-NEXT: ldr r0, [sp, #216] +; BE-NEXT: mov r11, r1 +; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: add r3, sp, #236 +; BE-NEXT: vmov.32 d12[0], r0 +; BE-NEXT: ldr r0, [sp, #232] +; BE-NEXT: mov r6, r1 +; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: add r3, sp, #252 +; BE-NEXT: vmov.32 d13[0], r0 +; BE-NEXT: ldr r0, [sp, #248] +; BE-NEXT: mov r7, r1 +; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: add r3, sp, #268 +; BE-NEXT: vmov.32 d14[0], r0 +; BE-NEXT: ldr r0, [sp, #264] +; BE-NEXT: mov r5, r1 +; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: vmov.32 d15[0], r0 +; BE-NEXT: ldr r0, [sp, #280] +; BE-NEXT: ldr r2, [sp, #288] +; BE-NEXT: vmov.32 d13[1], r7 +; BE-NEXT: ldr r7, [sp, #284] +; BE-NEXT: ldr r3, [sp, #292] +; BE-NEXT: vmov.32 d14[1], r5 +; BE-NEXT: ldr r5, [sp, #328] +; BE-NEXT: vmov.32 d12[1], r6 +; BE-NEXT: ldr r6, [sp, #300] +; BE-NEXT: vmov.32 d10[1], r8 +; BE-NEXT: ldr r8, [sp, #184] +; BE-NEXT: vmov.32 d11[1], r11 +; BE-NEXT: vmov.32 d9[1], r10 +; BE-NEXT: vmov.32 d8[1], r9 +; BE-NEXT: vmov.32 d15[1], r1 +; BE-NEXT: mov r1, r7 +; BE-NEXT: vstr d14, [sp, #48] @ 8-byte Spill +; BE-NEXT: vstr d13, [sp, #40] @ 8-byte Spill +; BE-NEXT: vstr d12, [sp, #32] @ 8-byte Spill +; BE-NEXT: vstr d11, [sp, #24] @ 8-byte Spill +; BE-NEXT: vstr d10, [sp, #16] @ 8-byte Spill +; BE-NEXT: vstr d9, [sp, #8] @ 8-byte Spill +; BE-NEXT: vstr d8, [sp] @ 8-byte Spill +; BE-NEXT: bl llrintl +; BE-NEXT: mov r10, r1 +; BE-NEXT: ldr r1, [sp, #296] +; BE-NEXT: ldr r2, [sp, #304] +; BE-NEXT: vmov.32 d8[0], r0 +; BE-NEXT: ldr r3, [sp, #308] +; BE-NEXT: mov r0, r1 +; BE-NEXT: mov r1, r6 +; BE-NEXT: bl llrintl +; BE-NEXT: add r3, sp, #332 +; BE-NEXT: mov r11, r1 +; BE-NEXT: vmov.32 d9[0], r0 +; BE-NEXT: mov r0, r5 +; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: add r3, sp, #188 +; BE-NEXT: mov r7, r1 +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: mov r0, r8 +; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: add r3, sp, #204 +; BE-NEXT: vmov.32 d11[0], r0 +; BE-NEXT: ldr r0, [sp, #200] +; BE-NEXT: mov r8, r1 +; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: add r3, sp, #348 +; BE-NEXT: vmov.32 d12[0], r0 +; BE-NEXT: ldr r0, [sp, #344] +; BE-NEXT: mov r5, r1 +; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: add r3, sp, #364 +; BE-NEXT: vmov.32 d13[0], r0 +; BE-NEXT: ldr r0, [sp, #360] +; BE-NEXT: mov r9, r1 +; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: add r3, sp, #316 +; BE-NEXT: vmov.32 d14[0], r0 +; BE-NEXT: ldr r0, [sp, #312] +; BE-NEXT: mov r6, r1 +; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: vldr d18, [sp, #48] @ 8-byte Reload +; BE-NEXT: vrev64.32 d17, d15 +; BE-NEXT: vrev64.32 d16, d18 +; BE-NEXT: vldr d18, [sp, #40] @ 8-byte Reload +; BE-NEXT: vmov.32 d24[0], r0 +; BE-NEXT: add r0, r4, #64 +; BE-NEXT: vldr d20, [sp, #32] @ 8-byte Reload +; BE-NEXT: vrev64.32 d19, d18 +; BE-NEXT: vmov.32 d9[1], r11 +; BE-NEXT: vmov.32 d10[1], r7 +; BE-NEXT: vrev64.32 d18, d20 +; BE-NEXT: vldr d20, [sp, #24] @ 8-byte Reload +; BE-NEXT: vmov.32 d8[1], r10 +; BE-NEXT: vmov.32 d14[1], r6 +; BE-NEXT: vmov.32 d24[1], r1 +; BE-NEXT: vldr d22, [sp, #16] @ 8-byte Reload +; BE-NEXT: vrev64.32 d21, d20 +; BE-NEXT: vrev64.32 d1, d9 +; BE-NEXT: vmov.32 d13[1], r9 +; BE-NEXT: vrev64.32 d31, d10 +; BE-NEXT: vrev64.32 d20, d22 +; BE-NEXT: vldr d22, [sp, #8] @ 8-byte Reload +; BE-NEXT: vrev64.32 d0, d8 +; BE-NEXT: vrev64.32 d29, d14 +; BE-NEXT: vmov.32 d12[1], r5 +; BE-NEXT: vrev64.32 d30, d24 +; BE-NEXT: vrev64.32 d27, d22 +; BE-NEXT: vldr d22, [sp] @ 8-byte Reload +; BE-NEXT: vst1.64 {d0, d1}, [r0:128]! +; BE-NEXT: vmov.32 d11[1], r8 +; BE-NEXT: vrev64.32 d28, d13 +; BE-NEXT: vst1.64 {d30, d31}, [r0:128]! +; BE-NEXT: vrev64.32 d26, d22 +; BE-NEXT: vrev64.32 d23, d12 +; BE-NEXT: vst1.64 {d28, d29}, [r0:128]! +; BE-NEXT: vrev64.32 d22, d11 +; BE-NEXT: vst1.64 {d26, d27}, [r0:128] +; BE-NEXT: vst1.64 {d20, d21}, [r4:128]! +; BE-NEXT: vst1.64 {d22, d23}, [r4:128]! +; BE-NEXT: vst1.64 {d18, d19}, [r4:128]! +; BE-NEXT: vst1.64 {d16, d17}, [r4:128] +; BE-NEXT: add sp, sp, #56 +; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEXT: add sp, sp, #4 +; BE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-NEON-LABEL: llrint_v16f128: +; BE-NEON: @ %bb.0: +; BE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-NEON-NEXT: .pad #4 +; BE-NEON-NEXT: sub sp, sp, #4 +; BE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEON-NEXT: .pad #56 +; BE-NEON-NEXT: sub sp, sp, #56 +; BE-NEON-NEXT: mov r5, r3 +; BE-NEON-NEXT: add r3, sp, #376 +; BE-NEON-NEXT: mov r6, r2 +; BE-NEON-NEXT: mov r4, r0 +; BE-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: ldr r7, [sp, #392] +; BE-NEON-NEXT: add r3, sp, #396 +; BE-NEON-NEXT: mov r9, r1 +; BE-NEON-NEXT: vmov.32 d8[0], r0 +; BE-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-NEON-NEXT: mov r0, r7 +; BE-NEON-NEXT: ldr r11, [sp, #168] +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: ldr r2, [sp, #160] +; BE-NEON-NEXT: mov r10, r1 +; BE-NEON-NEXT: ldr r3, [sp, #164] +; BE-NEON-NEXT: vmov.32 d9[0], r0 +; BE-NEON-NEXT: mov r0, r6 +; BE-NEON-NEXT: mov r1, r5 +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: add r3, sp, #172 +; BE-NEON-NEXT: mov r8, r1 +; BE-NEON-NEXT: vmov.32 d10[0], r0 +; BE-NEON-NEXT: mov r0, r11 +; BE-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: add r3, sp, #220 +; BE-NEON-NEXT: vmov.32 d11[0], r0 +; BE-NEON-NEXT: ldr r0, [sp, #216] +; BE-NEON-NEXT: mov r11, r1 +; BE-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: add r3, sp, #236 +; BE-NEON-NEXT: vmov.32 d12[0], r0 +; BE-NEON-NEXT: ldr r0, [sp, #232] +; BE-NEON-NEXT: mov r6, r1 +; BE-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: add r3, sp, #252 +; BE-NEON-NEXT: vmov.32 d13[0], r0 +; BE-NEON-NEXT: ldr r0, [sp, #248] +; BE-NEON-NEXT: mov r7, r1 +; BE-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: add r3, sp, #268 +; BE-NEON-NEXT: vmov.32 d14[0], r0 +; BE-NEON-NEXT: ldr r0, [sp, #264] +; BE-NEON-NEXT: mov r5, r1 +; BE-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: vmov.32 d15[0], r0 +; BE-NEON-NEXT: ldr r0, [sp, #280] +; BE-NEON-NEXT: ldr r2, [sp, #288] +; BE-NEON-NEXT: vmov.32 d13[1], r7 +; BE-NEON-NEXT: ldr r7, [sp, #284] +; BE-NEON-NEXT: ldr r3, [sp, #292] +; BE-NEON-NEXT: vmov.32 d14[1], r5 +; BE-NEON-NEXT: ldr r5, [sp, #328] +; BE-NEON-NEXT: vmov.32 d12[1], r6 +; BE-NEON-NEXT: ldr r6, [sp, #300] +; BE-NEON-NEXT: vmov.32 d10[1], r8 +; BE-NEON-NEXT: ldr r8, [sp, #184] +; BE-NEON-NEXT: vmov.32 d11[1], r11 +; BE-NEON-NEXT: vmov.32 d9[1], r10 +; BE-NEON-NEXT: vmov.32 d8[1], r9 +; BE-NEON-NEXT: vmov.32 d15[1], r1 +; BE-NEON-NEXT: mov r1, r7 +; BE-NEON-NEXT: vstr d14, [sp, #48] @ 8-byte Spill +; BE-NEON-NEXT: vstr d13, [sp, #40] @ 8-byte Spill +; BE-NEON-NEXT: vstr d12, [sp, #32] @ 8-byte Spill +; BE-NEON-NEXT: vstr d11, [sp, #24] @ 8-byte Spill +; BE-NEON-NEXT: vstr d10, [sp, #16] @ 8-byte Spill +; BE-NEON-NEXT: vstr d9, [sp, #8] @ 8-byte Spill +; BE-NEON-NEXT: vstr d8, [sp] @ 8-byte Spill +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: mov r10, r1 +; BE-NEON-NEXT: ldr r1, [sp, #296] +; BE-NEON-NEXT: ldr r2, [sp, #304] +; BE-NEON-NEXT: vmov.32 d8[0], r0 +; BE-NEON-NEXT: ldr r3, [sp, #308] +; BE-NEON-NEXT: mov r0, r1 +; BE-NEON-NEXT: mov r1, r6 +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: add r3, sp, #332 +; BE-NEON-NEXT: mov r11, r1 +; BE-NEON-NEXT: vmov.32 d9[0], r0 +; BE-NEON-NEXT: mov r0, r5 +; BE-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: add r3, sp, #188 +; BE-NEON-NEXT: mov r7, r1 +; BE-NEON-NEXT: vmov.32 d10[0], r0 +; BE-NEON-NEXT: mov r0, r8 +; BE-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: add r3, sp, #204 +; BE-NEON-NEXT: vmov.32 d11[0], r0 +; BE-NEON-NEXT: ldr r0, [sp, #200] +; BE-NEON-NEXT: mov r8, r1 +; BE-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: add r3, sp, #348 +; BE-NEON-NEXT: vmov.32 d12[0], r0 +; BE-NEON-NEXT: ldr r0, [sp, #344] +; BE-NEON-NEXT: mov r5, r1 +; BE-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: add r3, sp, #364 +; BE-NEON-NEXT: vmov.32 d13[0], r0 +; BE-NEON-NEXT: ldr r0, [sp, #360] +; BE-NEON-NEXT: mov r9, r1 +; BE-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: add r3, sp, #316 +; BE-NEON-NEXT: vmov.32 d14[0], r0 +; BE-NEON-NEXT: ldr r0, [sp, #312] +; BE-NEON-NEXT: mov r6, r1 +; BE-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: vldr d18, [sp, #48] @ 8-byte Reload +; BE-NEON-NEXT: vrev64.32 d17, d15 +; BE-NEON-NEXT: vrev64.32 d16, d18 +; BE-NEON-NEXT: vldr d18, [sp, #40] @ 8-byte Reload +; BE-NEON-NEXT: vmov.32 d24[0], r0 +; BE-NEON-NEXT: add r0, r4, #64 +; BE-NEON-NEXT: vldr d20, [sp, #32] @ 8-byte Reload +; BE-NEON-NEXT: vrev64.32 d19, d18 +; BE-NEON-NEXT: vmov.32 d9[1], r11 +; BE-NEON-NEXT: vmov.32 d10[1], r7 +; BE-NEON-NEXT: vrev64.32 d18, d20 +; BE-NEON-NEXT: vldr d20, [sp, #24] @ 8-byte Reload +; BE-NEON-NEXT: vmov.32 d8[1], r10 +; BE-NEON-NEXT: vmov.32 d14[1], r6 +; BE-NEON-NEXT: vmov.32 d24[1], r1 +; BE-NEON-NEXT: vldr d22, [sp, #16] @ 8-byte Reload +; BE-NEON-NEXT: vrev64.32 d21, d20 +; BE-NEON-NEXT: vrev64.32 d1, d9 +; BE-NEON-NEXT: vmov.32 d13[1], r9 +; BE-NEON-NEXT: vrev64.32 d31, d10 +; BE-NEON-NEXT: vrev64.32 d20, d22 +; BE-NEON-NEXT: vldr d22, [sp, #8] @ 8-byte Reload +; BE-NEON-NEXT: vrev64.32 d0, d8 +; BE-NEON-NEXT: vrev64.32 d29, d14 +; BE-NEON-NEXT: vmov.32 d12[1], r5 +; BE-NEON-NEXT: vrev64.32 d30, d24 +; BE-NEON-NEXT: vrev64.32 d27, d22 +; BE-NEON-NEXT: vldr d22, [sp] @ 8-byte Reload +; BE-NEON-NEXT: vst1.64 {d0, d1}, [r0:128]! +; BE-NEON-NEXT: vmov.32 d11[1], r8 +; BE-NEON-NEXT: vrev64.32 d28, d13 +; BE-NEON-NEXT: vst1.64 {d30, d31}, [r0:128]! +; BE-NEON-NEXT: vrev64.32 d26, d22 +; BE-NEON-NEXT: vrev64.32 d23, d12 +; BE-NEON-NEXT: vst1.64 {d28, d29}, [r0:128]! +; BE-NEON-NEXT: vrev64.32 d22, d11 +; BE-NEON-NEXT: vst1.64 {d26, d27}, [r0:128] +; BE-NEON-NEXT: vst1.64 {d20, d21}, [r4:128]! +; BE-NEON-NEXT: vst1.64 {d22, d23}, [r4:128]! +; BE-NEON-NEXT: vst1.64 {d18, d19}, [r4:128]! +; BE-NEON-NEXT: vst1.64 {d16, d17}, [r4:128] +; BE-NEON-NEXT: add sp, sp, #56 +; BE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEON-NEXT: add sp, sp, #4 +; BE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + %a = call <16 x i64> @llvm.llrint.v16i64.v16f128(<16 x fp128> %x) + ret <16 x i64> %a +} +declare <16 x i64> @llvm.llrint.v16i64.v16f128(<16 x fp128>) + +define <32 x i64> @llrint_v32f128(<32 x fp128> %x) { +; LE-LABEL: llrint_v32f128: +; LE: @ %bb.0: +; LE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-NEXT: .pad #4 +; LE-NEXT: sub sp, sp, #4 +; LE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEXT: .pad #192 +; LE-NEXT: sub sp, sp, #192 +; LE-NEXT: str r3, [sp, #60] @ 4-byte Spill +; LE-NEXT: add r3, sp, #688 +; LE-NEXT: str r2, [sp, #56] @ 4-byte Spill +; LE-NEXT: mov r9, r0 +; LE-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-NEXT: bl llrintl +; LE-NEXT: add r3, sp, #560 +; LE-NEXT: mov r4, r0 +; LE-NEXT: str r1, [sp, #64] @ 4-byte Spill +; LE-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-NEXT: bl llrintl +; LE-NEXT: vmov.32 d17[0], r0 +; LE-NEXT: ldr r7, [sp, #544] +; LE-NEXT: ldr r6, [sp, #548] +; LE-NEXT: add lr, sp, #96 +; LE-NEXT: ldr r2, [sp, #552] +; LE-NEXT: vmov.32 d17[1], r1 +; LE-NEXT: ldr r3, [sp, #556] +; LE-NEXT: mov r0, r7 +; LE-NEXT: mov r1, r6 +; LE-NEXT: vorr q4, q8, q8 +; LE-NEXT: ldr r5, [sp, #528] +; LE-NEXT: vmov.32 d17[0], r4 +; LE-NEXT: ldr r10, [sp, #304] +; LE-NEXT: ldr r8, [sp, #368] +; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEXT: bl llrintl +; LE-NEXT: add r3, sp, #532 +; LE-NEXT: vmov.32 d8[0], r0 +; LE-NEXT: mov r11, r1 +; LE-NEXT: add lr, sp, #144 +; LE-NEXT: ldm r3, {r1, r2, r3} +; LE-NEXT: mov r0, r5 +; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-NEXT: bl llrintl +; LE-NEXT: add r3, sp, #308 +; LE-NEXT: mov r5, r1 +; LE-NEXT: add lr, sp, #176 +; LE-NEXT: vmov.32 d17[0], r0 +; LE-NEXT: ldm r3, {r1, r2, r3} +; LE-NEXT: mov r0, r10 +; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEXT: bl llrintl +; LE-NEXT: add r3, sp, #372 +; LE-NEXT: mov r10, r1 +; LE-NEXT: vmov.32 d11[0], r0 +; LE-NEXT: mov r0, r8 +; LE-NEXT: ldm r3, {r1, r2, r3} +; LE-NEXT: bl llrintl +; LE-NEXT: add r3, sp, #404 +; LE-NEXT: vmov.32 d15[0], r0 +; LE-NEXT: ldr r0, [sp, #400] +; LE-NEXT: mov r6, r1 +; LE-NEXT: ldm r3, {r1, r2, r3} +; LE-NEXT: bl llrintl +; LE-NEXT: add r3, sp, #596 +; LE-NEXT: vmov.32 d9[0], r0 +; LE-NEXT: ldr r0, [sp, #592] +; LE-NEXT: mov r7, r1 +; LE-NEXT: ldm r3, {r1, r2, r3} +; LE-NEXT: bl llrintl +; LE-NEXT: add r3, sp, #676 +; LE-NEXT: vmov.32 d13[0], r0 +; LE-NEXT: ldr r0, [sp, #672] +; LE-NEXT: mov r4, r1 +; LE-NEXT: ldm r3, {r1, r2, r3} +; LE-NEXT: bl llrintl +; LE-NEXT: add lr, sp, #96 +; LE-NEXT: vmov.32 d13[1], r4 +; LE-NEXT: str r1, [sp, #52] @ 4-byte Spill +; LE-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; LE-NEXT: add lr, sp, #80 +; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEXT: add lr, sp, #128 +; LE-NEXT: vmov.32 d9[1], r7 +; LE-NEXT: ldr r1, [sp, #628] +; LE-NEXT: ldr r2, [sp, #632] +; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-NEXT: add lr, sp, #112 +; LE-NEXT: vmov.32 d15[1], r6 +; LE-NEXT: ldr r3, [sp, #636] +; LE-NEXT: ldr r7, [sp, #64] @ 4-byte Reload +; LE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-NEXT: add lr, sp, #160 +; LE-NEXT: vmov.32 d11[1], r10 +; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-NEXT: add lr, sp, #176 +; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-NEXT: add lr, sp, #144 +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: vmov.32 d18[0], r0 +; LE-NEXT: ldr r0, [sp, #624] +; LE-NEXT: vmov.32 d16[1], r11 +; LE-NEXT: vmov.32 d9[1], r5 +; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEXT: add lr, sp, #96 +; LE-NEXT: vmov.32 d19[1], r7 +; LE-NEXT: vstmia lr, {d18, d19} @ 16-byte Spill +; LE-NEXT: bl llrintl +; LE-NEXT: add r3, sp, #512 +; LE-NEXT: str r0, [sp, #48] @ 4-byte Spill +; LE-NEXT: str r1, [sp, #64] @ 4-byte Spill +; LE-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-NEXT: bl llrintl +; LE-NEXT: add r3, sp, #768 +; LE-NEXT: mov r11, r0 +; LE-NEXT: str r1, [sp, #28] @ 4-byte Spill +; LE-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-NEXT: bl llrintl +; LE-NEXT: ldr r6, [sp, #784] +; LE-NEXT: add r3, sp, #788 +; LE-NEXT: mov r8, r1 +; LE-NEXT: vmov.32 d14[0], r0 +; LE-NEXT: ldm r3, {r1, r2, r3} +; LE-NEXT: mov r0, r6 +; LE-NEXT: ldr r5, [sp, #736] +; LE-NEXT: ldr r7, [sp, #752] +; LE-NEXT: ldr r4, [sp, #720] +; LE-NEXT: bl llrintl +; LE-NEXT: add r3, sp, #740 +; LE-NEXT: mov r6, r1 +; LE-NEXT: vmov.32 d15[0], r0 +; LE-NEXT: mov r0, r5 +; LE-NEXT: ldm r3, {r1, r2, r3} +; LE-NEXT: bl llrintl +; LE-NEXT: add r3, sp, #756 +; LE-NEXT: mov r5, r1 +; LE-NEXT: vmov.32 d12[0], r0 +; LE-NEXT: mov r0, r7 +; LE-NEXT: ldm r3, {r1, r2, r3} +; LE-NEXT: bl llrintl +; LE-NEXT: add r3, sp, #724 +; LE-NEXT: mov r7, r1 +; LE-NEXT: vmov.32 d13[0], r0 +; LE-NEXT: mov r0, r4 +; LE-NEXT: ldm r3, {r1, r2, r3} +; LE-NEXT: bl llrintl +; LE-NEXT: vmov.32 d13[1], r7 +; LE-NEXT: add lr, sp, #8 +; LE-NEXT: ldr r2, [sp, #296] +; LE-NEXT: vmov.32 d12[1], r5 +; LE-NEXT: ldr r3, [sp, #300] +; LE-NEXT: ldr r4, [sp, #576] +; LE-NEXT: vmov.32 d11[0], r0 +; LE-NEXT: ldr r0, [sp, #56] @ 4-byte Reload +; LE-NEXT: ldr r10, [sp, #384] +; LE-NEXT: vmov.32 d15[1], r6 +; LE-NEXT: ldr r6, [sp, #352] +; LE-NEXT: vmov.32 d14[1], r8 +; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEXT: add lr, sp, #32 +; LE-NEXT: vmov.32 d11[1], r1 +; LE-NEXT: ldr r1, [sp, #60] @ 4-byte Reload +; LE-NEXT: vmov.32 d8[0], r11 +; LE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-NEXT: add lr, sp, #176 +; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-NEXT: bl llrintl +; LE-NEXT: add lr, sp, #160 +; LE-NEXT: add r3, sp, #356 +; LE-NEXT: mov r5, r1 +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: ldm r3, {r1, r2, r3} +; LE-NEXT: vmov.32 d16[0], r0 +; LE-NEXT: mov r0, r6 +; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEXT: bl llrintl +; LE-NEXT: add lr, sp, #112 +; LE-NEXT: add r3, sp, #388 +; LE-NEXT: mov r7, r1 +; LE-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload +; LE-NEXT: ldm r3, {r1, r2, r3} +; LE-NEXT: vmov.32 d14[0], r0 +; LE-NEXT: mov r0, r10 +; LE-NEXT: bl llrintl +; LE-NEXT: add lr, sp, #128 +; LE-NEXT: add r3, sp, #580 +; LE-NEXT: mov r6, r1 +; LE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-NEXT: ldm r3, {r1, r2, r3} +; LE-NEXT: vmov.32 d12[0], r0 +; LE-NEXT: mov r0, r4 +; LE-NEXT: bl llrintl +; LE-NEXT: add lr, sp, #80 +; LE-NEXT: add r3, sp, #708 +; LE-NEXT: mov r4, r1 +; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-NEXT: ldm r3, {r1, r2, r3} +; LE-NEXT: vmov.32 d8[0], r0 +; LE-NEXT: ldr r0, [sp, #704] +; LE-NEXT: bl llrintl +; LE-NEXT: vmov.32 d8[1], r4 +; LE-NEXT: add lr, sp, #80 +; LE-NEXT: ldr r2, [sp, #52] @ 4-byte Reload +; LE-NEXT: vmov.32 d12[1], r6 +; LE-NEXT: ldr r6, [sp, #644] +; LE-NEXT: ldr r3, [sp, #652] +; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-NEXT: add lr, sp, #128 +; LE-NEXT: vmov.32 d14[1], r7 +; LE-NEXT: ldr r4, [sp, #480] +; LE-NEXT: ldr r7, [sp, #656] +; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEXT: add lr, sp, #112 +; LE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-NEXT: add lr, sp, #160 +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: vmov.32 d10[0], r0 +; LE-NEXT: ldr r0, [sp, #28] @ 4-byte Reload +; LE-NEXT: ldr r10, [sp, #496] +; LE-NEXT: vmov.32 d16[1], r5 +; LE-NEXT: add r5, r9, #192 +; LE-NEXT: ldr r8, [sp, #608] +; LE-NEXT: vmov.32 d10[1], r1 +; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEXT: add lr, sp, #176 +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: vmov.32 d16[1], r0 +; LE-NEXT: ldr r0, [sp, #640] +; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEXT: add lr, sp, #96 +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: add lr, sp, #8 +; LE-NEXT: vmov.32 d16[1], r2 +; LE-NEXT: ldr r2, [sp, #648] +; LE-NEXT: vst1.64 {d16, d17}, [r5:128]! +; LE-NEXT: vst1.64 {d10, d11}, [r5:128]! +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: vst1.64 {d16, d17}, [r5:128]! +; LE-NEXT: ldr r1, [sp, #48] @ 4-byte Reload +; LE-NEXT: vmov.32 d9[0], r1 +; LE-NEXT: mov r1, r6 +; LE-NEXT: bl llrintl +; LE-NEXT: add r3, sp, #660 +; LE-NEXT: mov r11, r1 +; LE-NEXT: vmov.32 d12[0], r0 +; LE-NEXT: mov r0, r7 +; LE-NEXT: ldm r3, {r1, r2, r3} +; LE-NEXT: bl llrintl +; LE-NEXT: add r3, sp, #484 +; LE-NEXT: mov r7, r1 +; LE-NEXT: vmov.32 d13[0], r0 +; LE-NEXT: mov r0, r4 +; LE-NEXT: ldm r3, {r1, r2, r3} +; LE-NEXT: bl llrintl +; LE-NEXT: add r3, sp, #500 +; LE-NEXT: mov r6, r1 +; LE-NEXT: vmov.32 d10[0], r0 +; LE-NEXT: mov r0, r10 +; LE-NEXT: ldm r3, {r1, r2, r3} +; LE-NEXT: bl llrintl +; LE-NEXT: add r3, sp, #612 +; LE-NEXT: mov r4, r1 +; LE-NEXT: vmov.32 d11[0], r0 +; LE-NEXT: mov r0, r8 +; LE-NEXT: ldm r3, {r1, r2, r3} +; LE-NEXT: bl llrintl +; LE-NEXT: vmov.32 d8[0], r0 +; LE-NEXT: ldr r0, [sp, #64] @ 4-byte Reload +; LE-NEXT: add lr, sp, #96 +; LE-NEXT: add r8, r9, #128 +; LE-NEXT: vmov.32 d13[1], r7 +; LE-NEXT: ldr r2, [sp, #344] +; LE-NEXT: ldr r3, [sp, #348] +; LE-NEXT: vmov.32 d12[1], r11 +; LE-NEXT: ldr r7, [sp, #452] +; LE-NEXT: ldr r10, [sp, #416] +; LE-NEXT: vmov.32 d9[1], r0 +; LE-NEXT: ldr r0, [sp, #336] +; LE-NEXT: vmov.32 d8[1], r1 +; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEXT: add lr, sp, #64 +; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-NEXT: add lr, sp, #32 +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: add lr, sp, #144 +; LE-NEXT: vmov.32 d11[1], r4 +; LE-NEXT: ldr r4, [sp, #340] +; LE-NEXT: vst1.64 {d16, d17}, [r5:128] +; LE-NEXT: mov r1, r4 +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: add lr, sp, #80 +; LE-NEXT: vmov.32 d10[1], r6 +; LE-NEXT: ldr r6, [sp, #448] +; LE-NEXT: vst1.64 {d16, d17}, [r8:128]! +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: vst1.64 {d16, d17}, [r8:128]! +; LE-NEXT: bl llrintl +; LE-NEXT: ldr r2, [sp, #456] +; LE-NEXT: mov r11, r1 +; LE-NEXT: ldr r3, [sp, #460] +; LE-NEXT: vmov.32 d15[0], r0 +; LE-NEXT: mov r0, r6 +; LE-NEXT: mov r1, r7 +; LE-NEXT: ldr r5, [sp, #432] +; LE-NEXT: bl llrintl +; LE-NEXT: add r3, sp, #468 +; LE-NEXT: vmov.32 d12[0], r0 +; LE-NEXT: ldr r0, [sp, #464] +; LE-NEXT: mov r6, r1 +; LE-NEXT: ldm r3, {r1, r2, r3} +; LE-NEXT: bl llrintl +; LE-NEXT: add r3, sp, #420 +; LE-NEXT: mov r7, r1 +; LE-NEXT: vmov.32 d13[0], r0 +; LE-NEXT: mov r0, r10 +; LE-NEXT: ldm r3, {r1, r2, r3} +; LE-NEXT: bl llrintl +; LE-NEXT: add r3, sp, #436 +; LE-NEXT: mov r4, r1 +; LE-NEXT: vmov.32 d8[0], r0 +; LE-NEXT: mov r0, r5 +; LE-NEXT: ldm r3, {r1, r2, r3} +; LE-NEXT: bl llrintl +; LE-NEXT: add r3, sp, #324 +; LE-NEXT: vmov.32 d9[0], r0 +; LE-NEXT: ldr r0, [sp, #320] +; LE-NEXT: mov r5, r1 +; LE-NEXT: ldm r3, {r1, r2, r3} +; LE-NEXT: bl llrintl +; LE-NEXT: add lr, sp, #64 +; LE-NEXT: vmov.32 d9[1], r5 +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: add lr, sp, #96 +; LE-NEXT: vmov.32 d13[1], r7 +; LE-NEXT: vst1.64 {d16, d17}, [r8:128]! +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: add lr, sp, #176 +; LE-NEXT: vmov.32 d8[1], r4 +; LE-NEXT: vmov.32 d12[1], r6 +; LE-NEXT: vmov.32 d14[0], r0 +; LE-NEXT: add r0, r9, #64 +; LE-NEXT: vst1.64 {d16, d17}, [r8:128] +; LE-NEXT: vst1.64 {d8, d9}, [r0:128]! +; LE-NEXT: vst1.64 {d12, d13}, [r0:128]! +; LE-NEXT: vst1.64 {d10, d11}, [r0:128]! +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: add lr, sp, #160 +; LE-NEXT: vmov.32 d15[1], r11 +; LE-NEXT: vst1.64 {d16, d17}, [r0:128] +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: add lr, sp, #112 +; LE-NEXT: vmov.32 d14[1], r1 +; LE-NEXT: vst1.64 {d16, d17}, [r9:128]! +; LE-NEXT: vst1.64 {d14, d15}, [r9:128]! +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: add lr, sp, #128 +; LE-NEXT: vst1.64 {d16, d17}, [r9:128]! +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: vst1.64 {d16, d17}, [r9:128] +; LE-NEXT: add sp, sp, #192 +; LE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEXT: add sp, sp, #4 +; LE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; LE-NEON-LABEL: llrint_v32f128: +; LE-NEON: @ %bb.0: +; LE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-NEON-NEXT: .pad #4 +; LE-NEON-NEXT: sub sp, sp, #4 +; LE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEON-NEXT: .pad #192 +; LE-NEON-NEXT: sub sp, sp, #192 +; LE-NEON-NEXT: str r3, [sp, #60] @ 4-byte Spill +; LE-NEON-NEXT: add r3, sp, #688 +; LE-NEON-NEXT: str r2, [sp, #56] @ 4-byte Spill +; LE-NEON-NEXT: mov r9, r0 +; LE-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add r3, sp, #560 +; LE-NEON-NEXT: mov r4, r0 +; LE-NEON-NEXT: str r1, [sp, #64] @ 4-byte Spill +; LE-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: vmov.32 d17[0], r0 +; LE-NEON-NEXT: ldr r7, [sp, #544] +; LE-NEON-NEXT: ldr r6, [sp, #548] +; LE-NEON-NEXT: add lr, sp, #96 +; LE-NEON-NEXT: ldr r2, [sp, #552] +; LE-NEON-NEXT: vmov.32 d17[1], r1 +; LE-NEON-NEXT: ldr r3, [sp, #556] +; LE-NEON-NEXT: mov r0, r7 +; LE-NEON-NEXT: mov r1, r6 +; LE-NEON-NEXT: vorr q4, q8, q8 +; LE-NEON-NEXT: ldr r5, [sp, #528] +; LE-NEON-NEXT: vmov.32 d17[0], r4 +; LE-NEON-NEXT: ldr r10, [sp, #304] +; LE-NEON-NEXT: ldr r8, [sp, #368] +; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add r3, sp, #532 +; LE-NEON-NEXT: vmov.32 d8[0], r0 +; LE-NEON-NEXT: mov r11, r1 +; LE-NEON-NEXT: add lr, sp, #144 +; LE-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-NEON-NEXT: mov r0, r5 +; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add r3, sp, #308 +; LE-NEON-NEXT: mov r5, r1 +; LE-NEON-NEXT: add lr, sp, #176 +; LE-NEON-NEXT: vmov.32 d17[0], r0 +; LE-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-NEON-NEXT: mov r0, r10 +; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add r3, sp, #372 +; LE-NEON-NEXT: mov r10, r1 +; LE-NEON-NEXT: vmov.32 d11[0], r0 +; LE-NEON-NEXT: mov r0, r8 +; LE-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add r3, sp, #404 +; LE-NEON-NEXT: vmov.32 d15[0], r0 +; LE-NEON-NEXT: ldr r0, [sp, #400] +; LE-NEON-NEXT: mov r6, r1 +; LE-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add r3, sp, #596 +; LE-NEON-NEXT: vmov.32 d9[0], r0 +; LE-NEON-NEXT: ldr r0, [sp, #592] +; LE-NEON-NEXT: mov r7, r1 +; LE-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add r3, sp, #676 +; LE-NEON-NEXT: vmov.32 d13[0], r0 +; LE-NEON-NEXT: ldr r0, [sp, #672] +; LE-NEON-NEXT: mov r4, r1 +; LE-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add lr, sp, #96 +; LE-NEON-NEXT: vmov.32 d13[1], r4 +; LE-NEON-NEXT: str r1, [sp, #52] @ 4-byte Spill +; LE-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #80 +; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #128 +; LE-NEON-NEXT: vmov.32 d9[1], r7 +; LE-NEON-NEXT: ldr r1, [sp, #628] +; LE-NEON-NEXT: ldr r2, [sp, #632] +; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #112 +; LE-NEON-NEXT: vmov.32 d15[1], r6 +; LE-NEON-NEXT: ldr r3, [sp, #636] +; LE-NEON-NEXT: ldr r7, [sp, #64] @ 4-byte Reload +; LE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #160 +; LE-NEON-NEXT: vmov.32 d11[1], r10 +; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #176 +; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #144 +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: vmov.32 d18[0], r0 +; LE-NEON-NEXT: ldr r0, [sp, #624] +; LE-NEON-NEXT: vmov.32 d16[1], r11 +; LE-NEON-NEXT: vmov.32 d9[1], r5 +; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #96 +; LE-NEON-NEXT: vmov.32 d19[1], r7 +; LE-NEON-NEXT: vstmia lr, {d18, d19} @ 16-byte Spill +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add r3, sp, #512 +; LE-NEON-NEXT: str r0, [sp, #48] @ 4-byte Spill +; LE-NEON-NEXT: str r1, [sp, #64] @ 4-byte Spill +; LE-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add r3, sp, #768 +; LE-NEON-NEXT: mov r11, r0 +; LE-NEON-NEXT: str r1, [sp, #28] @ 4-byte Spill +; LE-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: ldr r6, [sp, #784] +; LE-NEON-NEXT: add r3, sp, #788 +; LE-NEON-NEXT: mov r8, r1 +; LE-NEON-NEXT: vmov.32 d14[0], r0 +; LE-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-NEON-NEXT: mov r0, r6 +; LE-NEON-NEXT: ldr r5, [sp, #736] +; LE-NEON-NEXT: ldr r7, [sp, #752] +; LE-NEON-NEXT: ldr r4, [sp, #720] +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add r3, sp, #740 +; LE-NEON-NEXT: mov r6, r1 +; LE-NEON-NEXT: vmov.32 d15[0], r0 +; LE-NEON-NEXT: mov r0, r5 +; LE-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add r3, sp, #756 +; LE-NEON-NEXT: mov r5, r1 +; LE-NEON-NEXT: vmov.32 d12[0], r0 +; LE-NEON-NEXT: mov r0, r7 +; LE-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add r3, sp, #724 +; LE-NEON-NEXT: mov r7, r1 +; LE-NEON-NEXT: vmov.32 d13[0], r0 +; LE-NEON-NEXT: mov r0, r4 +; LE-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: vmov.32 d13[1], r7 +; LE-NEON-NEXT: add lr, sp, #8 +; LE-NEON-NEXT: ldr r2, [sp, #296] +; LE-NEON-NEXT: vmov.32 d12[1], r5 +; LE-NEON-NEXT: ldr r3, [sp, #300] +; LE-NEON-NEXT: ldr r4, [sp, #576] +; LE-NEON-NEXT: vmov.32 d11[0], r0 +; LE-NEON-NEXT: ldr r0, [sp, #56] @ 4-byte Reload +; LE-NEON-NEXT: ldr r10, [sp, #384] +; LE-NEON-NEXT: vmov.32 d15[1], r6 +; LE-NEON-NEXT: ldr r6, [sp, #352] +; LE-NEON-NEXT: vmov.32 d14[1], r8 +; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #32 +; LE-NEON-NEXT: vmov.32 d11[1], r1 +; LE-NEON-NEXT: ldr r1, [sp, #60] @ 4-byte Reload +; LE-NEON-NEXT: vmov.32 d8[0], r11 +; LE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #176 +; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add lr, sp, #160 +; LE-NEON-NEXT: add r3, sp, #356 +; LE-NEON-NEXT: mov r5, r1 +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-NEON-NEXT: vmov.32 d16[0], r0 +; LE-NEON-NEXT: mov r0, r6 +; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add lr, sp, #112 +; LE-NEON-NEXT: add r3, sp, #388 +; LE-NEON-NEXT: mov r7, r1 +; LE-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload +; LE-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-NEON-NEXT: vmov.32 d14[0], r0 +; LE-NEON-NEXT: mov r0, r10 +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add lr, sp, #128 +; LE-NEON-NEXT: add r3, sp, #580 +; LE-NEON-NEXT: mov r6, r1 +; LE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-NEON-NEXT: vmov.32 d12[0], r0 +; LE-NEON-NEXT: mov r0, r4 +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add lr, sp, #80 +; LE-NEON-NEXT: add r3, sp, #708 +; LE-NEON-NEXT: mov r4, r1 +; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-NEON-NEXT: vmov.32 d8[0], r0 +; LE-NEON-NEXT: ldr r0, [sp, #704] +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: vmov.32 d8[1], r4 +; LE-NEON-NEXT: add lr, sp, #80 +; LE-NEON-NEXT: ldr r2, [sp, #52] @ 4-byte Reload +; LE-NEON-NEXT: vmov.32 d12[1], r6 +; LE-NEON-NEXT: ldr r6, [sp, #644] +; LE-NEON-NEXT: ldr r3, [sp, #652] +; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #128 +; LE-NEON-NEXT: vmov.32 d14[1], r7 +; LE-NEON-NEXT: ldr r4, [sp, #480] +; LE-NEON-NEXT: ldr r7, [sp, #656] +; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #112 +; LE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #160 +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: vmov.32 d10[0], r0 +; LE-NEON-NEXT: ldr r0, [sp, #28] @ 4-byte Reload +; LE-NEON-NEXT: ldr r10, [sp, #496] +; LE-NEON-NEXT: vmov.32 d16[1], r5 +; LE-NEON-NEXT: add r5, r9, #192 +; LE-NEON-NEXT: ldr r8, [sp, #608] +; LE-NEON-NEXT: vmov.32 d10[1], r1 +; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #176 +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: vmov.32 d16[1], r0 +; LE-NEON-NEXT: ldr r0, [sp, #640] +; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #96 +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #8 +; LE-NEON-NEXT: vmov.32 d16[1], r2 +; LE-NEON-NEXT: ldr r2, [sp, #648] +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! +; LE-NEON-NEXT: vst1.64 {d10, d11}, [r5:128]! +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! +; LE-NEON-NEXT: ldr r1, [sp, #48] @ 4-byte Reload +; LE-NEON-NEXT: vmov.32 d9[0], r1 +; LE-NEON-NEXT: mov r1, r6 +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add r3, sp, #660 +; LE-NEON-NEXT: mov r11, r1 +; LE-NEON-NEXT: vmov.32 d12[0], r0 +; LE-NEON-NEXT: mov r0, r7 +; LE-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add r3, sp, #484 +; LE-NEON-NEXT: mov r7, r1 +; LE-NEON-NEXT: vmov.32 d13[0], r0 +; LE-NEON-NEXT: mov r0, r4 +; LE-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add r3, sp, #500 +; LE-NEON-NEXT: mov r6, r1 +; LE-NEON-NEXT: vmov.32 d10[0], r0 +; LE-NEON-NEXT: mov r0, r10 +; LE-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add r3, sp, #612 +; LE-NEON-NEXT: mov r4, r1 +; LE-NEON-NEXT: vmov.32 d11[0], r0 +; LE-NEON-NEXT: mov r0, r8 +; LE-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: vmov.32 d8[0], r0 +; LE-NEON-NEXT: ldr r0, [sp, #64] @ 4-byte Reload +; LE-NEON-NEXT: add lr, sp, #96 +; LE-NEON-NEXT: add r8, r9, #128 +; LE-NEON-NEXT: vmov.32 d13[1], r7 +; LE-NEON-NEXT: ldr r2, [sp, #344] +; LE-NEON-NEXT: ldr r3, [sp, #348] +; LE-NEON-NEXT: vmov.32 d12[1], r11 +; LE-NEON-NEXT: ldr r7, [sp, #452] +; LE-NEON-NEXT: ldr r10, [sp, #416] +; LE-NEON-NEXT: vmov.32 d9[1], r0 +; LE-NEON-NEXT: ldr r0, [sp, #336] +; LE-NEON-NEXT: vmov.32 d8[1], r1 +; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #64 +; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-NEON-NEXT: add lr, sp, #32 +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #144 +; LE-NEON-NEXT: vmov.32 d11[1], r4 +; LE-NEON-NEXT: ldr r4, [sp, #340] +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128] +; LE-NEON-NEXT: mov r1, r4 +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #80 +; LE-NEON-NEXT: vmov.32 d10[1], r6 +; LE-NEON-NEXT: ldr r6, [sp, #448] +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r8:128]! +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r8:128]! +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: ldr r2, [sp, #456] +; LE-NEON-NEXT: mov r11, r1 +; LE-NEON-NEXT: ldr r3, [sp, #460] +; LE-NEON-NEXT: vmov.32 d15[0], r0 +; LE-NEON-NEXT: mov r0, r6 +; LE-NEON-NEXT: mov r1, r7 +; LE-NEON-NEXT: ldr r5, [sp, #432] +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add r3, sp, #468 +; LE-NEON-NEXT: vmov.32 d12[0], r0 +; LE-NEON-NEXT: ldr r0, [sp, #464] +; LE-NEON-NEXT: mov r6, r1 +; LE-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add r3, sp, #420 +; LE-NEON-NEXT: mov r7, r1 +; LE-NEON-NEXT: vmov.32 d13[0], r0 +; LE-NEON-NEXT: mov r0, r10 +; LE-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add r3, sp, #436 +; LE-NEON-NEXT: mov r4, r1 +; LE-NEON-NEXT: vmov.32 d8[0], r0 +; LE-NEON-NEXT: mov r0, r5 +; LE-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add r3, sp, #324 +; LE-NEON-NEXT: vmov.32 d9[0], r0 +; LE-NEON-NEXT: ldr r0, [sp, #320] +; LE-NEON-NEXT: mov r5, r1 +; LE-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-NEON-NEXT: bl llrintl +; LE-NEON-NEXT: add lr, sp, #64 +; LE-NEON-NEXT: vmov.32 d9[1], r5 +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #96 +; LE-NEON-NEXT: vmov.32 d13[1], r7 +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r8:128]! +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #176 +; LE-NEON-NEXT: vmov.32 d8[1], r4 +; LE-NEON-NEXT: vmov.32 d12[1], r6 +; LE-NEON-NEXT: vmov.32 d14[0], r0 +; LE-NEON-NEXT: add r0, r9, #64 +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r8:128] +; LE-NEON-NEXT: vst1.64 {d8, d9}, [r0:128]! +; LE-NEON-NEXT: vst1.64 {d12, d13}, [r0:128]! +; LE-NEON-NEXT: vst1.64 {d10, d11}, [r0:128]! +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #160 +; LE-NEON-NEXT: vmov.32 d15[1], r11 +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #112 +; LE-NEON-NEXT: vmov.32 d14[1], r1 +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r9:128]! +; LE-NEON-NEXT: vst1.64 {d14, d15}, [r9:128]! +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: add lr, sp, #128 +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r9:128]! +; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEON-NEXT: vst1.64 {d16, d17}, [r9:128] +; LE-NEON-NEXT: add sp, sp, #192 +; LE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEON-NEXT: add sp, sp, #4 +; LE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-LABEL: llrint_v32f128: +; BE: @ %bb.0: +; BE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-NEXT: .pad #4 +; BE-NEXT: sub sp, sp, #4 +; BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEXT: .pad #152 +; BE-NEXT: sub sp, sp, #152 +; BE-NEXT: str r3, [sp, #120] @ 4-byte Spill +; BE-NEXT: add r3, sp, #712 +; BE-NEXT: str r2, [sp, #112] @ 4-byte Spill +; BE-NEXT: mov r9, r0 +; BE-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: ldr r7, [sp, #648] +; BE-NEXT: add r3, sp, #652 +; BE-NEXT: mov r5, r1 +; BE-NEXT: vmov.32 d9[0], r0 +; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: mov r0, r7 +; BE-NEXT: ldr r6, [sp, #520] +; BE-NEXT: ldr r8, [sp, #632] +; BE-NEXT: bl llrintl +; BE-NEXT: add r3, sp, #524 +; BE-NEXT: mov r7, r1 +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: mov r0, r6 +; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: add r3, sp, #636 +; BE-NEXT: mov r4, r1 +; BE-NEXT: vmov.32 d8[0], r0 +; BE-NEXT: mov r0, r8 +; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: mov r6, r1 +; BE-NEXT: vmov.32 d11[0], r0 +; BE-NEXT: ldr r0, [sp, #488] +; BE-NEXT: vmov.32 d8[1], r4 +; BE-NEXT: ldr r1, [sp, #492] +; BE-NEXT: ldr r2, [sp, #496] +; BE-NEXT: vmov.32 d10[1], r7 +; BE-NEXT: ldr r3, [sp, #500] +; BE-NEXT: vmov.32 d9[1], r5 +; BE-NEXT: vstr d8, [sp, #144] @ 8-byte Spill +; BE-NEXT: vstr d10, [sp, #136] @ 8-byte Spill +; BE-NEXT: vstr d9, [sp, #128] @ 8-byte Spill +; BE-NEXT: bl llrintl +; BE-NEXT: add r3, sp, #680 +; BE-NEXT: str r0, [sp, #104] @ 4-byte Spill +; BE-NEXT: str r1, [sp, #88] @ 4-byte Spill +; BE-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: vmov.32 d16[0], r0 +; BE-NEXT: ldr r0, [sp, #728] +; BE-NEXT: ldr r2, [sp, #736] +; BE-NEXT: vmov.32 d11[1], r6 +; BE-NEXT: ldr r6, [sp, #732] +; BE-NEXT: ldr r3, [sp, #740] +; BE-NEXT: vmov.32 d16[1], r1 +; BE-NEXT: ldr r5, [sp, #504] +; BE-NEXT: mov r1, r6 +; BE-NEXT: ldr r7, [sp, #744] +; BE-NEXT: ldr r4, [sp, #748] +; BE-NEXT: vstr d11, [sp, #24] @ 8-byte Spill +; BE-NEXT: vstr d16, [sp, #8] @ 8-byte Spill +; BE-NEXT: bl llrintl +; BE-NEXT: ldr r2, [sp, #752] +; BE-NEXT: mov r11, r1 +; BE-NEXT: ldr r3, [sp, #756] +; BE-NEXT: vmov.32 d13[0], r0 +; BE-NEXT: mov r0, r7 +; BE-NEXT: mov r1, r4 +; BE-NEXT: ldr r10, [sp, #552] +; BE-NEXT: ldr r6, [sp, #664] +; BE-NEXT: bl llrintl +; BE-NEXT: add r3, sp, #508 +; BE-NEXT: mov r8, r1 +; BE-NEXT: vmov.32 d8[0], r0 +; BE-NEXT: mov r0, r5 +; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: add r3, sp, #540 +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: ldr r0, [sp, #536] +; BE-NEXT: mov r7, r1 +; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: add r3, sp, #556 +; BE-NEXT: mov r5, r1 +; BE-NEXT: vmov.32 d12[0], r0 +; BE-NEXT: mov r0, r10 +; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: add r3, sp, #668 +; BE-NEXT: mov r4, r1 +; BE-NEXT: vmov.32 d9[0], r0 +; BE-NEXT: mov r0, r6 +; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: add r3, sp, #700 +; BE-NEXT: vmov.32 d14[0], r0 +; BE-NEXT: ldr r0, [sp, #696] +; BE-NEXT: mov r6, r1 +; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: vmov.32 d11[0], r0 +; BE-NEXT: ldr r0, [sp, #104] @ 4-byte Reload +; BE-NEXT: ldr r2, [sp, #256] +; BE-NEXT: vmov.32 d13[1], r11 +; BE-NEXT: ldr r3, [sp, #260] +; BE-NEXT: vmov.32 d14[1], r6 +; BE-NEXT: ldr r6, [sp, #264] +; BE-NEXT: vmov.32 d9[1], r4 +; BE-NEXT: ldr r4, [sp, #344] +; BE-NEXT: vmov.32 d12[1], r5 +; BE-NEXT: ldr r5, [sp, #312] +; BE-NEXT: vmov.32 d8[1], r8 +; BE-NEXT: ldr r8, [sp, #328] +; BE-NEXT: vmov.32 d10[1], r7 +; BE-NEXT: vstr d13, [sp, #32] @ 8-byte Spill +; BE-NEXT: vmov.32 d11[1], r1 +; BE-NEXT: ldr r1, [sp, #120] @ 4-byte Reload +; BE-NEXT: vmov.32 d13[0], r0 +; BE-NEXT: ldr r0, [sp, #112] @ 4-byte Reload +; BE-NEXT: vstr d14, [sp] @ 8-byte Spill +; BE-NEXT: vstr d9, [sp, #16] @ 8-byte Spill +; BE-NEXT: vstr d12, [sp, #56] @ 8-byte Spill +; BE-NEXT: vstr d10, [sp, #64] @ 8-byte Spill +; BE-NEXT: vstr d8, [sp, #40] @ 8-byte Spill +; BE-NEXT: bl llrintl +; BE-NEXT: add r3, sp, #268 +; BE-NEXT: mov r11, r1 +; BE-NEXT: vmov.32 d14[0], r0 +; BE-NEXT: mov r0, r6 +; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: add r3, sp, #316 +; BE-NEXT: mov r10, r1 +; BE-NEXT: vmov.32 d8[0], r0 +; BE-NEXT: mov r0, r5 +; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: add r3, sp, #332 +; BE-NEXT: mov r7, r1 +; BE-NEXT: vmov.32 d12[0], r0 +; BE-NEXT: mov r0, r8 +; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: add r3, sp, #348 +; BE-NEXT: mov r5, r1 +; BE-NEXT: vmov.32 d15[0], r0 +; BE-NEXT: mov r0, r4 +; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: add r3, sp, #364 +; BE-NEXT: vmov.32 d9[0], r0 +; BE-NEXT: ldr r0, [sp, #360] +; BE-NEXT: mov r4, r1 +; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: add r3, sp, #476 +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: ldr r0, [sp, #472] +; BE-NEXT: mov r6, r1 +; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: vmov.32 d16[0], r0 +; BE-NEXT: ldr r0, [sp, #88] @ 4-byte Reload +; BE-NEXT: ldr r2, [sp, #592] +; BE-NEXT: vldr d20, [sp, #136] @ 8-byte Reload +; BE-NEXT: vmov.32 d16[1], r1 +; BE-NEXT: ldr r1, [sp, #588] +; BE-NEXT: ldr r3, [sp, #596] +; BE-NEXT: vldr d22, [sp, #24] @ 8-byte Reload +; BE-NEXT: vldr d18, [sp, #8] @ 8-byte Reload +; BE-NEXT: vrev64.32 d21, d20 +; BE-NEXT: vmov.32 d10[1], r6 +; BE-NEXT: ldr r6, [sp, #600] +; BE-NEXT: vmov.32 d9[1], r4 +; BE-NEXT: ldr r4, [sp, #616] +; BE-NEXT: vmov.32 d12[1], r7 +; BE-NEXT: ldr r7, [sp, #604] +; BE-NEXT: vmov.32 d8[1], r10 +; BE-NEXT: add r10, r9, #192 +; BE-NEXT: vmov.32 d14[1], r11 +; BE-NEXT: ldr r11, [sp, #440] +; BE-NEXT: vmov.32 d13[1], r0 +; BE-NEXT: ldr r0, [sp, #584] +; BE-NEXT: vmov.32 d15[1], r5 +; BE-NEXT: vstr d16, [sp, #48] @ 8-byte Spill +; BE-NEXT: vldr d16, [sp, #128] @ 8-byte Reload +; BE-NEXT: vrev64.32 d20, d22 +; BE-NEXT: vldr d22, [sp] @ 8-byte Reload +; BE-NEXT: vrev64.32 d19, d18 +; BE-NEXT: vrev64.32 d17, d16 +; BE-NEXT: vrev64.32 d18, d22 +; BE-NEXT: vstr d10, [sp, #120] @ 8-byte Spill +; BE-NEXT: vstr d9, [sp, #112] @ 8-byte Spill +; BE-NEXT: vstr d15, [sp, #104] @ 8-byte Spill +; BE-NEXT: vstr d12, [sp, #96] @ 8-byte Spill +; BE-NEXT: vstr d8, [sp, #80] @ 8-byte Spill +; BE-NEXT: vstr d14, [sp, #72] @ 8-byte Spill +; BE-NEXT: vstr d13, [sp, #88] @ 8-byte Spill +; BE-NEXT: vst1.64 {d20, d21}, [r10:128]! +; BE-NEXT: vrev64.32 d16, d11 +; BE-NEXT: vst1.64 {d18, d19}, [r10:128]! +; BE-NEXT: vst1.64 {d16, d17}, [r10:128]! +; BE-NEXT: bl llrintl +; BE-NEXT: ldr r2, [sp, #608] +; BE-NEXT: mov r8, r1 +; BE-NEXT: ldr r3, [sp, #612] +; BE-NEXT: vmov.32 d8[0], r0 +; BE-NEXT: mov r0, r6 +; BE-NEXT: mov r1, r7 +; BE-NEXT: ldr r5, [sp, #456] +; BE-NEXT: bl llrintl +; BE-NEXT: add r3, sp, #620 +; BE-NEXT: mov r6, r1 +; BE-NEXT: vmov.32 d9[0], r0 +; BE-NEXT: mov r0, r4 +; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: add r3, sp, #444 +; BE-NEXT: mov r7, r1 +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: mov r0, r11 +; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: add r3, sp, #460 +; BE-NEXT: mov r4, r1 +; BE-NEXT: vmov.32 d11[0], r0 +; BE-NEXT: mov r0, r5 +; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: add r3, sp, #572 +; BE-NEXT: vmov.32 d13[0], r0 +; BE-NEXT: ldr r0, [sp, #568] +; BE-NEXT: mov r5, r1 +; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: vldr d16, [sp, #16] @ 8-byte Reload +; BE-NEXT: vldr d18, [sp, #56] @ 8-byte Reload +; BE-NEXT: vrev64.32 d17, d16 +; BE-NEXT: ldr r2, [sp, #304] +; BE-NEXT: vrev64.32 d16, d18 +; BE-NEXT: ldr r3, [sp, #308] +; BE-NEXT: vldr d18, [sp, #144] @ 8-byte Reload +; BE-NEXT: vldr d20, [sp, #64] @ 8-byte Reload +; BE-NEXT: vrev64.32 d19, d18 +; BE-NEXT: vrev64.32 d18, d20 +; BE-NEXT: vldr d20, [sp, #40] @ 8-byte Reload +; BE-NEXT: vldr d22, [sp, #32] @ 8-byte Reload +; BE-NEXT: vmov.32 d14[0], r0 +; BE-NEXT: ldr r0, [sp, #296] +; BE-NEXT: vmov.32 d10[1], r7 +; BE-NEXT: ldr r7, [sp, #412] +; BE-NEXT: vmov.32 d9[1], r6 +; BE-NEXT: ldr r6, [sp, #408] +; BE-NEXT: vmov.32 d8[1], r8 +; BE-NEXT: add r8, r9, #128 +; BE-NEXT: vrev64.32 d21, d20 +; BE-NEXT: vmov.32 d13[1], r5 +; BE-NEXT: ldr r5, [sp, #300] +; BE-NEXT: vrev64.32 d20, d22 +; BE-NEXT: vmov.32 d14[1], r1 +; BE-NEXT: mov r1, r5 +; BE-NEXT: vstr d10, [sp, #136] @ 8-byte Spill +; BE-NEXT: vstr d9, [sp, #128] @ 8-byte Spill +; BE-NEXT: vstr d8, [sp, #24] @ 8-byte Spill +; BE-NEXT: vst1.64 {d20, d21}, [r10:128] +; BE-NEXT: vst1.64 {d18, d19}, [r8:128]! +; BE-NEXT: vmov.32 d11[1], r4 +; BE-NEXT: ldr r4, [sp, #424] +; BE-NEXT: ldr r10, [sp, #376] +; BE-NEXT: vst1.64 {d16, d17}, [r8:128]! +; BE-NEXT: bl llrintl +; BE-NEXT: ldr r2, [sp, #416] +; BE-NEXT: mov r11, r1 +; BE-NEXT: ldr r3, [sp, #420] +; BE-NEXT: vmov.32 d15[0], r0 +; BE-NEXT: mov r0, r6 +; BE-NEXT: mov r1, r7 +; BE-NEXT: ldr r5, [sp, #392] +; BE-NEXT: bl llrintl +; BE-NEXT: add r3, sp, #428 +; BE-NEXT: mov r6, r1 +; BE-NEXT: vmov.32 d8[0], r0 +; BE-NEXT: mov r0, r4 +; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: add r3, sp, #380 +; BE-NEXT: mov r7, r1 +; BE-NEXT: vmov.32 d9[0], r0 +; BE-NEXT: mov r0, r10 +; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: add r3, sp, #396 +; BE-NEXT: mov r4, r1 +; BE-NEXT: vmov.32 d12[0], r0 +; BE-NEXT: mov r0, r5 +; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: add r3, sp, #284 +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: ldr r0, [sp, #280] +; BE-NEXT: mov r5, r1 +; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: bl llrintl +; BE-NEXT: vldr d16, [sp, #120] @ 8-byte Reload +; BE-NEXT: vldr d18, [sp, #112] @ 8-byte Reload +; BE-NEXT: vrev64.32 d17, d16 +; BE-NEXT: vldr d26, [sp, #136] @ 8-byte Reload +; BE-NEXT: vrev64.32 d16, d18 +; BE-NEXT: vldr d18, [sp, #104] @ 8-byte Reload +; BE-NEXT: vrev64.32 d31, d26 +; BE-NEXT: vldr d26, [sp, #128] @ 8-byte Reload +; BE-NEXT: vldr d20, [sp, #96] @ 8-byte Reload +; BE-NEXT: vrev64.32 d19, d18 +; BE-NEXT: vrev64.32 d18, d20 +; BE-NEXT: vldr d20, [sp, #80] @ 8-byte Reload +; BE-NEXT: vrev64.32 d30, d26 +; BE-NEXT: vldr d26, [sp, #24] @ 8-byte Reload +; BE-NEXT: vmov.32 d10[1], r5 +; BE-NEXT: vldr d22, [sp, #72] @ 8-byte Reload +; BE-NEXT: vrev64.32 d21, d20 +; BE-NEXT: vrev64.32 d1, d26 +; BE-NEXT: vmov.32 d9[1], r7 +; BE-NEXT: vmov.32 d12[1], r4 +; BE-NEXT: vrev64.32 d20, d22 +; BE-NEXT: vldr d22, [sp, #88] @ 8-byte Reload +; BE-NEXT: vmov.32 d8[1], r6 +; BE-NEXT: vrev64.32 d0, d14 +; BE-NEXT: vmov.32 d28[0], r0 +; BE-NEXT: add r0, r9, #64 +; BE-NEXT: vrev64.32 d3, d10 +; BE-NEXT: vldr d24, [sp, #48] @ 8-byte Reload +; BE-NEXT: vrev64.32 d23, d22 +; BE-NEXT: vrev64.32 d5, d9 +; BE-NEXT: vst1.64 {d0, d1}, [r8:128]! +; BE-NEXT: vrev64.32 d2, d12 +; BE-NEXT: vmov.32 d15[1], r11 +; BE-NEXT: vrev64.32 d22, d24 +; BE-NEXT: vrev64.32 d25, d13 +; BE-NEXT: vrev64.32 d4, d8 +; BE-NEXT: vst1.64 {d30, d31}, [r8:128] +; BE-NEXT: vst1.64 {d2, d3}, [r0:128]! +; BE-NEXT: vmov.32 d28[1], r1 +; BE-NEXT: vrev64.32 d24, d11 +; BE-NEXT: vst1.64 {d4, d5}, [r0:128]! +; BE-NEXT: vrev64.32 d27, d15 +; BE-NEXT: vst1.64 {d24, d25}, [r0:128]! +; BE-NEXT: vrev64.32 d26, d28 +; BE-NEXT: vst1.64 {d22, d23}, [r0:128] +; BE-NEXT: vst1.64 {d20, d21}, [r9:128]! +; BE-NEXT: vst1.64 {d26, d27}, [r9:128]! +; BE-NEXT: vst1.64 {d18, d19}, [r9:128]! +; BE-NEXT: vst1.64 {d16, d17}, [r9:128] +; BE-NEXT: add sp, sp, #152 +; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEXT: add sp, sp, #4 +; BE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-NEON-LABEL: llrint_v32f128: +; BE-NEON: @ %bb.0: +; BE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-NEON-NEXT: .pad #4 +; BE-NEON-NEXT: sub sp, sp, #4 +; BE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEON-NEXT: .pad #152 +; BE-NEON-NEXT: sub sp, sp, #152 +; BE-NEON-NEXT: str r3, [sp, #120] @ 4-byte Spill +; BE-NEON-NEXT: add r3, sp, #712 +; BE-NEON-NEXT: str r2, [sp, #112] @ 4-byte Spill +; BE-NEON-NEXT: mov r9, r0 +; BE-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: ldr r7, [sp, #648] +; BE-NEON-NEXT: add r3, sp, #652 +; BE-NEON-NEXT: mov r5, r1 +; BE-NEON-NEXT: vmov.32 d9[0], r0 +; BE-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-NEON-NEXT: mov r0, r7 +; BE-NEON-NEXT: ldr r6, [sp, #520] +; BE-NEON-NEXT: ldr r8, [sp, #632] +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: add r3, sp, #524 +; BE-NEON-NEXT: mov r7, r1 +; BE-NEON-NEXT: vmov.32 d10[0], r0 +; BE-NEON-NEXT: mov r0, r6 +; BE-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: add r3, sp, #636 +; BE-NEON-NEXT: mov r4, r1 +; BE-NEON-NEXT: vmov.32 d8[0], r0 +; BE-NEON-NEXT: mov r0, r8 +; BE-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: mov r6, r1 +; BE-NEON-NEXT: vmov.32 d11[0], r0 +; BE-NEON-NEXT: ldr r0, [sp, #488] +; BE-NEON-NEXT: vmov.32 d8[1], r4 +; BE-NEON-NEXT: ldr r1, [sp, #492] +; BE-NEON-NEXT: ldr r2, [sp, #496] +; BE-NEON-NEXT: vmov.32 d10[1], r7 +; BE-NEON-NEXT: ldr r3, [sp, #500] +; BE-NEON-NEXT: vmov.32 d9[1], r5 +; BE-NEON-NEXT: vstr d8, [sp, #144] @ 8-byte Spill +; BE-NEON-NEXT: vstr d10, [sp, #136] @ 8-byte Spill +; BE-NEON-NEXT: vstr d9, [sp, #128] @ 8-byte Spill +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: add r3, sp, #680 +; BE-NEON-NEXT: str r0, [sp, #104] @ 4-byte Spill +; BE-NEON-NEXT: str r1, [sp, #88] @ 4-byte Spill +; BE-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: vmov.32 d16[0], r0 +; BE-NEON-NEXT: ldr r0, [sp, #728] +; BE-NEON-NEXT: ldr r2, [sp, #736] +; BE-NEON-NEXT: vmov.32 d11[1], r6 +; BE-NEON-NEXT: ldr r6, [sp, #732] +; BE-NEON-NEXT: ldr r3, [sp, #740] +; BE-NEON-NEXT: vmov.32 d16[1], r1 +; BE-NEON-NEXT: ldr r5, [sp, #504] +; BE-NEON-NEXT: mov r1, r6 +; BE-NEON-NEXT: ldr r7, [sp, #744] +; BE-NEON-NEXT: ldr r4, [sp, #748] +; BE-NEON-NEXT: vstr d11, [sp, #24] @ 8-byte Spill +; BE-NEON-NEXT: vstr d16, [sp, #8] @ 8-byte Spill +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: ldr r2, [sp, #752] +; BE-NEON-NEXT: mov r11, r1 +; BE-NEON-NEXT: ldr r3, [sp, #756] +; BE-NEON-NEXT: vmov.32 d13[0], r0 +; BE-NEON-NEXT: mov r0, r7 +; BE-NEON-NEXT: mov r1, r4 +; BE-NEON-NEXT: ldr r10, [sp, #552] +; BE-NEON-NEXT: ldr r6, [sp, #664] +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: add r3, sp, #508 +; BE-NEON-NEXT: mov r8, r1 +; BE-NEON-NEXT: vmov.32 d8[0], r0 +; BE-NEON-NEXT: mov r0, r5 +; BE-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: add r3, sp, #540 +; BE-NEON-NEXT: vmov.32 d10[0], r0 +; BE-NEON-NEXT: ldr r0, [sp, #536] +; BE-NEON-NEXT: mov r7, r1 +; BE-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: add r3, sp, #556 +; BE-NEON-NEXT: mov r5, r1 +; BE-NEON-NEXT: vmov.32 d12[0], r0 +; BE-NEON-NEXT: mov r0, r10 +; BE-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: add r3, sp, #668 +; BE-NEON-NEXT: mov r4, r1 +; BE-NEON-NEXT: vmov.32 d9[0], r0 +; BE-NEON-NEXT: mov r0, r6 +; BE-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: add r3, sp, #700 +; BE-NEON-NEXT: vmov.32 d14[0], r0 +; BE-NEON-NEXT: ldr r0, [sp, #696] +; BE-NEON-NEXT: mov r6, r1 +; BE-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: vmov.32 d11[0], r0 +; BE-NEON-NEXT: ldr r0, [sp, #104] @ 4-byte Reload +; BE-NEON-NEXT: ldr r2, [sp, #256] +; BE-NEON-NEXT: vmov.32 d13[1], r11 +; BE-NEON-NEXT: ldr r3, [sp, #260] +; BE-NEON-NEXT: vmov.32 d14[1], r6 +; BE-NEON-NEXT: ldr r6, [sp, #264] +; BE-NEON-NEXT: vmov.32 d9[1], r4 +; BE-NEON-NEXT: ldr r4, [sp, #344] +; BE-NEON-NEXT: vmov.32 d12[1], r5 +; BE-NEON-NEXT: ldr r5, [sp, #312] +; BE-NEON-NEXT: vmov.32 d8[1], r8 +; BE-NEON-NEXT: ldr r8, [sp, #328] +; BE-NEON-NEXT: vmov.32 d10[1], r7 +; BE-NEON-NEXT: vstr d13, [sp, #32] @ 8-byte Spill +; BE-NEON-NEXT: vmov.32 d11[1], r1 +; BE-NEON-NEXT: ldr r1, [sp, #120] @ 4-byte Reload +; BE-NEON-NEXT: vmov.32 d13[0], r0 +; BE-NEON-NEXT: ldr r0, [sp, #112] @ 4-byte Reload +; BE-NEON-NEXT: vstr d14, [sp] @ 8-byte Spill +; BE-NEON-NEXT: vstr d9, [sp, #16] @ 8-byte Spill +; BE-NEON-NEXT: vstr d12, [sp, #56] @ 8-byte Spill +; BE-NEON-NEXT: vstr d10, [sp, #64] @ 8-byte Spill +; BE-NEON-NEXT: vstr d8, [sp, #40] @ 8-byte Spill +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: add r3, sp, #268 +; BE-NEON-NEXT: mov r11, r1 +; BE-NEON-NEXT: vmov.32 d14[0], r0 +; BE-NEON-NEXT: mov r0, r6 +; BE-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: add r3, sp, #316 +; BE-NEON-NEXT: mov r10, r1 +; BE-NEON-NEXT: vmov.32 d8[0], r0 +; BE-NEON-NEXT: mov r0, r5 +; BE-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: add r3, sp, #332 +; BE-NEON-NEXT: mov r7, r1 +; BE-NEON-NEXT: vmov.32 d12[0], r0 +; BE-NEON-NEXT: mov r0, r8 +; BE-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: add r3, sp, #348 +; BE-NEON-NEXT: mov r5, r1 +; BE-NEON-NEXT: vmov.32 d15[0], r0 +; BE-NEON-NEXT: mov r0, r4 +; BE-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: add r3, sp, #364 +; BE-NEON-NEXT: vmov.32 d9[0], r0 +; BE-NEON-NEXT: ldr r0, [sp, #360] +; BE-NEON-NEXT: mov r4, r1 +; BE-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: add r3, sp, #476 +; BE-NEON-NEXT: vmov.32 d10[0], r0 +; BE-NEON-NEXT: ldr r0, [sp, #472] +; BE-NEON-NEXT: mov r6, r1 +; BE-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: vmov.32 d16[0], r0 +; BE-NEON-NEXT: ldr r0, [sp, #88] @ 4-byte Reload +; BE-NEON-NEXT: ldr r2, [sp, #592] +; BE-NEON-NEXT: vldr d20, [sp, #136] @ 8-byte Reload +; BE-NEON-NEXT: vmov.32 d16[1], r1 +; BE-NEON-NEXT: ldr r1, [sp, #588] +; BE-NEON-NEXT: ldr r3, [sp, #596] +; BE-NEON-NEXT: vldr d22, [sp, #24] @ 8-byte Reload +; BE-NEON-NEXT: vldr d18, [sp, #8] @ 8-byte Reload +; BE-NEON-NEXT: vrev64.32 d21, d20 +; BE-NEON-NEXT: vmov.32 d10[1], r6 +; BE-NEON-NEXT: ldr r6, [sp, #600] +; BE-NEON-NEXT: vmov.32 d9[1], r4 +; BE-NEON-NEXT: ldr r4, [sp, #616] +; BE-NEON-NEXT: vmov.32 d12[1], r7 +; BE-NEON-NEXT: ldr r7, [sp, #604] +; BE-NEON-NEXT: vmov.32 d8[1], r10 +; BE-NEON-NEXT: add r10, r9, #192 +; BE-NEON-NEXT: vmov.32 d14[1], r11 +; BE-NEON-NEXT: ldr r11, [sp, #440] +; BE-NEON-NEXT: vmov.32 d13[1], r0 +; BE-NEON-NEXT: ldr r0, [sp, #584] +; BE-NEON-NEXT: vmov.32 d15[1], r5 +; BE-NEON-NEXT: vstr d16, [sp, #48] @ 8-byte Spill +; BE-NEON-NEXT: vldr d16, [sp, #128] @ 8-byte Reload +; BE-NEON-NEXT: vrev64.32 d20, d22 +; BE-NEON-NEXT: vldr d22, [sp] @ 8-byte Reload +; BE-NEON-NEXT: vrev64.32 d19, d18 +; BE-NEON-NEXT: vrev64.32 d17, d16 +; BE-NEON-NEXT: vrev64.32 d18, d22 +; BE-NEON-NEXT: vstr d10, [sp, #120] @ 8-byte Spill +; BE-NEON-NEXT: vstr d9, [sp, #112] @ 8-byte Spill +; BE-NEON-NEXT: vstr d15, [sp, #104] @ 8-byte Spill +; BE-NEON-NEXT: vstr d12, [sp, #96] @ 8-byte Spill +; BE-NEON-NEXT: vstr d8, [sp, #80] @ 8-byte Spill +; BE-NEON-NEXT: vstr d14, [sp, #72] @ 8-byte Spill +; BE-NEON-NEXT: vstr d13, [sp, #88] @ 8-byte Spill +; BE-NEON-NEXT: vst1.64 {d20, d21}, [r10:128]! +; BE-NEON-NEXT: vrev64.32 d16, d11 +; BE-NEON-NEXT: vst1.64 {d18, d19}, [r10:128]! +; BE-NEON-NEXT: vst1.64 {d16, d17}, [r10:128]! +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: ldr r2, [sp, #608] +; BE-NEON-NEXT: mov r8, r1 +; BE-NEON-NEXT: ldr r3, [sp, #612] +; BE-NEON-NEXT: vmov.32 d8[0], r0 +; BE-NEON-NEXT: mov r0, r6 +; BE-NEON-NEXT: mov r1, r7 +; BE-NEON-NEXT: ldr r5, [sp, #456] +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: add r3, sp, #620 +; BE-NEON-NEXT: mov r6, r1 +; BE-NEON-NEXT: vmov.32 d9[0], r0 +; BE-NEON-NEXT: mov r0, r4 +; BE-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: add r3, sp, #444 +; BE-NEON-NEXT: mov r7, r1 +; BE-NEON-NEXT: vmov.32 d10[0], r0 +; BE-NEON-NEXT: mov r0, r11 +; BE-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: add r3, sp, #460 +; BE-NEON-NEXT: mov r4, r1 +; BE-NEON-NEXT: vmov.32 d11[0], r0 +; BE-NEON-NEXT: mov r0, r5 +; BE-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: add r3, sp, #572 +; BE-NEON-NEXT: vmov.32 d13[0], r0 +; BE-NEON-NEXT: ldr r0, [sp, #568] +; BE-NEON-NEXT: mov r5, r1 +; BE-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: vldr d16, [sp, #16] @ 8-byte Reload +; BE-NEON-NEXT: vldr d18, [sp, #56] @ 8-byte Reload +; BE-NEON-NEXT: vrev64.32 d17, d16 +; BE-NEON-NEXT: ldr r2, [sp, #304] +; BE-NEON-NEXT: vrev64.32 d16, d18 +; BE-NEON-NEXT: ldr r3, [sp, #308] +; BE-NEON-NEXT: vldr d18, [sp, #144] @ 8-byte Reload +; BE-NEON-NEXT: vldr d20, [sp, #64] @ 8-byte Reload +; BE-NEON-NEXT: vrev64.32 d19, d18 +; BE-NEON-NEXT: vrev64.32 d18, d20 +; BE-NEON-NEXT: vldr d20, [sp, #40] @ 8-byte Reload +; BE-NEON-NEXT: vldr d22, [sp, #32] @ 8-byte Reload +; BE-NEON-NEXT: vmov.32 d14[0], r0 +; BE-NEON-NEXT: ldr r0, [sp, #296] +; BE-NEON-NEXT: vmov.32 d10[1], r7 +; BE-NEON-NEXT: ldr r7, [sp, #412] +; BE-NEON-NEXT: vmov.32 d9[1], r6 +; BE-NEON-NEXT: ldr r6, [sp, #408] +; BE-NEON-NEXT: vmov.32 d8[1], r8 +; BE-NEON-NEXT: add r8, r9, #128 +; BE-NEON-NEXT: vrev64.32 d21, d20 +; BE-NEON-NEXT: vmov.32 d13[1], r5 +; BE-NEON-NEXT: ldr r5, [sp, #300] +; BE-NEON-NEXT: vrev64.32 d20, d22 +; BE-NEON-NEXT: vmov.32 d14[1], r1 +; BE-NEON-NEXT: mov r1, r5 +; BE-NEON-NEXT: vstr d10, [sp, #136] @ 8-byte Spill +; BE-NEON-NEXT: vstr d9, [sp, #128] @ 8-byte Spill +; BE-NEON-NEXT: vstr d8, [sp, #24] @ 8-byte Spill +; BE-NEON-NEXT: vst1.64 {d20, d21}, [r10:128] +; BE-NEON-NEXT: vst1.64 {d18, d19}, [r8:128]! +; BE-NEON-NEXT: vmov.32 d11[1], r4 +; BE-NEON-NEXT: ldr r4, [sp, #424] +; BE-NEON-NEXT: ldr r10, [sp, #376] +; BE-NEON-NEXT: vst1.64 {d16, d17}, [r8:128]! +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: ldr r2, [sp, #416] +; BE-NEON-NEXT: mov r11, r1 +; BE-NEON-NEXT: ldr r3, [sp, #420] +; BE-NEON-NEXT: vmov.32 d15[0], r0 +; BE-NEON-NEXT: mov r0, r6 +; BE-NEON-NEXT: mov r1, r7 +; BE-NEON-NEXT: ldr r5, [sp, #392] +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: add r3, sp, #428 +; BE-NEON-NEXT: mov r6, r1 +; BE-NEON-NEXT: vmov.32 d8[0], r0 +; BE-NEON-NEXT: mov r0, r4 +; BE-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: add r3, sp, #380 +; BE-NEON-NEXT: mov r7, r1 +; BE-NEON-NEXT: vmov.32 d9[0], r0 +; BE-NEON-NEXT: mov r0, r10 +; BE-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: add r3, sp, #396 +; BE-NEON-NEXT: mov r4, r1 +; BE-NEON-NEXT: vmov.32 d12[0], r0 +; BE-NEON-NEXT: mov r0, r5 +; BE-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: add r3, sp, #284 +; BE-NEON-NEXT: vmov.32 d10[0], r0 +; BE-NEON-NEXT: ldr r0, [sp, #280] +; BE-NEON-NEXT: mov r5, r1 +; BE-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-NEON-NEXT: bl llrintl +; BE-NEON-NEXT: vldr d16, [sp, #120] @ 8-byte Reload +; BE-NEON-NEXT: vldr d18, [sp, #112] @ 8-byte Reload +; BE-NEON-NEXT: vrev64.32 d17, d16 +; BE-NEON-NEXT: vldr d26, [sp, #136] @ 8-byte Reload +; BE-NEON-NEXT: vrev64.32 d16, d18 +; BE-NEON-NEXT: vldr d18, [sp, #104] @ 8-byte Reload +; BE-NEON-NEXT: vrev64.32 d31, d26 +; BE-NEON-NEXT: vldr d26, [sp, #128] @ 8-byte Reload +; BE-NEON-NEXT: vldr d20, [sp, #96] @ 8-byte Reload +; BE-NEON-NEXT: vrev64.32 d19, d18 +; BE-NEON-NEXT: vrev64.32 d18, d20 +; BE-NEON-NEXT: vldr d20, [sp, #80] @ 8-byte Reload +; BE-NEON-NEXT: vrev64.32 d30, d26 +; BE-NEON-NEXT: vldr d26, [sp, #24] @ 8-byte Reload +; BE-NEON-NEXT: vmov.32 d10[1], r5 +; BE-NEON-NEXT: vldr d22, [sp, #72] @ 8-byte Reload +; BE-NEON-NEXT: vrev64.32 d21, d20 +; BE-NEON-NEXT: vrev64.32 d1, d26 +; BE-NEON-NEXT: vmov.32 d9[1], r7 +; BE-NEON-NEXT: vmov.32 d12[1], r4 +; BE-NEON-NEXT: vrev64.32 d20, d22 +; BE-NEON-NEXT: vldr d22, [sp, #88] @ 8-byte Reload +; BE-NEON-NEXT: vmov.32 d8[1], r6 +; BE-NEON-NEXT: vrev64.32 d0, d14 +; BE-NEON-NEXT: vmov.32 d28[0], r0 +; BE-NEON-NEXT: add r0, r9, #64 +; BE-NEON-NEXT: vrev64.32 d3, d10 +; BE-NEON-NEXT: vldr d24, [sp, #48] @ 8-byte Reload +; BE-NEON-NEXT: vrev64.32 d23, d22 +; BE-NEON-NEXT: vrev64.32 d5, d9 +; BE-NEON-NEXT: vst1.64 {d0, d1}, [r8:128]! +; BE-NEON-NEXT: vrev64.32 d2, d12 +; BE-NEON-NEXT: vmov.32 d15[1], r11 +; BE-NEON-NEXT: vrev64.32 d22, d24 +; BE-NEON-NEXT: vrev64.32 d25, d13 +; BE-NEON-NEXT: vrev64.32 d4, d8 +; BE-NEON-NEXT: vst1.64 {d30, d31}, [r8:128] +; BE-NEON-NEXT: vst1.64 {d2, d3}, [r0:128]! +; BE-NEON-NEXT: vmov.32 d28[1], r1 +; BE-NEON-NEXT: vrev64.32 d24, d11 +; BE-NEON-NEXT: vst1.64 {d4, d5}, [r0:128]! +; BE-NEON-NEXT: vrev64.32 d27, d15 +; BE-NEON-NEXT: vst1.64 {d24, d25}, [r0:128]! +; BE-NEON-NEXT: vrev64.32 d26, d28 +; BE-NEON-NEXT: vst1.64 {d22, d23}, [r0:128] +; BE-NEON-NEXT: vst1.64 {d20, d21}, [r9:128]! +; BE-NEON-NEXT: vst1.64 {d26, d27}, [r9:128]! +; BE-NEON-NEXT: vst1.64 {d18, d19}, [r9:128]! +; BE-NEON-NEXT: vst1.64 {d16, d17}, [r9:128] +; BE-NEON-NEXT: add sp, sp, #152 +; BE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEON-NEXT: add sp, sp, #4 +; BE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + %a = call <32 x i64> @llvm.llrint.v32i64.v16f128(<32 x fp128> %x) + ret <32 x i64> %a +} +declare <32 x i64> @llvm.llrint.v32i64.v32f128(<32 x fp128>) diff --git a/llvm/test/CodeGen/ARM/vector-lrint.ll b/llvm/test/CodeGen/ARM/vector-lrint.ll new file mode 100644 index 0000000000000..50c8b9ff6d913 --- /dev/null +++ b/llvm/test/CodeGen/ARM/vector-lrint.ll @@ -0,0 +1,13251 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armv7-unknown-none-eabihf | FileCheck %s --check-prefix=LE-I32 +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armv7-unknown-none-eabihf | FileCheck %s --check-prefix=LE-I64 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefix=LE-I32-NEON +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefix=LE-I64-NEON +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armebv7-unknown-none-eabihf | FileCheck %s --check-prefix=BE-I32 +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armebv7-unknown-none-eabihf | FileCheck %s --check-prefix=BE-I64 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefix=BE-I32-NEON +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefix=BE-I64-NEON + +; FIXME: crash "Do not know how to soft promote this operator's operand!" +; define <1 x iXLen> @lrint_v1f16(<1 x half> %x) { +; %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half> %x) +; ret <1 x iXLen> %a +; } +; declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half>) + +; define <2 x iXLen> @lrint_v2f16(<2 x half> %x) { +; %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half> %x) +; ret <2 x iXLen> %a +; } +; declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half>) + +; define <4 x iXLen> @lrint_v4f16(<4 x half> %x) { +; %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half> %x) +; ret <4 x iXLen> %a +; } +; declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half>) + +; define <8 x iXLen> @lrint_v8f16(<8 x half> %x) { +; %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half> %x) +; ret <8 x iXLen> %a +; } +; declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half>) + +; define <16 x iXLen> @lrint_v16f16(<16 x half> %x) { +; %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half> %x) +; ret <16 x iXLen> %a +; } +; declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half>) + +; define <32 x iXLen> @lrint_v32f16(<32 x half> %x) { +; %a = call <32 x iXLen> @llvm.lrint.v32iXLen.v32f16(<32 x half> %x) +; ret <32 x iXLen> %a +; } +; declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f16(<32 x half>) + +define <1 x iXLen> @lrint_v1f32(<1 x float> %x) { +; LE-I32-LABEL: lrint_v1f32: +; LE-I32: @ %bb.0: +; LE-I32-NEXT: .save {r11, lr} +; LE-I32-NEXT: push {r11, lr} +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: pop {r11, pc} +; +; LE-I64-LABEL: lrint_v1f32: +; LE-I64: @ %bb.0: +; LE-I64-NEXT: .save {r11, lr} +; LE-I64-NEXT: push {r11, lr} +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.32 d0[0], r0 +; LE-I64-NEXT: vmov.32 d0[1], r1 +; LE-I64-NEXT: pop {r11, pc} +; +; LE-I32-NEON-LABEL: lrint_v1f32: +; LE-I32-NEON: @ %bb.0: +; LE-I32-NEON-NEXT: .save {r11, lr} +; LE-I32-NEON-NEXT: push {r11, lr} +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: pop {r11, pc} +; +; LE-I64-NEON-LABEL: lrint_v1f32: +; LE-I64-NEON: @ %bb.0: +; LE-I64-NEON-NEXT: .save {r11, lr} +; LE-I64-NEON-NEXT: push {r11, lr} +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.32 d0[0], r0 +; LE-I64-NEON-NEXT: vmov.32 d0[1], r1 +; LE-I64-NEON-NEXT: pop {r11, pc} +; +; BE-I32-LABEL: lrint_v1f32: +; BE-I32: @ %bb.0: +; BE-I32-NEXT: .save {r11, lr} +; BE-I32-NEXT: push {r11, lr} +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: pop {r11, pc} +; +; BE-I64-LABEL: lrint_v1f32: +; BE-I64: @ %bb.0: +; BE-I64-NEXT: .save {r11, lr} +; BE-I64-NEXT: push {r11, lr} +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d16[0], r0 +; BE-I64-NEXT: vmov.32 d16[1], r1 +; BE-I64-NEXT: vrev64.32 d0, d16 +; BE-I64-NEXT: pop {r11, pc} +; +; BE-I32-NEON-LABEL: lrint_v1f32: +; BE-I32-NEON: @ %bb.0: +; BE-I32-NEON-NEXT: .save {r11, lr} +; BE-I32-NEON-NEXT: push {r11, lr} +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: pop {r11, pc} +; +; BE-I64-NEON-LABEL: lrint_v1f32: +; BE-I64-NEON: @ %bb.0: +; BE-I64-NEON-NEXT: .save {r11, lr} +; BE-I64-NEON-NEXT: push {r11, lr} +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.32 d16[0], r0 +; BE-I64-NEON-NEXT: vmov.32 d16[1], r1 +; BE-I64-NEON-NEXT: vrev64.32 d0, d16 +; BE-I64-NEON-NEXT: pop {r11, pc} + %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float> %x) + ret <1 x iXLen> %a +} +declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float>) + +define <2 x iXLen> @lrint_v2f32(<2 x float> %x) { +; LE-I32-LABEL: lrint_v2f32: +; LE-I32: @ %bb.0: +; LE-I32-NEXT: .save {r11, lr} +; LE-I32-NEXT: push {r11, lr} +; LE-I32-NEXT: .vsave {d8, d9} +; LE-I32-NEXT: vpush {d8, d9} +; LE-I32-NEXT: vmov.f64 d8, d0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s17 +; LE-I32-NEXT: vmov.32 d9[0], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.32 d9[1], r0 +; LE-I32-NEXT: vorr d0, d9, d9 +; LE-I32-NEXT: vpop {d8, d9} +; LE-I32-NEXT: pop {r11, pc} +; +; LE-I64-LABEL: lrint_v2f32: +; LE-I64: @ %bb.0: +; LE-I64-NEXT: .save {r4, lr} +; LE-I64-NEXT: push {r4, lr} +; LE-I64-NEXT: .vsave {d10, d11} +; LE-I64-NEXT: vpush {d10, d11} +; LE-I64-NEXT: .vsave {d8} +; LE-I64-NEXT: vpush {d8} +; LE-I64-NEXT: vmov.f64 d8, d0 +; LE-I64-NEXT: vmov.f32 s0, s17 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s16 +; LE-I64-NEXT: mov r4, r1 +; LE-I64-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEXT: vmov.32 d11[1], r4 +; LE-I64-NEXT: vmov.32 d10[1], r1 +; LE-I64-NEXT: vorr q0, q5, q5 +; LE-I64-NEXT: vpop {d8} +; LE-I64-NEXT: vpop {d10, d11} +; LE-I64-NEXT: pop {r4, pc} +; +; LE-I32-NEON-LABEL: lrint_v2f32: +; LE-I32-NEON: @ %bb.0: +; LE-I32-NEON-NEXT: .save {r11, lr} +; LE-I32-NEON-NEXT: push {r11, lr} +; LE-I32-NEON-NEXT: .vsave {d8, d9} +; LE-I32-NEON-NEXT: vpush {d8, d9} +; LE-I32-NEON-NEXT: vmov.f64 d8, d0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s17 +; LE-I32-NEON-NEXT: vmov.32 d9[0], r0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.32 d9[1], r0 +; LE-I32-NEON-NEXT: vorr d0, d9, d9 +; LE-I32-NEON-NEXT: vpop {d8, d9} +; LE-I32-NEON-NEXT: pop {r11, pc} +; +; LE-I64-NEON-LABEL: lrint_v2f32: +; LE-I64-NEON: @ %bb.0: +; LE-I64-NEON-NEXT: .save {r4, lr} +; LE-I64-NEON-NEXT: push {r4, lr} +; LE-I64-NEON-NEXT: .vsave {d10, d11} +; LE-I64-NEON-NEXT: vpush {d10, d11} +; LE-I64-NEON-NEXT: .vsave {d8} +; LE-I64-NEON-NEXT: vpush {d8} +; LE-I64-NEON-NEXT: vmov.f64 d8, d0 +; LE-I64-NEON-NEXT: vmov.f32 s0, s17 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.f32 s0, s16 +; LE-I64-NEON-NEXT: mov r4, r1 +; LE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEON-NEXT: vmov.32 d11[1], r4 +; LE-I64-NEON-NEXT: vmov.32 d10[1], r1 +; LE-I64-NEON-NEXT: vorr q0, q5, q5 +; LE-I64-NEON-NEXT: vpop {d8} +; LE-I64-NEON-NEXT: vpop {d10, d11} +; LE-I64-NEON-NEXT: pop {r4, pc} +; +; BE-I32-LABEL: lrint_v2f32: +; BE-I32: @ %bb.0: +; BE-I32-NEXT: .save {r11, lr} +; BE-I32-NEXT: push {r11, lr} +; BE-I32-NEXT: .vsave {d8, d9} +; BE-I32-NEXT: vpush {d8, d9} +; BE-I32-NEXT: vrev64.32 d8, d0 +; BE-I32-NEXT: vmov.f32 s0, s16 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s17 +; BE-I32-NEXT: vmov.32 d9[0], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.32 d9[1], r0 +; BE-I32-NEXT: vrev64.32 d0, d9 +; BE-I32-NEXT: vpop {d8, d9} +; BE-I32-NEXT: pop {r11, pc} +; +; BE-I64-LABEL: lrint_v2f32: +; BE-I64: @ %bb.0: +; BE-I64-NEXT: .save {r4, lr} +; BE-I64-NEXT: push {r4, lr} +; BE-I64-NEXT: .vsave {d10, d11} +; BE-I64-NEXT: vpush {d10, d11} +; BE-I64-NEXT: .vsave {d8} +; BE-I64-NEXT: vpush {d8} +; BE-I64-NEXT: vrev64.32 d8, d0 +; BE-I64-NEXT: vmov.f32 s0, s17 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.f32 s0, s16 +; BE-I64-NEXT: mov r4, r1 +; BE-I64-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEXT: vmov.32 d11[1], r4 +; BE-I64-NEXT: vmov.32 d10[1], r1 +; BE-I64-NEXT: vrev64.32 q0, q5 +; BE-I64-NEXT: vpop {d8} +; BE-I64-NEXT: vpop {d10, d11} +; BE-I64-NEXT: pop {r4, pc} +; +; BE-I32-NEON-LABEL: lrint_v2f32: +; BE-I32-NEON: @ %bb.0: +; BE-I32-NEON-NEXT: .save {r11, lr} +; BE-I32-NEON-NEXT: push {r11, lr} +; BE-I32-NEON-NEXT: .vsave {d8, d9} +; BE-I32-NEON-NEXT: vpush {d8, d9} +; BE-I32-NEON-NEXT: vrev64.32 d8, d0 +; BE-I32-NEON-NEXT: vmov.f32 s0, s16 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s17 +; BE-I32-NEON-NEXT: vmov.32 d9[0], r0 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.32 d9[1], r0 +; BE-I32-NEON-NEXT: vrev64.32 d0, d9 +; BE-I32-NEON-NEXT: vpop {d8, d9} +; BE-I32-NEON-NEXT: pop {r11, pc} +; +; BE-I64-NEON-LABEL: lrint_v2f32: +; BE-I64-NEON: @ %bb.0: +; BE-I64-NEON-NEXT: .save {r4, lr} +; BE-I64-NEON-NEXT: push {r4, lr} +; BE-I64-NEON-NEXT: .vsave {d10, d11} +; BE-I64-NEON-NEXT: vpush {d10, d11} +; BE-I64-NEON-NEXT: .vsave {d8} +; BE-I64-NEON-NEXT: vpush {d8} +; BE-I64-NEON-NEXT: vrev64.32 d8, d0 +; BE-I64-NEON-NEXT: vmov.f32 s0, s17 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.f32 s0, s16 +; BE-I64-NEON-NEXT: mov r4, r1 +; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEON-NEXT: vmov.32 d11[1], r4 +; BE-I64-NEON-NEXT: vmov.32 d10[1], r1 +; BE-I64-NEON-NEXT: vrev64.32 q0, q5 +; BE-I64-NEON-NEXT: vpop {d8} +; BE-I64-NEON-NEXT: vpop {d10, d11} +; BE-I64-NEON-NEXT: pop {r4, pc} + %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float> %x) + ret <2 x iXLen> %a +} +declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float>) + +define <4 x iXLen> @lrint_v4f32(<4 x float> %x) { +; LE-I32-LABEL: lrint_v4f32: +; LE-I32: @ %bb.0: +; LE-I32-NEXT: .save {r11, lr} +; LE-I32-NEXT: push {r11, lr} +; LE-I32-NEXT: .vsave {d8, d9, d10, d11} +; LE-I32-NEXT: vpush {d8, d9, d10, d11} +; LE-I32-NEXT: vorr q4, q0, q0 +; LE-I32-NEXT: vmov.f32 s0, s18 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s16 +; LE-I32-NEXT: vmov.32 d11[0], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s19 +; LE-I32-NEXT: vmov.32 d10[0], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s17 +; LE-I32-NEXT: vmov.32 d11[1], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.32 d10[1], r0 +; LE-I32-NEXT: vorr q0, q5, q5 +; LE-I32-NEXT: vpop {d8, d9, d10, d11} +; LE-I32-NEXT: pop {r11, pc} +; +; LE-I64-LABEL: lrint_v4f32: +; LE-I64: @ %bb.0: +; LE-I64-NEXT: .save {r4, r5, r6, lr} +; LE-I64-NEXT: push {r4, r5, r6, lr} +; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13} +; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13} +; LE-I64-NEXT: vorr q5, q0, q0 +; LE-I64-NEXT: vmov.f32 s0, s23 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s20 +; LE-I64-NEXT: mov r4, r1 +; LE-I64-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s21 +; LE-I64-NEXT: mov r5, r1 +; LE-I64-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s22 +; LE-I64-NEXT: mov r6, r1 +; LE-I64-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEXT: vmov.32 d13[1], r6 +; LE-I64-NEXT: vmov.32 d9[1], r4 +; LE-I64-NEXT: vmov.32 d12[1], r5 +; LE-I64-NEXT: vmov.32 d8[1], r1 +; LE-I64-NEXT: vorr q0, q6, q6 +; LE-I64-NEXT: vorr q1, q4, q4 +; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13} +; LE-I64-NEXT: pop {r4, r5, r6, pc} +; +; LE-I32-NEON-LABEL: lrint_v4f32: +; LE-I32-NEON: @ %bb.0: +; LE-I32-NEON-NEXT: .save {r11, lr} +; LE-I32-NEON-NEXT: push {r11, lr} +; LE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11} +; LE-I32-NEON-NEXT: vpush {d8, d9, d10, d11} +; LE-I32-NEON-NEXT: vorr q4, q0, q0 +; LE-I32-NEON-NEXT: vmov.f32 s0, s18 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s16 +; LE-I32-NEON-NEXT: vmov.32 d11[0], r0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s19 +; LE-I32-NEON-NEXT: vmov.32 d10[0], r0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s17 +; LE-I32-NEON-NEXT: vmov.32 d11[1], r0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.32 d10[1], r0 +; LE-I32-NEON-NEXT: vorr q0, q5, q5 +; LE-I32-NEON-NEXT: vpop {d8, d9, d10, d11} +; LE-I32-NEON-NEXT: pop {r11, pc} +; +; LE-I64-NEON-LABEL: lrint_v4f32: +; LE-I64-NEON: @ %bb.0: +; LE-I64-NEON-NEXT: .save {r4, r5, r6, lr} +; LE-I64-NEON-NEXT: push {r4, r5, r6, lr} +; LE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13} +; LE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13} +; LE-I64-NEON-NEXT: vorr q5, q0, q0 +; LE-I64-NEON-NEXT: vmov.f32 s0, s23 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.f32 s0, s20 +; LE-I64-NEON-NEXT: mov r4, r1 +; LE-I64-NEON-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.f32 s0, s21 +; LE-I64-NEON-NEXT: mov r5, r1 +; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.f32 s0, s22 +; LE-I64-NEON-NEXT: mov r6, r1 +; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEON-NEXT: vmov.32 d13[1], r6 +; LE-I64-NEON-NEXT: vmov.32 d9[1], r4 +; LE-I64-NEON-NEXT: vmov.32 d12[1], r5 +; LE-I64-NEON-NEXT: vmov.32 d8[1], r1 +; LE-I64-NEON-NEXT: vorr q0, q6, q6 +; LE-I64-NEON-NEXT: vorr q1, q4, q4 +; LE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13} +; LE-I64-NEON-NEXT: pop {r4, r5, r6, pc} +; +; BE-I32-LABEL: lrint_v4f32: +; BE-I32: @ %bb.0: +; BE-I32-NEXT: .save {r11, lr} +; BE-I32-NEXT: push {r11, lr} +; BE-I32-NEXT: .vsave {d8, d9, d10, d11} +; BE-I32-NEXT: vpush {d8, d9, d10, d11} +; BE-I32-NEXT: vrev64.32 q4, q0 +; BE-I32-NEXT: vmov.f32 s0, s18 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s16 +; BE-I32-NEXT: vmov.32 d11[0], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s19 +; BE-I32-NEXT: vmov.32 d10[0], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s17 +; BE-I32-NEXT: vmov.32 d11[1], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.32 d10[1], r0 +; BE-I32-NEXT: vrev64.32 q0, q5 +; BE-I32-NEXT: vpop {d8, d9, d10, d11} +; BE-I32-NEXT: pop {r11, pc} +; +; BE-I64-LABEL: lrint_v4f32: +; BE-I64: @ %bb.0: +; BE-I64-NEXT: .save {r4, r5, r6, lr} +; BE-I64-NEXT: push {r4, r5, r6, lr} +; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13} +; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13} +; BE-I64-NEXT: vrev64.32 d8, d1 +; BE-I64-NEXT: vrev64.32 d9, d0 +; BE-I64-NEXT: vmov.f32 s0, s17 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.f32 s0, s18 +; BE-I64-NEXT: mov r4, r1 +; BE-I64-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.f32 s0, s19 +; BE-I64-NEXT: mov r5, r1 +; BE-I64-NEXT: vmov.32 d12[0], r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.f32 s0, s16 +; BE-I64-NEXT: mov r6, r1 +; BE-I64-NEXT: vmov.32 d13[0], r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEXT: vmov.32 d13[1], r6 +; BE-I64-NEXT: vmov.32 d11[1], r4 +; BE-I64-NEXT: vmov.32 d12[1], r5 +; BE-I64-NEXT: vmov.32 d10[1], r1 +; BE-I64-NEXT: vrev64.32 q0, q6 +; BE-I64-NEXT: vrev64.32 q1, q5 +; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13} +; BE-I64-NEXT: pop {r4, r5, r6, pc} +; +; BE-I32-NEON-LABEL: lrint_v4f32: +; BE-I32-NEON: @ %bb.0: +; BE-I32-NEON-NEXT: .save {r11, lr} +; BE-I32-NEON-NEXT: push {r11, lr} +; BE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11} +; BE-I32-NEON-NEXT: vpush {d8, d9, d10, d11} +; BE-I32-NEON-NEXT: vrev64.32 q4, q0 +; BE-I32-NEON-NEXT: vmov.f32 s0, s18 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s16 +; BE-I32-NEON-NEXT: vmov.32 d11[0], r0 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s19 +; BE-I32-NEON-NEXT: vmov.32 d10[0], r0 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s17 +; BE-I32-NEON-NEXT: vmov.32 d11[1], r0 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.32 d10[1], r0 +; BE-I32-NEON-NEXT: vrev64.32 q0, q5 +; BE-I32-NEON-NEXT: vpop {d8, d9, d10, d11} +; BE-I32-NEON-NEXT: pop {r11, pc} +; +; BE-I64-NEON-LABEL: lrint_v4f32: +; BE-I64-NEON: @ %bb.0: +; BE-I64-NEON-NEXT: .save {r4, r5, r6, lr} +; BE-I64-NEON-NEXT: push {r4, r5, r6, lr} +; BE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13} +; BE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13} +; BE-I64-NEON-NEXT: vrev64.32 d8, d1 +; BE-I64-NEON-NEXT: vrev64.32 d9, d0 +; BE-I64-NEON-NEXT: vmov.f32 s0, s17 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.f32 s0, s18 +; BE-I64-NEON-NEXT: mov r4, r1 +; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.f32 s0, s19 +; BE-I64-NEON-NEXT: mov r5, r1 +; BE-I64-NEON-NEXT: vmov.32 d12[0], r0 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.f32 s0, s16 +; BE-I64-NEON-NEXT: mov r6, r1 +; BE-I64-NEON-NEXT: vmov.32 d13[0], r0 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEON-NEXT: vmov.32 d13[1], r6 +; BE-I64-NEON-NEXT: vmov.32 d11[1], r4 +; BE-I64-NEON-NEXT: vmov.32 d12[1], r5 +; BE-I64-NEON-NEXT: vmov.32 d10[1], r1 +; BE-I64-NEON-NEXT: vrev64.32 q0, q6 +; BE-I64-NEON-NEXT: vrev64.32 q1, q5 +; BE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13} +; BE-I64-NEON-NEXT: pop {r4, r5, r6, pc} + %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float> %x) + ret <4 x iXLen> %a +} +declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float>) + +define <8 x iXLen> @lrint_v8f32(<8 x float> %x) { +; LE-I32-LABEL: lrint_v8f32: +; LE-I32: @ %bb.0: +; LE-I32-NEXT: .save {r11, lr} +; LE-I32-NEXT: push {r11, lr} +; LE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEXT: vorr q5, q1, q1 +; LE-I32-NEXT: vorr q7, q0, q0 +; LE-I32-NEXT: vmov.f32 s0, s20 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s22 +; LE-I32-NEXT: vmov.32 d8[0], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s30 +; LE-I32-NEXT: vmov.32 d9[0], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s28 +; LE-I32-NEXT: vmov.32 d13[0], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s31 +; LE-I32-NEXT: vmov.32 d12[0], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s29 +; LE-I32-NEXT: vmov.32 d13[1], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s23 +; LE-I32-NEXT: vmov.32 d12[1], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s21 +; LE-I32-NEXT: vmov.32 d9[1], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.32 d8[1], r0 +; LE-I32-NEXT: vorr q0, q6, q6 +; LE-I32-NEXT: vorr q1, q4, q4 +; LE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEXT: pop {r11, pc} +; +; LE-I64-LABEL: lrint_v8f32: +; LE-I64: @ %bb.0: +; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEXT: .pad #40 +; LE-I64-NEXT: sub sp, sp, #40 +; LE-I64-NEXT: vorr q6, q1, q1 +; LE-I64-NEXT: add lr, sp, #24 +; LE-I64-NEXT: vorr q7, q0, q0 +; LE-I64-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill +; LE-I64-NEXT: vmov.f32 s0, s27 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s24 +; LE-I64-NEXT: mov r8, r1 +; LE-I64-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s25 +; LE-I64-NEXT: mov r9, r1 +; LE-I64-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vorr q6, q7, q7 +; LE-I64-NEXT: add lr, sp, #8 +; LE-I64-NEXT: mov r10, r1 +; LE-I64-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEXT: vmov.f32 s0, s26 +; LE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s27 +; LE-I64-NEXT: mov r7, r1 +; LE-I64-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s24 +; LE-I64-NEXT: mov r4, r1 +; LE-I64-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: add lr, sp, #8 +; LE-I64-NEXT: mov r5, r1 +; LE-I64-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-I64-NEXT: vmov.f32 s0, s1 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: add lr, sp, #24 +; LE-I64-NEXT: mov r6, r1 +; LE-I64-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-I64-NEXT: vmov.f32 s0, s2 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEXT: vmov.32 d13[1], r6 +; LE-I64-NEXT: vmov.32 d15[1], r4 +; LE-I64-NEXT: vmov.32 d11[1], r10 +; LE-I64-NEXT: vmov.32 d9[1], r8 +; LE-I64-NEXT: vmov.32 d12[1], r5 +; LE-I64-NEXT: vmov.32 d14[1], r7 +; LE-I64-NEXT: vorr q0, q6, q6 +; LE-I64-NEXT: vmov.32 d10[1], r9 +; LE-I64-NEXT: vorr q1, q7, q7 +; LE-I64-NEXT: vmov.32 d8[1], r1 +; LE-I64-NEXT: vorr q2, q5, q5 +; LE-I64-NEXT: vorr q3, q4, q4 +; LE-I64-NEXT: add sp, sp, #40 +; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; +; LE-I32-NEON-LABEL: lrint_v8f32: +; LE-I32-NEON: @ %bb.0: +; LE-I32-NEON-NEXT: .save {r11, lr} +; LE-I32-NEON-NEXT: push {r11, lr} +; LE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEON-NEXT: vorr q5, q1, q1 +; LE-I32-NEON-NEXT: vorr q7, q0, q0 +; LE-I32-NEON-NEXT: vmov.f32 s0, s20 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s22 +; LE-I32-NEON-NEXT: vmov.32 d8[0], r0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s30 +; LE-I32-NEON-NEXT: vmov.32 d9[0], r0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s28 +; LE-I32-NEON-NEXT: vmov.32 d13[0], r0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s31 +; LE-I32-NEON-NEXT: vmov.32 d12[0], r0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s29 +; LE-I32-NEON-NEXT: vmov.32 d13[1], r0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s23 +; LE-I32-NEON-NEXT: vmov.32 d12[1], r0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s21 +; LE-I32-NEON-NEXT: vmov.32 d9[1], r0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.32 d8[1], r0 +; LE-I32-NEON-NEXT: vorr q0, q6, q6 +; LE-I32-NEON-NEXT: vorr q1, q4, q4 +; LE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEON-NEXT: pop {r11, pc} +; +; LE-I64-NEON-LABEL: lrint_v8f32: +; LE-I64-NEON: @ %bb.0: +; LE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; LE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; LE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEON-NEXT: .pad #40 +; LE-I64-NEON-NEXT: sub sp, sp, #40 +; LE-I64-NEON-NEXT: vorr q6, q1, q1 +; LE-I64-NEON-NEXT: add lr, sp, #24 +; LE-I64-NEON-NEXT: vorr q7, q0, q0 +; LE-I64-NEON-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill +; LE-I64-NEON-NEXT: vmov.f32 s0, s27 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.f32 s0, s24 +; LE-I64-NEON-NEXT: mov r8, r1 +; LE-I64-NEON-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.f32 s0, s25 +; LE-I64-NEON-NEXT: mov r9, r1 +; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vorr q6, q7, q7 +; LE-I64-NEON-NEXT: add lr, sp, #8 +; LE-I64-NEON-NEXT: mov r10, r1 +; LE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEON-NEXT: vmov.f32 s0, s26 +; LE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.f32 s0, s27 +; LE-I64-NEON-NEXT: mov r7, r1 +; LE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.f32 s0, s24 +; LE-I64-NEON-NEXT: mov r4, r1 +; LE-I64-NEON-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: add lr, sp, #8 +; LE-I64-NEON-NEXT: mov r5, r1 +; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-I64-NEON-NEXT: vmov.f32 s0, s1 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: add lr, sp, #24 +; LE-I64-NEON-NEXT: mov r6, r1 +; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-I64-NEON-NEXT: vmov.f32 s0, s2 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEON-NEXT: vmov.32 d13[1], r6 +; LE-I64-NEON-NEXT: vmov.32 d15[1], r4 +; LE-I64-NEON-NEXT: vmov.32 d11[1], r10 +; LE-I64-NEON-NEXT: vmov.32 d9[1], r8 +; LE-I64-NEON-NEXT: vmov.32 d12[1], r5 +; LE-I64-NEON-NEXT: vmov.32 d14[1], r7 +; LE-I64-NEON-NEXT: vorr q0, q6, q6 +; LE-I64-NEON-NEXT: vmov.32 d10[1], r9 +; LE-I64-NEON-NEXT: vorr q1, q7, q7 +; LE-I64-NEON-NEXT: vmov.32 d8[1], r1 +; LE-I64-NEON-NEXT: vorr q2, q5, q5 +; LE-I64-NEON-NEXT: vorr q3, q4, q4 +; LE-I64-NEON-NEXT: add sp, sp, #40 +; LE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; +; BE-I32-LABEL: lrint_v8f32: +; BE-I32: @ %bb.0: +; BE-I32-NEXT: .save {r11, lr} +; BE-I32-NEXT: push {r11, lr} +; BE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEXT: vrev64.32 q4, q1 +; BE-I32-NEXT: vrev64.32 q5, q0 +; BE-I32-NEXT: vmov.f32 s0, s16 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s20 +; BE-I32-NEXT: vmov.32 d12[0], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s18 +; BE-I32-NEXT: vmov.32 d14[0], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s22 +; BE-I32-NEXT: vmov.32 d13[0], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s19 +; BE-I32-NEXT: vmov.32 d15[0], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s23 +; BE-I32-NEXT: vmov.32 d13[1], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s21 +; BE-I32-NEXT: vmov.32 d15[1], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s17 +; BE-I32-NEXT: vmov.32 d14[1], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.32 d12[1], r0 +; BE-I32-NEXT: vrev64.32 q0, q7 +; BE-I32-NEXT: vrev64.32 q1, q6 +; BE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEXT: pop {r11, pc} +; +; BE-I64-LABEL: lrint_v8f32: +; BE-I64: @ %bb.0: +; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEXT: .pad #32 +; BE-I64-NEXT: sub sp, sp, #32 +; BE-I64-NEXT: vorr q4, q1, q1 +; BE-I64-NEXT: add lr, sp, #8 +; BE-I64-NEXT: vorr q5, q0, q0 +; BE-I64-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; BE-I64-NEXT: vrev64.32 d12, d8 +; BE-I64-NEXT: vmov.f32 s0, s25 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.f32 s0, s24 +; BE-I64-NEXT: mov r8, r1 +; BE-I64-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vrev64.32 d0, d11 +; BE-I64-NEXT: mov r9, r1 +; BE-I64-NEXT: vrev64.32 d8, d9 +; BE-I64-NEXT: vorr d9, d0, d0 +; BE-I64-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEXT: vstr d8, [sp, #24] @ 8-byte Spill +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.f32 s0, s17 +; BE-I64-NEXT: mov r10, r1 +; BE-I64-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: add lr, sp, #8 +; BE-I64-NEXT: vmov.f32 s0, s19 +; BE-I64-NEXT: mov r7, r1 +; BE-I64-NEXT: vmov.32 d13[0], r0 +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: vrev64.32 d8, d16 +; BE-I64-NEXT: vstr d8, [sp, #8] @ 8-byte Spill +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.f32 s0, s16 +; BE-I64-NEXT: mov r4, r1 +; BE-I64-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vldr d0, [sp, #8] @ 8-byte Reload +; BE-I64-NEXT: mov r5, r1 +; BE-I64-NEXT: vmov.32 d8[0], r0 +; BE-I64-NEXT: vmov.f32 s0, s1 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vldr d0, [sp, #24] @ 8-byte Reload +; BE-I64-NEXT: mov r6, r1 +; BE-I64-NEXT: @ kill: def $s0 killed $s0 killed $d0 +; BE-I64-NEXT: vmov.32 d9[0], r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d12[0], r0 +; BE-I64-NEXT: vmov.32 d9[1], r6 +; BE-I64-NEXT: vmov.32 d11[1], r4 +; BE-I64-NEXT: vmov.32 d15[1], r8 +; BE-I64-NEXT: vmov.32 d13[1], r7 +; BE-I64-NEXT: vmov.32 d8[1], r5 +; BE-I64-NEXT: vmov.32 d10[1], r10 +; BE-I64-NEXT: vmov.32 d14[1], r9 +; BE-I64-NEXT: vmov.32 d12[1], r1 +; BE-I64-NEXT: vrev64.32 q0, q4 +; BE-I64-NEXT: vrev64.32 q1, q5 +; BE-I64-NEXT: vrev64.32 q2, q7 +; BE-I64-NEXT: vrev64.32 q3, q6 +; BE-I64-NEXT: add sp, sp, #32 +; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; +; BE-I32-NEON-LABEL: lrint_v8f32: +; BE-I32-NEON: @ %bb.0: +; BE-I32-NEON-NEXT: .save {r11, lr} +; BE-I32-NEON-NEXT: push {r11, lr} +; BE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEON-NEXT: vrev64.32 q4, q1 +; BE-I32-NEON-NEXT: vrev64.32 q5, q0 +; BE-I32-NEON-NEXT: vmov.f32 s0, s16 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s20 +; BE-I32-NEON-NEXT: vmov.32 d12[0], r0 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s18 +; BE-I32-NEON-NEXT: vmov.32 d14[0], r0 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s22 +; BE-I32-NEON-NEXT: vmov.32 d13[0], r0 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s19 +; BE-I32-NEON-NEXT: vmov.32 d15[0], r0 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s23 +; BE-I32-NEON-NEXT: vmov.32 d13[1], r0 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s21 +; BE-I32-NEON-NEXT: vmov.32 d15[1], r0 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s17 +; BE-I32-NEON-NEXT: vmov.32 d14[1], r0 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.32 d12[1], r0 +; BE-I32-NEON-NEXT: vrev64.32 q0, q7 +; BE-I32-NEON-NEXT: vrev64.32 q1, q6 +; BE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEON-NEXT: pop {r11, pc} +; +; BE-I64-NEON-LABEL: lrint_v8f32: +; BE-I64-NEON: @ %bb.0: +; BE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; BE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; BE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEON-NEXT: .pad #32 +; BE-I64-NEON-NEXT: sub sp, sp, #32 +; BE-I64-NEON-NEXT: vorr q4, q1, q1 +; BE-I64-NEON-NEXT: add lr, sp, #8 +; BE-I64-NEON-NEXT: vorr q5, q0, q0 +; BE-I64-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; BE-I64-NEON-NEXT: vrev64.32 d12, d8 +; BE-I64-NEON-NEXT: vmov.f32 s0, s25 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.f32 s0, s24 +; BE-I64-NEON-NEXT: mov r8, r1 +; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vrev64.32 d0, d11 +; BE-I64-NEON-NEXT: mov r9, r1 +; BE-I64-NEON-NEXT: vrev64.32 d8, d9 +; BE-I64-NEON-NEXT: vorr d9, d0, d0 +; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEON-NEXT: vstr d8, [sp, #24] @ 8-byte Spill +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.f32 s0, s17 +; BE-I64-NEON-NEXT: mov r10, r1 +; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: add lr, sp, #8 +; BE-I64-NEON-NEXT: vmov.f32 s0, s19 +; BE-I64-NEON-NEXT: mov r7, r1 +; BE-I64-NEON-NEXT: vmov.32 d13[0], r0 +; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEON-NEXT: vrev64.32 d8, d16 +; BE-I64-NEON-NEXT: vstr d8, [sp, #8] @ 8-byte Spill +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.f32 s0, s16 +; BE-I64-NEON-NEXT: mov r4, r1 +; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vldr d0, [sp, #8] @ 8-byte Reload +; BE-I64-NEON-NEXT: mov r5, r1 +; BE-I64-NEON-NEXT: vmov.32 d8[0], r0 +; BE-I64-NEON-NEXT: vmov.f32 s0, s1 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vldr d0, [sp, #24] @ 8-byte Reload +; BE-I64-NEON-NEXT: mov r6, r1 +; BE-I64-NEON-NEXT: @ kill: def $s0 killed $s0 killed $d0 +; BE-I64-NEON-NEXT: vmov.32 d9[0], r0 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.32 d12[0], r0 +; BE-I64-NEON-NEXT: vmov.32 d9[1], r6 +; BE-I64-NEON-NEXT: vmov.32 d11[1], r4 +; BE-I64-NEON-NEXT: vmov.32 d15[1], r8 +; BE-I64-NEON-NEXT: vmov.32 d13[1], r7 +; BE-I64-NEON-NEXT: vmov.32 d8[1], r5 +; BE-I64-NEON-NEXT: vmov.32 d10[1], r10 +; BE-I64-NEON-NEXT: vmov.32 d14[1], r9 +; BE-I64-NEON-NEXT: vmov.32 d12[1], r1 +; BE-I64-NEON-NEXT: vrev64.32 q0, q4 +; BE-I64-NEON-NEXT: vrev64.32 q1, q5 +; BE-I64-NEON-NEXT: vrev64.32 q2, q7 +; BE-I64-NEON-NEXT: vrev64.32 q3, q6 +; BE-I64-NEON-NEXT: add sp, sp, #32 +; BE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} + %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float> %x) + ret <8 x iXLen> %a +} +declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float>) + +define <16 x iXLen> @lrint_v16f32(<16 x float> %x) { +; LE-I32-LABEL: lrint_v16f32: +; LE-I32: @ %bb.0: +; LE-I32-NEXT: .save {r11, lr} +; LE-I32-NEXT: push {r11, lr} +; LE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEXT: .pad #80 +; LE-I32-NEXT: sub sp, sp, #80 +; LE-I32-NEXT: vorr q5, q3, q3 +; LE-I32-NEXT: vstmia sp, {d0, d1} @ 16-byte Spill +; LE-I32-NEXT: add lr, sp, #32 +; LE-I32-NEXT: vorr q6, q2, q2 +; LE-I32-NEXT: vorr q7, q1, q1 +; LE-I32-NEXT: vmov.f32 s0, s20 +; LE-I32-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s22 +; LE-I32-NEXT: vmov.32 d8[0], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s24 +; LE-I32-NEXT: add lr, sp, #48 +; LE-I32-NEXT: vmov.32 d9[0], r0 +; LE-I32-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I32-NEXT: add lr, sp, #16 +; LE-I32-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s26 +; LE-I32-NEXT: vmov.32 d8[0], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: add lr, sp, #64 +; LE-I32-NEXT: vmov.32 d9[0], r0 +; LE-I32-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I32-NEXT: vorr q4, q7, q7 +; LE-I32-NEXT: vmov.f32 s0, s16 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s18 +; LE-I32-NEXT: vmov.32 d10[0], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vldmia sp, {d12, d13} @ 16-byte Reload +; LE-I32-NEXT: vmov.f32 s0, s26 +; LE-I32-NEXT: vmov.32 d11[0], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s24 +; LE-I32-NEXT: vmov.32 d15[0], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s27 +; LE-I32-NEXT: vmov.32 d14[0], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s25 +; LE-I32-NEXT: vmov.32 d15[1], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s19 +; LE-I32-NEXT: vmov.32 d14[1], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s17 +; LE-I32-NEXT: vmov.32 d11[1], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: add lr, sp, #16 +; LE-I32-NEXT: vmov.32 d10[1], r0 +; LE-I32-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-I32-NEXT: vmov.f32 s0, s27 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s25 +; LE-I32-NEXT: add lr, sp, #64 +; LE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I32-NEXT: vmov.32 d9[1], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.32 d8[1], r0 +; LE-I32-NEXT: add lr, sp, #64 +; LE-I32-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I32-NEXT: add lr, sp, #32 +; LE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I32-NEXT: vmov.f32 s0, s19 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s17 +; LE-I32-NEXT: add lr, sp, #48 +; LE-I32-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-I32-NEXT: vmov.32 d13[1], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: add lr, sp, #64 +; LE-I32-NEXT: vmov.32 d12[1], r0 +; LE-I32-NEXT: vorr q0, q7, q7 +; LE-I32-NEXT: vldmia lr, {d4, d5} @ 16-byte Reload +; LE-I32-NEXT: vorr q1, q5, q5 +; LE-I32-NEXT: vorr q3, q6, q6 +; LE-I32-NEXT: add sp, sp, #80 +; LE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEXT: pop {r11, pc} +; +; LE-I64-LABEL: lrint_v16f32: +; LE-I64: @ %bb.0: +; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I64-NEXT: .pad #4 +; LE-I64-NEXT: sub sp, sp, #4 +; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEXT: .pad #160 +; LE-I64-NEXT: sub sp, sp, #160 +; LE-I64-NEXT: add lr, sp, #112 +; LE-I64-NEXT: vorr q5, q3, q3 +; LE-I64-NEXT: vorr q6, q0, q0 +; LE-I64-NEXT: mov r4, r0 +; LE-I64-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #48 +; LE-I64-NEXT: vorr q7, q1, q1 +; LE-I64-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; LE-I64-NEXT: vmov.f32 s0, s23 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s24 +; LE-I64-NEXT: add lr, sp, #144 +; LE-I64-NEXT: vmov.32 d17[0], r0 +; LE-I64-NEXT: str r1, [sp, #108] @ 4-byte Spill +; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s25 +; LE-I64-NEXT: str r1, [sp, #84] @ 4-byte Spill +; LE-I64-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s28 +; LE-I64-NEXT: add lr, sp, #128 +; LE-I64-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEXT: str r1, [sp, #44] @ 4-byte Spill +; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s29 +; LE-I64-NEXT: mov r9, r1 +; LE-I64-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s30 +; LE-I64-NEXT: mov r6, r1 +; LE-I64-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s31 +; LE-I64-NEXT: mov r5, r1 +; LE-I64-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: add lr, sp, #112 +; LE-I64-NEXT: mov r7, r1 +; LE-I64-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload +; LE-I64-NEXT: vmov.f32 s0, s29 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s22 +; LE-I64-NEXT: add lr, sp, #24 +; LE-I64-NEXT: vmov.32 d17[0], r0 +; LE-I64-NEXT: mov r11, r1 +; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEXT: vmov.32 d13[1], r7 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: add lr, sp, #144 +; LE-I64-NEXT: vmov.f32 s0, s21 +; LE-I64-NEXT: vmov.32 d12[1], r5 +; LE-I64-NEXT: str r1, [sp, #40] @ 4-byte Spill +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: vmov.32 d16[0], r0 +; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #88 +; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s20 +; LE-I64-NEXT: mov r10, r1 +; LE-I64-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEXT: vmov.32 d9[1], r6 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s31 +; LE-I64-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEXT: add lr, sp, #8 +; LE-I64-NEXT: mov r8, r1 +; LE-I64-NEXT: vmov.32 d8[1], r9 +; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #64 +; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: add lr, sp, #128 +; LE-I64-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEXT: ldr r0, [sp, #44] @ 4-byte Reload +; LE-I64-NEXT: mov r9, r1 +; LE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #48 +; LE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-I64-NEXT: vmov.f32 s0, s27 +; LE-I64-NEXT: vmov.32 d11[1], r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s26 +; LE-I64-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEXT: ldr r0, [sp, #84] @ 4-byte Reload +; LE-I64-NEXT: add lr, sp, #128 +; LE-I64-NEXT: mov r7, r1 +; LE-I64-NEXT: vmov.32 d10[1], r0 +; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEXT: add lr, sp, #144 +; LE-I64-NEXT: ldr r0, [sp, #108] @ 4-byte Reload +; LE-I64-NEXT: mov r5, r1 +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: vmov.32 d17[1], r0 +; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #112 +; LE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-I64-NEXT: vmov.f32 s0, s20 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: add lr, sp, #24 +; LE-I64-NEXT: vmov.f32 s0, s22 +; LE-I64-NEXT: mov r6, r1 +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: vmov.32 d16[0], r0 +; LE-I64-NEXT: vmov.32 d17[1], r11 +; LE-I64-NEXT: vorr q6, q8, q8 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: add lr, sp, #144 +; LE-I64-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEXT: ldr r0, [sp, #40] @ 4-byte Reload +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #8 +; LE-I64-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #128 +; LE-I64-NEXT: vmov.32 d9[1], r9 +; LE-I64-NEXT: vmov.32 d12[1], r6 +; LE-I64-NEXT: vmov.32 d19[1], r10 +; LE-I64-NEXT: vmov.32 d8[1], r1 +; LE-I64-NEXT: vmov.32 d16[1], r0 +; LE-I64-NEXT: add r0, r4, #64 +; LE-I64-NEXT: vmov.32 d18[1], r8 +; LE-I64-NEXT: vst1.64 {d12, d13}, [r0:128]! +; LE-I64-NEXT: vst1.64 {d8, d9}, [r0:128]! +; LE-I64-NEXT: vst1.64 {d18, d19}, [r0:128]! +; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128] +; LE-I64-NEXT: vmov.32 d15[1], r7 +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #64 +; LE-I64-NEXT: vmov.32 d14[1], r5 +; LE-I64-NEXT: vst1.64 {d16, d17}, [r4:128]! +; LE-I64-NEXT: vst1.64 {d14, d15}, [r4:128]! +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #88 +; LE-I64-NEXT: vst1.64 {d16, d17}, [r4:128]! +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: vst1.64 {d16, d17}, [r4:128] +; LE-I64-NEXT: add sp, sp, #160 +; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEXT: add sp, sp, #4 +; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; LE-I32-NEON-LABEL: lrint_v16f32: +; LE-I32-NEON: @ %bb.0: +; LE-I32-NEON-NEXT: .save {r11, lr} +; LE-I32-NEON-NEXT: push {r11, lr} +; LE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEON-NEXT: .pad #80 +; LE-I32-NEON-NEXT: sub sp, sp, #80 +; LE-I32-NEON-NEXT: vorr q5, q3, q3 +; LE-I32-NEON-NEXT: vstmia sp, {d0, d1} @ 16-byte Spill +; LE-I32-NEON-NEXT: add lr, sp, #32 +; LE-I32-NEON-NEXT: vorr q6, q2, q2 +; LE-I32-NEON-NEXT: vorr q7, q1, q1 +; LE-I32-NEON-NEXT: vmov.f32 s0, s20 +; LE-I32-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s22 +; LE-I32-NEON-NEXT: vmov.32 d8[0], r0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s24 +; LE-I32-NEON-NEXT: add lr, sp, #48 +; LE-I32-NEON-NEXT: vmov.32 d9[0], r0 +; LE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I32-NEON-NEXT: add lr, sp, #16 +; LE-I32-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s26 +; LE-I32-NEON-NEXT: vmov.32 d8[0], r0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: add lr, sp, #64 +; LE-I32-NEON-NEXT: vmov.32 d9[0], r0 +; LE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I32-NEON-NEXT: vorr q4, q7, q7 +; LE-I32-NEON-NEXT: vmov.f32 s0, s16 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s18 +; LE-I32-NEON-NEXT: vmov.32 d10[0], r0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vldmia sp, {d12, d13} @ 16-byte Reload +; LE-I32-NEON-NEXT: vmov.f32 s0, s26 +; LE-I32-NEON-NEXT: vmov.32 d11[0], r0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s24 +; LE-I32-NEON-NEXT: vmov.32 d15[0], r0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s27 +; LE-I32-NEON-NEXT: vmov.32 d14[0], r0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s25 +; LE-I32-NEON-NEXT: vmov.32 d15[1], r0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s19 +; LE-I32-NEON-NEXT: vmov.32 d14[1], r0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s17 +; LE-I32-NEON-NEXT: vmov.32 d11[1], r0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: add lr, sp, #16 +; LE-I32-NEON-NEXT: vmov.32 d10[1], r0 +; LE-I32-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-I32-NEON-NEXT: vmov.f32 s0, s27 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s25 +; LE-I32-NEON-NEXT: add lr, sp, #64 +; LE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I32-NEON-NEXT: vmov.32 d9[1], r0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.32 d8[1], r0 +; LE-I32-NEON-NEXT: add lr, sp, #64 +; LE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I32-NEON-NEXT: add lr, sp, #32 +; LE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I32-NEON-NEXT: vmov.f32 s0, s19 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s17 +; LE-I32-NEON-NEXT: add lr, sp, #48 +; LE-I32-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-I32-NEON-NEXT: vmov.32 d13[1], r0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: add lr, sp, #64 +; LE-I32-NEON-NEXT: vmov.32 d12[1], r0 +; LE-I32-NEON-NEXT: vorr q0, q7, q7 +; LE-I32-NEON-NEXT: vldmia lr, {d4, d5} @ 16-byte Reload +; LE-I32-NEON-NEXT: vorr q1, q5, q5 +; LE-I32-NEON-NEXT: vorr q3, q6, q6 +; LE-I32-NEON-NEXT: add sp, sp, #80 +; LE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEON-NEXT: pop {r11, pc} +; +; LE-I64-NEON-LABEL: lrint_v16f32: +; LE-I64-NEON: @ %bb.0: +; LE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I64-NEON-NEXT: .pad #4 +; LE-I64-NEON-NEXT: sub sp, sp, #4 +; LE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEON-NEXT: .pad #160 +; LE-I64-NEON-NEXT: sub sp, sp, #160 +; LE-I64-NEON-NEXT: add lr, sp, #112 +; LE-I64-NEON-NEXT: vorr q5, q3, q3 +; LE-I64-NEON-NEXT: vorr q6, q0, q0 +; LE-I64-NEON-NEXT: mov r4, r0 +; LE-I64-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #48 +; LE-I64-NEON-NEXT: vorr q7, q1, q1 +; LE-I64-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; LE-I64-NEON-NEXT: vmov.f32 s0, s23 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.f32 s0, s24 +; LE-I64-NEON-NEXT: add lr, sp, #144 +; LE-I64-NEON-NEXT: vmov.32 d17[0], r0 +; LE-I64-NEON-NEXT: str r1, [sp, #108] @ 4-byte Spill +; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.f32 s0, s25 +; LE-I64-NEON-NEXT: str r1, [sp, #84] @ 4-byte Spill +; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.f32 s0, s28 +; LE-I64-NEON-NEXT: add lr, sp, #128 +; LE-I64-NEON-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEON-NEXT: str r1, [sp, #44] @ 4-byte Spill +; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.f32 s0, s29 +; LE-I64-NEON-NEXT: mov r9, r1 +; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.f32 s0, s30 +; LE-I64-NEON-NEXT: mov r6, r1 +; LE-I64-NEON-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.f32 s0, s31 +; LE-I64-NEON-NEXT: mov r5, r1 +; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: add lr, sp, #112 +; LE-I64-NEON-NEXT: mov r7, r1 +; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload +; LE-I64-NEON-NEXT: vmov.f32 s0, s29 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.f32 s0, s22 +; LE-I64-NEON-NEXT: add lr, sp, #24 +; LE-I64-NEON-NEXT: vmov.32 d17[0], r0 +; LE-I64-NEON-NEXT: mov r11, r1 +; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEON-NEXT: vmov.32 d13[1], r7 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: add lr, sp, #144 +; LE-I64-NEON-NEXT: vmov.f32 s0, s21 +; LE-I64-NEON-NEXT: vmov.32 d12[1], r5 +; LE-I64-NEON-NEXT: str r1, [sp, #40] @ 4-byte Spill +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: vmov.32 d16[0], r0 +; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #88 +; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.f32 s0, s20 +; LE-I64-NEON-NEXT: mov r10, r1 +; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEON-NEXT: vmov.32 d9[1], r6 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.f32 s0, s31 +; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEON-NEXT: add lr, sp, #8 +; LE-I64-NEON-NEXT: mov r8, r1 +; LE-I64-NEON-NEXT: vmov.32 d8[1], r9 +; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #64 +; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: add lr, sp, #128 +; LE-I64-NEON-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEON-NEXT: ldr r0, [sp, #44] @ 4-byte Reload +; LE-I64-NEON-NEXT: mov r9, r1 +; LE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #48 +; LE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-I64-NEON-NEXT: vmov.f32 s0, s27 +; LE-I64-NEON-NEXT: vmov.32 d11[1], r0 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.f32 s0, s26 +; LE-I64-NEON-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEON-NEXT: ldr r0, [sp, #84] @ 4-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #128 +; LE-I64-NEON-NEXT: mov r7, r1 +; LE-I64-NEON-NEXT: vmov.32 d10[1], r0 +; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEON-NEXT: add lr, sp, #144 +; LE-I64-NEON-NEXT: ldr r0, [sp, #108] @ 4-byte Reload +; LE-I64-NEON-NEXT: mov r5, r1 +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: vmov.32 d17[1], r0 +; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #112 +; LE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-I64-NEON-NEXT: vmov.f32 s0, s20 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: add lr, sp, #24 +; LE-I64-NEON-NEXT: vmov.f32 s0, s22 +; LE-I64-NEON-NEXT: mov r6, r1 +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: vmov.32 d16[0], r0 +; LE-I64-NEON-NEXT: vmov.32 d17[1], r11 +; LE-I64-NEON-NEXT: vorr q6, q8, q8 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: add lr, sp, #144 +; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEON-NEXT: ldr r0, [sp, #40] @ 4-byte Reload +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #8 +; LE-I64-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #128 +; LE-I64-NEON-NEXT: vmov.32 d9[1], r9 +; LE-I64-NEON-NEXT: vmov.32 d12[1], r6 +; LE-I64-NEON-NEXT: vmov.32 d19[1], r10 +; LE-I64-NEON-NEXT: vmov.32 d8[1], r1 +; LE-I64-NEON-NEXT: vmov.32 d16[1], r0 +; LE-I64-NEON-NEXT: add r0, r4, #64 +; LE-I64-NEON-NEXT: vmov.32 d18[1], r8 +; LE-I64-NEON-NEXT: vst1.64 {d12, d13}, [r0:128]! +; LE-I64-NEON-NEXT: vst1.64 {d8, d9}, [r0:128]! +; LE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]! +; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] +; LE-I64-NEON-NEXT: vmov.32 d15[1], r7 +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #64 +; LE-I64-NEON-NEXT: vmov.32 d14[1], r5 +; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r4:128]! +; LE-I64-NEON-NEXT: vst1.64 {d14, d15}, [r4:128]! +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #88 +; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r4:128]! +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r4:128] +; LE-I64-NEON-NEXT: add sp, sp, #160 +; LE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEON-NEXT: add sp, sp, #4 +; LE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-I32-LABEL: lrint_v16f32: +; BE-I32: @ %bb.0: +; BE-I32-NEXT: .save {r11, lr} +; BE-I32-NEXT: push {r11, lr} +; BE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEXT: .pad #96 +; BE-I32-NEXT: sub sp, sp, #96 +; BE-I32-NEXT: vrev64.32 q3, q3 +; BE-I32-NEXT: add lr, sp, #64 +; BE-I32-NEXT: vrev64.32 q4, q0 +; BE-I32-NEXT: vmov.f32 s0, s12 +; BE-I32-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill +; BE-I32-NEXT: add lr, sp, #32 +; BE-I32-NEXT: vrev64.32 q5, q1 +; BE-I32-NEXT: vrev64.32 q7, q2 +; BE-I32-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s16 +; BE-I32-NEXT: vmov.32 d16[0], r0 +; BE-I32-NEXT: add lr, sp, #80 +; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s18 +; BE-I32-NEXT: vmov.32 d12[0], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s20 +; BE-I32-NEXT: add lr, sp, #48 +; BE-I32-NEXT: vmov.32 d13[0], r0 +; BE-I32-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; BE-I32-NEXT: add lr, sp, #16 +; BE-I32-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s22 +; BE-I32-NEXT: vmov.32 d8[0], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s28 +; BE-I32-NEXT: vmov.32 d9[0], r0 +; BE-I32-NEXT: vstmia sp, {d8, d9} @ 16-byte Spill +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: add lr, sp, #64 +; BE-I32-NEXT: vmov.32 d12[0], r0 +; BE-I32-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-I32-NEXT: vmov.f32 s0, s22 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s30 +; BE-I32-NEXT: add lr, sp, #80 +; BE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-I32-NEXT: vmov.32 d9[0], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s23 +; BE-I32-NEXT: vmov.32 d13[0], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s31 +; BE-I32-NEXT: add lr, sp, #80 +; BE-I32-NEXT: vmov.32 d9[1], r0 +; BE-I32-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s29 +; BE-I32-NEXT: vmov.32 d13[1], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: add lr, sp, #16 +; BE-I32-NEXT: vmov.32 d12[1], r0 +; BE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-I32-NEXT: vmov.f32 s0, s19 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s17 +; BE-I32-NEXT: vldmia sp, {d10, d11} @ 16-byte Reload +; BE-I32-NEXT: vmov.32 d11[1], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: add lr, sp, #32 +; BE-I32-NEXT: vmov.32 d10[1], r0 +; BE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-I32-NEXT: vmov.f32 s0, s19 +; BE-I32-NEXT: vorr q7, q5, q5 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s17 +; BE-I32-NEXT: add lr, sp, #48 +; BE-I32-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-I32-NEXT: vmov.32 d11[1], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: add lr, sp, #64 +; BE-I32-NEXT: vmov.32 d10[1], r0 +; BE-I32-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; BE-I32-NEXT: vmov.f32 s0, s1 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: add lr, sp, #80 +; BE-I32-NEXT: vrev64.32 q0, q5 +; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I32-NEXT: vrev64.32 q1, q7 +; BE-I32-NEXT: vmov.32 d16[1], r0 +; BE-I32-NEXT: vrev64.32 q2, q6 +; BE-I32-NEXT: vrev64.32 q3, q8 +; BE-I32-NEXT: add sp, sp, #96 +; BE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEXT: pop {r11, pc} +; +; BE-I64-LABEL: lrint_v16f32: +; BE-I64: @ %bb.0: +; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I64-NEXT: .pad #4 +; BE-I64-NEXT: sub sp, sp, #4 +; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEXT: .pad #144 +; BE-I64-NEXT: sub sp, sp, #144 +; BE-I64-NEXT: vorr q6, q3, q3 +; BE-I64-NEXT: add lr, sp, #112 +; BE-I64-NEXT: vorr q7, q0, q0 +; BE-I64-NEXT: mov r4, r0 +; BE-I64-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #96 +; BE-I64-NEXT: vrev64.32 d8, d13 +; BE-I64-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill +; BE-I64-NEXT: vmov.f32 s0, s17 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.f32 s0, s16 +; BE-I64-NEXT: str r1, [sp, #88] @ 4-byte Spill +; BE-I64-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vrev64.32 d8, d14 +; BE-I64-NEXT: add lr, sp, #128 +; BE-I64-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEXT: str r1, [sp, #92] @ 4-byte Spill +; BE-I64-NEXT: vmov.f32 s0, s16 +; BE-I64-NEXT: vrev64.32 d9, d12 +; BE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-I64-NEXT: vstr d9, [sp, #64] @ 8-byte Spill +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.f32 s0, s19 +; BE-I64-NEXT: mov r9, r1 +; BE-I64-NEXT: vmov.32 d12[0], r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.f32 s0, s17 +; BE-I64-NEXT: str r1, [sp, #84] @ 4-byte Spill +; BE-I64-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEXT: vrev64.32 d9, d15 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.f32 s0, s18 +; BE-I64-NEXT: mov r6, r1 +; BE-I64-NEXT: vmov.32 d13[0], r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.f32 s0, s19 +; BE-I64-NEXT: mov r5, r1 +; BE-I64-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vldr d0, [sp, #64] @ 8-byte Reload +; BE-I64-NEXT: mov r7, r1 +; BE-I64-NEXT: @ kill: def $s0 killed $s0 killed $d0 +; BE-I64-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEXT: add lr, sp, #40 +; BE-I64-NEXT: str r1, [sp, #60] @ 4-byte Spill +; BE-I64-NEXT: vmov.32 d15[1], r7 +; BE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #96 +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: vrev64.32 d8, d16 +; BE-I64-NEXT: vmov.f32 s0, s17 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.f32 s0, s16 +; BE-I64-NEXT: vmov.32 d14[1], r5 +; BE-I64-NEXT: add lr, sp, #64 +; BE-I64-NEXT: mov r10, r1 +; BE-I64-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEXT: add lr, sp, #24 +; BE-I64-NEXT: mov r11, r1 +; BE-I64-NEXT: vmov.32 d13[1], r6 +; BE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #96 +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: vrev64.32 d8, d17 +; BE-I64-NEXT: vmov.f32 s0, s17 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.f32 s0, s16 +; BE-I64-NEXT: vmov.32 d12[1], r9 +; BE-I64-NEXT: add lr, sp, #96 +; BE-I64-NEXT: mov r8, r1 +; BE-I64-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEXT: add lr, sp, #8 +; BE-I64-NEXT: ldr r0, [sp, #88] @ 4-byte Reload +; BE-I64-NEXT: mov r9, r1 +; BE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #112 +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: add lr, sp, #128 +; BE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-I64-NEXT: vrev64.32 d8, d16 +; BE-I64-NEXT: vmov.32 d11[1], r0 +; BE-I64-NEXT: vmov.f32 s0, s17 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.f32 s0, s16 +; BE-I64-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEXT: ldr r0, [sp, #92] @ 4-byte Reload +; BE-I64-NEXT: add lr, sp, #128 +; BE-I64-NEXT: mov r7, r1 +; BE-I64-NEXT: vmov.32 d10[1], r0 +; BE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: add lr, sp, #112 +; BE-I64-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEXT: ldr r0, [sp, #84] @ 4-byte Reload +; BE-I64-NEXT: mov r5, r1 +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: add lr, sp, #40 +; BE-I64-NEXT: vrev64.32 d8, d17 +; BE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; BE-I64-NEXT: vmov.f32 s0, s17 +; BE-I64-NEXT: vmov.32 d13[1], r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.f32 s0, s16 +; BE-I64-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEXT: ldr r0, [sp, #60] @ 4-byte Reload +; BE-I64-NEXT: mov r6, r1 +; BE-I64-NEXT: vmov.32 d12[1], r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: add lr, sp, #24 +; BE-I64-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEXT: add r0, r4, #64 +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: add lr, sp, #8 +; BE-I64-NEXT: vmov.32 d17[1], r10 +; BE-I64-NEXT: vmov.32 d16[1], r11 +; BE-I64-NEXT: vorr q12, q8, q8 +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: add lr, sp, #128 +; BE-I64-NEXT: vmov.32 d15[1], r7 +; BE-I64-NEXT: vmov.32 d11[1], r6 +; BE-I64-NEXT: vmov.32 d14[1], r5 +; BE-I64-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; BE-I64-NEXT: add lr, sp, #96 +; BE-I64-NEXT: vmov.32 d10[1], r1 +; BE-I64-NEXT: vmov.32 d17[1], r8 +; BE-I64-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload +; BE-I64-NEXT: add lr, sp, #64 +; BE-I64-NEXT: vmov.32 d16[1], r9 +; BE-I64-NEXT: vrev64.32 q14, q7 +; BE-I64-NEXT: vorr q13, q8, q8 +; BE-I64-NEXT: vrev64.32 q15, q5 +; BE-I64-NEXT: vldmia lr, {d22, d23} @ 16-byte Reload +; BE-I64-NEXT: vrev64.32 q8, q6 +; BE-I64-NEXT: vst1.64 {d28, d29}, [r0:128]! +; BE-I64-NEXT: vst1.64 {d30, d31}, [r0:128]! +; BE-I64-NEXT: vrev64.32 q9, q9 +; BE-I64-NEXT: vrev64.32 q10, q10 +; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-I64-NEXT: vrev64.32 q11, q11 +; BE-I64-NEXT: vrev64.32 q12, q12 +; BE-I64-NEXT: vst1.64 {d18, d19}, [r0:128] +; BE-I64-NEXT: vst1.64 {d20, d21}, [r4:128]! +; BE-I64-NEXT: vst1.64 {d22, d23}, [r4:128]! +; BE-I64-NEXT: vrev64.32 q13, q13 +; BE-I64-NEXT: vst1.64 {d24, d25}, [r4:128]! +; BE-I64-NEXT: vst1.64 {d26, d27}, [r4:128] +; BE-I64-NEXT: add sp, sp, #144 +; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEXT: add sp, sp, #4 +; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-I32-NEON-LABEL: lrint_v16f32: +; BE-I32-NEON: @ %bb.0: +; BE-I32-NEON-NEXT: .save {r11, lr} +; BE-I32-NEON-NEXT: push {r11, lr} +; BE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEON-NEXT: .pad #96 +; BE-I32-NEON-NEXT: sub sp, sp, #96 +; BE-I32-NEON-NEXT: vrev64.32 q3, q3 +; BE-I32-NEON-NEXT: add lr, sp, #64 +; BE-I32-NEON-NEXT: vrev64.32 q4, q0 +; BE-I32-NEON-NEXT: vmov.f32 s0, s12 +; BE-I32-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill +; BE-I32-NEON-NEXT: add lr, sp, #32 +; BE-I32-NEON-NEXT: vrev64.32 q5, q1 +; BE-I32-NEON-NEXT: vrev64.32 q7, q2 +; BE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s16 +; BE-I32-NEON-NEXT: vmov.32 d16[0], r0 +; BE-I32-NEON-NEXT: add lr, sp, #80 +; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s18 +; BE-I32-NEON-NEXT: vmov.32 d12[0], r0 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s20 +; BE-I32-NEON-NEXT: add lr, sp, #48 +; BE-I32-NEON-NEXT: vmov.32 d13[0], r0 +; BE-I32-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; BE-I32-NEON-NEXT: add lr, sp, #16 +; BE-I32-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s22 +; BE-I32-NEON-NEXT: vmov.32 d8[0], r0 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s28 +; BE-I32-NEON-NEXT: vmov.32 d9[0], r0 +; BE-I32-NEON-NEXT: vstmia sp, {d8, d9} @ 16-byte Spill +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: add lr, sp, #64 +; BE-I32-NEON-NEXT: vmov.32 d12[0], r0 +; BE-I32-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-I32-NEON-NEXT: vmov.f32 s0, s22 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s30 +; BE-I32-NEON-NEXT: add lr, sp, #80 +; BE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-I32-NEON-NEXT: vmov.32 d9[0], r0 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s23 +; BE-I32-NEON-NEXT: vmov.32 d13[0], r0 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s31 +; BE-I32-NEON-NEXT: add lr, sp, #80 +; BE-I32-NEON-NEXT: vmov.32 d9[1], r0 +; BE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s29 +; BE-I32-NEON-NEXT: vmov.32 d13[1], r0 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: add lr, sp, #16 +; BE-I32-NEON-NEXT: vmov.32 d12[1], r0 +; BE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-I32-NEON-NEXT: vmov.f32 s0, s19 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s17 +; BE-I32-NEON-NEXT: vldmia sp, {d10, d11} @ 16-byte Reload +; BE-I32-NEON-NEXT: vmov.32 d11[1], r0 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: add lr, sp, #32 +; BE-I32-NEON-NEXT: vmov.32 d10[1], r0 +; BE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-I32-NEON-NEXT: vmov.f32 s0, s19 +; BE-I32-NEON-NEXT: vorr q7, q5, q5 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s17 +; BE-I32-NEON-NEXT: add lr, sp, #48 +; BE-I32-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-I32-NEON-NEXT: vmov.32 d11[1], r0 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: add lr, sp, #64 +; BE-I32-NEON-NEXT: vmov.32 d10[1], r0 +; BE-I32-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; BE-I32-NEON-NEXT: vmov.f32 s0, s1 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: add lr, sp, #80 +; BE-I32-NEON-NEXT: vrev64.32 q0, q5 +; BE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I32-NEON-NEXT: vrev64.32 q1, q7 +; BE-I32-NEON-NEXT: vmov.32 d16[1], r0 +; BE-I32-NEON-NEXT: vrev64.32 q2, q6 +; BE-I32-NEON-NEXT: vrev64.32 q3, q8 +; BE-I32-NEON-NEXT: add sp, sp, #96 +; BE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEON-NEXT: pop {r11, pc} +; +; BE-I64-NEON-LABEL: lrint_v16f32: +; BE-I64-NEON: @ %bb.0: +; BE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I64-NEON-NEXT: .pad #4 +; BE-I64-NEON-NEXT: sub sp, sp, #4 +; BE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEON-NEXT: .pad #144 +; BE-I64-NEON-NEXT: sub sp, sp, #144 +; BE-I64-NEON-NEXT: vorr q6, q3, q3 +; BE-I64-NEON-NEXT: add lr, sp, #112 +; BE-I64-NEON-NEXT: vorr q7, q0, q0 +; BE-I64-NEON-NEXT: mov r4, r0 +; BE-I64-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #96 +; BE-I64-NEON-NEXT: vrev64.32 d8, d13 +; BE-I64-NEON-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill +; BE-I64-NEON-NEXT: vmov.f32 s0, s17 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.f32 s0, s16 +; BE-I64-NEON-NEXT: str r1, [sp, #88] @ 4-byte Spill +; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vrev64.32 d8, d14 +; BE-I64-NEON-NEXT: add lr, sp, #128 +; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEON-NEXT: str r1, [sp, #92] @ 4-byte Spill +; BE-I64-NEON-NEXT: vmov.f32 s0, s16 +; BE-I64-NEON-NEXT: vrev64.32 d9, d12 +; BE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-I64-NEON-NEXT: vstr d9, [sp, #64] @ 8-byte Spill +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.f32 s0, s19 +; BE-I64-NEON-NEXT: mov r9, r1 +; BE-I64-NEON-NEXT: vmov.32 d12[0], r0 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.f32 s0, s17 +; BE-I64-NEON-NEXT: str r1, [sp, #84] @ 4-byte Spill +; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEON-NEXT: vrev64.32 d9, d15 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.f32 s0, s18 +; BE-I64-NEON-NEXT: mov r6, r1 +; BE-I64-NEON-NEXT: vmov.32 d13[0], r0 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.f32 s0, s19 +; BE-I64-NEON-NEXT: mov r5, r1 +; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vldr d0, [sp, #64] @ 8-byte Reload +; BE-I64-NEON-NEXT: mov r7, r1 +; BE-I64-NEON-NEXT: @ kill: def $s0 killed $s0 killed $d0 +; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEON-NEXT: add lr, sp, #40 +; BE-I64-NEON-NEXT: str r1, [sp, #60] @ 4-byte Spill +; BE-I64-NEON-NEXT: vmov.32 d15[1], r7 +; BE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #96 +; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEON-NEXT: vrev64.32 d8, d16 +; BE-I64-NEON-NEXT: vmov.f32 s0, s17 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.f32 s0, s16 +; BE-I64-NEON-NEXT: vmov.32 d14[1], r5 +; BE-I64-NEON-NEXT: add lr, sp, #64 +; BE-I64-NEON-NEXT: mov r10, r1 +; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEON-NEXT: add lr, sp, #24 +; BE-I64-NEON-NEXT: mov r11, r1 +; BE-I64-NEON-NEXT: vmov.32 d13[1], r6 +; BE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #96 +; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEON-NEXT: vrev64.32 d8, d17 +; BE-I64-NEON-NEXT: vmov.f32 s0, s17 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.f32 s0, s16 +; BE-I64-NEON-NEXT: vmov.32 d12[1], r9 +; BE-I64-NEON-NEXT: add lr, sp, #96 +; BE-I64-NEON-NEXT: mov r8, r1 +; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEON-NEXT: add lr, sp, #8 +; BE-I64-NEON-NEXT: ldr r0, [sp, #88] @ 4-byte Reload +; BE-I64-NEON-NEXT: mov r9, r1 +; BE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #112 +; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEON-NEXT: add lr, sp, #128 +; BE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-I64-NEON-NEXT: vrev64.32 d8, d16 +; BE-I64-NEON-NEXT: vmov.32 d11[1], r0 +; BE-I64-NEON-NEXT: vmov.f32 s0, s17 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.f32 s0, s16 +; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEON-NEXT: ldr r0, [sp, #92] @ 4-byte Reload +; BE-I64-NEON-NEXT: add lr, sp, #128 +; BE-I64-NEON-NEXT: mov r7, r1 +; BE-I64-NEON-NEXT: vmov.32 d10[1], r0 +; BE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: add lr, sp, #112 +; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEON-NEXT: ldr r0, [sp, #84] @ 4-byte Reload +; BE-I64-NEON-NEXT: mov r5, r1 +; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEON-NEXT: add lr, sp, #40 +; BE-I64-NEON-NEXT: vrev64.32 d8, d17 +; BE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; BE-I64-NEON-NEXT: vmov.f32 s0, s17 +; BE-I64-NEON-NEXT: vmov.32 d13[1], r0 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.f32 s0, s16 +; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEON-NEXT: ldr r0, [sp, #60] @ 4-byte Reload +; BE-I64-NEON-NEXT: mov r6, r1 +; BE-I64-NEON-NEXT: vmov.32 d12[1], r0 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: add lr, sp, #24 +; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEON-NEXT: add r0, r4, #64 +; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEON-NEXT: add lr, sp, #8 +; BE-I64-NEON-NEXT: vmov.32 d17[1], r10 +; BE-I64-NEON-NEXT: vmov.32 d16[1], r11 +; BE-I64-NEON-NEXT: vorr q12, q8, q8 +; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEON-NEXT: add lr, sp, #128 +; BE-I64-NEON-NEXT: vmov.32 d15[1], r7 +; BE-I64-NEON-NEXT: vmov.32 d11[1], r6 +; BE-I64-NEON-NEXT: vmov.32 d14[1], r5 +; BE-I64-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; BE-I64-NEON-NEXT: add lr, sp, #96 +; BE-I64-NEON-NEXT: vmov.32 d10[1], r1 +; BE-I64-NEON-NEXT: vmov.32 d17[1], r8 +; BE-I64-NEON-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload +; BE-I64-NEON-NEXT: add lr, sp, #64 +; BE-I64-NEON-NEXT: vmov.32 d16[1], r9 +; BE-I64-NEON-NEXT: vrev64.32 q14, q7 +; BE-I64-NEON-NEXT: vorr q13, q8, q8 +; BE-I64-NEON-NEXT: vrev64.32 q15, q5 +; BE-I64-NEON-NEXT: vldmia lr, {d22, d23} @ 16-byte Reload +; BE-I64-NEON-NEXT: vrev64.32 q8, q6 +; BE-I64-NEON-NEXT: vst1.64 {d28, d29}, [r0:128]! +; BE-I64-NEON-NEXT: vst1.64 {d30, d31}, [r0:128]! +; BE-I64-NEON-NEXT: vrev64.32 q9, q9 +; BE-I64-NEON-NEXT: vrev64.32 q10, q10 +; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-I64-NEON-NEXT: vrev64.32 q11, q11 +; BE-I64-NEON-NEXT: vrev64.32 q12, q12 +; BE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r0:128] +; BE-I64-NEON-NEXT: vst1.64 {d20, d21}, [r4:128]! +; BE-I64-NEON-NEXT: vst1.64 {d22, d23}, [r4:128]! +; BE-I64-NEON-NEXT: vrev64.32 q13, q13 +; BE-I64-NEON-NEXT: vst1.64 {d24, d25}, [r4:128]! +; BE-I64-NEON-NEXT: vst1.64 {d26, d27}, [r4:128] +; BE-I64-NEON-NEXT: add sp, sp, #144 +; BE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEON-NEXT: add sp, sp, #4 +; BE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float> %x) + ret <16 x iXLen> %a +} +declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float>) + +define <32 x iXLen> @lrint_v32f32(<32 x float> %x) { +; LE-I32-LABEL: lrint_v32f32: +; LE-I32: @ %bb.0: +; LE-I32-NEXT: .save {r4, r5, r6, lr} +; LE-I32-NEXT: push {r4, r5, r6, lr} +; LE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEXT: .pad #144 +; LE-I32-NEXT: sub sp, sp, #144 +; LE-I32-NEXT: add lr, sp, #48 +; LE-I32-NEXT: mov r4, r0 +; LE-I32-NEXT: add r0, sp, #224 +; LE-I32-NEXT: vorr q4, q0, q0 +; LE-I32-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill +; LE-I32-NEXT: add lr, sp, #16 +; LE-I32-NEXT: vorr q6, q3, q3 +; LE-I32-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; LE-I32-NEXT: vmov.f32 s0, s4 +; LE-I32-NEXT: add lr, sp, #80 +; LE-I32-NEXT: vorr q5, q1, q1 +; LE-I32-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I32-NEXT: add r0, sp, #272 +; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I32-NEXT: add lr, sp, #64 +; LE-I32-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I32-NEXT: add r0, sp, #240 +; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I32-NEXT: add lr, sp, #32 +; LE-I32-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I32-NEXT: vstmia sp, {d2, d3} @ 16-byte Spill +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s18 +; LE-I32-NEXT: vmov.32 d14[0], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s22 +; LE-I32-NEXT: add lr, sp, #112 +; LE-I32-NEXT: vmov.32 d17[0], r0 +; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: add lr, sp, #128 +; LE-I32-NEXT: vmov.32 d15[0], r0 +; LE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-I32-NEXT: add lr, sp, #48 +; LE-I32-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-I32-NEXT: vmov.f32 s0, s20 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s22 +; LE-I32-NEXT: vmov.32 d8[0], r0 +; LE-I32-NEXT: vorr q7, q5, q5 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s26 +; LE-I32-NEXT: vmov.32 d9[0], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s24 +; LE-I32-NEXT: vmov.32 d11[0], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s27 +; LE-I32-NEXT: vmov.32 d10[0], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s25 +; LE-I32-NEXT: vmov.32 d11[1], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s31 +; LE-I32-NEXT: vmov.32 d10[1], r0 +; LE-I32-NEXT: add lr, sp, #96 +; LE-I32-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s29 +; LE-I32-NEXT: vmov.32 d9[1], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vldmia sp, {d14, d15} @ 16-byte Reload +; LE-I32-NEXT: vmov.f32 s0, s31 +; LE-I32-NEXT: add lr, sp, #48 +; LE-I32-NEXT: vmov.32 d8[1], r0 +; LE-I32-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: add lr, sp, #128 +; LE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I32-NEXT: add lr, sp, #16 +; LE-I32-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-I32-NEXT: vmov.f32 s0, s23 +; LE-I32-NEXT: vmov.32 d9[1], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s29 +; LE-I32-NEXT: add lr, sp, #112 +; LE-I32-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-I32-NEXT: vmov.32 d13[1], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s20 +; LE-I32-NEXT: vmov.32 d8[1], r0 +; LE-I32-NEXT: add lr, sp, #128 +; LE-I32-NEXT: add r0, sp, #256 +; LE-I32-NEXT: vld1.64 {d14, d15}, [r0] +; LE-I32-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s21 +; LE-I32-NEXT: vorr q4, q6, q6 +; LE-I32-NEXT: vmov.32 d8[0], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vorr q6, q7, q7 +; LE-I32-NEXT: vmov.32 d8[1], r0 +; LE-I32-NEXT: add lr, sp, #112 +; LE-I32-NEXT: vstmia sp, {d14, d15} @ 16-byte Spill +; LE-I32-NEXT: vmov.f32 s0, s24 +; LE-I32-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: add lr, sp, #32 +; LE-I32-NEXT: vmov.32 d14[0], r0 +; LE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I32-NEXT: vmov.f32 s0, s18 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s16 +; LE-I32-NEXT: vmov.32 d11[0], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s19 +; LE-I32-NEXT: vmov.32 d10[0], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s26 +; LE-I32-NEXT: vmov.32 d11[1], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s17 +; LE-I32-NEXT: add lr, sp, #16 +; LE-I32-NEXT: vmov.32 d15[0], r0 +; LE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.32 d10[1], r0 +; LE-I32-NEXT: add lr, sp, #32 +; LE-I32-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-I32-NEXT: add lr, sp, #64 +; LE-I32-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-I32-NEXT: vmov.f32 s0, s20 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: add lr, sp, #80 +; LE-I32-NEXT: vmov.32 d14[0], r0 +; LE-I32-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-I32-NEXT: vmov.f32 s0, s26 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s24 +; LE-I32-NEXT: vmov.32 d9[0], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s27 +; LE-I32-NEXT: vmov.32 d8[0], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s22 +; LE-I32-NEXT: vmov.32 d9[1], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s25 +; LE-I32-NEXT: vmov.32 d15[0], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s23 +; LE-I32-NEXT: vmov.32 d8[1], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s21 +; LE-I32-NEXT: vmov.32 d15[1], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vldmia sp, {d12, d13} @ 16-byte Reload +; LE-I32-NEXT: vmov.f32 s0, s27 +; LE-I32-NEXT: vmov.32 d14[1], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.f32 s0, s25 +; LE-I32-NEXT: add lr, sp, #16 +; LE-I32-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-I32-NEXT: vmov.32 d11[1], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: add lr, sp, #112 +; LE-I32-NEXT: vmov.32 d10[1], r0 +; LE-I32-NEXT: mov r0, r4 +; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEXT: add lr, sp, #128 +; LE-I32-NEXT: vst1.32 {d16, d17}, [r0:128]! +; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEXT: add lr, sp, #48 +; LE-I32-NEXT: vst1.32 {d16, d17}, [r0:128]! +; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEXT: add lr, sp, #96 +; LE-I32-NEXT: vst1.32 {d16, d17}, [r0:128]! +; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEXT: add lr, sp, #32 +; LE-I32-NEXT: vst1.64 {d16, d17}, [r0:128] +; LE-I32-NEXT: add r0, r4, #64 +; LE-I32-NEXT: vst1.32 {d8, d9}, [r0:128]! +; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEXT: vst1.32 {d16, d17}, [r0:128]! +; LE-I32-NEXT: vst1.32 {d10, d11}, [r0:128]! +; LE-I32-NEXT: vst1.64 {d14, d15}, [r0:128] +; LE-I32-NEXT: add sp, sp, #144 +; LE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEXT: pop {r4, r5, r6, pc} +; +; LE-I64-LABEL: lrint_v32f32: +; LE-I64: @ %bb.0: +; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I64-NEXT: .pad #4 +; LE-I64-NEXT: sub sp, sp, #4 +; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEXT: .pad #184 +; LE-I64-NEXT: sub sp, sp, #184 +; LE-I64-NEXT: add lr, sp, #152 +; LE-I64-NEXT: vorr q7, q3, q3 +; LE-I64-NEXT: vorr q4, q2, q2 +; LE-I64-NEXT: mov r5, r0 +; LE-I64-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #88 +; LE-I64-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; LE-I64-NEXT: vmov.f32 s0, s3 +; LE-I64-NEXT: str r0, [sp, #68] @ 4-byte Spill +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s18 +; LE-I64-NEXT: add lr, sp, #168 +; LE-I64-NEXT: vmov.32 d17[0], r0 +; LE-I64-NEXT: str r1, [sp, #16] @ 4-byte Spill +; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s16 +; LE-I64-NEXT: mov r8, r1 +; LE-I64-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s17 +; LE-I64-NEXT: mov r6, r1 +; LE-I64-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s19 +; LE-I64-NEXT: mov r4, r1 +; LE-I64-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s31 +; LE-I64-NEXT: mov r7, r1 +; LE-I64-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s30 +; LE-I64-NEXT: str r1, [sp, #8] @ 4-byte Spill +; LE-I64-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEXT: vmov.32 d11[1], r7 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s29 +; LE-I64-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEXT: add lr, sp, #104 +; LE-I64-NEXT: str r1, [sp, #12] @ 4-byte Spill +; LE-I64-NEXT: vmov.32 d13[1], r4 +; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEXT: add r0, sp, #320 +; LE-I64-NEXT: add lr, sp, #120 +; LE-I64-NEXT: mov r11, r1 +; LE-I64-NEXT: vld1.64 {d0, d1}, [r0] +; LE-I64-NEXT: add r0, sp, #304 +; LE-I64-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #72 +; LE-I64-NEXT: vld1.64 {d0, d1}, [r0] +; LE-I64-NEXT: add r0, sp, #336 +; LE-I64-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #32 +; LE-I64-NEXT: vld1.64 {d0, d1}, [r0] +; LE-I64-NEXT: add r0, sp, #288 +; LE-I64-NEXT: vmov.32 d12[1], r6 +; LE-I64-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #48 +; LE-I64-NEXT: vld1.64 {d0, d1}, [r0] +; LE-I64-NEXT: vmov.32 d10[1], r8 +; LE-I64-NEXT: add r8, r5, #64 +; LE-I64-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #152 +; LE-I64-NEXT: vst1.64 {d12, d13}, [r8:128]! +; LE-I64-NEXT: vst1.64 {d10, d11}, [r8:128]! +; LE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-I64-NEXT: vmov.f32 s0, s27 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s28 +; LE-I64-NEXT: mov r4, r1 +; LE-I64-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s26 +; LE-I64-NEXT: mov r9, r1 +; LE-I64-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEXT: vmov.32 d11[1], r4 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEXT: add lr, sp, #136 +; LE-I64-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; LE-I64-NEXT: mov r10, r1 +; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #168 +; LE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #88 +; LE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-I64-NEXT: vmov.f32 s0, s26 +; LE-I64-NEXT: vmov.32 d11[1], r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s25 +; LE-I64-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEXT: add lr, sp, #168 +; LE-I64-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; LE-I64-NEXT: mov r7, r1 +; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #104 +; LE-I64-NEXT: vorr q5, q6, q6 +; LE-I64-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload +; LE-I64-NEXT: vmov.32 d15[1], r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s20 +; LE-I64-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; LE-I64-NEXT: add lr, sp, #104 +; LE-I64-NEXT: mov r4, r1 +; LE-I64-NEXT: vmov.32 d14[1], r0 +; LE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEXT: add lr, sp, #152 +; LE-I64-NEXT: mov r6, r1 +; LE-I64-NEXT: vorr q7, q6, q6 +; LE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-I64-NEXT: vmov.32 d9[1], r11 +; LE-I64-NEXT: vmov.f32 s0, s25 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s24 +; LE-I64-NEXT: mov r5, r1 +; LE-I64-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEXT: vmov.32 d8[1], r9 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: add lr, sp, #136 +; LE-I64-NEXT: mov r11, r1 +; LE-I64-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: vmov.32 d16[1], r10 +; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #120 +; LE-I64-NEXT: vst1.64 {d8, d9}, [r8:128]! +; LE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-I64-NEXT: vmov.f32 s0, s1 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: add lr, sp, #152 +; LE-I64-NEXT: vmov.32 d17[0], r0 +; LE-I64-NEXT: mov r10, r1 +; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #104 +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #72 +; LE-I64-NEXT: vst1.64 {d16, d17}, [r8:128] +; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I64-NEXT: vmov.f32 s0, s19 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: add lr, sp, #168 +; LE-I64-NEXT: vmov.f32 s0, s18 +; LE-I64-NEXT: mov r8, r1 +; LE-I64-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: vmov.32 d16[1], r7 +; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s17 +; LE-I64-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEXT: add lr, sp, #104 +; LE-I64-NEXT: mov r7, r1 +; LE-I64-NEXT: vmov.32 d15[1], r4 +; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s16 +; LE-I64-NEXT: mov r9, r1 +; LE-I64-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEXT: vmov.32 d14[1], r6 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEXT: add lr, sp, #88 +; LE-I64-NEXT: mov r6, r1 +; LE-I64-NEXT: vmov.32 d11[1], r5 +; LE-I64-NEXT: vmov.32 d10[1], r11 +; LE-I64-NEXT: ldr r11, [sp, #68] @ 4-byte Reload +; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #16 +; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #32 +; LE-I64-NEXT: vst1.64 {d14, d15}, [r11:128]! +; LE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-I64-NEXT: vmov.f32 s0, s23 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: add lr, sp, #152 +; LE-I64-NEXT: mov r4, r1 +; LE-I64-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #120 +; LE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-I64-NEXT: @ kill: def $s0 killed $s0 killed $q0 +; LE-I64-NEXT: vmov.32 d13[1], r10 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s22 +; LE-I64-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEXT: add lr, sp, #152 +; LE-I64-NEXT: mov r5, r1 +; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #104 +; LE-I64-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload +; LE-I64-NEXT: vmov.32 d15[1], r8 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s21 +; LE-I64-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEXT: add lr, sp, #72 +; LE-I64-NEXT: mov r8, r1 +; LE-I64-NEXT: vmov.32 d14[1], r7 +; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #104 +; LE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s20 +; LE-I64-NEXT: add lr, sp, #88 +; LE-I64-NEXT: mov r7, r1 +; LE-I64-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-I64-NEXT: vmov.32 d13[1], r9 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEXT: add lr, sp, #32 +; LE-I64-NEXT: mov r9, r1 +; LE-I64-NEXT: vmov.32 d12[1], r6 +; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #88 +; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #120 +; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I64-NEXT: vmov.f32 s0, s19 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s18 +; LE-I64-NEXT: add lr, sp, #72 +; LE-I64-NEXT: mov r10, r1 +; LE-I64-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-I64-NEXT: vmov.32 d13[1], r4 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: add lr, sp, #152 +; LE-I64-NEXT: mov r4, r1 +; LE-I64-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: vmov.32 d16[1], r5 +; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #168 +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #48 +; LE-I64-NEXT: vst1.64 {d16, d17}, [r11:128]! +; LE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-I64-NEXT: vmov.f32 s0, s21 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s20 +; LE-I64-NEXT: vmov.32 d12[1], r8 +; LE-I64-NEXT: add lr, sp, #72 +; LE-I64-NEXT: mov r5, r1 +; LE-I64-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s23 +; LE-I64-NEXT: add lr, sp, #32 +; LE-I64-NEXT: mov r6, r1 +; LE-I64-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-I64-NEXT: vmov.32 d13[1], r7 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: add lr, sp, #48 +; LE-I64-NEXT: mov r7, r1 +; LE-I64-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-I64-NEXT: vmov.f32 s0, s2 +; LE-I64-NEXT: vmov.32 d12[1], r9 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: add lr, sp, #16 +; LE-I64-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #136 +; LE-I64-NEXT: vmov.32 d11[1], r7 +; LE-I64-NEXT: vst1.64 {d16, d17}, [r11:128]! +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #152 +; LE-I64-NEXT: vmov.32 d15[1], r10 +; LE-I64-NEXT: vst1.64 {d16, d17}, [r11:128] +; LE-I64-NEXT: vmov.32 d10[1], r1 +; LE-I64-NEXT: ldr r1, [sp, #68] @ 4-byte Reload +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: add r0, r1, #192 +; LE-I64-NEXT: add lr, sp, #72 +; LE-I64-NEXT: vmov.32 d14[1], r4 +; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]! +; LE-I64-NEXT: vst1.64 {d14, d15}, [r0:128]! +; LE-I64-NEXT: vst1.64 {d12, d13}, [r0:128]! +; LE-I64-NEXT: vmov.32 d9[1], r5 +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #88 +; LE-I64-NEXT: vmov.32 d8[1], r6 +; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128] +; LE-I64-NEXT: add r0, r1, #128 +; LE-I64-NEXT: vst1.64 {d8, d9}, [r0:128]! +; LE-I64-NEXT: vst1.64 {d10, d11}, [r0:128]! +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #104 +; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]! +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128] +; LE-I64-NEXT: add sp, sp, #184 +; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEXT: add sp, sp, #4 +; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; LE-I32-NEON-LABEL: lrint_v32f32: +; LE-I32-NEON: @ %bb.0: +; LE-I32-NEON-NEXT: .save {r4, r5, r6, lr} +; LE-I32-NEON-NEXT: push {r4, r5, r6, lr} +; LE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEON-NEXT: .pad #144 +; LE-I32-NEON-NEXT: sub sp, sp, #144 +; LE-I32-NEON-NEXT: add lr, sp, #48 +; LE-I32-NEON-NEXT: mov r4, r0 +; LE-I32-NEON-NEXT: add r0, sp, #224 +; LE-I32-NEON-NEXT: vorr q4, q0, q0 +; LE-I32-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill +; LE-I32-NEON-NEXT: add lr, sp, #16 +; LE-I32-NEON-NEXT: vorr q6, q3, q3 +; LE-I32-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; LE-I32-NEON-NEXT: vmov.f32 s0, s4 +; LE-I32-NEON-NEXT: add lr, sp, #80 +; LE-I32-NEON-NEXT: vorr q5, q1, q1 +; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I32-NEON-NEXT: add r0, sp, #272 +; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I32-NEON-NEXT: add lr, sp, #64 +; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I32-NEON-NEXT: add r0, sp, #240 +; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I32-NEON-NEXT: add lr, sp, #32 +; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I32-NEON-NEXT: vstmia sp, {d2, d3} @ 16-byte Spill +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s18 +; LE-I32-NEON-NEXT: vmov.32 d14[0], r0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s22 +; LE-I32-NEON-NEXT: add lr, sp, #112 +; LE-I32-NEON-NEXT: vmov.32 d17[0], r0 +; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: add lr, sp, #128 +; LE-I32-NEON-NEXT: vmov.32 d15[0], r0 +; LE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-I32-NEON-NEXT: add lr, sp, #48 +; LE-I32-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-I32-NEON-NEXT: vmov.f32 s0, s20 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s22 +; LE-I32-NEON-NEXT: vmov.32 d8[0], r0 +; LE-I32-NEON-NEXT: vorr q7, q5, q5 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s26 +; LE-I32-NEON-NEXT: vmov.32 d9[0], r0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s24 +; LE-I32-NEON-NEXT: vmov.32 d11[0], r0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s27 +; LE-I32-NEON-NEXT: vmov.32 d10[0], r0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s25 +; LE-I32-NEON-NEXT: vmov.32 d11[1], r0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s31 +; LE-I32-NEON-NEXT: vmov.32 d10[1], r0 +; LE-I32-NEON-NEXT: add lr, sp, #96 +; LE-I32-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s29 +; LE-I32-NEON-NEXT: vmov.32 d9[1], r0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vldmia sp, {d14, d15} @ 16-byte Reload +; LE-I32-NEON-NEXT: vmov.f32 s0, s31 +; LE-I32-NEON-NEXT: add lr, sp, #48 +; LE-I32-NEON-NEXT: vmov.32 d8[1], r0 +; LE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: add lr, sp, #128 +; LE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I32-NEON-NEXT: add lr, sp, #16 +; LE-I32-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-I32-NEON-NEXT: vmov.f32 s0, s23 +; LE-I32-NEON-NEXT: vmov.32 d9[1], r0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s29 +; LE-I32-NEON-NEXT: add lr, sp, #112 +; LE-I32-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-I32-NEON-NEXT: vmov.32 d13[1], r0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s20 +; LE-I32-NEON-NEXT: vmov.32 d8[1], r0 +; LE-I32-NEON-NEXT: add lr, sp, #128 +; LE-I32-NEON-NEXT: add r0, sp, #256 +; LE-I32-NEON-NEXT: vld1.64 {d14, d15}, [r0] +; LE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s21 +; LE-I32-NEON-NEXT: vorr q4, q6, q6 +; LE-I32-NEON-NEXT: vmov.32 d8[0], r0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vorr q6, q7, q7 +; LE-I32-NEON-NEXT: vmov.32 d8[1], r0 +; LE-I32-NEON-NEXT: add lr, sp, #112 +; LE-I32-NEON-NEXT: vstmia sp, {d14, d15} @ 16-byte Spill +; LE-I32-NEON-NEXT: vmov.f32 s0, s24 +; LE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: add lr, sp, #32 +; LE-I32-NEON-NEXT: vmov.32 d14[0], r0 +; LE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I32-NEON-NEXT: vmov.f32 s0, s18 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s16 +; LE-I32-NEON-NEXT: vmov.32 d11[0], r0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s19 +; LE-I32-NEON-NEXT: vmov.32 d10[0], r0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s26 +; LE-I32-NEON-NEXT: vmov.32 d11[1], r0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s17 +; LE-I32-NEON-NEXT: add lr, sp, #16 +; LE-I32-NEON-NEXT: vmov.32 d15[0], r0 +; LE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.32 d10[1], r0 +; LE-I32-NEON-NEXT: add lr, sp, #32 +; LE-I32-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-I32-NEON-NEXT: add lr, sp, #64 +; LE-I32-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-I32-NEON-NEXT: vmov.f32 s0, s20 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: add lr, sp, #80 +; LE-I32-NEON-NEXT: vmov.32 d14[0], r0 +; LE-I32-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-I32-NEON-NEXT: vmov.f32 s0, s26 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s24 +; LE-I32-NEON-NEXT: vmov.32 d9[0], r0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s27 +; LE-I32-NEON-NEXT: vmov.32 d8[0], r0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s22 +; LE-I32-NEON-NEXT: vmov.32 d9[1], r0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s25 +; LE-I32-NEON-NEXT: vmov.32 d15[0], r0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s23 +; LE-I32-NEON-NEXT: vmov.32 d8[1], r0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s21 +; LE-I32-NEON-NEXT: vmov.32 d15[1], r0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vldmia sp, {d12, d13} @ 16-byte Reload +; LE-I32-NEON-NEXT: vmov.f32 s0, s27 +; LE-I32-NEON-NEXT: vmov.32 d14[1], r0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: vmov.f32 s0, s25 +; LE-I32-NEON-NEXT: add lr, sp, #16 +; LE-I32-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-I32-NEON-NEXT: vmov.32 d11[1], r0 +; LE-I32-NEON-NEXT: bl lrintf +; LE-I32-NEON-NEXT: add lr, sp, #112 +; LE-I32-NEON-NEXT: vmov.32 d10[1], r0 +; LE-I32-NEON-NEXT: mov r0, r4 +; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEON-NEXT: add lr, sp, #128 +; LE-I32-NEON-NEXT: vst1.32 {d16, d17}, [r0:128]! +; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEON-NEXT: add lr, sp, #48 +; LE-I32-NEON-NEXT: vst1.32 {d16, d17}, [r0:128]! +; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEON-NEXT: add lr, sp, #96 +; LE-I32-NEON-NEXT: vst1.32 {d16, d17}, [r0:128]! +; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEON-NEXT: add lr, sp, #32 +; LE-I32-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] +; LE-I32-NEON-NEXT: add r0, r4, #64 +; LE-I32-NEON-NEXT: vst1.32 {d8, d9}, [r0:128]! +; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEON-NEXT: vst1.32 {d16, d17}, [r0:128]! +; LE-I32-NEON-NEXT: vst1.32 {d10, d11}, [r0:128]! +; LE-I32-NEON-NEXT: vst1.64 {d14, d15}, [r0:128] +; LE-I32-NEON-NEXT: add sp, sp, #144 +; LE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEON-NEXT: pop {r4, r5, r6, pc} +; +; LE-I64-NEON-LABEL: lrint_v32f32: +; LE-I64-NEON: @ %bb.0: +; LE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I64-NEON-NEXT: .pad #4 +; LE-I64-NEON-NEXT: sub sp, sp, #4 +; LE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEON-NEXT: .pad #184 +; LE-I64-NEON-NEXT: sub sp, sp, #184 +; LE-I64-NEON-NEXT: add lr, sp, #152 +; LE-I64-NEON-NEXT: vorr q7, q3, q3 +; LE-I64-NEON-NEXT: vorr q4, q2, q2 +; LE-I64-NEON-NEXT: mov r5, r0 +; LE-I64-NEON-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #88 +; LE-I64-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; LE-I64-NEON-NEXT: vmov.f32 s0, s3 +; LE-I64-NEON-NEXT: str r0, [sp, #68] @ 4-byte Spill +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.f32 s0, s18 +; LE-I64-NEON-NEXT: add lr, sp, #168 +; LE-I64-NEON-NEXT: vmov.32 d17[0], r0 +; LE-I64-NEON-NEXT: str r1, [sp, #16] @ 4-byte Spill +; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.f32 s0, s16 +; LE-I64-NEON-NEXT: mov r8, r1 +; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.f32 s0, s17 +; LE-I64-NEON-NEXT: mov r6, r1 +; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.f32 s0, s19 +; LE-I64-NEON-NEXT: mov r4, r1 +; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.f32 s0, s31 +; LE-I64-NEON-NEXT: mov r7, r1 +; LE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.f32 s0, s30 +; LE-I64-NEON-NEXT: str r1, [sp, #8] @ 4-byte Spill +; LE-I64-NEON-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEON-NEXT: vmov.32 d11[1], r7 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.f32 s0, s29 +; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEON-NEXT: add lr, sp, #104 +; LE-I64-NEON-NEXT: str r1, [sp, #12] @ 4-byte Spill +; LE-I64-NEON-NEXT: vmov.32 d13[1], r4 +; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEON-NEXT: add r0, sp, #320 +; LE-I64-NEON-NEXT: add lr, sp, #120 +; LE-I64-NEON-NEXT: mov r11, r1 +; LE-I64-NEON-NEXT: vld1.64 {d0, d1}, [r0] +; LE-I64-NEON-NEXT: add r0, sp, #304 +; LE-I64-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #72 +; LE-I64-NEON-NEXT: vld1.64 {d0, d1}, [r0] +; LE-I64-NEON-NEXT: add r0, sp, #336 +; LE-I64-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #32 +; LE-I64-NEON-NEXT: vld1.64 {d0, d1}, [r0] +; LE-I64-NEON-NEXT: add r0, sp, #288 +; LE-I64-NEON-NEXT: vmov.32 d12[1], r6 +; LE-I64-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #48 +; LE-I64-NEON-NEXT: vld1.64 {d0, d1}, [r0] +; LE-I64-NEON-NEXT: vmov.32 d10[1], r8 +; LE-I64-NEON-NEXT: add r8, r5, #64 +; LE-I64-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #152 +; LE-I64-NEON-NEXT: vst1.64 {d12, d13}, [r8:128]! +; LE-I64-NEON-NEXT: vst1.64 {d10, d11}, [r8:128]! +; LE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-I64-NEON-NEXT: vmov.f32 s0, s27 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.f32 s0, s28 +; LE-I64-NEON-NEXT: mov r4, r1 +; LE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.f32 s0, s26 +; LE-I64-NEON-NEXT: mov r9, r1 +; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEON-NEXT: vmov.32 d11[1], r4 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEON-NEXT: add lr, sp, #136 +; LE-I64-NEON-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; LE-I64-NEON-NEXT: mov r10, r1 +; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #168 +; LE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #88 +; LE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-I64-NEON-NEXT: vmov.f32 s0, s26 +; LE-I64-NEON-NEXT: vmov.32 d11[1], r0 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.f32 s0, s25 +; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEON-NEXT: add lr, sp, #168 +; LE-I64-NEON-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; LE-I64-NEON-NEXT: mov r7, r1 +; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #104 +; LE-I64-NEON-NEXT: vorr q5, q6, q6 +; LE-I64-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload +; LE-I64-NEON-NEXT: vmov.32 d15[1], r0 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.f32 s0, s20 +; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEON-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #104 +; LE-I64-NEON-NEXT: mov r4, r1 +; LE-I64-NEON-NEXT: vmov.32 d14[1], r0 +; LE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEON-NEXT: add lr, sp, #152 +; LE-I64-NEON-NEXT: mov r6, r1 +; LE-I64-NEON-NEXT: vorr q7, q6, q6 +; LE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-I64-NEON-NEXT: vmov.32 d9[1], r11 +; LE-I64-NEON-NEXT: vmov.f32 s0, s25 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.f32 s0, s24 +; LE-I64-NEON-NEXT: mov r5, r1 +; LE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEON-NEXT: vmov.32 d8[1], r9 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: add lr, sp, #136 +; LE-I64-NEON-NEXT: mov r11, r1 +; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: vmov.32 d16[1], r10 +; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #120 +; LE-I64-NEON-NEXT: vst1.64 {d8, d9}, [r8:128]! +; LE-I64-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-I64-NEON-NEXT: vmov.f32 s0, s1 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: add lr, sp, #152 +; LE-I64-NEON-NEXT: vmov.32 d17[0], r0 +; LE-I64-NEON-NEXT: mov r10, r1 +; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #104 +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #72 +; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r8:128] +; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I64-NEON-NEXT: vmov.f32 s0, s19 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: add lr, sp, #168 +; LE-I64-NEON-NEXT: vmov.f32 s0, s18 +; LE-I64-NEON-NEXT: mov r8, r1 +; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: vmov.32 d16[1], r7 +; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.f32 s0, s17 +; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEON-NEXT: add lr, sp, #104 +; LE-I64-NEON-NEXT: mov r7, r1 +; LE-I64-NEON-NEXT: vmov.32 d15[1], r4 +; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.f32 s0, s16 +; LE-I64-NEON-NEXT: mov r9, r1 +; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEON-NEXT: vmov.32 d14[1], r6 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEON-NEXT: add lr, sp, #88 +; LE-I64-NEON-NEXT: mov r6, r1 +; LE-I64-NEON-NEXT: vmov.32 d11[1], r5 +; LE-I64-NEON-NEXT: vmov.32 d10[1], r11 +; LE-I64-NEON-NEXT: ldr r11, [sp, #68] @ 4-byte Reload +; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #16 +; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #32 +; LE-I64-NEON-NEXT: vst1.64 {d14, d15}, [r11:128]! +; LE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-I64-NEON-NEXT: vmov.f32 s0, s23 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: add lr, sp, #152 +; LE-I64-NEON-NEXT: mov r4, r1 +; LE-I64-NEON-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #120 +; LE-I64-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-I64-NEON-NEXT: @ kill: def $s0 killed $s0 killed $q0 +; LE-I64-NEON-NEXT: vmov.32 d13[1], r10 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.f32 s0, s22 +; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEON-NEXT: add lr, sp, #152 +; LE-I64-NEON-NEXT: mov r5, r1 +; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #104 +; LE-I64-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload +; LE-I64-NEON-NEXT: vmov.32 d15[1], r8 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.f32 s0, s21 +; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEON-NEXT: add lr, sp, #72 +; LE-I64-NEON-NEXT: mov r8, r1 +; LE-I64-NEON-NEXT: vmov.32 d14[1], r7 +; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #104 +; LE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.f32 s0, s20 +; LE-I64-NEON-NEXT: add lr, sp, #88 +; LE-I64-NEON-NEXT: mov r7, r1 +; LE-I64-NEON-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-I64-NEON-NEXT: vmov.32 d13[1], r9 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEON-NEXT: add lr, sp, #32 +; LE-I64-NEON-NEXT: mov r9, r1 +; LE-I64-NEON-NEXT: vmov.32 d12[1], r6 +; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #88 +; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #120 +; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I64-NEON-NEXT: vmov.f32 s0, s19 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.f32 s0, s18 +; LE-I64-NEON-NEXT: add lr, sp, #72 +; LE-I64-NEON-NEXT: mov r10, r1 +; LE-I64-NEON-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-I64-NEON-NEXT: vmov.32 d13[1], r4 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: add lr, sp, #152 +; LE-I64-NEON-NEXT: mov r4, r1 +; LE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: vmov.32 d16[1], r5 +; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #168 +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #48 +; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r11:128]! +; LE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-I64-NEON-NEXT: vmov.f32 s0, s21 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.f32 s0, s20 +; LE-I64-NEON-NEXT: vmov.32 d12[1], r8 +; LE-I64-NEON-NEXT: add lr, sp, #72 +; LE-I64-NEON-NEXT: mov r5, r1 +; LE-I64-NEON-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: vmov.f32 s0, s23 +; LE-I64-NEON-NEXT: add lr, sp, #32 +; LE-I64-NEON-NEXT: mov r6, r1 +; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-I64-NEON-NEXT: vmov.32 d13[1], r7 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: add lr, sp, #48 +; LE-I64-NEON-NEXT: mov r7, r1 +; LE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-I64-NEON-NEXT: vmov.f32 s0, s2 +; LE-I64-NEON-NEXT: vmov.32 d12[1], r9 +; LE-I64-NEON-NEXT: bl lrintf +; LE-I64-NEON-NEXT: add lr, sp, #16 +; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #136 +; LE-I64-NEON-NEXT: vmov.32 d11[1], r7 +; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r11:128]! +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #152 +; LE-I64-NEON-NEXT: vmov.32 d15[1], r10 +; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r11:128] +; LE-I64-NEON-NEXT: vmov.32 d10[1], r1 +; LE-I64-NEON-NEXT: ldr r1, [sp, #68] @ 4-byte Reload +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: add r0, r1, #192 +; LE-I64-NEON-NEXT: add lr, sp, #72 +; LE-I64-NEON-NEXT: vmov.32 d14[1], r4 +; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! +; LE-I64-NEON-NEXT: vst1.64 {d14, d15}, [r0:128]! +; LE-I64-NEON-NEXT: vst1.64 {d12, d13}, [r0:128]! +; LE-I64-NEON-NEXT: vmov.32 d9[1], r5 +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #88 +; LE-I64-NEON-NEXT: vmov.32 d8[1], r6 +; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] +; LE-I64-NEON-NEXT: add r0, r1, #128 +; LE-I64-NEON-NEXT: vst1.64 {d8, d9}, [r0:128]! +; LE-I64-NEON-NEXT: vst1.64 {d10, d11}, [r0:128]! +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #104 +; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] +; LE-I64-NEON-NEXT: add sp, sp, #184 +; LE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEON-NEXT: add sp, sp, #4 +; LE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-I32-LABEL: lrint_v32f32: +; BE-I32: @ %bb.0: +; BE-I32-NEXT: .save {r4, r5, r6, lr} +; BE-I32-NEXT: push {r4, r5, r6, lr} +; BE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEXT: .pad #144 +; BE-I32-NEXT: sub sp, sp, #144 +; BE-I32-NEXT: mov r4, r0 +; BE-I32-NEXT: add r0, sp, #256 +; BE-I32-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I32-NEXT: add lr, sp, #16 +; BE-I32-NEXT: add r0, sp, #272 +; BE-I32-NEXT: vrev64.32 q4, q3 +; BE-I32-NEXT: vrev64.32 q7, q1 +; BE-I32-NEXT: vrev64.32 q8, q8 +; BE-I32-NEXT: vld1.64 {d18, d19}, [r0] +; BE-I32-NEXT: add r0, sp, #224 +; BE-I32-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; BE-I32-NEXT: add lr, sp, #80 +; BE-I32-NEXT: vrev64.32 q5, q0 +; BE-I32-NEXT: vmov.f32 s0, s28 +; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEXT: add lr, sp, #64 +; BE-I32-NEXT: vrev64.32 q8, q9 +; BE-I32-NEXT: vld1.64 {d20, d21}, [r0] +; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEXT: add lr, sp, #32 +; BE-I32-NEXT: vrev64.32 q8, q10 +; BE-I32-NEXT: vrev64.32 q6, q2 +; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEXT: vstmia sp, {d14, d15} @ 16-byte Spill +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s18 +; BE-I32-NEXT: vmov.32 d16[0], r0 +; BE-I32-NEXT: add lr, sp, #128 +; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s20 +; BE-I32-NEXT: add lr, sp, #112 +; BE-I32-NEXT: vmov.32 d17[0], r0 +; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEXT: add lr, sp, #48 +; BE-I32-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s22 +; BE-I32-NEXT: vmov.32 d8[0], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s30 +; BE-I32-NEXT: vmov.32 d9[0], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s26 +; BE-I32-NEXT: add lr, sp, #128 +; BE-I32-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload +; BE-I32-NEXT: vmov.32 d15[0], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s24 +; BE-I32-NEXT: vmov.32 d11[0], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s27 +; BE-I32-NEXT: vmov.32 d10[0], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s25 +; BE-I32-NEXT: vmov.32 d11[1], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vldmia sp, {d12, d13} @ 16-byte Reload +; BE-I32-NEXT: vmov.f32 s0, s27 +; BE-I32-NEXT: add lr, sp, #96 +; BE-I32-NEXT: vmov.32 d10[1], r0 +; BE-I32-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: add lr, sp, #48 +; BE-I32-NEXT: vmov.32 d15[1], r0 +; BE-I32-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-I32-NEXT: vmov.f32 s0, s23 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s21 +; BE-I32-NEXT: vmov.32 d9[1], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.32 d8[1], r0 +; BE-I32-NEXT: add lr, sp, #48 +; BE-I32-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; BE-I32-NEXT: add lr, sp, #16 +; BE-I32-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-I32-NEXT: vmov.f32 s0, s23 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s25 +; BE-I32-NEXT: add lr, sp, #112 +; BE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-I32-NEXT: vmov.32 d9[1], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s20 +; BE-I32-NEXT: vmov.32 d14[1], r0 +; BE-I32-NEXT: add r0, sp, #240 +; BE-I32-NEXT: add lr, sp, #128 +; BE-I32-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-I32-NEXT: vrev64.32 q6, q8 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s21 +; BE-I32-NEXT: vmov.32 d8[0], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s24 +; BE-I32-NEXT: vmov.32 d8[1], r0 +; BE-I32-NEXT: add lr, sp, #112 +; BE-I32-NEXT: vorr q7, q6, q6 +; BE-I32-NEXT: vstmia sp, {d12, d13} @ 16-byte Spill +; BE-I32-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: add lr, sp, #32 +; BE-I32-NEXT: vmov.32 d12[0], r0 +; BE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-I32-NEXT: vmov.f32 s0, s18 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s16 +; BE-I32-NEXT: vmov.32 d11[0], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s19 +; BE-I32-NEXT: vmov.32 d10[0], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s30 +; BE-I32-NEXT: vmov.32 d11[1], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s17 +; BE-I32-NEXT: add lr, sp, #16 +; BE-I32-NEXT: vmov.32 d13[0], r0 +; BE-I32-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.32 d10[1], r0 +; BE-I32-NEXT: add lr, sp, #32 +; BE-I32-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-I32-NEXT: add lr, sp, #64 +; BE-I32-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-I32-NEXT: vmov.f32 s0, s20 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: add lr, sp, #80 +; BE-I32-NEXT: vmov.32 d8[0], r0 +; BE-I32-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; BE-I32-NEXT: vmov.f32 s0, s26 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s24 +; BE-I32-NEXT: vmov.32 d15[0], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s27 +; BE-I32-NEXT: vmov.32 d14[0], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s22 +; BE-I32-NEXT: vmov.32 d15[1], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s25 +; BE-I32-NEXT: vmov.32 d9[0], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s23 +; BE-I32-NEXT: vmov.32 d14[1], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.f32 s0, s21 +; BE-I32-NEXT: vmov.32 d9[1], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vldmia sp, {d12, d13} @ 16-byte Reload +; BE-I32-NEXT: vmov.f32 s0, s27 +; BE-I32-NEXT: vmov.32 d8[1], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: add lr, sp, #16 +; BE-I32-NEXT: vmov.f32 s0, s25 +; BE-I32-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-I32-NEXT: add lr, sp, #112 +; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I32-NEXT: vmov.32 d11[1], r0 +; BE-I32-NEXT: vrev64.32 q8, q8 +; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: add lr, sp, #48 +; BE-I32-NEXT: vmov.32 d10[1], r0 +; BE-I32-NEXT: mov r0, r4 +; BE-I32-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; BE-I32-NEXT: add lr, sp, #128 +; BE-I32-NEXT: vrev64.32 q8, q4 +; BE-I32-NEXT: vst1.32 {d18, d19}, [r0:128]! +; BE-I32-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; BE-I32-NEXT: add lr, sp, #96 +; BE-I32-NEXT: vst1.32 {d18, d19}, [r0:128]! +; BE-I32-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; BE-I32-NEXT: add lr, sp, #112 +; BE-I32-NEXT: vst1.32 {d18, d19}, [r0:128]! +; BE-I32-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; BE-I32-NEXT: add lr, sp, #32 +; BE-I32-NEXT: vst1.64 {d18, d19}, [r0:128] +; BE-I32-NEXT: add r0, r4, #64 +; BE-I32-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; BE-I32-NEXT: vst1.32 {d18, d19}, [r0:128]! +; BE-I32-NEXT: vst1.32 {d10, d11}, [r0:128]! +; BE-I32-NEXT: vst1.32 {d14, d15}, [r0:128]! +; BE-I32-NEXT: vst1.64 {d16, d17}, [r0:128] +; BE-I32-NEXT: add sp, sp, #144 +; BE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEXT: pop {r4, r5, r6, pc} +; +; BE-I64-LABEL: lrint_v32f32: +; BE-I64: @ %bb.0: +; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I64-NEXT: .pad #4 +; BE-I64-NEXT: sub sp, sp, #4 +; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEXT: .pad #256 +; BE-I64-NEXT: sub sp, sp, #256 +; BE-I64-NEXT: add lr, sp, #208 +; BE-I64-NEXT: str r0, [sp, #156] @ 4-byte Spill +; BE-I64-NEXT: add r0, sp, #408 +; BE-I64-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #120 +; BE-I64-NEXT: vld1.64 {d10, d11}, [r0] +; BE-I64-NEXT: add r0, sp, #392 +; BE-I64-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #160 +; BE-I64-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #176 +; BE-I64-NEXT: vrev64.32 d8, d10 +; BE-I64-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #136 +; BE-I64-NEXT: vmov.f32 s0, s17 +; BE-I64-NEXT: vld1.64 {d12, d13}, [r0] +; BE-I64-NEXT: add r0, sp, #360 +; BE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #192 +; BE-I64-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I64-NEXT: add r0, sp, #376 +; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #40 +; BE-I64-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.f32 s0, s16 +; BE-I64-NEXT: str r1, [sp, #88] @ 4-byte Spill +; BE-I64-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vrev64.32 d9, d11 +; BE-I64-NEXT: add lr, sp, #240 +; BE-I64-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEXT: str r1, [sp, #104] @ 4-byte Spill +; BE-I64-NEXT: vmov.f32 s0, s18 +; BE-I64-NEXT: vrev64.32 d8, d13 +; BE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.f32 s0, s17 +; BE-I64-NEXT: mov r5, r1 +; BE-I64-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.f32 s0, s19 +; BE-I64-NEXT: add lr, sp, #192 +; BE-I64-NEXT: str r1, [sp, #72] @ 4-byte Spill +; BE-I64-NEXT: vmov.32 d13[0], r0 +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: vrev64.32 d10, d16 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.f32 s0, s20 +; BE-I64-NEXT: add lr, sp, #224 +; BE-I64-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEXT: mov r4, r1 +; BE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.f32 s0, s21 +; BE-I64-NEXT: mov r7, r1 +; BE-I64-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.f32 s0, s16 +; BE-I64-NEXT: mov r6, r1 +; BE-I64-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d12[0], r0 +; BE-I64-NEXT: add lr, sp, #24 +; BE-I64-NEXT: mov r9, r1 +; BE-I64-NEXT: vmov.32 d15[1], r6 +; BE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #192 +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: vrev64.32 d8, d17 +; BE-I64-NEXT: vmov.f32 s0, s17 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.f32 s0, s16 +; BE-I64-NEXT: vmov.32 d14[1], r7 +; BE-I64-NEXT: add lr, sp, #56 +; BE-I64-NEXT: mov r10, r1 +; BE-I64-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEXT: add lr, sp, #192 +; BE-I64-NEXT: mov r11, r1 +; BE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #40 +; BE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; BE-I64-NEXT: add lr, sp, #224 +; BE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-I64-NEXT: vrev64.32 d8, d12 +; BE-I64-NEXT: vmov.32 d11[1], r4 +; BE-I64-NEXT: vmov.f32 s0, s17 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.f32 s0, s16 +; BE-I64-NEXT: vmov.32 d10[1], r5 +; BE-I64-NEXT: add lr, sp, #224 +; BE-I64-NEXT: mov r8, r1 +; BE-I64-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vrev64.32 d8, d13 +; BE-I64-NEXT: add lr, sp, #8 +; BE-I64-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEXT: ldr r0, [sp, #88] @ 4-byte Reload +; BE-I64-NEXT: vmov.f32 s0, s17 +; BE-I64-NEXT: mov r5, r1 +; BE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #240 +; BE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-I64-NEXT: vmov.32 d11[1], r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.f32 s0, s16 +; BE-I64-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEXT: ldr r0, [sp, #104] @ 4-byte Reload +; BE-I64-NEXT: add lr, sp, #240 +; BE-I64-NEXT: mov r4, r1 +; BE-I64-NEXT: vmov.32 d10[1], r0 +; BE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: add lr, sp, #136 +; BE-I64-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEXT: ldr r0, [sp, #72] @ 4-byte Reload +; BE-I64-NEXT: mov r6, r1 +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: add lr, sp, #24 +; BE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; BE-I64-NEXT: vrev64.32 d8, d16 +; BE-I64-NEXT: vmov.32 d13[1], r0 +; BE-I64-NEXT: vmov.f32 s0, s17 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.f32 s0, s16 +; BE-I64-NEXT: mov r7, r1 +; BE-I64-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEXT: vmov.32 d12[1], r9 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: add lr, sp, #192 +; BE-I64-NEXT: vmov.32 d15[1], r4 +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: add lr, sp, #8 +; BE-I64-NEXT: vmov.32 d17[1], r10 +; BE-I64-NEXT: vmov.32 d16[1], r11 +; BE-I64-NEXT: vorr q9, q8, q8 +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: add lr, sp, #192 +; BE-I64-NEXT: vmov.32 d17[1], r8 +; BE-I64-NEXT: vmov.32 d16[1], r5 +; BE-I64-NEXT: vorr q10, q8, q8 +; BE-I64-NEXT: vrev64.32 q8, q6 +; BE-I64-NEXT: vmov.32 d14[1], r6 +; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #240 +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEXT: vrev64.32 q8, q8 +; BE-I64-NEXT: vmov.32 d11[1], r7 +; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #224 +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: vmov.32 d10[1], r1 +; BE-I64-NEXT: vrev64.32 q8, q8 +; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #56 +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: add lr, sp, #136 +; BE-I64-NEXT: vrev64.32 q8, q8 +; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #104 +; BE-I64-NEXT: vrev64.32 q8, q9 +; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #88 +; BE-I64-NEXT: vrev64.32 q8, q10 +; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #72 +; BE-I64-NEXT: vrev64.32 q8, q7 +; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #208 +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: add lr, sp, #56 +; BE-I64-NEXT: vrev64.32 d8, d17 +; BE-I64-NEXT: vrev64.32 q8, q5 +; BE-I64-NEXT: vmov.f32 s0, s17 +; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.f32 s0, s16 +; BE-I64-NEXT: mov r4, r1 +; BE-I64-NEXT: vmov.32 d13[0], r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: add lr, sp, #120 +; BE-I64-NEXT: vmov.32 d12[0], r0 +; BE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-I64-NEXT: vmov.32 d13[1], r4 +; BE-I64-NEXT: vrev64.32 d8, d10 +; BE-I64-NEXT: vmov.32 d12[1], r1 +; BE-I64-NEXT: vmov.f32 s0, s17 +; BE-I64-NEXT: vrev64.32 q6, q6 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.f32 s0, s16 +; BE-I64-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEXT: vmov.32 d15[1], r1 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEXT: ldr r6, [sp, #156] @ 4-byte Reload +; BE-I64-NEXT: vrev64.32 d8, d11 +; BE-I64-NEXT: add r5, r6, #64 +; BE-I64-NEXT: vmov.32 d14[1], r1 +; BE-I64-NEXT: vmov.f32 s0, s17 +; BE-I64-NEXT: vrev64.32 q8, q7 +; BE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.f32 s0, s16 +; BE-I64-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEXT: vmov.32 d15[1], r1 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: add lr, sp, #208 +; BE-I64-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; BE-I64-NEXT: vmov.32 d14[1], r1 +; BE-I64-NEXT: vrev64.32 d8, d18 +; BE-I64-NEXT: vrev64.32 q8, q7 +; BE-I64-NEXT: vmov.f32 s0, s17 +; BE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.f32 s0, s16 +; BE-I64-NEXT: mov r4, r1 +; BE-I64-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEXT: add lr, sp, #160 +; BE-I64-NEXT: vmov.32 d15[1], r4 +; BE-I64-NEXT: vmov.32 d14[1], r1 +; BE-I64-NEXT: vrev64.32 q8, q7 +; BE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-I64-NEXT: vrev64.32 d8, d11 +; BE-I64-NEXT: vst1.64 {d12, d13}, [r5:128] +; BE-I64-NEXT: vmov.f32 s0, s17 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.f32 s0, s16 +; BE-I64-NEXT: mov r4, r1 +; BE-I64-NEXT: vmov.32 d13[0], r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d12[0], r0 +; BE-I64-NEXT: add lr, sp, #208 +; BE-I64-NEXT: vmov.32 d13[1], r4 +; BE-I64-NEXT: vmov.32 d12[1], r1 +; BE-I64-NEXT: vrev64.32 q8, q6 +; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #176 +; BE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; BE-I64-NEXT: vrev64.32 d8, d12 +; BE-I64-NEXT: vmov.f32 s0, s17 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.f32 s0, s16 +; BE-I64-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEXT: vmov.32 d15[1], r1 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEXT: mov r5, r6 +; BE-I64-NEXT: vrev64.32 d8, d13 +; BE-I64-NEXT: vmov.32 d14[1], r1 +; BE-I64-NEXT: vmov.f32 s0, s17 +; BE-I64-NEXT: vrev64.32 q8, q7 +; BE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.f32 s0, s16 +; BE-I64-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEXT: vmov.32 d15[1], r1 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEXT: vrev64.32 d8, d10 +; BE-I64-NEXT: vmov.32 d14[1], r1 +; BE-I64-NEXT: vmov.f32 s0, s17 +; BE-I64-NEXT: vrev64.32 q8, q7 +; BE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.f32 s0, s16 +; BE-I64-NEXT: mov r4, r1 +; BE-I64-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEXT: add lr, sp, #208 +; BE-I64-NEXT: add r0, r6, #192 +; BE-I64-NEXT: vmov.32 d15[1], r4 +; BE-I64-NEXT: vmov.32 d14[1], r1 +; BE-I64-NEXT: vrev64.32 q8, q7 +; BE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: add lr, sp, #56 +; BE-I64-NEXT: vst1.64 {d16, d17}, [r5:128] +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: add lr, sp, #192 +; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: add lr, sp, #240 +; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: add lr, sp, #224 +; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: add lr, sp, #136 +; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128] +; BE-I64-NEXT: add r0, r6, #128 +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: add lr, sp, #104 +; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: add lr, sp, #88 +; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: add lr, sp, #72 +; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128] +; BE-I64-NEXT: add sp, sp, #256 +; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEXT: add sp, sp, #4 +; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-I32-NEON-LABEL: lrint_v32f32: +; BE-I32-NEON: @ %bb.0: +; BE-I32-NEON-NEXT: .save {r4, r5, r6, lr} +; BE-I32-NEON-NEXT: push {r4, r5, r6, lr} +; BE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEON-NEXT: .pad #144 +; BE-I32-NEON-NEXT: sub sp, sp, #144 +; BE-I32-NEON-NEXT: mov r4, r0 +; BE-I32-NEON-NEXT: add r0, sp, #256 +; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I32-NEON-NEXT: add lr, sp, #16 +; BE-I32-NEON-NEXT: add r0, sp, #272 +; BE-I32-NEON-NEXT: vrev64.32 q4, q3 +; BE-I32-NEON-NEXT: vrev64.32 q7, q1 +; BE-I32-NEON-NEXT: vrev64.32 q8, q8 +; BE-I32-NEON-NEXT: vld1.64 {d18, d19}, [r0] +; BE-I32-NEON-NEXT: add r0, sp, #224 +; BE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; BE-I32-NEON-NEXT: add lr, sp, #80 +; BE-I32-NEON-NEXT: vrev64.32 q5, q0 +; BE-I32-NEON-NEXT: vmov.f32 s0, s28 +; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEON-NEXT: add lr, sp, #64 +; BE-I32-NEON-NEXT: vrev64.32 q8, q9 +; BE-I32-NEON-NEXT: vld1.64 {d20, d21}, [r0] +; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEON-NEXT: add lr, sp, #32 +; BE-I32-NEON-NEXT: vrev64.32 q8, q10 +; BE-I32-NEON-NEXT: vrev64.32 q6, q2 +; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEON-NEXT: vstmia sp, {d14, d15} @ 16-byte Spill +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s18 +; BE-I32-NEON-NEXT: vmov.32 d16[0], r0 +; BE-I32-NEON-NEXT: add lr, sp, #128 +; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s20 +; BE-I32-NEON-NEXT: add lr, sp, #112 +; BE-I32-NEON-NEXT: vmov.32 d17[0], r0 +; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEON-NEXT: add lr, sp, #48 +; BE-I32-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s22 +; BE-I32-NEON-NEXT: vmov.32 d8[0], r0 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s30 +; BE-I32-NEON-NEXT: vmov.32 d9[0], r0 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s26 +; BE-I32-NEON-NEXT: add lr, sp, #128 +; BE-I32-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload +; BE-I32-NEON-NEXT: vmov.32 d15[0], r0 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s24 +; BE-I32-NEON-NEXT: vmov.32 d11[0], r0 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s27 +; BE-I32-NEON-NEXT: vmov.32 d10[0], r0 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s25 +; BE-I32-NEON-NEXT: vmov.32 d11[1], r0 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vldmia sp, {d12, d13} @ 16-byte Reload +; BE-I32-NEON-NEXT: vmov.f32 s0, s27 +; BE-I32-NEON-NEXT: add lr, sp, #96 +; BE-I32-NEON-NEXT: vmov.32 d10[1], r0 +; BE-I32-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: add lr, sp, #48 +; BE-I32-NEON-NEXT: vmov.32 d15[1], r0 +; BE-I32-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-I32-NEON-NEXT: vmov.f32 s0, s23 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s21 +; BE-I32-NEON-NEXT: vmov.32 d9[1], r0 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.32 d8[1], r0 +; BE-I32-NEON-NEXT: add lr, sp, #48 +; BE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; BE-I32-NEON-NEXT: add lr, sp, #16 +; BE-I32-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-I32-NEON-NEXT: vmov.f32 s0, s23 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s25 +; BE-I32-NEON-NEXT: add lr, sp, #112 +; BE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-I32-NEON-NEXT: vmov.32 d9[1], r0 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s20 +; BE-I32-NEON-NEXT: vmov.32 d14[1], r0 +; BE-I32-NEON-NEXT: add r0, sp, #240 +; BE-I32-NEON-NEXT: add lr, sp, #128 +; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-I32-NEON-NEXT: vrev64.32 q6, q8 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s21 +; BE-I32-NEON-NEXT: vmov.32 d8[0], r0 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s24 +; BE-I32-NEON-NEXT: vmov.32 d8[1], r0 +; BE-I32-NEON-NEXT: add lr, sp, #112 +; BE-I32-NEON-NEXT: vorr q7, q6, q6 +; BE-I32-NEON-NEXT: vstmia sp, {d12, d13} @ 16-byte Spill +; BE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: add lr, sp, #32 +; BE-I32-NEON-NEXT: vmov.32 d12[0], r0 +; BE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-I32-NEON-NEXT: vmov.f32 s0, s18 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s16 +; BE-I32-NEON-NEXT: vmov.32 d11[0], r0 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s19 +; BE-I32-NEON-NEXT: vmov.32 d10[0], r0 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s30 +; BE-I32-NEON-NEXT: vmov.32 d11[1], r0 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s17 +; BE-I32-NEON-NEXT: add lr, sp, #16 +; BE-I32-NEON-NEXT: vmov.32 d13[0], r0 +; BE-I32-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.32 d10[1], r0 +; BE-I32-NEON-NEXT: add lr, sp, #32 +; BE-I32-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-I32-NEON-NEXT: add lr, sp, #64 +; BE-I32-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-I32-NEON-NEXT: vmov.f32 s0, s20 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: add lr, sp, #80 +; BE-I32-NEON-NEXT: vmov.32 d8[0], r0 +; BE-I32-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; BE-I32-NEON-NEXT: vmov.f32 s0, s26 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s24 +; BE-I32-NEON-NEXT: vmov.32 d15[0], r0 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s27 +; BE-I32-NEON-NEXT: vmov.32 d14[0], r0 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s22 +; BE-I32-NEON-NEXT: vmov.32 d15[1], r0 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s25 +; BE-I32-NEON-NEXT: vmov.32 d9[0], r0 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s23 +; BE-I32-NEON-NEXT: vmov.32 d14[1], r0 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vmov.f32 s0, s21 +; BE-I32-NEON-NEXT: vmov.32 d9[1], r0 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: vldmia sp, {d12, d13} @ 16-byte Reload +; BE-I32-NEON-NEXT: vmov.f32 s0, s27 +; BE-I32-NEON-NEXT: vmov.32 d8[1], r0 +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: add lr, sp, #16 +; BE-I32-NEON-NEXT: vmov.f32 s0, s25 +; BE-I32-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-I32-NEON-NEXT: add lr, sp, #112 +; BE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I32-NEON-NEXT: vmov.32 d11[1], r0 +; BE-I32-NEON-NEXT: vrev64.32 q8, q8 +; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEON-NEXT: bl lrintf +; BE-I32-NEON-NEXT: add lr, sp, #48 +; BE-I32-NEON-NEXT: vmov.32 d10[1], r0 +; BE-I32-NEON-NEXT: mov r0, r4 +; BE-I32-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; BE-I32-NEON-NEXT: add lr, sp, #128 +; BE-I32-NEON-NEXT: vrev64.32 q8, q4 +; BE-I32-NEON-NEXT: vst1.32 {d18, d19}, [r0:128]! +; BE-I32-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; BE-I32-NEON-NEXT: add lr, sp, #96 +; BE-I32-NEON-NEXT: vst1.32 {d18, d19}, [r0:128]! +; BE-I32-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; BE-I32-NEON-NEXT: add lr, sp, #112 +; BE-I32-NEON-NEXT: vst1.32 {d18, d19}, [r0:128]! +; BE-I32-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; BE-I32-NEON-NEXT: add lr, sp, #32 +; BE-I32-NEON-NEXT: vst1.64 {d18, d19}, [r0:128] +; BE-I32-NEON-NEXT: add r0, r4, #64 +; BE-I32-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; BE-I32-NEON-NEXT: vst1.32 {d18, d19}, [r0:128]! +; BE-I32-NEON-NEXT: vst1.32 {d10, d11}, [r0:128]! +; BE-I32-NEON-NEXT: vst1.32 {d14, d15}, [r0:128]! +; BE-I32-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] +; BE-I32-NEON-NEXT: add sp, sp, #144 +; BE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEON-NEXT: pop {r4, r5, r6, pc} +; +; BE-I64-NEON-LABEL: lrint_v32f32: +; BE-I64-NEON: @ %bb.0: +; BE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I64-NEON-NEXT: .pad #4 +; BE-I64-NEON-NEXT: sub sp, sp, #4 +; BE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEON-NEXT: .pad #256 +; BE-I64-NEON-NEXT: sub sp, sp, #256 +; BE-I64-NEON-NEXT: add lr, sp, #208 +; BE-I64-NEON-NEXT: str r0, [sp, #156] @ 4-byte Spill +; BE-I64-NEON-NEXT: add r0, sp, #408 +; BE-I64-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #120 +; BE-I64-NEON-NEXT: vld1.64 {d10, d11}, [r0] +; BE-I64-NEON-NEXT: add r0, sp, #392 +; BE-I64-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #160 +; BE-I64-NEON-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #176 +; BE-I64-NEON-NEXT: vrev64.32 d8, d10 +; BE-I64-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #136 +; BE-I64-NEON-NEXT: vmov.f32 s0, s17 +; BE-I64-NEON-NEXT: vld1.64 {d12, d13}, [r0] +; BE-I64-NEON-NEXT: add r0, sp, #360 +; BE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #192 +; BE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I64-NEON-NEXT: add r0, sp, #376 +; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #40 +; BE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.f32 s0, s16 +; BE-I64-NEON-NEXT: str r1, [sp, #88] @ 4-byte Spill +; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vrev64.32 d9, d11 +; BE-I64-NEON-NEXT: add lr, sp, #240 +; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEON-NEXT: str r1, [sp, #104] @ 4-byte Spill +; BE-I64-NEON-NEXT: vmov.f32 s0, s18 +; BE-I64-NEON-NEXT: vrev64.32 d8, d13 +; BE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.f32 s0, s17 +; BE-I64-NEON-NEXT: mov r5, r1 +; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.f32 s0, s19 +; BE-I64-NEON-NEXT: add lr, sp, #192 +; BE-I64-NEON-NEXT: str r1, [sp, #72] @ 4-byte Spill +; BE-I64-NEON-NEXT: vmov.32 d13[0], r0 +; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEON-NEXT: vrev64.32 d10, d16 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.f32 s0, s20 +; BE-I64-NEON-NEXT: add lr, sp, #224 +; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEON-NEXT: mov r4, r1 +; BE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.f32 s0, s21 +; BE-I64-NEON-NEXT: mov r7, r1 +; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.f32 s0, s16 +; BE-I64-NEON-NEXT: mov r6, r1 +; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.32 d12[0], r0 +; BE-I64-NEON-NEXT: add lr, sp, #24 +; BE-I64-NEON-NEXT: mov r9, r1 +; BE-I64-NEON-NEXT: vmov.32 d15[1], r6 +; BE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #192 +; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEON-NEXT: vrev64.32 d8, d17 +; BE-I64-NEON-NEXT: vmov.f32 s0, s17 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.f32 s0, s16 +; BE-I64-NEON-NEXT: vmov.32 d14[1], r7 +; BE-I64-NEON-NEXT: add lr, sp, #56 +; BE-I64-NEON-NEXT: mov r10, r1 +; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEON-NEXT: add lr, sp, #192 +; BE-I64-NEON-NEXT: mov r11, r1 +; BE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #40 +; BE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; BE-I64-NEON-NEXT: add lr, sp, #224 +; BE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-I64-NEON-NEXT: vrev64.32 d8, d12 +; BE-I64-NEON-NEXT: vmov.32 d11[1], r4 +; BE-I64-NEON-NEXT: vmov.f32 s0, s17 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.f32 s0, s16 +; BE-I64-NEON-NEXT: vmov.32 d10[1], r5 +; BE-I64-NEON-NEXT: add lr, sp, #224 +; BE-I64-NEON-NEXT: mov r8, r1 +; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vrev64.32 d8, d13 +; BE-I64-NEON-NEXT: add lr, sp, #8 +; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEON-NEXT: ldr r0, [sp, #88] @ 4-byte Reload +; BE-I64-NEON-NEXT: vmov.f32 s0, s17 +; BE-I64-NEON-NEXT: mov r5, r1 +; BE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #240 +; BE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-I64-NEON-NEXT: vmov.32 d11[1], r0 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.f32 s0, s16 +; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEON-NEXT: ldr r0, [sp, #104] @ 4-byte Reload +; BE-I64-NEON-NEXT: add lr, sp, #240 +; BE-I64-NEON-NEXT: mov r4, r1 +; BE-I64-NEON-NEXT: vmov.32 d10[1], r0 +; BE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: add lr, sp, #136 +; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEON-NEXT: ldr r0, [sp, #72] @ 4-byte Reload +; BE-I64-NEON-NEXT: mov r6, r1 +; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEON-NEXT: add lr, sp, #24 +; BE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; BE-I64-NEON-NEXT: vrev64.32 d8, d16 +; BE-I64-NEON-NEXT: vmov.32 d13[1], r0 +; BE-I64-NEON-NEXT: vmov.f32 s0, s17 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.f32 s0, s16 +; BE-I64-NEON-NEXT: mov r7, r1 +; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEON-NEXT: vmov.32 d12[1], r9 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: add lr, sp, #192 +; BE-I64-NEON-NEXT: vmov.32 d15[1], r4 +; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEON-NEXT: add lr, sp, #8 +; BE-I64-NEON-NEXT: vmov.32 d17[1], r10 +; BE-I64-NEON-NEXT: vmov.32 d16[1], r11 +; BE-I64-NEON-NEXT: vorr q9, q8, q8 +; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEON-NEXT: add lr, sp, #192 +; BE-I64-NEON-NEXT: vmov.32 d17[1], r8 +; BE-I64-NEON-NEXT: vmov.32 d16[1], r5 +; BE-I64-NEON-NEXT: vorr q10, q8, q8 +; BE-I64-NEON-NEXT: vrev64.32 q8, q6 +; BE-I64-NEON-NEXT: vmov.32 d14[1], r6 +; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #240 +; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEON-NEXT: vrev64.32 q8, q8 +; BE-I64-NEON-NEXT: vmov.32 d11[1], r7 +; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #224 +; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEON-NEXT: vmov.32 d10[1], r1 +; BE-I64-NEON-NEXT: vrev64.32 q8, q8 +; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #56 +; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEON-NEXT: add lr, sp, #136 +; BE-I64-NEON-NEXT: vrev64.32 q8, q8 +; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #104 +; BE-I64-NEON-NEXT: vrev64.32 q8, q9 +; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #88 +; BE-I64-NEON-NEXT: vrev64.32 q8, q10 +; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #72 +; BE-I64-NEON-NEXT: vrev64.32 q8, q7 +; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #208 +; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEON-NEXT: add lr, sp, #56 +; BE-I64-NEON-NEXT: vrev64.32 d8, d17 +; BE-I64-NEON-NEXT: vrev64.32 q8, q5 +; BE-I64-NEON-NEXT: vmov.f32 s0, s17 +; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.f32 s0, s16 +; BE-I64-NEON-NEXT: mov r4, r1 +; BE-I64-NEON-NEXT: vmov.32 d13[0], r0 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: add lr, sp, #120 +; BE-I64-NEON-NEXT: vmov.32 d12[0], r0 +; BE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-I64-NEON-NEXT: vmov.32 d13[1], r4 +; BE-I64-NEON-NEXT: vrev64.32 d8, d10 +; BE-I64-NEON-NEXT: vmov.32 d12[1], r1 +; BE-I64-NEON-NEXT: vmov.f32 s0, s17 +; BE-I64-NEON-NEXT: vrev64.32 q6, q6 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.f32 s0, s16 +; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEON-NEXT: vmov.32 d15[1], r1 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEON-NEXT: ldr r6, [sp, #156] @ 4-byte Reload +; BE-I64-NEON-NEXT: vrev64.32 d8, d11 +; BE-I64-NEON-NEXT: add r5, r6, #64 +; BE-I64-NEON-NEXT: vmov.32 d14[1], r1 +; BE-I64-NEON-NEXT: vmov.f32 s0, s17 +; BE-I64-NEON-NEXT: vrev64.32 q8, q7 +; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.f32 s0, s16 +; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEON-NEXT: vmov.32 d15[1], r1 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: add lr, sp, #208 +; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; BE-I64-NEON-NEXT: vmov.32 d14[1], r1 +; BE-I64-NEON-NEXT: vrev64.32 d8, d18 +; BE-I64-NEON-NEXT: vrev64.32 q8, q7 +; BE-I64-NEON-NEXT: vmov.f32 s0, s17 +; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.f32 s0, s16 +; BE-I64-NEON-NEXT: mov r4, r1 +; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEON-NEXT: add lr, sp, #160 +; BE-I64-NEON-NEXT: vmov.32 d15[1], r4 +; BE-I64-NEON-NEXT: vmov.32 d14[1], r1 +; BE-I64-NEON-NEXT: vrev64.32 q8, q7 +; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-I64-NEON-NEXT: vrev64.32 d8, d11 +; BE-I64-NEON-NEXT: vst1.64 {d12, d13}, [r5:128] +; BE-I64-NEON-NEXT: vmov.f32 s0, s17 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.f32 s0, s16 +; BE-I64-NEON-NEXT: mov r4, r1 +; BE-I64-NEON-NEXT: vmov.32 d13[0], r0 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.32 d12[0], r0 +; BE-I64-NEON-NEXT: add lr, sp, #208 +; BE-I64-NEON-NEXT: vmov.32 d13[1], r4 +; BE-I64-NEON-NEXT: vmov.32 d12[1], r1 +; BE-I64-NEON-NEXT: vrev64.32 q8, q6 +; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #176 +; BE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; BE-I64-NEON-NEXT: vrev64.32 d8, d12 +; BE-I64-NEON-NEXT: vmov.f32 s0, s17 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.f32 s0, s16 +; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEON-NEXT: vmov.32 d15[1], r1 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEON-NEXT: mov r5, r6 +; BE-I64-NEON-NEXT: vrev64.32 d8, d13 +; BE-I64-NEON-NEXT: vmov.32 d14[1], r1 +; BE-I64-NEON-NEXT: vmov.f32 s0, s17 +; BE-I64-NEON-NEXT: vrev64.32 q8, q7 +; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.f32 s0, s16 +; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEON-NEXT: vmov.32 d15[1], r1 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEON-NEXT: vrev64.32 d8, d10 +; BE-I64-NEON-NEXT: vmov.32 d14[1], r1 +; BE-I64-NEON-NEXT: vmov.f32 s0, s17 +; BE-I64-NEON-NEXT: vrev64.32 q8, q7 +; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.f32 s0, s16 +; BE-I64-NEON-NEXT: mov r4, r1 +; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEON-NEXT: bl lrintf +; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEON-NEXT: add lr, sp, #208 +; BE-I64-NEON-NEXT: add r0, r6, #192 +; BE-I64-NEON-NEXT: vmov.32 d15[1], r4 +; BE-I64-NEON-NEXT: vmov.32 d14[1], r1 +; BE-I64-NEON-NEXT: vrev64.32 q8, q7 +; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEON-NEXT: add lr, sp, #56 +; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128] +; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEON-NEXT: add lr, sp, #192 +; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEON-NEXT: add lr, sp, #240 +; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEON-NEXT: add lr, sp, #224 +; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEON-NEXT: add lr, sp, #136 +; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] +; BE-I64-NEON-NEXT: add r0, r6, #128 +; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEON-NEXT: add lr, sp, #104 +; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEON-NEXT: add lr, sp, #88 +; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEON-NEXT: add lr, sp, #72 +; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] +; BE-I64-NEON-NEXT: add sp, sp, #256 +; BE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEON-NEXT: add sp, sp, #4 +; BE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + %a = call <32 x iXLen> @llvm.lrint.v32iXLen.v32f32(<32 x float> %x) + ret <32 x iXLen> %a +} +declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f32(<32 x float>) + +define <1 x iXLen> @lrint_v1f64(<1 x double> %x) { +; LE-I32-LABEL: lrint_v1f64: +; LE-I32: @ %bb.0: +; LE-I32-NEXT: .save {r11, lr} +; LE-I32-NEXT: push {r11, lr} +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: pop {r11, pc} +; +; LE-I64-LABEL: lrint_v1f64: +; LE-I64: @ %bb.0: +; LE-I64-NEXT: .save {r11, lr} +; LE-I64-NEXT: push {r11, lr} +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vmov.32 d0[0], r0 +; LE-I64-NEXT: vmov.32 d0[1], r1 +; LE-I64-NEXT: pop {r11, pc} +; +; LE-I32-NEON-LABEL: lrint_v1f64: +; LE-I32-NEON: @ %bb.0: +; LE-I32-NEON-NEXT: .save {r11, lr} +; LE-I32-NEON-NEXT: push {r11, lr} +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: pop {r11, pc} +; +; LE-I64-NEON-LABEL: lrint_v1f64: +; LE-I64-NEON: @ %bb.0: +; LE-I64-NEON-NEXT: .save {r11, lr} +; LE-I64-NEON-NEXT: push {r11, lr} +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vmov.32 d0[0], r0 +; LE-I64-NEON-NEXT: vmov.32 d0[1], r1 +; LE-I64-NEON-NEXT: pop {r11, pc} +; +; BE-I32-LABEL: lrint_v1f64: +; BE-I32: @ %bb.0: +; BE-I32-NEXT: .save {r11, lr} +; BE-I32-NEXT: push {r11, lr} +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: pop {r11, pc} +; +; BE-I64-LABEL: lrint_v1f64: +; BE-I64: @ %bb.0: +; BE-I64-NEXT: .save {r11, lr} +; BE-I64-NEXT: push {r11, lr} +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vmov.32 d16[0], r0 +; BE-I64-NEXT: vmov.32 d16[1], r1 +; BE-I64-NEXT: vrev64.32 d0, d16 +; BE-I64-NEXT: pop {r11, pc} +; +; BE-I32-NEON-LABEL: lrint_v1f64: +; BE-I32-NEON: @ %bb.0: +; BE-I32-NEON-NEXT: .save {r11, lr} +; BE-I32-NEON-NEXT: push {r11, lr} +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: pop {r11, pc} +; +; BE-I64-NEON-LABEL: lrint_v1f64: +; BE-I64-NEON: @ %bb.0: +; BE-I64-NEON-NEXT: .save {r11, lr} +; BE-I64-NEON-NEXT: push {r11, lr} +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vmov.32 d16[0], r0 +; BE-I64-NEON-NEXT: vmov.32 d16[1], r1 +; BE-I64-NEON-NEXT: vrev64.32 d0, d16 +; BE-I64-NEON-NEXT: pop {r11, pc} + %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double> %x) + ret <1 x iXLen> %a +} +declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double>) + +define <2 x iXLen> @lrint_v2f64(<2 x double> %x) { +; LE-I32-LABEL: lrint_v2f64: +; LE-I32: @ %bb.0: +; LE-I32-NEXT: .save {r11, lr} +; LE-I32-NEXT: push {r11, lr} +; LE-I32-NEXT: .vsave {d8, d9, d10} +; LE-I32-NEXT: vpush {d8, d9, d10} +; LE-I32-NEXT: vorr q4, q0, q0 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: vorr d0, d9, d9 +; LE-I32-NEXT: vmov.32 d10[0], r0 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: vmov.32 d10[1], r0 +; LE-I32-NEXT: vorr d0, d10, d10 +; LE-I32-NEXT: vpop {d8, d9, d10} +; LE-I32-NEXT: pop {r11, pc} +; +; LE-I64-LABEL: lrint_v2f64: +; LE-I64: @ %bb.0: +; LE-I64-NEXT: .save {r4, lr} +; LE-I64-NEXT: push {r4, lr} +; LE-I64-NEXT: .vsave {d8, d9, d10, d11} +; LE-I64-NEXT: vpush {d8, d9, d10, d11} +; LE-I64-NEXT: vorr q4, q0, q0 +; LE-I64-NEXT: vorr d0, d9, d9 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vorr d0, d8, d8 +; LE-I64-NEXT: mov r4, r1 +; LE-I64-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEXT: vmov.32 d11[1], r4 +; LE-I64-NEXT: vmov.32 d10[1], r1 +; LE-I64-NEXT: vorr q0, q5, q5 +; LE-I64-NEXT: vpop {d8, d9, d10, d11} +; LE-I64-NEXT: pop {r4, pc} +; +; LE-I32-NEON-LABEL: lrint_v2f64: +; LE-I32-NEON: @ %bb.0: +; LE-I32-NEON-NEXT: .save {r11, lr} +; LE-I32-NEON-NEXT: push {r11, lr} +; LE-I32-NEON-NEXT: .vsave {d8, d9, d10} +; LE-I32-NEON-NEXT: vpush {d8, d9, d10} +; LE-I32-NEON-NEXT: vorr q4, q0, q0 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: vorr d0, d9, d9 +; LE-I32-NEON-NEXT: vmov.32 d10[0], r0 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: vmov.32 d10[1], r0 +; LE-I32-NEON-NEXT: vorr d0, d10, d10 +; LE-I32-NEON-NEXT: vpop {d8, d9, d10} +; LE-I32-NEON-NEXT: pop {r11, pc} +; +; LE-I64-NEON-LABEL: lrint_v2f64: +; LE-I64-NEON: @ %bb.0: +; LE-I64-NEON-NEXT: .save {r4, lr} +; LE-I64-NEON-NEXT: push {r4, lr} +; LE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11} +; LE-I64-NEON-NEXT: vpush {d8, d9, d10, d11} +; LE-I64-NEON-NEXT: vorr q4, q0, q0 +; LE-I64-NEON-NEXT: vorr d0, d9, d9 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vorr d0, d8, d8 +; LE-I64-NEON-NEXT: mov r4, r1 +; LE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEON-NEXT: vmov.32 d11[1], r4 +; LE-I64-NEON-NEXT: vmov.32 d10[1], r1 +; LE-I64-NEON-NEXT: vorr q0, q5, q5 +; LE-I64-NEON-NEXT: vpop {d8, d9, d10, d11} +; LE-I64-NEON-NEXT: pop {r4, pc} +; +; BE-I32-LABEL: lrint_v2f64: +; BE-I32: @ %bb.0: +; BE-I32-NEXT: .save {r11, lr} +; BE-I32-NEXT: push {r11, lr} +; BE-I32-NEXT: .vsave {d8, d9, d10} +; BE-I32-NEXT: vpush {d8, d9, d10} +; BE-I32-NEXT: vorr q4, q0, q0 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: vorr d0, d9, d9 +; BE-I32-NEXT: vmov.32 d10[0], r0 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: vmov.32 d10[1], r0 +; BE-I32-NEXT: vrev64.32 d0, d10 +; BE-I32-NEXT: vpop {d8, d9, d10} +; BE-I32-NEXT: pop {r11, pc} +; +; BE-I64-LABEL: lrint_v2f64: +; BE-I64: @ %bb.0: +; BE-I64-NEXT: .save {r4, lr} +; BE-I64-NEXT: push {r4, lr} +; BE-I64-NEXT: .vsave {d8, d9, d10, d11} +; BE-I64-NEXT: vpush {d8, d9, d10, d11} +; BE-I64-NEXT: vorr q4, q0, q0 +; BE-I64-NEXT: vorr d0, d9, d9 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vorr d0, d8, d8 +; BE-I64-NEXT: mov r4, r1 +; BE-I64-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEXT: vmov.32 d11[1], r4 +; BE-I64-NEXT: vmov.32 d10[1], r1 +; BE-I64-NEXT: vrev64.32 q0, q5 +; BE-I64-NEXT: vpop {d8, d9, d10, d11} +; BE-I64-NEXT: pop {r4, pc} +; +; BE-I32-NEON-LABEL: lrint_v2f64: +; BE-I32-NEON: @ %bb.0: +; BE-I32-NEON-NEXT: .save {r11, lr} +; BE-I32-NEON-NEXT: push {r11, lr} +; BE-I32-NEON-NEXT: .vsave {d8, d9, d10} +; BE-I32-NEON-NEXT: vpush {d8, d9, d10} +; BE-I32-NEON-NEXT: vorr q4, q0, q0 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: vorr d0, d9, d9 +; BE-I32-NEON-NEXT: vmov.32 d10[0], r0 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: vmov.32 d10[1], r0 +; BE-I32-NEON-NEXT: vrev64.32 d0, d10 +; BE-I32-NEON-NEXT: vpop {d8, d9, d10} +; BE-I32-NEON-NEXT: pop {r11, pc} +; +; BE-I64-NEON-LABEL: lrint_v2f64: +; BE-I64-NEON: @ %bb.0: +; BE-I64-NEON-NEXT: .save {r4, lr} +; BE-I64-NEON-NEXT: push {r4, lr} +; BE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11} +; BE-I64-NEON-NEXT: vpush {d8, d9, d10, d11} +; BE-I64-NEON-NEXT: vorr q4, q0, q0 +; BE-I64-NEON-NEXT: vorr d0, d9, d9 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vorr d0, d8, d8 +; BE-I64-NEON-NEXT: mov r4, r1 +; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEON-NEXT: vmov.32 d11[1], r4 +; BE-I64-NEON-NEXT: vmov.32 d10[1], r1 +; BE-I64-NEON-NEXT: vrev64.32 q0, q5 +; BE-I64-NEON-NEXT: vpop {d8, d9, d10, d11} +; BE-I64-NEON-NEXT: pop {r4, pc} + %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double> %x) + ret <2 x iXLen> %a +} +declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double>) + +define <4 x iXLen> @lrint_v4f64(<4 x double> %x) { +; LE-I32-LABEL: lrint_v4f64: +; LE-I32: @ %bb.0: +; LE-I32-NEXT: .save {r11, lr} +; LE-I32-NEXT: push {r11, lr} +; LE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13} +; LE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13} +; LE-I32-NEXT: vorr q4, q1, q1 +; LE-I32-NEXT: vorr q5, q0, q0 +; LE-I32-NEXT: vorr d0, d8, d8 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: vorr d0, d10, d10 +; LE-I32-NEXT: vmov.32 d13[0], r0 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: vorr d0, d9, d9 +; LE-I32-NEXT: vmov.32 d12[0], r0 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: vorr d0, d11, d11 +; LE-I32-NEXT: vmov.32 d13[1], r0 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: vmov.32 d12[1], r0 +; LE-I32-NEXT: vorr q0, q6, q6 +; LE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13} +; LE-I32-NEXT: pop {r11, pc} +; +; LE-I64-LABEL: lrint_v4f64: +; LE-I64: @ %bb.0: +; LE-I64-NEXT: .save {r4, r5, r6, lr} +; LE-I64-NEXT: push {r4, r5, r6, lr} +; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEXT: vorr q5, q1, q1 +; LE-I64-NEXT: vorr q6, q0, q0 +; LE-I64-NEXT: vorr d0, d11, d11 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vorr d0, d12, d12 +; LE-I64-NEXT: mov r4, r1 +; LE-I64-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vorr d0, d13, d13 +; LE-I64-NEXT: mov r5, r1 +; LE-I64-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vorr d0, d10, d10 +; LE-I64-NEXT: mov r6, r1 +; LE-I64-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEXT: vmov.32 d15[1], r6 +; LE-I64-NEXT: vmov.32 d9[1], r4 +; LE-I64-NEXT: vmov.32 d14[1], r5 +; LE-I64-NEXT: vmov.32 d8[1], r1 +; LE-I64-NEXT: vorr q0, q7, q7 +; LE-I64-NEXT: vorr q1, q4, q4 +; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEXT: pop {r4, r5, r6, pc} +; +; LE-I32-NEON-LABEL: lrint_v4f64: +; LE-I32-NEON: @ %bb.0: +; LE-I32-NEON-NEXT: .save {r11, lr} +; LE-I32-NEON-NEXT: push {r11, lr} +; LE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13} +; LE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13} +; LE-I32-NEON-NEXT: vorr q4, q1, q1 +; LE-I32-NEON-NEXT: vorr q5, q0, q0 +; LE-I32-NEON-NEXT: vorr d0, d8, d8 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: vorr d0, d10, d10 +; LE-I32-NEON-NEXT: vmov.32 d13[0], r0 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: vorr d0, d9, d9 +; LE-I32-NEON-NEXT: vmov.32 d12[0], r0 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: vorr d0, d11, d11 +; LE-I32-NEON-NEXT: vmov.32 d13[1], r0 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: vmov.32 d12[1], r0 +; LE-I32-NEON-NEXT: vorr q0, q6, q6 +; LE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13} +; LE-I32-NEON-NEXT: pop {r11, pc} +; +; LE-I64-NEON-LABEL: lrint_v4f64: +; LE-I64-NEON: @ %bb.0: +; LE-I64-NEON-NEXT: .save {r4, r5, r6, lr} +; LE-I64-NEON-NEXT: push {r4, r5, r6, lr} +; LE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEON-NEXT: vorr q5, q1, q1 +; LE-I64-NEON-NEXT: vorr q6, q0, q0 +; LE-I64-NEON-NEXT: vorr d0, d11, d11 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vorr d0, d12, d12 +; LE-I64-NEON-NEXT: mov r4, r1 +; LE-I64-NEON-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vorr d0, d13, d13 +; LE-I64-NEON-NEXT: mov r5, r1 +; LE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vorr d0, d10, d10 +; LE-I64-NEON-NEXT: mov r6, r1 +; LE-I64-NEON-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEON-NEXT: vmov.32 d15[1], r6 +; LE-I64-NEON-NEXT: vmov.32 d9[1], r4 +; LE-I64-NEON-NEXT: vmov.32 d14[1], r5 +; LE-I64-NEON-NEXT: vmov.32 d8[1], r1 +; LE-I64-NEON-NEXT: vorr q0, q7, q7 +; LE-I64-NEON-NEXT: vorr q1, q4, q4 +; LE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEON-NEXT: pop {r4, r5, r6, pc} +; +; BE-I32-LABEL: lrint_v4f64: +; BE-I32: @ %bb.0: +; BE-I32-NEXT: .save {r11, lr} +; BE-I32-NEXT: push {r11, lr} +; BE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13} +; BE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13} +; BE-I32-NEXT: vorr q4, q1, q1 +; BE-I32-NEXT: vorr q5, q0, q0 +; BE-I32-NEXT: vorr d0, d8, d8 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: vorr d0, d10, d10 +; BE-I32-NEXT: vmov.32 d13[0], r0 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: vorr d0, d9, d9 +; BE-I32-NEXT: vmov.32 d12[0], r0 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: vorr d0, d11, d11 +; BE-I32-NEXT: vmov.32 d13[1], r0 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: vmov.32 d12[1], r0 +; BE-I32-NEXT: vrev64.32 q0, q6 +; BE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13} +; BE-I32-NEXT: pop {r11, pc} +; +; BE-I64-LABEL: lrint_v4f64: +; BE-I64: @ %bb.0: +; BE-I64-NEXT: .save {r4, r5, r6, lr} +; BE-I64-NEXT: push {r4, r5, r6, lr} +; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEXT: vorr q4, q1, q1 +; BE-I64-NEXT: vorr q5, q0, q0 +; BE-I64-NEXT: vorr d0, d9, d9 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vorr d0, d10, d10 +; BE-I64-NEXT: mov r4, r1 +; BE-I64-NEXT: vmov.32 d13[0], r0 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vorr d0, d11, d11 +; BE-I64-NEXT: mov r5, r1 +; BE-I64-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vorr d0, d8, d8 +; BE-I64-NEXT: mov r6, r1 +; BE-I64-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vmov.32 d12[0], r0 +; BE-I64-NEXT: vmov.32 d15[1], r6 +; BE-I64-NEXT: vmov.32 d13[1], r4 +; BE-I64-NEXT: vmov.32 d14[1], r5 +; BE-I64-NEXT: vmov.32 d12[1], r1 +; BE-I64-NEXT: vrev64.32 q0, q7 +; BE-I64-NEXT: vrev64.32 q1, q6 +; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEXT: pop {r4, r5, r6, pc} +; +; BE-I32-NEON-LABEL: lrint_v4f64: +; BE-I32-NEON: @ %bb.0: +; BE-I32-NEON-NEXT: .save {r11, lr} +; BE-I32-NEON-NEXT: push {r11, lr} +; BE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13} +; BE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13} +; BE-I32-NEON-NEXT: vorr q4, q1, q1 +; BE-I32-NEON-NEXT: vorr q5, q0, q0 +; BE-I32-NEON-NEXT: vorr d0, d8, d8 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: vorr d0, d10, d10 +; BE-I32-NEON-NEXT: vmov.32 d13[0], r0 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: vorr d0, d9, d9 +; BE-I32-NEON-NEXT: vmov.32 d12[0], r0 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: vorr d0, d11, d11 +; BE-I32-NEON-NEXT: vmov.32 d13[1], r0 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: vmov.32 d12[1], r0 +; BE-I32-NEON-NEXT: vrev64.32 q0, q6 +; BE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13} +; BE-I32-NEON-NEXT: pop {r11, pc} +; +; BE-I64-NEON-LABEL: lrint_v4f64: +; BE-I64-NEON: @ %bb.0: +; BE-I64-NEON-NEXT: .save {r4, r5, r6, lr} +; BE-I64-NEON-NEXT: push {r4, r5, r6, lr} +; BE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEON-NEXT: vorr q4, q1, q1 +; BE-I64-NEON-NEXT: vorr q5, q0, q0 +; BE-I64-NEON-NEXT: vorr d0, d9, d9 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vorr d0, d10, d10 +; BE-I64-NEON-NEXT: mov r4, r1 +; BE-I64-NEON-NEXT: vmov.32 d13[0], r0 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vorr d0, d11, d11 +; BE-I64-NEON-NEXT: mov r5, r1 +; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vorr d0, d8, d8 +; BE-I64-NEON-NEXT: mov r6, r1 +; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vmov.32 d12[0], r0 +; BE-I64-NEON-NEXT: vmov.32 d15[1], r6 +; BE-I64-NEON-NEXT: vmov.32 d13[1], r4 +; BE-I64-NEON-NEXT: vmov.32 d14[1], r5 +; BE-I64-NEON-NEXT: vmov.32 d12[1], r1 +; BE-I64-NEON-NEXT: vrev64.32 q0, q7 +; BE-I64-NEON-NEXT: vrev64.32 q1, q6 +; BE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEON-NEXT: pop {r4, r5, r6, pc} + %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double> %x) + ret <4 x iXLen> %a +} +declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double>) + +define <8 x iXLen> @lrint_v8f64(<8 x double> %x) { +; LE-I32-LABEL: lrint_v8f64: +; LE-I32: @ %bb.0: +; LE-I32-NEXT: .save {r11, lr} +; LE-I32-NEXT: push {r11, lr} +; LE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEXT: .pad #32 +; LE-I32-NEXT: sub sp, sp, #32 +; LE-I32-NEXT: vorr q5, q0, q0 +; LE-I32-NEXT: add lr, sp, #16 +; LE-I32-NEXT: vorr d0, d4, d4 +; LE-I32-NEXT: vstmia sp, {d6, d7} @ 16-byte Spill +; LE-I32-NEXT: vorr q7, q3, q3 +; LE-I32-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill +; LE-I32-NEXT: vorr q6, q1, q1 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: vorr d0, d14, d14 +; LE-I32-NEXT: vmov.32 d8[0], r0 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: vorr d0, d12, d12 +; LE-I32-NEXT: vmov.32 d9[0], r0 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: vorr d0, d10, d10 +; LE-I32-NEXT: vmov.32 d15[0], r0 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: vorr d0, d13, d13 +; LE-I32-NEXT: vmov.32 d14[0], r0 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: vorr d0, d11, d11 +; LE-I32-NEXT: vmov.32 d15[1], r0 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: vldmia sp, {d16, d17} @ 16-byte Reload +; LE-I32-NEXT: vorr d0, d17, d17 +; LE-I32-NEXT: vmov.32 d14[1], r0 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: add lr, sp, #16 +; LE-I32-NEXT: vmov.32 d9[1], r0 +; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEXT: vorr d0, d17, d17 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: vmov.32 d8[1], r0 +; LE-I32-NEXT: vorr q0, q7, q7 +; LE-I32-NEXT: vorr q1, q4, q4 +; LE-I32-NEXT: add sp, sp, #32 +; LE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEXT: pop {r11, pc} +; +; LE-I64-LABEL: lrint_v8f64: +; LE-I64: @ %bb.0: +; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEXT: .pad #40 +; LE-I64-NEXT: sub sp, sp, #40 +; LE-I64-NEXT: vorr q4, q0, q0 +; LE-I64-NEXT: add lr, sp, #24 +; LE-I64-NEXT: vorr d0, d7, d7 +; LE-I64-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill +; LE-I64-NEXT: vorr q7, q2, q2 +; LE-I64-NEXT: vorr q6, q1, q1 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vorr d0, d14, d14 +; LE-I64-NEXT: add lr, sp, #8 +; LE-I64-NEXT: vmov.32 d17[0], r0 +; LE-I64-NEXT: mov r8, r1 +; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vorr d0, d15, d15 +; LE-I64-NEXT: mov r9, r1 +; LE-I64-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vorr d0, d12, d12 +; LE-I64-NEXT: mov r10, r1 +; LE-I64-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vorr d0, d13, d13 +; LE-I64-NEXT: mov r7, r1 +; LE-I64-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vorr d0, d8, d8 +; LE-I64-NEXT: mov r4, r1 +; LE-I64-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vorr d0, d9, d9 +; LE-I64-NEXT: mov r5, r1 +; LE-I64-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: add lr, sp, #24 +; LE-I64-NEXT: mov r6, r1 +; LE-I64-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-I64-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: add lr, sp, #8 +; LE-I64-NEXT: vmov.32 d13[1], r6 +; LE-I64-NEXT: vldmia lr, {d6, d7} @ 16-byte Reload +; LE-I64-NEXT: vmov.32 d15[1], r4 +; LE-I64-NEXT: vmov.32 d11[1], r10 +; LE-I64-NEXT: vmov.32 d6[0], r0 +; LE-I64-NEXT: vmov.32 d12[1], r5 +; LE-I64-NEXT: vmov.32 d14[1], r7 +; LE-I64-NEXT: vorr q0, q6, q6 +; LE-I64-NEXT: vmov.32 d10[1], r9 +; LE-I64-NEXT: vorr q1, q7, q7 +; LE-I64-NEXT: vmov.32 d7[1], r8 +; LE-I64-NEXT: vorr q2, q5, q5 +; LE-I64-NEXT: vmov.32 d6[1], r1 +; LE-I64-NEXT: add sp, sp, #40 +; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; +; LE-I32-NEON-LABEL: lrint_v8f64: +; LE-I32-NEON: @ %bb.0: +; LE-I32-NEON-NEXT: .save {r11, lr} +; LE-I32-NEON-NEXT: push {r11, lr} +; LE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEON-NEXT: .pad #32 +; LE-I32-NEON-NEXT: sub sp, sp, #32 +; LE-I32-NEON-NEXT: vorr q5, q0, q0 +; LE-I32-NEON-NEXT: add lr, sp, #16 +; LE-I32-NEON-NEXT: vorr d0, d4, d4 +; LE-I32-NEON-NEXT: vstmia sp, {d6, d7} @ 16-byte Spill +; LE-I32-NEON-NEXT: vorr q7, q3, q3 +; LE-I32-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill +; LE-I32-NEON-NEXT: vorr q6, q1, q1 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: vorr d0, d14, d14 +; LE-I32-NEON-NEXT: vmov.32 d8[0], r0 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: vorr d0, d12, d12 +; LE-I32-NEON-NEXT: vmov.32 d9[0], r0 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: vorr d0, d10, d10 +; LE-I32-NEON-NEXT: vmov.32 d15[0], r0 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: vorr d0, d13, d13 +; LE-I32-NEON-NEXT: vmov.32 d14[0], r0 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: vorr d0, d11, d11 +; LE-I32-NEON-NEXT: vmov.32 d15[1], r0 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: vldmia sp, {d16, d17} @ 16-byte Reload +; LE-I32-NEON-NEXT: vorr d0, d17, d17 +; LE-I32-NEON-NEXT: vmov.32 d14[1], r0 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: add lr, sp, #16 +; LE-I32-NEON-NEXT: vmov.32 d9[1], r0 +; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEON-NEXT: vorr d0, d17, d17 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: vmov.32 d8[1], r0 +; LE-I32-NEON-NEXT: vorr q0, q7, q7 +; LE-I32-NEON-NEXT: vorr q1, q4, q4 +; LE-I32-NEON-NEXT: add sp, sp, #32 +; LE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEON-NEXT: pop {r11, pc} +; +; LE-I64-NEON-LABEL: lrint_v8f64: +; LE-I64-NEON: @ %bb.0: +; LE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; LE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; LE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEON-NEXT: .pad #40 +; LE-I64-NEON-NEXT: sub sp, sp, #40 +; LE-I64-NEON-NEXT: vorr q4, q0, q0 +; LE-I64-NEON-NEXT: add lr, sp, #24 +; LE-I64-NEON-NEXT: vorr d0, d7, d7 +; LE-I64-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill +; LE-I64-NEON-NEXT: vorr q7, q2, q2 +; LE-I64-NEON-NEXT: vorr q6, q1, q1 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vorr d0, d14, d14 +; LE-I64-NEON-NEXT: add lr, sp, #8 +; LE-I64-NEON-NEXT: vmov.32 d17[0], r0 +; LE-I64-NEON-NEXT: mov r8, r1 +; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vorr d0, d15, d15 +; LE-I64-NEON-NEXT: mov r9, r1 +; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vorr d0, d12, d12 +; LE-I64-NEON-NEXT: mov r10, r1 +; LE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vorr d0, d13, d13 +; LE-I64-NEON-NEXT: mov r7, r1 +; LE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vorr d0, d8, d8 +; LE-I64-NEON-NEXT: mov r4, r1 +; LE-I64-NEON-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vorr d0, d9, d9 +; LE-I64-NEON-NEXT: mov r5, r1 +; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: add lr, sp, #24 +; LE-I64-NEON-NEXT: mov r6, r1 +; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-I64-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: add lr, sp, #8 +; LE-I64-NEON-NEXT: vmov.32 d13[1], r6 +; LE-I64-NEON-NEXT: vldmia lr, {d6, d7} @ 16-byte Reload +; LE-I64-NEON-NEXT: vmov.32 d15[1], r4 +; LE-I64-NEON-NEXT: vmov.32 d11[1], r10 +; LE-I64-NEON-NEXT: vmov.32 d6[0], r0 +; LE-I64-NEON-NEXT: vmov.32 d12[1], r5 +; LE-I64-NEON-NEXT: vmov.32 d14[1], r7 +; LE-I64-NEON-NEXT: vorr q0, q6, q6 +; LE-I64-NEON-NEXT: vmov.32 d10[1], r9 +; LE-I64-NEON-NEXT: vorr q1, q7, q7 +; LE-I64-NEON-NEXT: vmov.32 d7[1], r8 +; LE-I64-NEON-NEXT: vorr q2, q5, q5 +; LE-I64-NEON-NEXT: vmov.32 d6[1], r1 +; LE-I64-NEON-NEXT: add sp, sp, #40 +; LE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; +; BE-I32-LABEL: lrint_v8f64: +; BE-I32: @ %bb.0: +; BE-I32-NEXT: .save {r11, lr} +; BE-I32-NEXT: push {r11, lr} +; BE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEXT: .pad #32 +; BE-I32-NEXT: sub sp, sp, #32 +; BE-I32-NEXT: vorr q5, q0, q0 +; BE-I32-NEXT: vstmia sp, {d0, d1} @ 16-byte Spill +; BE-I32-NEXT: vorr d0, d4, d4 +; BE-I32-NEXT: add lr, sp, #16 +; BE-I32-NEXT: vorr q7, q3, q3 +; BE-I32-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill +; BE-I32-NEXT: vorr q6, q1, q1 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: vorr d0, d10, d10 +; BE-I32-NEXT: vmov.32 d8[0], r0 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: vorr d0, d14, d14 +; BE-I32-NEXT: vmov.32 d10[0], r0 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: vorr d0, d12, d12 +; BE-I32-NEXT: vmov.32 d9[0], r0 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: vorr d0, d15, d15 +; BE-I32-NEXT: vmov.32 d11[0], r0 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: vorr d0, d13, d13 +; BE-I32-NEXT: vmov.32 d9[1], r0 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: vldmia sp, {d16, d17} @ 16-byte Reload +; BE-I32-NEXT: vorr d0, d17, d17 +; BE-I32-NEXT: vmov.32 d11[1], r0 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: add lr, sp, #16 +; BE-I32-NEXT: vmov.32 d10[1], r0 +; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I32-NEXT: vorr d0, d17, d17 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: vmov.32 d8[1], r0 +; BE-I32-NEXT: vrev64.32 q0, q5 +; BE-I32-NEXT: vrev64.32 q1, q4 +; BE-I32-NEXT: add sp, sp, #32 +; BE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEXT: pop {r11, pc} +; +; BE-I64-LABEL: lrint_v8f64: +; BE-I64: @ %bb.0: +; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEXT: .pad #40 +; BE-I64-NEXT: sub sp, sp, #40 +; BE-I64-NEXT: vorr q4, q0, q0 +; BE-I64-NEXT: add lr, sp, #24 +; BE-I64-NEXT: vorr d0, d7, d7 +; BE-I64-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill +; BE-I64-NEXT: vorr q7, q2, q2 +; BE-I64-NEXT: vorr q6, q1, q1 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vorr d0, d14, d14 +; BE-I64-NEXT: add lr, sp, #8 +; BE-I64-NEXT: vmov.32 d17[0], r0 +; BE-I64-NEXT: mov r8, r1 +; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vorr d0, d15, d15 +; BE-I64-NEXT: mov r9, r1 +; BE-I64-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vorr d0, d12, d12 +; BE-I64-NEXT: mov r10, r1 +; BE-I64-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vorr d0, d13, d13 +; BE-I64-NEXT: mov r7, r1 +; BE-I64-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vorr d0, d8, d8 +; BE-I64-NEXT: mov r4, r1 +; BE-I64-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vorr d0, d9, d9 +; BE-I64-NEXT: mov r5, r1 +; BE-I64-NEXT: vmov.32 d12[0], r0 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: add lr, sp, #24 +; BE-I64-NEXT: mov r6, r1 +; BE-I64-NEXT: vmov.32 d13[0], r0 +; BE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; BE-I64-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: add lr, sp, #8 +; BE-I64-NEXT: vmov.32 d13[1], r6 +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: vmov.32 d15[1], r4 +; BE-I64-NEXT: vmov.32 d16[0], r0 +; BE-I64-NEXT: vmov.32 d11[1], r10 +; BE-I64-NEXT: vmov.32 d17[1], r8 +; BE-I64-NEXT: vmov.32 d12[1], r5 +; BE-I64-NEXT: vmov.32 d14[1], r7 +; BE-I64-NEXT: vmov.32 d10[1], r9 +; BE-I64-NEXT: vmov.32 d16[1], r1 +; BE-I64-NEXT: vrev64.32 q0, q6 +; BE-I64-NEXT: vrev64.32 q1, q7 +; BE-I64-NEXT: vrev64.32 q2, q5 +; BE-I64-NEXT: vrev64.32 q3, q8 +; BE-I64-NEXT: add sp, sp, #40 +; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; +; BE-I32-NEON-LABEL: lrint_v8f64: +; BE-I32-NEON: @ %bb.0: +; BE-I32-NEON-NEXT: .save {r11, lr} +; BE-I32-NEON-NEXT: push {r11, lr} +; BE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEON-NEXT: .pad #32 +; BE-I32-NEON-NEXT: sub sp, sp, #32 +; BE-I32-NEON-NEXT: vorr q5, q0, q0 +; BE-I32-NEON-NEXT: vstmia sp, {d0, d1} @ 16-byte Spill +; BE-I32-NEON-NEXT: vorr d0, d4, d4 +; BE-I32-NEON-NEXT: add lr, sp, #16 +; BE-I32-NEON-NEXT: vorr q7, q3, q3 +; BE-I32-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill +; BE-I32-NEON-NEXT: vorr q6, q1, q1 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: vorr d0, d10, d10 +; BE-I32-NEON-NEXT: vmov.32 d8[0], r0 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: vorr d0, d14, d14 +; BE-I32-NEON-NEXT: vmov.32 d10[0], r0 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: vorr d0, d12, d12 +; BE-I32-NEON-NEXT: vmov.32 d9[0], r0 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: vorr d0, d15, d15 +; BE-I32-NEON-NEXT: vmov.32 d11[0], r0 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: vorr d0, d13, d13 +; BE-I32-NEON-NEXT: vmov.32 d9[1], r0 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: vldmia sp, {d16, d17} @ 16-byte Reload +; BE-I32-NEON-NEXT: vorr d0, d17, d17 +; BE-I32-NEON-NEXT: vmov.32 d11[1], r0 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: add lr, sp, #16 +; BE-I32-NEON-NEXT: vmov.32 d10[1], r0 +; BE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I32-NEON-NEXT: vorr d0, d17, d17 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: vmov.32 d8[1], r0 +; BE-I32-NEON-NEXT: vrev64.32 q0, q5 +; BE-I32-NEON-NEXT: vrev64.32 q1, q4 +; BE-I32-NEON-NEXT: add sp, sp, #32 +; BE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEON-NEXT: pop {r11, pc} +; +; BE-I64-NEON-LABEL: lrint_v8f64: +; BE-I64-NEON: @ %bb.0: +; BE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; BE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; BE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEON-NEXT: .pad #40 +; BE-I64-NEON-NEXT: sub sp, sp, #40 +; BE-I64-NEON-NEXT: vorr q4, q0, q0 +; BE-I64-NEON-NEXT: add lr, sp, #24 +; BE-I64-NEON-NEXT: vorr d0, d7, d7 +; BE-I64-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill +; BE-I64-NEON-NEXT: vorr q7, q2, q2 +; BE-I64-NEON-NEXT: vorr q6, q1, q1 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vorr d0, d14, d14 +; BE-I64-NEON-NEXT: add lr, sp, #8 +; BE-I64-NEON-NEXT: vmov.32 d17[0], r0 +; BE-I64-NEON-NEXT: mov r8, r1 +; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vorr d0, d15, d15 +; BE-I64-NEON-NEXT: mov r9, r1 +; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vorr d0, d12, d12 +; BE-I64-NEON-NEXT: mov r10, r1 +; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vorr d0, d13, d13 +; BE-I64-NEON-NEXT: mov r7, r1 +; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vorr d0, d8, d8 +; BE-I64-NEON-NEXT: mov r4, r1 +; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vorr d0, d9, d9 +; BE-I64-NEON-NEXT: mov r5, r1 +; BE-I64-NEON-NEXT: vmov.32 d12[0], r0 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: add lr, sp, #24 +; BE-I64-NEON-NEXT: mov r6, r1 +; BE-I64-NEON-NEXT: vmov.32 d13[0], r0 +; BE-I64-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; BE-I64-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: add lr, sp, #8 +; BE-I64-NEON-NEXT: vmov.32 d13[1], r6 +; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEON-NEXT: vmov.32 d15[1], r4 +; BE-I64-NEON-NEXT: vmov.32 d16[0], r0 +; BE-I64-NEON-NEXT: vmov.32 d11[1], r10 +; BE-I64-NEON-NEXT: vmov.32 d17[1], r8 +; BE-I64-NEON-NEXT: vmov.32 d12[1], r5 +; BE-I64-NEON-NEXT: vmov.32 d14[1], r7 +; BE-I64-NEON-NEXT: vmov.32 d10[1], r9 +; BE-I64-NEON-NEXT: vmov.32 d16[1], r1 +; BE-I64-NEON-NEXT: vrev64.32 q0, q6 +; BE-I64-NEON-NEXT: vrev64.32 q1, q7 +; BE-I64-NEON-NEXT: vrev64.32 q2, q5 +; BE-I64-NEON-NEXT: vrev64.32 q3, q8 +; BE-I64-NEON-NEXT: add sp, sp, #40 +; BE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} + %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double> %x) + ret <8 x iXLen> %a +} +declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double>) + +define <16 x iXLen> @lrint_v16f64(<16 x double> %x) { +; LE-I32-LABEL: lrint_v16f64: +; LE-I32: @ %bb.0: +; LE-I32-NEXT: .save {r4, r5, r6, lr} +; LE-I32-NEXT: push {r4, r5, r6, lr} +; LE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEXT: .pad #128 +; LE-I32-NEXT: sub sp, sp, #128 +; LE-I32-NEXT: add lr, sp, #80 +; LE-I32-NEXT: add r0, sp, #240 +; LE-I32-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I32-NEXT: add r0, sp, #208 +; LE-I32-NEXT: vorr q6, q0, q0 +; LE-I32-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill +; LE-I32-NEXT: add lr, sp, #32 +; LE-I32-NEXT: vorr q5, q1, q1 +; LE-I32-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill +; LE-I32-NEXT: add lr, sp, #16 +; LE-I32-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; LE-I32-NEXT: add lr, sp, #64 +; LE-I32-NEXT: vorr d0, d4, d4 +; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I32-NEXT: add lr, sp, #112 +; LE-I32-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I32-NEXT: add r0, sp, #224 +; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I32-NEXT: add lr, sp, #96 +; LE-I32-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I32-NEXT: add r0, sp, #256 +; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I32-NEXT: add lr, sp, #48 +; LE-I32-NEXT: vld1.64 {d14, d15}, [r0] +; LE-I32-NEXT: vstmia sp, {d2, d3} @ 16-byte Spill +; LE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: vorr d0, d12, d12 +; LE-I32-NEXT: vmov.32 d8[0], r0 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: vorr d0, d10, d10 +; LE-I32-NEXT: vmov.32 d12[0], r0 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: vorr d0, d14, d14 +; LE-I32-NEXT: vmov.32 d13[0], r0 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: add lr, sp, #64 +; LE-I32-NEXT: mov r4, r0 +; LE-I32-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-I32-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: add lr, sp, #80 +; LE-I32-NEXT: vmov.32 d14[0], r0 +; LE-I32-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-I32-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: add lr, sp, #112 +; LE-I32-NEXT: vmov.32 d9[0], r0 +; LE-I32-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-I32-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; LE-I32-NEXT: vmov.32 d15[0], r4 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: vldmia sp, {d16, d17} @ 16-byte Reload +; LE-I32-NEXT: vorr d0, d17, d17 +; LE-I32-NEXT: vmov.32 d10[0], r0 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: add lr, sp, #96 +; LE-I32-NEXT: vmov.32 d13[1], r0 +; LE-I32-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-I32-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: add lr, sp, #16 +; LE-I32-NEXT: vmov.32 d11[0], r0 +; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEXT: vorr d0, d17, d17 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: add lr, sp, #80 +; LE-I32-NEXT: vmov.32 d12[1], r0 +; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEXT: vorr d0, d17, d17 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: add lr, sp, #32 +; LE-I32-NEXT: vmov.32 d9[1], r0 +; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEXT: vorr d0, d17, d17 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: add lr, sp, #96 +; LE-I32-NEXT: vmov.32 d8[1], r0 +; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEXT: vorr d0, d17, d17 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: add lr, sp, #112 +; LE-I32-NEXT: vmov.32 d11[1], r0 +; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEXT: vorr d0, d17, d17 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: add lr, sp, #48 +; LE-I32-NEXT: vmov.32 d10[1], r0 +; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEXT: vorr d0, d17, d17 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: add lr, sp, #64 +; LE-I32-NEXT: vmov.32 d15[1], r0 +; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEXT: vorr d0, d17, d17 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: vmov.32 d14[1], r0 +; LE-I32-NEXT: vorr q0, q6, q6 +; LE-I32-NEXT: vorr q1, q4, q4 +; LE-I32-NEXT: vorr q2, q5, q5 +; LE-I32-NEXT: vorr q3, q7, q7 +; LE-I32-NEXT: add sp, sp, #128 +; LE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEXT: pop {r4, r5, r6, pc} +; +; LE-I64-LABEL: lrint_v16f64: +; LE-I64: @ %bb.0: +; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I64-NEXT: .pad #4 +; LE-I64-NEXT: sub sp, sp, #4 +; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEXT: .pad #176 +; LE-I64-NEXT: sub sp, sp, #176 +; LE-I64-NEXT: add lr, sp, #40 +; LE-I64-NEXT: str r0, [sp, #140] @ 4-byte Spill +; LE-I64-NEXT: add r0, sp, #312 +; LE-I64-NEXT: vorr q6, q2, q2 +; LE-I64-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #96 +; LE-I64-NEXT: vorr q7, q1, q1 +; LE-I64-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #144 +; LE-I64-NEXT: vorr d0, d1, d1 +; LE-I64-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I64-NEXT: add r0, sp, #280 +; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #80 +; LE-I64-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I64-NEXT: add r0, sp, #296 +; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #120 +; LE-I64-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I64-NEXT: add r0, sp, #328 +; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #56 +; LE-I64-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vorr d0, d14, d14 +; LE-I64-NEXT: str r1, [sp, #116] @ 4-byte Spill +; LE-I64-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vorr d0, d15, d15 +; LE-I64-NEXT: str r1, [sp, #76] @ 4-byte Spill +; LE-I64-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vorr d0, d12, d12 +; LE-I64-NEXT: add lr, sp, #160 +; LE-I64-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEXT: str r1, [sp, #72] @ 4-byte Spill +; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vorr d0, d13, d13 +; LE-I64-NEXT: mov r6, r1 +; LE-I64-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: add lr, sp, #40 +; LE-I64-NEXT: mov r4, r1 +; LE-I64-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I64-NEXT: vorr d0, d8, d8 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vorr d0, d9, d9 +; LE-I64-NEXT: mov r7, r1 +; LE-I64-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: add lr, sp, #96 +; LE-I64-NEXT: mov r5, r1 +; LE-I64-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-I64-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEXT: add lr, sp, #40 +; LE-I64-NEXT: mov r10, r1 +; LE-I64-NEXT: vmov.32 d13[1], r5 +; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #56 +; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I64-NEXT: vorr d0, d9, d9 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vorr d0, d8, d8 +; LE-I64-NEXT: vmov.32 d12[1], r7 +; LE-I64-NEXT: add lr, sp, #96 +; LE-I64-NEXT: mov r9, r1 +; LE-I64-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEXT: add lr, sp, #24 +; LE-I64-NEXT: mov r11, r1 +; LE-I64-NEXT: vmov.32 d15[1], r4 +; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #144 +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: vorr d0, d17, d17 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: add lr, sp, #8 +; LE-I64-NEXT: vmov.32 d14[1], r6 +; LE-I64-NEXT: mov r8, r1 +; LE-I64-NEXT: vmov.32 d17[0], r0 +; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #56 +; LE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #80 +; LE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-I64-NEXT: vorr d0, d11, d11 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEXT: add lr, sp, #160 +; LE-I64-NEXT: vorr d0, d10, d10 +; LE-I64-NEXT: ldr r0, [sp, #72] @ 4-byte Reload +; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I64-NEXT: mov r6, r1 +; LE-I64-NEXT: vmov.32 d9[1], r0 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEXT: ldr r0, [sp, #76] @ 4-byte Reload +; LE-I64-NEXT: add lr, sp, #160 +; LE-I64-NEXT: mov r4, r1 +; LE-I64-NEXT: vmov.32 d8[1], r0 +; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #120 +; LE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-I64-NEXT: vorr d0, d11, d11 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEXT: add lr, sp, #40 +; LE-I64-NEXT: vorr d0, d10, d10 +; LE-I64-NEXT: ldr r0, [sp, #116] @ 4-byte Reload +; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I64-NEXT: mov r5, r1 +; LE-I64-NEXT: vmov.32 d9[1], r0 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: add lr, sp, #144 +; LE-I64-NEXT: mov r7, r1 +; LE-I64-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-I64-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; LE-I64-NEXT: vmov.32 d8[1], r10 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: add lr, sp, #8 +; LE-I64-NEXT: vmov.32 d15[1], r6 +; LE-I64-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #24 +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #160 +; LE-I64-NEXT: vmov.32 d20[0], r0 +; LE-I64-NEXT: vmov.32 d21[1], r8 +; LE-I64-NEXT: vmov.32 d20[1], r1 +; LE-I64-NEXT: ldr r1, [sp, #140] @ 4-byte Reload +; LE-I64-NEXT: vmov.32 d13[1], r5 +; LE-I64-NEXT: mov r0, r1 +; LE-I64-NEXT: vst1.64 {d8, d9}, [r0:128]! +; LE-I64-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #56 +; LE-I64-NEXT: vmov.32 d14[1], r4 +; LE-I64-NEXT: vst1.64 {d18, d19}, [r0:128]! +; LE-I64-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #96 +; LE-I64-NEXT: vmov.32 d12[1], r7 +; LE-I64-NEXT: vst1.64 {d18, d19}, [r0:128]! +; LE-I64-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; LE-I64-NEXT: vmov.32 d17[1], r9 +; LE-I64-NEXT: vst1.64 {d18, d19}, [r0:128] +; LE-I64-NEXT: add r0, r1, #64 +; LE-I64-NEXT: vst1.64 {d14, d15}, [r0:128]! +; LE-I64-NEXT: vst1.64 {d12, d13}, [r0:128]! +; LE-I64-NEXT: vmov.32 d16[1], r11 +; LE-I64-NEXT: vst1.64 {d20, d21}, [r0:128]! +; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128] +; LE-I64-NEXT: add sp, sp, #176 +; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEXT: add sp, sp, #4 +; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; LE-I32-NEON-LABEL: lrint_v16f64: +; LE-I32-NEON: @ %bb.0: +; LE-I32-NEON-NEXT: .save {r4, r5, r6, lr} +; LE-I32-NEON-NEXT: push {r4, r5, r6, lr} +; LE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEON-NEXT: .pad #128 +; LE-I32-NEON-NEXT: sub sp, sp, #128 +; LE-I32-NEON-NEXT: add lr, sp, #80 +; LE-I32-NEON-NEXT: add r0, sp, #240 +; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I32-NEON-NEXT: add r0, sp, #208 +; LE-I32-NEON-NEXT: vorr q6, q0, q0 +; LE-I32-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill +; LE-I32-NEON-NEXT: add lr, sp, #32 +; LE-I32-NEON-NEXT: vorr q5, q1, q1 +; LE-I32-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill +; LE-I32-NEON-NEXT: add lr, sp, #16 +; LE-I32-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; LE-I32-NEON-NEXT: add lr, sp, #64 +; LE-I32-NEON-NEXT: vorr d0, d4, d4 +; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I32-NEON-NEXT: add lr, sp, #112 +; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I32-NEON-NEXT: add r0, sp, #224 +; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I32-NEON-NEXT: add lr, sp, #96 +; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I32-NEON-NEXT: add r0, sp, #256 +; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I32-NEON-NEXT: add lr, sp, #48 +; LE-I32-NEON-NEXT: vld1.64 {d14, d15}, [r0] +; LE-I32-NEON-NEXT: vstmia sp, {d2, d3} @ 16-byte Spill +; LE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: vorr d0, d12, d12 +; LE-I32-NEON-NEXT: vmov.32 d8[0], r0 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: vorr d0, d10, d10 +; LE-I32-NEON-NEXT: vmov.32 d12[0], r0 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: vorr d0, d14, d14 +; LE-I32-NEON-NEXT: vmov.32 d13[0], r0 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: add lr, sp, #64 +; LE-I32-NEON-NEXT: mov r4, r0 +; LE-I32-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-I32-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: add lr, sp, #80 +; LE-I32-NEON-NEXT: vmov.32 d14[0], r0 +; LE-I32-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-I32-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: add lr, sp, #112 +; LE-I32-NEON-NEXT: vmov.32 d9[0], r0 +; LE-I32-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-I32-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; LE-I32-NEON-NEXT: vmov.32 d15[0], r4 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: vldmia sp, {d16, d17} @ 16-byte Reload +; LE-I32-NEON-NEXT: vorr d0, d17, d17 +; LE-I32-NEON-NEXT: vmov.32 d10[0], r0 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: add lr, sp, #96 +; LE-I32-NEON-NEXT: vmov.32 d13[1], r0 +; LE-I32-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-I32-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: add lr, sp, #16 +; LE-I32-NEON-NEXT: vmov.32 d11[0], r0 +; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEON-NEXT: vorr d0, d17, d17 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: add lr, sp, #80 +; LE-I32-NEON-NEXT: vmov.32 d12[1], r0 +; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEON-NEXT: vorr d0, d17, d17 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: add lr, sp, #32 +; LE-I32-NEON-NEXT: vmov.32 d9[1], r0 +; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEON-NEXT: vorr d0, d17, d17 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: add lr, sp, #96 +; LE-I32-NEON-NEXT: vmov.32 d8[1], r0 +; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEON-NEXT: vorr d0, d17, d17 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: add lr, sp, #112 +; LE-I32-NEON-NEXT: vmov.32 d11[1], r0 +; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEON-NEXT: vorr d0, d17, d17 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: add lr, sp, #48 +; LE-I32-NEON-NEXT: vmov.32 d10[1], r0 +; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEON-NEXT: vorr d0, d17, d17 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: add lr, sp, #64 +; LE-I32-NEON-NEXT: vmov.32 d15[1], r0 +; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEON-NEXT: vorr d0, d17, d17 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: vmov.32 d14[1], r0 +; LE-I32-NEON-NEXT: vorr q0, q6, q6 +; LE-I32-NEON-NEXT: vorr q1, q4, q4 +; LE-I32-NEON-NEXT: vorr q2, q5, q5 +; LE-I32-NEON-NEXT: vorr q3, q7, q7 +; LE-I32-NEON-NEXT: add sp, sp, #128 +; LE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEON-NEXT: pop {r4, r5, r6, pc} +; +; LE-I64-NEON-LABEL: lrint_v16f64: +; LE-I64-NEON: @ %bb.0: +; LE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I64-NEON-NEXT: .pad #4 +; LE-I64-NEON-NEXT: sub sp, sp, #4 +; LE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEON-NEXT: .pad #176 +; LE-I64-NEON-NEXT: sub sp, sp, #176 +; LE-I64-NEON-NEXT: add lr, sp, #40 +; LE-I64-NEON-NEXT: str r0, [sp, #140] @ 4-byte Spill +; LE-I64-NEON-NEXT: add r0, sp, #312 +; LE-I64-NEON-NEXT: vorr q6, q2, q2 +; LE-I64-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #96 +; LE-I64-NEON-NEXT: vorr q7, q1, q1 +; LE-I64-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #144 +; LE-I64-NEON-NEXT: vorr d0, d1, d1 +; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I64-NEON-NEXT: add r0, sp, #280 +; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #80 +; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I64-NEON-NEXT: add r0, sp, #296 +; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #120 +; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I64-NEON-NEXT: add r0, sp, #328 +; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #56 +; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vorr d0, d14, d14 +; LE-I64-NEON-NEXT: str r1, [sp, #116] @ 4-byte Spill +; LE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vorr d0, d15, d15 +; LE-I64-NEON-NEXT: str r1, [sp, #76] @ 4-byte Spill +; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vorr d0, d12, d12 +; LE-I64-NEON-NEXT: add lr, sp, #160 +; LE-I64-NEON-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEON-NEXT: str r1, [sp, #72] @ 4-byte Spill +; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vorr d0, d13, d13 +; LE-I64-NEON-NEXT: mov r6, r1 +; LE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: add lr, sp, #40 +; LE-I64-NEON-NEXT: mov r4, r1 +; LE-I64-NEON-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I64-NEON-NEXT: vorr d0, d8, d8 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vorr d0, d9, d9 +; LE-I64-NEON-NEXT: mov r7, r1 +; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: add lr, sp, #96 +; LE-I64-NEON-NEXT: mov r5, r1 +; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-I64-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEON-NEXT: add lr, sp, #40 +; LE-I64-NEON-NEXT: mov r10, r1 +; LE-I64-NEON-NEXT: vmov.32 d13[1], r5 +; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #56 +; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I64-NEON-NEXT: vorr d0, d9, d9 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vorr d0, d8, d8 +; LE-I64-NEON-NEXT: vmov.32 d12[1], r7 +; LE-I64-NEON-NEXT: add lr, sp, #96 +; LE-I64-NEON-NEXT: mov r9, r1 +; LE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEON-NEXT: add lr, sp, #24 +; LE-I64-NEON-NEXT: mov r11, r1 +; LE-I64-NEON-NEXT: vmov.32 d15[1], r4 +; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #144 +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: vorr d0, d17, d17 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: add lr, sp, #8 +; LE-I64-NEON-NEXT: vmov.32 d14[1], r6 +; LE-I64-NEON-NEXT: mov r8, r1 +; LE-I64-NEON-NEXT: vmov.32 d17[0], r0 +; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #56 +; LE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #80 +; LE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-I64-NEON-NEXT: vorr d0, d11, d11 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEON-NEXT: add lr, sp, #160 +; LE-I64-NEON-NEXT: vorr d0, d10, d10 +; LE-I64-NEON-NEXT: ldr r0, [sp, #72] @ 4-byte Reload +; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I64-NEON-NEXT: mov r6, r1 +; LE-I64-NEON-NEXT: vmov.32 d9[1], r0 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEON-NEXT: ldr r0, [sp, #76] @ 4-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #160 +; LE-I64-NEON-NEXT: mov r4, r1 +; LE-I64-NEON-NEXT: vmov.32 d8[1], r0 +; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #120 +; LE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-I64-NEON-NEXT: vorr d0, d11, d11 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEON-NEXT: add lr, sp, #40 +; LE-I64-NEON-NEXT: vorr d0, d10, d10 +; LE-I64-NEON-NEXT: ldr r0, [sp, #116] @ 4-byte Reload +; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I64-NEON-NEXT: mov r5, r1 +; LE-I64-NEON-NEXT: vmov.32 d9[1], r0 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: add lr, sp, #144 +; LE-I64-NEON-NEXT: mov r7, r1 +; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-I64-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; LE-I64-NEON-NEXT: vmov.32 d8[1], r10 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: add lr, sp, #8 +; LE-I64-NEON-NEXT: vmov.32 d15[1], r6 +; LE-I64-NEON-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #24 +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #160 +; LE-I64-NEON-NEXT: vmov.32 d20[0], r0 +; LE-I64-NEON-NEXT: vmov.32 d21[1], r8 +; LE-I64-NEON-NEXT: vmov.32 d20[1], r1 +; LE-I64-NEON-NEXT: ldr r1, [sp, #140] @ 4-byte Reload +; LE-I64-NEON-NEXT: vmov.32 d13[1], r5 +; LE-I64-NEON-NEXT: mov r0, r1 +; LE-I64-NEON-NEXT: vst1.64 {d8, d9}, [r0:128]! +; LE-I64-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #56 +; LE-I64-NEON-NEXT: vmov.32 d14[1], r4 +; LE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]! +; LE-I64-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #96 +; LE-I64-NEON-NEXT: vmov.32 d12[1], r7 +; LE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]! +; LE-I64-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; LE-I64-NEON-NEXT: vmov.32 d17[1], r9 +; LE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r0:128] +; LE-I64-NEON-NEXT: add r0, r1, #64 +; LE-I64-NEON-NEXT: vst1.64 {d14, d15}, [r0:128]! +; LE-I64-NEON-NEXT: vst1.64 {d12, d13}, [r0:128]! +; LE-I64-NEON-NEXT: vmov.32 d16[1], r11 +; LE-I64-NEON-NEXT: vst1.64 {d20, d21}, [r0:128]! +; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] +; LE-I64-NEON-NEXT: add sp, sp, #176 +; LE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEON-NEXT: add sp, sp, #4 +; LE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-I32-LABEL: lrint_v16f64: +; BE-I32: @ %bb.0: +; BE-I32-NEXT: .save {r4, r5, r6, lr} +; BE-I32-NEXT: push {r4, r5, r6, lr} +; BE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEXT: .pad #128 +; BE-I32-NEXT: sub sp, sp, #128 +; BE-I32-NEXT: add lr, sp, #64 +; BE-I32-NEXT: add r0, sp, #240 +; BE-I32-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I32-NEXT: add r0, sp, #224 +; BE-I32-NEXT: vorr q6, q3, q3 +; BE-I32-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill +; BE-I32-NEXT: add lr, sp, #16 +; BE-I32-NEXT: vorr q5, q1, q1 +; BE-I32-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill +; BE-I32-NEXT: add lr, sp, #32 +; BE-I32-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; BE-I32-NEXT: add lr, sp, #80 +; BE-I32-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEXT: add lr, sp, #112 +; BE-I32-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I32-NEXT: add r0, sp, #256 +; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEXT: add lr, sp, #96 +; BE-I32-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I32-NEXT: add r0, sp, #208 +; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEXT: add lr, sp, #48 +; BE-I32-NEXT: vld1.64 {d14, d15}, [r0] +; BE-I32-NEXT: vstmia sp, {d6, d7} @ 16-byte Spill +; BE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: vorr d0, d10, d10 +; BE-I32-NEXT: vmov.32 d8[0], r0 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: vorr d0, d12, d12 +; BE-I32-NEXT: vmov.32 d9[0], r0 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: vorr d0, d14, d14 +; BE-I32-NEXT: vmov.32 d11[0], r0 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: add lr, sp, #80 +; BE-I32-NEXT: mov r4, r0 +; BE-I32-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; BE-I32-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: add lr, sp, #64 +; BE-I32-NEXT: vmov.32 d12[0], r0 +; BE-I32-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; BE-I32-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: add lr, sp, #112 +; BE-I32-NEXT: vmov.32 d10[0], r0 +; BE-I32-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; BE-I32-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; BE-I32-NEXT: vmov.32 d14[0], r4 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: vldmia sp, {d16, d17} @ 16-byte Reload +; BE-I32-NEXT: vorr d0, d17, d17 +; BE-I32-NEXT: vmov.32 d15[0], r0 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: add lr, sp, #96 +; BE-I32-NEXT: vmov.32 d11[1], r0 +; BE-I32-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; BE-I32-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: add lr, sp, #64 +; BE-I32-NEXT: vmov.32 d13[0], r0 +; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I32-NEXT: vorr d0, d17, d17 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: add lr, sp, #16 +; BE-I32-NEXT: vmov.32 d10[1], r0 +; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I32-NEXT: vorr d0, d17, d17 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: add lr, sp, #32 +; BE-I32-NEXT: vmov.32 d9[1], r0 +; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I32-NEXT: vorr d0, d17, d17 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: add lr, sp, #96 +; BE-I32-NEXT: vmov.32 d8[1], r0 +; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I32-NEXT: vorr d0, d17, d17 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: add lr, sp, #112 +; BE-I32-NEXT: vmov.32 d13[1], r0 +; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I32-NEXT: vorr d0, d17, d17 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: add lr, sp, #48 +; BE-I32-NEXT: vmov.32 d15[1], r0 +; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I32-NEXT: vorr d0, d17, d17 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: add lr, sp, #80 +; BE-I32-NEXT: vmov.32 d14[1], r0 +; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I32-NEXT: vorr d0, d17, d17 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: vmov.32 d12[1], r0 +; BE-I32-NEXT: vrev64.32 q0, q4 +; BE-I32-NEXT: vrev64.32 q1, q5 +; BE-I32-NEXT: vrev64.32 q2, q7 +; BE-I32-NEXT: vrev64.32 q3, q6 +; BE-I32-NEXT: add sp, sp, #128 +; BE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEXT: pop {r4, r5, r6, pc} +; +; BE-I64-LABEL: lrint_v16f64: +; BE-I64: @ %bb.0: +; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I64-NEXT: .pad #4 +; BE-I64-NEXT: sub sp, sp, #4 +; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEXT: .pad #168 +; BE-I64-NEXT: sub sp, sp, #168 +; BE-I64-NEXT: add lr, sp, #64 +; BE-I64-NEXT: str r0, [sp, #132] @ 4-byte Spill +; BE-I64-NEXT: add r0, sp, #304 +; BE-I64-NEXT: vorr q4, q3, q3 +; BE-I64-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #48 +; BE-I64-NEXT: vorr d0, d1, d1 +; BE-I64-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I64-NEXT: add r0, sp, #320 +; BE-I64-NEXT: vorr q6, q2, q2 +; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #88 +; BE-I64-NEXT: vorr q7, q1, q1 +; BE-I64-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I64-NEXT: add r0, sp, #272 +; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #112 +; BE-I64-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I64-NEXT: add r0, sp, #288 +; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #24 +; BE-I64-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vorr d0, d14, d14 +; BE-I64-NEXT: add lr, sp, #136 +; BE-I64-NEXT: vmov.32 d17[0], r0 +; BE-I64-NEXT: str r1, [sp, #108] @ 4-byte Spill +; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vorr d0, d15, d15 +; BE-I64-NEXT: str r1, [sp, #84] @ 4-byte Spill +; BE-I64-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vorr d0, d12, d12 +; BE-I64-NEXT: add lr, sp, #152 +; BE-I64-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEXT: str r1, [sp, #44] @ 4-byte Spill +; BE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vorr d0, d13, d13 +; BE-I64-NEXT: mov r6, r1 +; BE-I64-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vorr d0, d8, d8 +; BE-I64-NEXT: mov r4, r1 +; BE-I64-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vorr d0, d9, d9 +; BE-I64-NEXT: mov r7, r1 +; BE-I64-NEXT: vmov.32 d12[0], r0 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: add lr, sp, #64 +; BE-I64-NEXT: mov r5, r1 +; BE-I64-NEXT: vmov.32 d13[0], r0 +; BE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; BE-I64-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: add lr, sp, #136 +; BE-I64-NEXT: mov r9, r1 +; BE-I64-NEXT: vmov.32 d13[1], r5 +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: vmov.32 d16[0], r0 +; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #24 +; BE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-I64-NEXT: vorr d0, d9, d9 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vorr d0, d8, d8 +; BE-I64-NEXT: vmov.32 d12[1], r7 +; BE-I64-NEXT: add lr, sp, #64 +; BE-I64-NEXT: mov r10, r1 +; BE-I64-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEXT: add lr, sp, #8 +; BE-I64-NEXT: mov r11, r1 +; BE-I64-NEXT: vmov.32 d11[1], r4 +; BE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #48 +; BE-I64-NEXT: vorr q6, q5, q5 +; BE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-I64-NEXT: vorr d0, d9, d9 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vorr d0, d8, d8 +; BE-I64-NEXT: vmov.32 d12[1], r6 +; BE-I64-NEXT: add lr, sp, #24 +; BE-I64-NEXT: mov r8, r1 +; BE-I64-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEXT: add lr, sp, #48 +; BE-I64-NEXT: ldr r0, [sp, #44] @ 4-byte Reload +; BE-I64-NEXT: mov r6, r1 +; BE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #152 +; BE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-I64-NEXT: add lr, sp, #88 +; BE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; BE-I64-NEXT: vorr d0, d13, d13 +; BE-I64-NEXT: vmov.32 d9[1], r0 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEXT: ldr r0, [sp, #84] @ 4-byte Reload +; BE-I64-NEXT: vorr d0, d12, d12 +; BE-I64-NEXT: add lr, sp, #152 +; BE-I64-NEXT: mov r4, r1 +; BE-I64-NEXT: vmov.32 d8[1], r0 +; BE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: add lr, sp, #136 +; BE-I64-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEXT: ldr r0, [sp, #108] @ 4-byte Reload +; BE-I64-NEXT: mov r5, r1 +; BE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-I64-NEXT: add lr, sp, #112 +; BE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-I64-NEXT: vorr d0, d9, d9 +; BE-I64-NEXT: vmov.32 d11[1], r0 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vorr d0, d8, d8 +; BE-I64-NEXT: mov r7, r1 +; BE-I64-NEXT: vmov.32 d13[0], r0 +; BE-I64-NEXT: vmov.32 d10[1], r9 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: add lr, sp, #8 +; BE-I64-NEXT: vmov.32 d12[0], r0 +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: add lr, sp, #48 +; BE-I64-NEXT: vmov.32 d17[1], r10 +; BE-I64-NEXT: vmov.32 d16[1], r11 +; BE-I64-NEXT: vorr q12, q8, q8 +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: add lr, sp, #152 +; BE-I64-NEXT: vmov.32 d17[1], r8 +; BE-I64-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; BE-I64-NEXT: add lr, sp, #24 +; BE-I64-NEXT: vmov.32 d13[1], r7 +; BE-I64-NEXT: vmov.32 d16[1], r6 +; BE-I64-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload +; BE-I64-NEXT: add lr, sp, #64 +; BE-I64-NEXT: vorr q13, q8, q8 +; BE-I64-NEXT: vmov.32 d12[1], r1 +; BE-I64-NEXT: ldr r1, [sp, #132] @ 4-byte Reload +; BE-I64-NEXT: vrev64.32 q8, q5 +; BE-I64-NEXT: mov r0, r1 +; BE-I64-NEXT: vldmia lr, {d22, d23} @ 16-byte Reload +; BE-I64-NEXT: vrev64.32 q9, q9 +; BE-I64-NEXT: vrev64.32 q10, q10 +; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-I64-NEXT: vst1.64 {d18, d19}, [r0:128]! +; BE-I64-NEXT: vrev64.32 q11, q11 +; BE-I64-NEXT: vmov.32 d15[1], r4 +; BE-I64-NEXT: vst1.64 {d20, d21}, [r0:128]! +; BE-I64-NEXT: vrev64.32 q15, q6 +; BE-I64-NEXT: vmov.32 d14[1], r5 +; BE-I64-NEXT: vrev64.32 q12, q12 +; BE-I64-NEXT: vst1.64 {d22, d23}, [r0:128] +; BE-I64-NEXT: add r0, r1, #64 +; BE-I64-NEXT: vrev64.32 q13, q13 +; BE-I64-NEXT: vst1.64 {d30, d31}, [r0:128]! +; BE-I64-NEXT: vst1.64 {d24, d25}, [r0:128]! +; BE-I64-NEXT: vrev64.32 q14, q7 +; BE-I64-NEXT: vst1.64 {d26, d27}, [r0:128]! +; BE-I64-NEXT: vst1.64 {d28, d29}, [r0:128] +; BE-I64-NEXT: add sp, sp, #168 +; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEXT: add sp, sp, #4 +; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-I32-NEON-LABEL: lrint_v16f64: +; BE-I32-NEON: @ %bb.0: +; BE-I32-NEON-NEXT: .save {r4, r5, r6, lr} +; BE-I32-NEON-NEXT: push {r4, r5, r6, lr} +; BE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEON-NEXT: .pad #128 +; BE-I32-NEON-NEXT: sub sp, sp, #128 +; BE-I32-NEON-NEXT: add lr, sp, #64 +; BE-I32-NEON-NEXT: add r0, sp, #240 +; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I32-NEON-NEXT: add r0, sp, #224 +; BE-I32-NEON-NEXT: vorr q6, q3, q3 +; BE-I32-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill +; BE-I32-NEON-NEXT: add lr, sp, #16 +; BE-I32-NEON-NEXT: vorr q5, q1, q1 +; BE-I32-NEON-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill +; BE-I32-NEON-NEXT: add lr, sp, #32 +; BE-I32-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; BE-I32-NEON-NEXT: add lr, sp, #80 +; BE-I32-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEON-NEXT: add lr, sp, #112 +; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I32-NEON-NEXT: add r0, sp, #256 +; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEON-NEXT: add lr, sp, #96 +; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I32-NEON-NEXT: add r0, sp, #208 +; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEON-NEXT: add lr, sp, #48 +; BE-I32-NEON-NEXT: vld1.64 {d14, d15}, [r0] +; BE-I32-NEON-NEXT: vstmia sp, {d6, d7} @ 16-byte Spill +; BE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: vorr d0, d10, d10 +; BE-I32-NEON-NEXT: vmov.32 d8[0], r0 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: vorr d0, d12, d12 +; BE-I32-NEON-NEXT: vmov.32 d9[0], r0 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: vorr d0, d14, d14 +; BE-I32-NEON-NEXT: vmov.32 d11[0], r0 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: add lr, sp, #80 +; BE-I32-NEON-NEXT: mov r4, r0 +; BE-I32-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; BE-I32-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: add lr, sp, #64 +; BE-I32-NEON-NEXT: vmov.32 d12[0], r0 +; BE-I32-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; BE-I32-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: add lr, sp, #112 +; BE-I32-NEON-NEXT: vmov.32 d10[0], r0 +; BE-I32-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; BE-I32-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; BE-I32-NEON-NEXT: vmov.32 d14[0], r4 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: vldmia sp, {d16, d17} @ 16-byte Reload +; BE-I32-NEON-NEXT: vorr d0, d17, d17 +; BE-I32-NEON-NEXT: vmov.32 d15[0], r0 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: add lr, sp, #96 +; BE-I32-NEON-NEXT: vmov.32 d11[1], r0 +; BE-I32-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; BE-I32-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: add lr, sp, #64 +; BE-I32-NEON-NEXT: vmov.32 d13[0], r0 +; BE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I32-NEON-NEXT: vorr d0, d17, d17 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: add lr, sp, #16 +; BE-I32-NEON-NEXT: vmov.32 d10[1], r0 +; BE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I32-NEON-NEXT: vorr d0, d17, d17 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: add lr, sp, #32 +; BE-I32-NEON-NEXT: vmov.32 d9[1], r0 +; BE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I32-NEON-NEXT: vorr d0, d17, d17 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: add lr, sp, #96 +; BE-I32-NEON-NEXT: vmov.32 d8[1], r0 +; BE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I32-NEON-NEXT: vorr d0, d17, d17 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: add lr, sp, #112 +; BE-I32-NEON-NEXT: vmov.32 d13[1], r0 +; BE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I32-NEON-NEXT: vorr d0, d17, d17 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: add lr, sp, #48 +; BE-I32-NEON-NEXT: vmov.32 d15[1], r0 +; BE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I32-NEON-NEXT: vorr d0, d17, d17 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: add lr, sp, #80 +; BE-I32-NEON-NEXT: vmov.32 d14[1], r0 +; BE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I32-NEON-NEXT: vorr d0, d17, d17 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: vmov.32 d12[1], r0 +; BE-I32-NEON-NEXT: vrev64.32 q0, q4 +; BE-I32-NEON-NEXT: vrev64.32 q1, q5 +; BE-I32-NEON-NEXT: vrev64.32 q2, q7 +; BE-I32-NEON-NEXT: vrev64.32 q3, q6 +; BE-I32-NEON-NEXT: add sp, sp, #128 +; BE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEON-NEXT: pop {r4, r5, r6, pc} +; +; BE-I64-NEON-LABEL: lrint_v16f64: +; BE-I64-NEON: @ %bb.0: +; BE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I64-NEON-NEXT: .pad #4 +; BE-I64-NEON-NEXT: sub sp, sp, #4 +; BE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEON-NEXT: .pad #168 +; BE-I64-NEON-NEXT: sub sp, sp, #168 +; BE-I64-NEON-NEXT: add lr, sp, #64 +; BE-I64-NEON-NEXT: str r0, [sp, #132] @ 4-byte Spill +; BE-I64-NEON-NEXT: add r0, sp, #304 +; BE-I64-NEON-NEXT: vorr q4, q3, q3 +; BE-I64-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #48 +; BE-I64-NEON-NEXT: vorr d0, d1, d1 +; BE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I64-NEON-NEXT: add r0, sp, #320 +; BE-I64-NEON-NEXT: vorr q6, q2, q2 +; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #88 +; BE-I64-NEON-NEXT: vorr q7, q1, q1 +; BE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I64-NEON-NEXT: add r0, sp, #272 +; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #112 +; BE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I64-NEON-NEXT: add r0, sp, #288 +; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #24 +; BE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vorr d0, d14, d14 +; BE-I64-NEON-NEXT: add lr, sp, #136 +; BE-I64-NEON-NEXT: vmov.32 d17[0], r0 +; BE-I64-NEON-NEXT: str r1, [sp, #108] @ 4-byte Spill +; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vorr d0, d15, d15 +; BE-I64-NEON-NEXT: str r1, [sp, #84] @ 4-byte Spill +; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vorr d0, d12, d12 +; BE-I64-NEON-NEXT: add lr, sp, #152 +; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEON-NEXT: str r1, [sp, #44] @ 4-byte Spill +; BE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vorr d0, d13, d13 +; BE-I64-NEON-NEXT: mov r6, r1 +; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vorr d0, d8, d8 +; BE-I64-NEON-NEXT: mov r4, r1 +; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vorr d0, d9, d9 +; BE-I64-NEON-NEXT: mov r7, r1 +; BE-I64-NEON-NEXT: vmov.32 d12[0], r0 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: add lr, sp, #64 +; BE-I64-NEON-NEXT: mov r5, r1 +; BE-I64-NEON-NEXT: vmov.32 d13[0], r0 +; BE-I64-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; BE-I64-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: add lr, sp, #136 +; BE-I64-NEON-NEXT: mov r9, r1 +; BE-I64-NEON-NEXT: vmov.32 d13[1], r5 +; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEON-NEXT: vmov.32 d16[0], r0 +; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #24 +; BE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-I64-NEON-NEXT: vorr d0, d9, d9 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vorr d0, d8, d8 +; BE-I64-NEON-NEXT: vmov.32 d12[1], r7 +; BE-I64-NEON-NEXT: add lr, sp, #64 +; BE-I64-NEON-NEXT: mov r10, r1 +; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEON-NEXT: add lr, sp, #8 +; BE-I64-NEON-NEXT: mov r11, r1 +; BE-I64-NEON-NEXT: vmov.32 d11[1], r4 +; BE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #48 +; BE-I64-NEON-NEXT: vorr q6, q5, q5 +; BE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-I64-NEON-NEXT: vorr d0, d9, d9 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vorr d0, d8, d8 +; BE-I64-NEON-NEXT: vmov.32 d12[1], r6 +; BE-I64-NEON-NEXT: add lr, sp, #24 +; BE-I64-NEON-NEXT: mov r8, r1 +; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEON-NEXT: add lr, sp, #48 +; BE-I64-NEON-NEXT: ldr r0, [sp, #44] @ 4-byte Reload +; BE-I64-NEON-NEXT: mov r6, r1 +; BE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #152 +; BE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-I64-NEON-NEXT: add lr, sp, #88 +; BE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; BE-I64-NEON-NEXT: vorr d0, d13, d13 +; BE-I64-NEON-NEXT: vmov.32 d9[1], r0 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEON-NEXT: ldr r0, [sp, #84] @ 4-byte Reload +; BE-I64-NEON-NEXT: vorr d0, d12, d12 +; BE-I64-NEON-NEXT: add lr, sp, #152 +; BE-I64-NEON-NEXT: mov r4, r1 +; BE-I64-NEON-NEXT: vmov.32 d8[1], r0 +; BE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: add lr, sp, #136 +; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEON-NEXT: ldr r0, [sp, #108] @ 4-byte Reload +; BE-I64-NEON-NEXT: mov r5, r1 +; BE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-I64-NEON-NEXT: add lr, sp, #112 +; BE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-I64-NEON-NEXT: vorr d0, d9, d9 +; BE-I64-NEON-NEXT: vmov.32 d11[1], r0 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vorr d0, d8, d8 +; BE-I64-NEON-NEXT: mov r7, r1 +; BE-I64-NEON-NEXT: vmov.32 d13[0], r0 +; BE-I64-NEON-NEXT: vmov.32 d10[1], r9 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: add lr, sp, #8 +; BE-I64-NEON-NEXT: vmov.32 d12[0], r0 +; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEON-NEXT: add lr, sp, #48 +; BE-I64-NEON-NEXT: vmov.32 d17[1], r10 +; BE-I64-NEON-NEXT: vmov.32 d16[1], r11 +; BE-I64-NEON-NEXT: vorr q12, q8, q8 +; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEON-NEXT: add lr, sp, #152 +; BE-I64-NEON-NEXT: vmov.32 d17[1], r8 +; BE-I64-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; BE-I64-NEON-NEXT: add lr, sp, #24 +; BE-I64-NEON-NEXT: vmov.32 d13[1], r7 +; BE-I64-NEON-NEXT: vmov.32 d16[1], r6 +; BE-I64-NEON-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload +; BE-I64-NEON-NEXT: add lr, sp, #64 +; BE-I64-NEON-NEXT: vorr q13, q8, q8 +; BE-I64-NEON-NEXT: vmov.32 d12[1], r1 +; BE-I64-NEON-NEXT: ldr r1, [sp, #132] @ 4-byte Reload +; BE-I64-NEON-NEXT: vrev64.32 q8, q5 +; BE-I64-NEON-NEXT: mov r0, r1 +; BE-I64-NEON-NEXT: vldmia lr, {d22, d23} @ 16-byte Reload +; BE-I64-NEON-NEXT: vrev64.32 q9, q9 +; BE-I64-NEON-NEXT: vrev64.32 q10, q10 +; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]! +; BE-I64-NEON-NEXT: vrev64.32 q11, q11 +; BE-I64-NEON-NEXT: vmov.32 d15[1], r4 +; BE-I64-NEON-NEXT: vst1.64 {d20, d21}, [r0:128]! +; BE-I64-NEON-NEXT: vrev64.32 q15, q6 +; BE-I64-NEON-NEXT: vmov.32 d14[1], r5 +; BE-I64-NEON-NEXT: vrev64.32 q12, q12 +; BE-I64-NEON-NEXT: vst1.64 {d22, d23}, [r0:128] +; BE-I64-NEON-NEXT: add r0, r1, #64 +; BE-I64-NEON-NEXT: vrev64.32 q13, q13 +; BE-I64-NEON-NEXT: vst1.64 {d30, d31}, [r0:128]! +; BE-I64-NEON-NEXT: vst1.64 {d24, d25}, [r0:128]! +; BE-I64-NEON-NEXT: vrev64.32 q14, q7 +; BE-I64-NEON-NEXT: vst1.64 {d26, d27}, [r0:128]! +; BE-I64-NEON-NEXT: vst1.64 {d28, d29}, [r0:128] +; BE-I64-NEON-NEXT: add sp, sp, #168 +; BE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEON-NEXT: add sp, sp, #4 +; BE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f64(<16 x double> %x) + ret <16 x iXLen> %a +} +declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f64(<16 x double>) + +define <32 x iXLen> @lrint_v32f64(<32 x double> %x) { +; LE-I32-LABEL: lrint_v32f64: +; LE-I32: @ %bb.0: +; LE-I32-NEXT: .save {r4, r5, r6, lr} +; LE-I32-NEXT: push {r4, r5, r6, lr} +; LE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEXT: .pad #160 +; LE-I32-NEXT: sub sp, sp, #160 +; LE-I32-NEXT: add lr, sp, #96 +; LE-I32-NEXT: mov r4, r0 +; LE-I32-NEXT: add r0, sp, #304 +; LE-I32-NEXT: vorr q6, q3, q3 +; LE-I32-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill +; LE-I32-NEXT: add lr, sp, #64 +; LE-I32-NEXT: vorr q5, q1, q1 +; LE-I32-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; LE-I32-NEXT: add lr, sp, #48 +; LE-I32-NEXT: vorr d0, d4, d4 +; LE-I32-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I32-NEXT: add r0, sp, #352 +; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I32-NEXT: add lr, sp, #16 +; LE-I32-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I32-NEXT: add r0, sp, #272 +; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I32-NEXT: add lr, sp, #112 +; LE-I32-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I32-NEXT: add r0, sp, #288 +; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I32-NEXT: add lr, sp, #80 +; LE-I32-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I32-NEXT: add r0, sp, #336 +; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I32-NEXT: add lr, sp, #144 +; LE-I32-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I32-NEXT: add r0, sp, #256 +; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I32-NEXT: add lr, sp, #128 +; LE-I32-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I32-NEXT: add r0, sp, #320 +; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I32-NEXT: add lr, sp, #32 +; LE-I32-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: vorr d0, d12, d12 +; LE-I32-NEXT: vmov.32 d14[0], r0 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: vorr d0, d10, d10 +; LE-I32-NEXT: vmov.32 d15[0], r0 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: vorr d0, d13, d13 +; LE-I32-NEXT: vmov.32 d9[0], r0 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: vorr d0, d11, d11 +; LE-I32-NEXT: vmov.32 d15[1], r0 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: add lr, sp, #96 +; LE-I32-NEXT: vorr q5, q4, q4 +; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEXT: vorr d0, d17, d17 +; LE-I32-NEXT: vmov.32 d11[1], r0 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: vmov.32 d14[1], r0 +; LE-I32-NEXT: add lr, sp, #96 +; LE-I32-NEXT: add r0, sp, #416 +; LE-I32-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-I32-NEXT: add lr, sp, #64 +; LE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I32-NEXT: vorr d0, d8, d8 +; LE-I32-NEXT: vstmia sp, {d16, d17} @ 16-byte Spill +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: add lr, sp, #32 +; LE-I32-NEXT: vmov.32 d10[0], r0 +; LE-I32-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload +; LE-I32-NEXT: vorr q6, q5, q5 +; LE-I32-NEXT: vorr d0, d14, d14 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: vorr d0, d9, d9 +; LE-I32-NEXT: vmov.32 d11[0], r0 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: vorr d0, d15, d15 +; LE-I32-NEXT: vmov.32 d12[1], r0 +; LE-I32-NEXT: add lr, sp, #64 +; LE-I32-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: vmov.32 d11[1], r0 +; LE-I32-NEXT: add r0, sp, #400 +; LE-I32-NEXT: add lr, sp, #32 +; LE-I32-NEXT: vorr q6, q5, q5 +; LE-I32-NEXT: vldmia sp, {d10, d11} @ 16-byte Reload +; LE-I32-NEXT: vorr d0, d10, d10 +; LE-I32-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: add lr, sp, #48 +; LE-I32-NEXT: vmov.32 d15[0], r0 +; LE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I32-NEXT: vorr d0, d8, d8 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: vorr d0, d11, d11 +; LE-I32-NEXT: vmov.32 d12[0], r0 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: vorr d0, d9, d9 +; LE-I32-NEXT: vmov.32 d15[1], r0 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: vmov.32 d12[1], r0 +; LE-I32-NEXT: add lr, sp, #48 +; LE-I32-NEXT: add r0, sp, #384 +; LE-I32-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I32-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I32-NEXT: add lr, sp, #32 +; LE-I32-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-I32-NEXT: vorr d0, d10, d10 +; LE-I32-NEXT: vstmia sp, {d16, d17} @ 16-byte Spill +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: add lr, sp, #16 +; LE-I32-NEXT: vmov.32 d14[0], r0 +; LE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I32-NEXT: vorr d0, d8, d8 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: vorr d0, d11, d11 +; LE-I32-NEXT: vmov.32 d13[0], r0 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: vorr d0, d9, d9 +; LE-I32-NEXT: vmov.32 d14[1], r0 +; LE-I32-NEXT: add lr, sp, #32 +; LE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: vorr q7, q6, q6 +; LE-I32-NEXT: vldmia sp, {d10, d11} @ 16-byte Reload +; LE-I32-NEXT: vorr d0, d10, d10 +; LE-I32-NEXT: vmov.32 d15[1], r0 +; LE-I32-NEXT: add r0, sp, #368 +; LE-I32-NEXT: vld1.64 {d12, d13}, [r0] +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: add lr, sp, #144 +; LE-I32-NEXT: vmov.32 d9[0], r0 +; LE-I32-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-I32-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: vorr d0, d11, d11 +; LE-I32-NEXT: vmov.32 d14[0], r0 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: vorr d0, d12, d12 +; LE-I32-NEXT: vmov.32 d9[1], r0 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: add lr, sp, #144 +; LE-I32-NEXT: vmov.32 d8[0], r0 +; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEXT: vorr d0, d17, d17 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: vmov.32 d14[1], r0 +; LE-I32-NEXT: add r0, sp, #240 +; LE-I32-NEXT: vorr d0, d13, d13 +; LE-I32-NEXT: add lr, sp, #144 +; LE-I32-NEXT: vld1.64 {d10, d11}, [r0] +; LE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-I32-NEXT: vstmia sp, {d10, d11} @ 16-byte Spill +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: vorr d0, d10, d10 +; LE-I32-NEXT: vmov.32 d8[1], r0 +; LE-I32-NEXT: add lr, sp, #16 +; LE-I32-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: add lr, sp, #80 +; LE-I32-NEXT: vmov.32 d8[0], r0 +; LE-I32-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-I32-NEXT: vorr d0, d12, d12 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: add lr, sp, #112 +; LE-I32-NEXT: vmov.32 d11[0], r0 +; LE-I32-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload +; LE-I32-NEXT: vorr d0, d14, d14 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: vorr d0, d13, d13 +; LE-I32-NEXT: vmov.32 d10[0], r0 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: add lr, sp, #128 +; LE-I32-NEXT: vmov.32 d11[1], r0 +; LE-I32-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-I32-NEXT: vorr d0, d12, d12 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: vorr d0, d15, d15 +; LE-I32-NEXT: vmov.32 d9[0], r0 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: vorr d0, d13, d13 +; LE-I32-NEXT: vmov.32 d10[1], r0 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: vldmia sp, {d16, d17} @ 16-byte Reload +; LE-I32-NEXT: vorr d0, d17, d17 +; LE-I32-NEXT: vmov.32 d9[1], r0 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: add lr, sp, #64 +; LE-I32-NEXT: vmov.32 d8[1], r0 +; LE-I32-NEXT: mov r0, r4 +; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEXT: add lr, sp, #96 +; LE-I32-NEXT: vst1.32 {d16, d17}, [r0:128]! +; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEXT: add lr, sp, #48 +; LE-I32-NEXT: vst1.32 {d16, d17}, [r0:128]! +; LE-I32-NEXT: vst1.32 {d8, d9}, [r0:128]! +; LE-I32-NEXT: vst1.64 {d10, d11}, [r0:128] +; LE-I32-NEXT: add r0, r4, #64 +; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEXT: add lr, sp, #144 +; LE-I32-NEXT: vst1.32 {d16, d17}, [r0:128]! +; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEXT: add lr, sp, #16 +; LE-I32-NEXT: vst1.32 {d16, d17}, [r0:128]! +; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEXT: add lr, sp, #32 +; LE-I32-NEXT: vst1.32 {d16, d17}, [r0:128]! +; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEXT: vst1.64 {d16, d17}, [r0:128] +; LE-I32-NEXT: add sp, sp, #160 +; LE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEXT: pop {r4, r5, r6, pc} +; +; LE-I64-LABEL: lrint_v32f64: +; LE-I64: @ %bb.0: +; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I64-NEXT: .pad #4 +; LE-I64-NEXT: sub sp, sp, #4 +; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEXT: .pad #208 +; LE-I64-NEXT: sub sp, sp, #208 +; LE-I64-NEXT: add lr, sp, #72 +; LE-I64-NEXT: str r0, [sp, #156] @ 4-byte Spill +; LE-I64-NEXT: add r0, sp, #456 +; LE-I64-NEXT: vorr q4, q0, q0 +; LE-I64-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #56 +; LE-I64-NEXT: vorr d0, d7, d7 +; LE-I64-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #104 +; LE-I64-NEXT: vorr q5, q2, q2 +; LE-I64-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I64-NEXT: add r0, sp, #344 +; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #192 +; LE-I64-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I64-NEXT: add r0, sp, #376 +; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #160 +; LE-I64-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I64-NEXT: add r0, sp, #360 +; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #136 +; LE-I64-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I64-NEXT: add r0, sp, #440 +; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #176 +; LE-I64-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vorr d0, d10, d10 +; LE-I64-NEXT: str r1, [sp, #120] @ 4-byte Spill +; LE-I64-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vorr d0, d11, d11 +; LE-I64-NEXT: mov r10, r1 +; LE-I64-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vorr d0, d8, d8 +; LE-I64-NEXT: add lr, sp, #88 +; LE-I64-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEXT: mov r11, r1 +; LE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vorr d0, d9, d9 +; LE-I64-NEXT: mov r9, r1 +; LE-I64-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: add lr, sp, #40 +; LE-I64-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEXT: mov r6, r1 +; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #56 +; LE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-I64-NEXT: vorr d0, d10, d10 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vorr d0, d11, d11 +; LE-I64-NEXT: mov r4, r1 +; LE-I64-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: add lr, sp, #72 +; LE-I64-NEXT: mov r7, r1 +; LE-I64-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-I64-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEXT: add lr, sp, #56 +; LE-I64-NEXT: mov r5, r1 +; LE-I64-NEXT: vmov.32 d9[1], r7 +; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #176 +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: vorr d0, d17, d17 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vmov.32 d8[1], r4 +; LE-I64-NEXT: add lr, sp, #72 +; LE-I64-NEXT: mov r7, r1 +; LE-I64-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #104 +; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I64-NEXT: vorr d0, d9, d9 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: add lr, sp, #40 +; LE-I64-NEXT: vorr d0, d8, d8 +; LE-I64-NEXT: mov r8, r1 +; LE-I64-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-I64-NEXT: vmov.32 d11[1], r6 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: add lr, sp, #176 +; LE-I64-NEXT: mov r4, r1 +; LE-I64-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-I64-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; LE-I64-NEXT: vmov.32 d10[1], r9 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: add lr, sp, #88 +; LE-I64-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEXT: ldr r0, [sp, #120] @ 4-byte Reload +; LE-I64-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #56 +; LE-I64-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #24 +; LE-I64-NEXT: vmov.32 d19[1], r0 +; LE-I64-NEXT: add r0, sp, #408 +; LE-I64-NEXT: ldr r2, [sp, #156] @ 4-byte Reload +; LE-I64-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I64-NEXT: vmov.32 d13[1], r7 +; LE-I64-NEXT: mov r0, r2 +; LE-I64-NEXT: vmov.32 d12[1], r1 +; LE-I64-NEXT: add r1, sp, #488 +; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #40 +; LE-I64-NEXT: vld1.64 {d16, d17}, [r1] +; LE-I64-NEXT: add r1, sp, #472 +; LE-I64-NEXT: vst1.64 {d10, d11}, [r0:128]! +; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #72 +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #56 +; LE-I64-NEXT: vmov.32 d21[1], r11 +; LE-I64-NEXT: vmov.32 d20[1], r10 +; LE-I64-NEXT: add r10, r2, #192 +; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]! +; LE-I64-NEXT: vld1.64 {d16, d17}, [r1] +; LE-I64-NEXT: add r1, sp, #392 +; LE-I64-NEXT: vmov.32 d18[1], r5 +; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #72 +; LE-I64-NEXT: vst1.64 {d20, d21}, [r0:128]! +; LE-I64-NEXT: vld1.64 {d16, d17}, [r1] +; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #104 +; LE-I64-NEXT: vst1.64 {d18, d19}, [r0:128] +; LE-I64-NEXT: add r0, sp, #312 +; LE-I64-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I64-NEXT: add r0, sp, #328 +; LE-I64-NEXT: vmov.32 d15[1], r8 +; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #120 +; LE-I64-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I64-NEXT: add r0, sp, #424 +; LE-I64-NEXT: vmov.32 d14[1], r4 +; LE-I64-NEXT: vst1.64 {d12, d13}, [r10:128]! +; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #176 +; LE-I64-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I64-NEXT: vst1.64 {d14, d15}, [r10:128]! +; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #192 +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: vorr d0, d17, d17 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: add lr, sp, #136 +; LE-I64-NEXT: mov r9, r1 +; LE-I64-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-I64-NEXT: vorr d0, d10, d10 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vorr d0, d11, d11 +; LE-I64-NEXT: mov r8, r1 +; LE-I64-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: add lr, sp, #160 +; LE-I64-NEXT: mov r5, r1 +; LE-I64-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-I64-NEXT: vorr d0, d10, d10 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vorr d0, d11, d11 +; LE-I64-NEXT: mov r6, r1 +; LE-I64-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: add lr, sp, #192 +; LE-I64-NEXT: mov r4, r1 +; LE-I64-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-I64-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEXT: add lr, sp, #192 +; LE-I64-NEXT: mov r11, r1 +; LE-I64-NEXT: vmov.32 d15[1], r4 +; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #176 +; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I64-NEXT: vorr d0, d9, d9 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vorr d0, d8, d8 +; LE-I64-NEXT: vmov.32 d14[1], r6 +; LE-I64-NEXT: add lr, sp, #136 +; LE-I64-NEXT: mov r4, r1 +; LE-I64-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEXT: add lr, sp, #176 +; LE-I64-NEXT: mov r7, r1 +; LE-I64-NEXT: vmov.32 d13[1], r5 +; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #24 +; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I64-NEXT: vorr d0, d9, d9 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vorr d0, d8, d8 +; LE-I64-NEXT: vmov.32 d12[1], r8 +; LE-I64-NEXT: add lr, sp, #88 +; LE-I64-NEXT: mov r5, r1 +; LE-I64-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: add lr, sp, #192 +; LE-I64-NEXT: str r1, [sp, #24] @ 4-byte Spill +; LE-I64-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #40 +; LE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-I64-NEXT: vorr d0, d11, d11 +; LE-I64-NEXT: vmov.32 d9[1], r9 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vorr d0, d10, d10 +; LE-I64-NEXT: vmov.32 d8[1], r11 +; LE-I64-NEXT: add lr, sp, #192 +; LE-I64-NEXT: mov r6, r1 +; LE-I64-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEXT: add lr, sp, #160 +; LE-I64-NEXT: str r1, [sp, #40] @ 4-byte Spill +; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #176 +; LE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #56 +; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I64-NEXT: vorr d0, d9, d9 +; LE-I64-NEXT: vmov.32 d11[1], r4 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vorr d0, d8, d8 +; LE-I64-NEXT: vmov.32 d10[1], r7 +; LE-I64-NEXT: add lr, sp, #176 +; LE-I64-NEXT: mov r8, r1 +; LE-I64-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEXT: add lr, sp, #8 +; LE-I64-NEXT: mov r11, r1 +; LE-I64-NEXT: vmov.32 d15[1], r5 +; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #72 +; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I64-NEXT: vorr d0, d9, d9 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEXT: ldr r0, [sp, #24] @ 4-byte Reload +; LE-I64-NEXT: vorr d0, d8, d8 +; LE-I64-NEXT: add lr, sp, #56 +; LE-I64-NEXT: mov r9, r1 +; LE-I64-NEXT: vmov.32 d14[1], r0 +; LE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEXT: add lr, sp, #72 +; LE-I64-NEXT: mov r7, r1 +; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #160 +; LE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #104 +; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I64-NEXT: vorr d0, d9, d9 +; LE-I64-NEXT: vmov.32 d13[1], r6 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEXT: ldr r0, [sp, #40] @ 4-byte Reload +; LE-I64-NEXT: vorr d0, d8, d8 +; LE-I64-NEXT: add lr, sp, #160 +; LE-I64-NEXT: mov r6, r1 +; LE-I64-NEXT: vmov.32 d12[1], r0 +; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: add lr, sp, #8 +; LE-I64-NEXT: mov r4, r1 +; LE-I64-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #120 +; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I64-NEXT: vorr d0, d9, d9 +; LE-I64-NEXT: vmov.32 d13[1], r8 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vorr d0, d8, d8 +; LE-I64-NEXT: mov r5, r1 +; LE-I64-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEXT: vmov.32 d12[1], r11 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: add lr, sp, #72 +; LE-I64-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #160 +; LE-I64-NEXT: vmov.32 d17[1], r9 +; LE-I64-NEXT: vmov.32 d16[1], r7 +; LE-I64-NEXT: vst1.64 {d12, d13}, [r10:128]! +; LE-I64-NEXT: vorr q9, q8, q8 +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #136 +; LE-I64-NEXT: vmov.32 d15[1], r5 +; LE-I64-NEXT: vst1.64 {d16, d17}, [r10:128] +; LE-I64-NEXT: vmov.32 d14[1], r1 +; LE-I64-NEXT: ldr r1, [sp, #156] @ 4-byte Reload +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: add r0, r1, #128 +; LE-I64-NEXT: add lr, sp, #56 +; LE-I64-NEXT: vmov.32 d11[1], r6 +; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]! +; LE-I64-NEXT: vst1.64 {d18, d19}, [r0:128]! +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #176 +; LE-I64-NEXT: vmov.32 d10[1], r4 +; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]! +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #192 +; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128] +; LE-I64-NEXT: add r0, r1, #64 +; LE-I64-NEXT: vst1.64 {d10, d11}, [r0:128]! +; LE-I64-NEXT: vst1.64 {d14, d15}, [r0:128]! +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #88 +; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]! +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128] +; LE-I64-NEXT: add sp, sp, #208 +; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEXT: add sp, sp, #4 +; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; LE-I32-NEON-LABEL: lrint_v32f64: +; LE-I32-NEON: @ %bb.0: +; LE-I32-NEON-NEXT: .save {r4, r5, r6, lr} +; LE-I32-NEON-NEXT: push {r4, r5, r6, lr} +; LE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEON-NEXT: .pad #160 +; LE-I32-NEON-NEXT: sub sp, sp, #160 +; LE-I32-NEON-NEXT: add lr, sp, #96 +; LE-I32-NEON-NEXT: mov r4, r0 +; LE-I32-NEON-NEXT: add r0, sp, #304 +; LE-I32-NEON-NEXT: vorr q6, q3, q3 +; LE-I32-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill +; LE-I32-NEON-NEXT: add lr, sp, #64 +; LE-I32-NEON-NEXT: vorr q5, q1, q1 +; LE-I32-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; LE-I32-NEON-NEXT: add lr, sp, #48 +; LE-I32-NEON-NEXT: vorr d0, d4, d4 +; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I32-NEON-NEXT: add r0, sp, #352 +; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I32-NEON-NEXT: add lr, sp, #16 +; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I32-NEON-NEXT: add r0, sp, #272 +; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I32-NEON-NEXT: add lr, sp, #112 +; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I32-NEON-NEXT: add r0, sp, #288 +; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I32-NEON-NEXT: add lr, sp, #80 +; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I32-NEON-NEXT: add r0, sp, #336 +; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I32-NEON-NEXT: add lr, sp, #144 +; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I32-NEON-NEXT: add r0, sp, #256 +; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I32-NEON-NEXT: add lr, sp, #128 +; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I32-NEON-NEXT: add r0, sp, #320 +; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I32-NEON-NEXT: add lr, sp, #32 +; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: vorr d0, d12, d12 +; LE-I32-NEON-NEXT: vmov.32 d14[0], r0 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: vorr d0, d10, d10 +; LE-I32-NEON-NEXT: vmov.32 d15[0], r0 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: vorr d0, d13, d13 +; LE-I32-NEON-NEXT: vmov.32 d9[0], r0 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: vorr d0, d11, d11 +; LE-I32-NEON-NEXT: vmov.32 d15[1], r0 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: add lr, sp, #96 +; LE-I32-NEON-NEXT: vorr q5, q4, q4 +; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEON-NEXT: vorr d0, d17, d17 +; LE-I32-NEON-NEXT: vmov.32 d11[1], r0 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: vmov.32 d14[1], r0 +; LE-I32-NEON-NEXT: add lr, sp, #96 +; LE-I32-NEON-NEXT: add r0, sp, #416 +; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-I32-NEON-NEXT: add lr, sp, #64 +; LE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I32-NEON-NEXT: vorr d0, d8, d8 +; LE-I32-NEON-NEXT: vstmia sp, {d16, d17} @ 16-byte Spill +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: add lr, sp, #32 +; LE-I32-NEON-NEXT: vmov.32 d10[0], r0 +; LE-I32-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload +; LE-I32-NEON-NEXT: vorr q6, q5, q5 +; LE-I32-NEON-NEXT: vorr d0, d14, d14 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: vorr d0, d9, d9 +; LE-I32-NEON-NEXT: vmov.32 d11[0], r0 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: vorr d0, d15, d15 +; LE-I32-NEON-NEXT: vmov.32 d12[1], r0 +; LE-I32-NEON-NEXT: add lr, sp, #64 +; LE-I32-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: vmov.32 d11[1], r0 +; LE-I32-NEON-NEXT: add r0, sp, #400 +; LE-I32-NEON-NEXT: add lr, sp, #32 +; LE-I32-NEON-NEXT: vorr q6, q5, q5 +; LE-I32-NEON-NEXT: vldmia sp, {d10, d11} @ 16-byte Reload +; LE-I32-NEON-NEXT: vorr d0, d10, d10 +; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: add lr, sp, #48 +; LE-I32-NEON-NEXT: vmov.32 d15[0], r0 +; LE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I32-NEON-NEXT: vorr d0, d8, d8 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: vorr d0, d11, d11 +; LE-I32-NEON-NEXT: vmov.32 d12[0], r0 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: vorr d0, d9, d9 +; LE-I32-NEON-NEXT: vmov.32 d15[1], r0 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: vmov.32 d12[1], r0 +; LE-I32-NEON-NEXT: add lr, sp, #48 +; LE-I32-NEON-NEXT: add r0, sp, #384 +; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I32-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I32-NEON-NEXT: add lr, sp, #32 +; LE-I32-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-I32-NEON-NEXT: vorr d0, d10, d10 +; LE-I32-NEON-NEXT: vstmia sp, {d16, d17} @ 16-byte Spill +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: add lr, sp, #16 +; LE-I32-NEON-NEXT: vmov.32 d14[0], r0 +; LE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I32-NEON-NEXT: vorr d0, d8, d8 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: vorr d0, d11, d11 +; LE-I32-NEON-NEXT: vmov.32 d13[0], r0 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: vorr d0, d9, d9 +; LE-I32-NEON-NEXT: vmov.32 d14[1], r0 +; LE-I32-NEON-NEXT: add lr, sp, #32 +; LE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: vorr q7, q6, q6 +; LE-I32-NEON-NEXT: vldmia sp, {d10, d11} @ 16-byte Reload +; LE-I32-NEON-NEXT: vorr d0, d10, d10 +; LE-I32-NEON-NEXT: vmov.32 d15[1], r0 +; LE-I32-NEON-NEXT: add r0, sp, #368 +; LE-I32-NEON-NEXT: vld1.64 {d12, d13}, [r0] +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: add lr, sp, #144 +; LE-I32-NEON-NEXT: vmov.32 d9[0], r0 +; LE-I32-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-I32-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: vorr d0, d11, d11 +; LE-I32-NEON-NEXT: vmov.32 d14[0], r0 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: vorr d0, d12, d12 +; LE-I32-NEON-NEXT: vmov.32 d9[1], r0 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: add lr, sp, #144 +; LE-I32-NEON-NEXT: vmov.32 d8[0], r0 +; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEON-NEXT: vorr d0, d17, d17 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: vmov.32 d14[1], r0 +; LE-I32-NEON-NEXT: add r0, sp, #240 +; LE-I32-NEON-NEXT: vorr d0, d13, d13 +; LE-I32-NEON-NEXT: add lr, sp, #144 +; LE-I32-NEON-NEXT: vld1.64 {d10, d11}, [r0] +; LE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-I32-NEON-NEXT: vstmia sp, {d10, d11} @ 16-byte Spill +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: vorr d0, d10, d10 +; LE-I32-NEON-NEXT: vmov.32 d8[1], r0 +; LE-I32-NEON-NEXT: add lr, sp, #16 +; LE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: add lr, sp, #80 +; LE-I32-NEON-NEXT: vmov.32 d8[0], r0 +; LE-I32-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-I32-NEON-NEXT: vorr d0, d12, d12 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: add lr, sp, #112 +; LE-I32-NEON-NEXT: vmov.32 d11[0], r0 +; LE-I32-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload +; LE-I32-NEON-NEXT: vorr d0, d14, d14 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: vorr d0, d13, d13 +; LE-I32-NEON-NEXT: vmov.32 d10[0], r0 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: add lr, sp, #128 +; LE-I32-NEON-NEXT: vmov.32 d11[1], r0 +; LE-I32-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-I32-NEON-NEXT: vorr d0, d12, d12 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: vorr d0, d15, d15 +; LE-I32-NEON-NEXT: vmov.32 d9[0], r0 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: vorr d0, d13, d13 +; LE-I32-NEON-NEXT: vmov.32 d10[1], r0 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: vldmia sp, {d16, d17} @ 16-byte Reload +; LE-I32-NEON-NEXT: vorr d0, d17, d17 +; LE-I32-NEON-NEXT: vmov.32 d9[1], r0 +; LE-I32-NEON-NEXT: bl lrint +; LE-I32-NEON-NEXT: add lr, sp, #64 +; LE-I32-NEON-NEXT: vmov.32 d8[1], r0 +; LE-I32-NEON-NEXT: mov r0, r4 +; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEON-NEXT: add lr, sp, #96 +; LE-I32-NEON-NEXT: vst1.32 {d16, d17}, [r0:128]! +; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEON-NEXT: add lr, sp, #48 +; LE-I32-NEON-NEXT: vst1.32 {d16, d17}, [r0:128]! +; LE-I32-NEON-NEXT: vst1.32 {d8, d9}, [r0:128]! +; LE-I32-NEON-NEXT: vst1.64 {d10, d11}, [r0:128] +; LE-I32-NEON-NEXT: add r0, r4, #64 +; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEON-NEXT: add lr, sp, #144 +; LE-I32-NEON-NEXT: vst1.32 {d16, d17}, [r0:128]! +; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEON-NEXT: add lr, sp, #16 +; LE-I32-NEON-NEXT: vst1.32 {d16, d17}, [r0:128]! +; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEON-NEXT: add lr, sp, #32 +; LE-I32-NEON-NEXT: vst1.32 {d16, d17}, [r0:128]! +; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] +; LE-I32-NEON-NEXT: add sp, sp, #160 +; LE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEON-NEXT: pop {r4, r5, r6, pc} +; +; LE-I64-NEON-LABEL: lrint_v32f64: +; LE-I64-NEON: @ %bb.0: +; LE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I64-NEON-NEXT: .pad #4 +; LE-I64-NEON-NEXT: sub sp, sp, #4 +; LE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEON-NEXT: .pad #208 +; LE-I64-NEON-NEXT: sub sp, sp, #208 +; LE-I64-NEON-NEXT: add lr, sp, #72 +; LE-I64-NEON-NEXT: str r0, [sp, #156] @ 4-byte Spill +; LE-I64-NEON-NEXT: add r0, sp, #456 +; LE-I64-NEON-NEXT: vorr q4, q0, q0 +; LE-I64-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #56 +; LE-I64-NEON-NEXT: vorr d0, d7, d7 +; LE-I64-NEON-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #104 +; LE-I64-NEON-NEXT: vorr q5, q2, q2 +; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I64-NEON-NEXT: add r0, sp, #344 +; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #192 +; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I64-NEON-NEXT: add r0, sp, #376 +; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #160 +; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I64-NEON-NEXT: add r0, sp, #360 +; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #136 +; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I64-NEON-NEXT: add r0, sp, #440 +; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #176 +; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vorr d0, d10, d10 +; LE-I64-NEON-NEXT: str r1, [sp, #120] @ 4-byte Spill +; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vorr d0, d11, d11 +; LE-I64-NEON-NEXT: mov r10, r1 +; LE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vorr d0, d8, d8 +; LE-I64-NEON-NEXT: add lr, sp, #88 +; LE-I64-NEON-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEON-NEXT: mov r11, r1 +; LE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vorr d0, d9, d9 +; LE-I64-NEON-NEXT: mov r9, r1 +; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: add lr, sp, #40 +; LE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEON-NEXT: mov r6, r1 +; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #56 +; LE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-I64-NEON-NEXT: vorr d0, d10, d10 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vorr d0, d11, d11 +; LE-I64-NEON-NEXT: mov r4, r1 +; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: add lr, sp, #72 +; LE-I64-NEON-NEXT: mov r7, r1 +; LE-I64-NEON-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-I64-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEON-NEXT: add lr, sp, #56 +; LE-I64-NEON-NEXT: mov r5, r1 +; LE-I64-NEON-NEXT: vmov.32 d9[1], r7 +; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #176 +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: vorr d0, d17, d17 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vmov.32 d8[1], r4 +; LE-I64-NEON-NEXT: add lr, sp, #72 +; LE-I64-NEON-NEXT: mov r7, r1 +; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #104 +; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I64-NEON-NEXT: vorr d0, d9, d9 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: add lr, sp, #40 +; LE-I64-NEON-NEXT: vorr d0, d8, d8 +; LE-I64-NEON-NEXT: mov r8, r1 +; LE-I64-NEON-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-I64-NEON-NEXT: vmov.32 d11[1], r6 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: add lr, sp, #176 +; LE-I64-NEON-NEXT: mov r4, r1 +; LE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-I64-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; LE-I64-NEON-NEXT: vmov.32 d10[1], r9 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: add lr, sp, #88 +; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEON-NEXT: ldr r0, [sp, #120] @ 4-byte Reload +; LE-I64-NEON-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #56 +; LE-I64-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #24 +; LE-I64-NEON-NEXT: vmov.32 d19[1], r0 +; LE-I64-NEON-NEXT: add r0, sp, #408 +; LE-I64-NEON-NEXT: ldr r2, [sp, #156] @ 4-byte Reload +; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I64-NEON-NEXT: vmov.32 d13[1], r7 +; LE-I64-NEON-NEXT: mov r0, r2 +; LE-I64-NEON-NEXT: vmov.32 d12[1], r1 +; LE-I64-NEON-NEXT: add r1, sp, #488 +; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #40 +; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r1] +; LE-I64-NEON-NEXT: add r1, sp, #472 +; LE-I64-NEON-NEXT: vst1.64 {d10, d11}, [r0:128]! +; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #72 +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #56 +; LE-I64-NEON-NEXT: vmov.32 d21[1], r11 +; LE-I64-NEON-NEXT: vmov.32 d20[1], r10 +; LE-I64-NEON-NEXT: add r10, r2, #192 +; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! +; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r1] +; LE-I64-NEON-NEXT: add r1, sp, #392 +; LE-I64-NEON-NEXT: vmov.32 d18[1], r5 +; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #72 +; LE-I64-NEON-NEXT: vst1.64 {d20, d21}, [r0:128]! +; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r1] +; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #104 +; LE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r0:128] +; LE-I64-NEON-NEXT: add r0, sp, #312 +; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I64-NEON-NEXT: add r0, sp, #328 +; LE-I64-NEON-NEXT: vmov.32 d15[1], r8 +; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #120 +; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I64-NEON-NEXT: add r0, sp, #424 +; LE-I64-NEON-NEXT: vmov.32 d14[1], r4 +; LE-I64-NEON-NEXT: vst1.64 {d12, d13}, [r10:128]! +; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #176 +; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I64-NEON-NEXT: vst1.64 {d14, d15}, [r10:128]! +; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #192 +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: vorr d0, d17, d17 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: add lr, sp, #136 +; LE-I64-NEON-NEXT: mov r9, r1 +; LE-I64-NEON-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-I64-NEON-NEXT: vorr d0, d10, d10 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vorr d0, d11, d11 +; LE-I64-NEON-NEXT: mov r8, r1 +; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: add lr, sp, #160 +; LE-I64-NEON-NEXT: mov r5, r1 +; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-I64-NEON-NEXT: vorr d0, d10, d10 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vorr d0, d11, d11 +; LE-I64-NEON-NEXT: mov r6, r1 +; LE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: add lr, sp, #192 +; LE-I64-NEON-NEXT: mov r4, r1 +; LE-I64-NEON-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-I64-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEON-NEXT: add lr, sp, #192 +; LE-I64-NEON-NEXT: mov r11, r1 +; LE-I64-NEON-NEXT: vmov.32 d15[1], r4 +; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #176 +; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I64-NEON-NEXT: vorr d0, d9, d9 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vorr d0, d8, d8 +; LE-I64-NEON-NEXT: vmov.32 d14[1], r6 +; LE-I64-NEON-NEXT: add lr, sp, #136 +; LE-I64-NEON-NEXT: mov r4, r1 +; LE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEON-NEXT: add lr, sp, #176 +; LE-I64-NEON-NEXT: mov r7, r1 +; LE-I64-NEON-NEXT: vmov.32 d13[1], r5 +; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #24 +; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I64-NEON-NEXT: vorr d0, d9, d9 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vorr d0, d8, d8 +; LE-I64-NEON-NEXT: vmov.32 d12[1], r8 +; LE-I64-NEON-NEXT: add lr, sp, #88 +; LE-I64-NEON-NEXT: mov r5, r1 +; LE-I64-NEON-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: add lr, sp, #192 +; LE-I64-NEON-NEXT: str r1, [sp, #24] @ 4-byte Spill +; LE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #40 +; LE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-I64-NEON-NEXT: vorr d0, d11, d11 +; LE-I64-NEON-NEXT: vmov.32 d9[1], r9 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vorr d0, d10, d10 +; LE-I64-NEON-NEXT: vmov.32 d8[1], r11 +; LE-I64-NEON-NEXT: add lr, sp, #192 +; LE-I64-NEON-NEXT: mov r6, r1 +; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEON-NEXT: add lr, sp, #160 +; LE-I64-NEON-NEXT: str r1, [sp, #40] @ 4-byte Spill +; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #176 +; LE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #56 +; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I64-NEON-NEXT: vorr d0, d9, d9 +; LE-I64-NEON-NEXT: vmov.32 d11[1], r4 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vorr d0, d8, d8 +; LE-I64-NEON-NEXT: vmov.32 d10[1], r7 +; LE-I64-NEON-NEXT: add lr, sp, #176 +; LE-I64-NEON-NEXT: mov r8, r1 +; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEON-NEXT: add lr, sp, #8 +; LE-I64-NEON-NEXT: mov r11, r1 +; LE-I64-NEON-NEXT: vmov.32 d15[1], r5 +; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #72 +; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I64-NEON-NEXT: vorr d0, d9, d9 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEON-NEXT: ldr r0, [sp, #24] @ 4-byte Reload +; LE-I64-NEON-NEXT: vorr d0, d8, d8 +; LE-I64-NEON-NEXT: add lr, sp, #56 +; LE-I64-NEON-NEXT: mov r9, r1 +; LE-I64-NEON-NEXT: vmov.32 d14[1], r0 +; LE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEON-NEXT: add lr, sp, #72 +; LE-I64-NEON-NEXT: mov r7, r1 +; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #160 +; LE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #104 +; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I64-NEON-NEXT: vorr d0, d9, d9 +; LE-I64-NEON-NEXT: vmov.32 d13[1], r6 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEON-NEXT: ldr r0, [sp, #40] @ 4-byte Reload +; LE-I64-NEON-NEXT: vorr d0, d8, d8 +; LE-I64-NEON-NEXT: add lr, sp, #160 +; LE-I64-NEON-NEXT: mov r6, r1 +; LE-I64-NEON-NEXT: vmov.32 d12[1], r0 +; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: add lr, sp, #8 +; LE-I64-NEON-NEXT: mov r4, r1 +; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #120 +; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I64-NEON-NEXT: vorr d0, d9, d9 +; LE-I64-NEON-NEXT: vmov.32 d13[1], r8 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: vorr d0, d8, d8 +; LE-I64-NEON-NEXT: mov r5, r1 +; LE-I64-NEON-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEON-NEXT: vmov.32 d12[1], r11 +; LE-I64-NEON-NEXT: bl lrint +; LE-I64-NEON-NEXT: add lr, sp, #72 +; LE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #160 +; LE-I64-NEON-NEXT: vmov.32 d17[1], r9 +; LE-I64-NEON-NEXT: vmov.32 d16[1], r7 +; LE-I64-NEON-NEXT: vst1.64 {d12, d13}, [r10:128]! +; LE-I64-NEON-NEXT: vorr q9, q8, q8 +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #136 +; LE-I64-NEON-NEXT: vmov.32 d15[1], r5 +; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r10:128] +; LE-I64-NEON-NEXT: vmov.32 d14[1], r1 +; LE-I64-NEON-NEXT: ldr r1, [sp, #156] @ 4-byte Reload +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: add r0, r1, #128 +; LE-I64-NEON-NEXT: add lr, sp, #56 +; LE-I64-NEON-NEXT: vmov.32 d11[1], r6 +; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! +; LE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]! +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #176 +; LE-I64-NEON-NEXT: vmov.32 d10[1], r4 +; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #192 +; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] +; LE-I64-NEON-NEXT: add r0, r1, #64 +; LE-I64-NEON-NEXT: vst1.64 {d10, d11}, [r0:128]! +; LE-I64-NEON-NEXT: vst1.64 {d14, d15}, [r0:128]! +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #88 +; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] +; LE-I64-NEON-NEXT: add sp, sp, #208 +; LE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEON-NEXT: add sp, sp, #4 +; LE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-I32-LABEL: lrint_v32f64: +; BE-I32: @ %bb.0: +; BE-I32-NEXT: .save {r4, r5, r6, lr} +; BE-I32-NEXT: push {r4, r5, r6, lr} +; BE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEXT: .pad #176 +; BE-I32-NEXT: sub sp, sp, #176 +; BE-I32-NEXT: add lr, sp, #128 +; BE-I32-NEXT: mov r4, r0 +; BE-I32-NEXT: add r0, sp, #336 +; BE-I32-NEXT: vorr q6, q3, q3 +; BE-I32-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill +; BE-I32-NEXT: add lr, sp, #80 +; BE-I32-NEXT: vorr q5, q1, q1 +; BE-I32-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; BE-I32-NEXT: add lr, sp, #32 +; BE-I32-NEXT: vorr d0, d4, d4 +; BE-I32-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I32-NEXT: add r0, sp, #320 +; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEXT: add lr, sp, #160 +; BE-I32-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I32-NEXT: add r0, sp, #432 +; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEXT: add lr, sp, #112 +; BE-I32-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I32-NEXT: add r0, sp, #288 +; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEXT: add lr, sp, #96 +; BE-I32-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I32-NEXT: add r0, sp, #368 +; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEXT: add lr, sp, #48 +; BE-I32-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I32-NEXT: add r0, sp, #416 +; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEXT: add lr, sp, #144 +; BE-I32-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I32-NEXT: add r0, sp, #400 +; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEXT: add lr, sp, #64 +; BE-I32-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: vorr d0, d12, d12 +; BE-I32-NEXT: vmov.32 d14[0], r0 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: vorr d0, d10, d10 +; BE-I32-NEXT: vmov.32 d15[0], r0 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: vorr d0, d13, d13 +; BE-I32-NEXT: vmov.32 d9[0], r0 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: vorr d0, d11, d11 +; BE-I32-NEXT: vmov.32 d15[1], r0 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: add lr, sp, #128 +; BE-I32-NEXT: vorr q5, q4, q4 +; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I32-NEXT: vorr d0, d17, d17 +; BE-I32-NEXT: vmov.32 d11[1], r0 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: vmov.32 d14[1], r0 +; BE-I32-NEXT: add lr, sp, #128 +; BE-I32-NEXT: add r0, sp, #384 +; BE-I32-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-I32-NEXT: add lr, sp, #16 +; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEXT: add lr, sp, #80 +; BE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-I32-NEXT: vorr d0, d8, d8 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: add lr, sp, #64 +; BE-I32-NEXT: vmov.32 d10[0], r0 +; BE-I32-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload +; BE-I32-NEXT: vorr d0, d14, d14 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: vorr d0, d9, d9 +; BE-I32-NEXT: vmov.32 d13[0], r0 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: vorr d0, d15, d15 +; BE-I32-NEXT: vmov.32 d10[1], r0 +; BE-I32-NEXT: add lr, sp, #80 +; BE-I32-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: add lr, sp, #16 +; BE-I32-NEXT: vmov.32 d13[1], r0 +; BE-I32-NEXT: add r0, sp, #272 +; BE-I32-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-I32-NEXT: vorr d0, d10, d10 +; BE-I32-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I32-NEXT: vstmia sp, {d16, d17} @ 16-byte Spill +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: add lr, sp, #32 +; BE-I32-NEXT: vmov.32 d12[0], r0 +; BE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-I32-NEXT: vorr d0, d8, d8 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: vorr d0, d11, d11 +; BE-I32-NEXT: vmov.32 d15[0], r0 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: vorr d0, d9, d9 +; BE-I32-NEXT: vmov.32 d12[1], r0 +; BE-I32-NEXT: add lr, sp, #64 +; BE-I32-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: vldmia sp, {d10, d11} @ 16-byte Reload +; BE-I32-NEXT: add lr, sp, #16 +; BE-I32-NEXT: vmov.32 d15[1], r0 +; BE-I32-NEXT: add r0, sp, #256 +; BE-I32-NEXT: vorr d0, d10, d10 +; BE-I32-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: add lr, sp, #160 +; BE-I32-NEXT: vmov.32 d13[0], r0 +; BE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-I32-NEXT: vorr d0, d8, d8 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: vorr d0, d11, d11 +; BE-I32-NEXT: vmov.32 d14[0], r0 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: vorr d0, d9, d9 +; BE-I32-NEXT: vmov.32 d13[1], r0 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: vmov.32 d14[1], r0 +; BE-I32-NEXT: add lr, sp, #32 +; BE-I32-NEXT: add r0, sp, #304 +; BE-I32-NEXT: vld1.64 {d10, d11}, [r0] +; BE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-I32-NEXT: add lr, sp, #16 +; BE-I32-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload +; BE-I32-NEXT: vorr d0, d14, d14 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: add lr, sp, #48 +; BE-I32-NEXT: vorr q4, q6, q6 +; BE-I32-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; BE-I32-NEXT: vorr d0, d12, d12 +; BE-I32-NEXT: vmov.32 d8[0], r0 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: vorr d0, d15, d15 +; BE-I32-NEXT: add lr, sp, #160 +; BE-I32-NEXT: vmov.32 d17[0], r0 +; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: vorr d0, d10, d10 +; BE-I32-NEXT: vmov.32 d8[1], r0 +; BE-I32-NEXT: add lr, sp, #16 +; BE-I32-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: vorr d0, d13, d13 +; BE-I32-NEXT: vmov.32 d9[0], r0 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: add lr, sp, #160 +; BE-I32-NEXT: vorr d0, d11, d11 +; BE-I32-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; BE-I32-NEXT: add lr, sp, #48 +; BE-I32-NEXT: vmov.32 d13[1], r0 +; BE-I32-NEXT: add r0, sp, #352 +; BE-I32-NEXT: vld1.64 {d14, d15}, [r0] +; BE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: vorr d0, d14, d14 +; BE-I32-NEXT: vmov.32 d9[1], r0 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: vmov.32 d12[0], r0 +; BE-I32-NEXT: add lr, sp, #160 +; BE-I32-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; BE-I32-NEXT: add lr, sp, #96 +; BE-I32-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; BE-I32-NEXT: vorr d0, d12, d12 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: add lr, sp, #112 +; BE-I32-NEXT: vmov.32 d8[0], r0 +; BE-I32-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload +; BE-I32-NEXT: vorr d0, d14, d14 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: vorr d0, d13, d13 +; BE-I32-NEXT: vmov.32 d11[0], r0 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: add lr, sp, #144 +; BE-I32-NEXT: vmov.32 d8[1], r0 +; BE-I32-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; BE-I32-NEXT: vorr d0, d12, d12 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: vorr d0, d15, d15 +; BE-I32-NEXT: vmov.32 d10[0], r0 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: vorr d0, d13, d13 +; BE-I32-NEXT: vmov.32 d11[1], r0 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: add lr, sp, #48 +; BE-I32-NEXT: vmov.32 d10[1], r0 +; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I32-NEXT: vorr d0, d17, d17 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: add lr, sp, #160 +; BE-I32-NEXT: vrev64.32 q9, q4 +; BE-I32-NEXT: vldmia lr, {d22, d23} @ 16-byte Reload +; BE-I32-NEXT: add lr, sp, #80 +; BE-I32-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload +; BE-I32-NEXT: add lr, sp, #128 +; BE-I32-NEXT: vmov.32 d22[1], r0 +; BE-I32-NEXT: mov r0, r4 +; BE-I32-NEXT: vst1.32 {d20, d21}, [r0:128]! +; BE-I32-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload +; BE-I32-NEXT: add lr, sp, #16 +; BE-I32-NEXT: vrev64.32 q8, q5 +; BE-I32-NEXT: vst1.32 {d20, d21}, [r0:128]! +; BE-I32-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload +; BE-I32-NEXT: add lr, sp, #32 +; BE-I32-NEXT: vst1.32 {d20, d21}, [r0:128]! +; BE-I32-NEXT: vst1.64 {d18, d19}, [r0:128] +; BE-I32-NEXT: add r0, r4, #64 +; BE-I32-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; BE-I32-NEXT: add lr, sp, #64 +; BE-I32-NEXT: vst1.32 {d18, d19}, [r0:128]! +; BE-I32-NEXT: vst1.32 {d22, d23}, [r0:128]! +; BE-I32-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; BE-I32-NEXT: vst1.32 {d18, d19}, [r0:128]! +; BE-I32-NEXT: vst1.64 {d16, d17}, [r0:128] +; BE-I32-NEXT: add sp, sp, #176 +; BE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEXT: pop {r4, r5, r6, pc} +; +; BE-I64-LABEL: lrint_v32f64: +; BE-I64: @ %bb.0: +; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I64-NEXT: .pad #4 +; BE-I64-NEXT: sub sp, sp, #4 +; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEXT: .pad #232 +; BE-I64-NEXT: sub sp, sp, #232 +; BE-I64-NEXT: add lr, sp, #184 +; BE-I64-NEXT: str r0, [sp, #148] @ 4-byte Spill +; BE-I64-NEXT: add r0, sp, #416 +; BE-I64-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #168 +; BE-I64-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #152 +; BE-I64-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #128 +; BE-I64-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #200 +; BE-I64-NEXT: vld1.64 {d18, d19}, [r0] +; BE-I64-NEXT: add r0, sp, #448 +; BE-I64-NEXT: vorr d0, d19, d19 +; BE-I64-NEXT: vld1.64 {d14, d15}, [r0] +; BE-I64-NEXT: add r0, sp, #336 +; BE-I64-NEXT: vstmia lr, {d18, d19} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #64 +; BE-I64-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I64-NEXT: add r0, sp, #400 +; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #8 +; BE-I64-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I64-NEXT: add r0, sp, #352 +; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #24 +; BE-I64-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I64-NEXT: add r0, sp, #368 +; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #48 +; BE-I64-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I64-NEXT: add r0, sp, #384 +; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #96 +; BE-I64-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I64-NEXT: add r0, sp, #512 +; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #112 +; BE-I64-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I64-NEXT: add r0, sp, #432 +; BE-I64-NEXT: vld1.64 {d8, d9}, [r0] +; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vorr d0, d8, d8 +; BE-I64-NEXT: str r1, [sp, #80] @ 4-byte Spill +; BE-I64-NEXT: vmov.32 d13[0], r0 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vorr d0, d9, d9 +; BE-I64-NEXT: str r1, [sp, #44] @ 4-byte Spill +; BE-I64-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vorr d0, d14, d14 +; BE-I64-NEXT: add lr, sp, #216 +; BE-I64-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEXT: mov r9, r1 +; BE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vorr d0, d15, d15 +; BE-I64-NEXT: mov r8, r1 +; BE-I64-NEXT: vmov.32 d8[0], r0 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: add lr, sp, #64 +; BE-I64-NEXT: mov r4, r1 +; BE-I64-NEXT: vmov.32 d9[0], r0 +; BE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-I64-NEXT: vorr d0, d10, d10 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vorr d0, d11, d11 +; BE-I64-NEXT: mov r6, r1 +; BE-I64-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: add lr, sp, #200 +; BE-I64-NEXT: mov r7, r1 +; BE-I64-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; BE-I64-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vmov.32 d12[0], r0 +; BE-I64-NEXT: add lr, sp, #200 +; BE-I64-NEXT: mov r5, r1 +; BE-I64-NEXT: vmov.32 d15[1], r7 +; BE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #8 +; BE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-I64-NEXT: vorr d0, d11, d11 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vorr d0, d10, d10 +; BE-I64-NEXT: vmov.32 d14[1], r6 +; BE-I64-NEXT: add lr, sp, #64 +; BE-I64-NEXT: mov r10, r1 +; BE-I64-NEXT: vmov.32 d13[0], r0 +; BE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: add lr, sp, #24 +; BE-I64-NEXT: mov r11, r1 +; BE-I64-NEXT: vmov.32 d12[0], r0 +; BE-I64-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload +; BE-I64-NEXT: vorr d0, d15, d15 +; BE-I64-NEXT: vmov.32 d9[1], r4 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vorr d0, d14, d14 +; BE-I64-NEXT: vmov.32 d8[1], r8 +; BE-I64-NEXT: add lr, sp, #8 +; BE-I64-NEXT: str r1, [sp, #4] @ 4-byte Spill +; BE-I64-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEXT: add lr, sp, #24 +; BE-I64-NEXT: mov r8, r1 +; BE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #216 +; BE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-I64-NEXT: add lr, sp, #48 +; BE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-I64-NEXT: vorr d0, d9, d9 +; BE-I64-NEXT: vmov.32 d11[1], r9 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEXT: ldr r0, [sp, #44] @ 4-byte Reload +; BE-I64-NEXT: vorr d0, d8, d8 +; BE-I64-NEXT: add lr, sp, #216 +; BE-I64-NEXT: mov r9, r1 +; BE-I64-NEXT: vmov.32 d10[1], r0 +; BE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEXT: add lr, sp, #48 +; BE-I64-NEXT: ldr r0, [sp, #80] @ 4-byte Reload +; BE-I64-NEXT: mov r6, r1 +; BE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #200 +; BE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-I64-NEXT: add lr, sp, #96 +; BE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-I64-NEXT: vorr d0, d9, d9 +; BE-I64-NEXT: vmov.32 d11[1], r0 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vorr d0, d8, d8 +; BE-I64-NEXT: vmov.32 d10[1], r5 +; BE-I64-NEXT: add lr, sp, #200 +; BE-I64-NEXT: mov r7, r1 +; BE-I64-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: add lr, sp, #112 +; BE-I64-NEXT: vorr q4, q6, q6 +; BE-I64-NEXT: mov r4, r1 +; BE-I64-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; BE-I64-NEXT: vorr d0, d13, d13 +; BE-I64-NEXT: vmov.32 d9[1], r10 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vorr d0, d12, d12 +; BE-I64-NEXT: mov r5, r1 +; BE-I64-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEXT: vmov.32 d8[1], r11 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEXT: add lr, sp, #24 +; BE-I64-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: add lr, sp, #48 +; BE-I64-NEXT: vmov.32 d17[1], r0 +; BE-I64-NEXT: vmov.32 d16[1], r8 +; BE-I64-NEXT: vorr q9, q8, q8 +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: add lr, sp, #112 +; BE-I64-NEXT: vmov.32 d17[1], r9 +; BE-I64-NEXT: vmov.32 d16[1], r6 +; BE-I64-NEXT: vorr q10, q8, q8 +; BE-I64-NEXT: vrev64.32 q8, q4 +; BE-I64-NEXT: vmov.32 d15[1], r7 +; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #200 +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: vmov.32 d11[1], r5 +; BE-I64-NEXT: vrev64.32 q8, q8 +; BE-I64-NEXT: vmov.32 d14[1], r4 +; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #216 +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: vmov.32 d10[1], r1 +; BE-I64-NEXT: vrev64.32 q8, q8 +; BE-I64-NEXT: vrev64.32 q6, q7 +; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #8 +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: add lr, sp, #96 +; BE-I64-NEXT: vrev64.32 q7, q5 +; BE-I64-NEXT: vrev64.32 q8, q8 +; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #64 +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: add lr, sp, #80 +; BE-I64-NEXT: vrev64.32 q8, q8 +; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #64 +; BE-I64-NEXT: vrev64.32 q8, q9 +; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #48 +; BE-I64-NEXT: vrev64.32 q8, q10 +; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #128 +; BE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-I64-NEXT: vorr d0, d11, d11 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vorr d0, d10, d10 +; BE-I64-NEXT: mov r4, r1 +; BE-I64-NEXT: vmov.32 d9[0], r0 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vmov.32 d8[0], r0 +; BE-I64-NEXT: ldr r6, [sp, #148] @ 4-byte Reload +; BE-I64-NEXT: add lr, sp, #152 +; BE-I64-NEXT: vmov.32 d9[1], r4 +; BE-I64-NEXT: mov r5, r6 +; BE-I64-NEXT: vmov.32 d8[1], r1 +; BE-I64-NEXT: vrev64.32 q8, q4 +; BE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-I64-NEXT: vorr d0, d11, d11 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vorr d0, d10, d10 +; BE-I64-NEXT: mov r4, r1 +; BE-I64-NEXT: vmov.32 d9[0], r0 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vmov.32 d8[0], r0 +; BE-I64-NEXT: add lr, sp, #168 +; BE-I64-NEXT: vmov.32 d9[1], r4 +; BE-I64-NEXT: vmov.32 d8[1], r1 +; BE-I64-NEXT: vrev64.32 q8, q4 +; BE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-I64-NEXT: vorr d0, d11, d11 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vorr d0, d10, d10 +; BE-I64-NEXT: mov r4, r1 +; BE-I64-NEXT: vmov.32 d9[0], r0 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vmov.32 d8[0], r0 +; BE-I64-NEXT: add lr, sp, #184 +; BE-I64-NEXT: vmov.32 d9[1], r4 +; BE-I64-NEXT: vmov.32 d8[1], r1 +; BE-I64-NEXT: vrev64.32 q8, q4 +; BE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-I64-NEXT: vorr d0, d11, d11 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vorr d0, d10, d10 +; BE-I64-NEXT: mov r4, r1 +; BE-I64-NEXT: vmov.32 d9[0], r0 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vmov.32 d8[0], r0 +; BE-I64-NEXT: add r0, sp, #464 +; BE-I64-NEXT: vmov.32 d9[1], r4 +; BE-I64-NEXT: vmov.32 d8[1], r1 +; BE-I64-NEXT: vrev64.32 q8, q4 +; BE-I64-NEXT: vld1.64 {d8, d9}, [r0] +; BE-I64-NEXT: vorr d0, d9, d9 +; BE-I64-NEXT: vst1.64 {d16, d17}, [r5:128] +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vorr d0, d8, d8 +; BE-I64-NEXT: mov r4, r1 +; BE-I64-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEXT: add r0, sp, #480 +; BE-I64-NEXT: add r5, r6, #192 +; BE-I64-NEXT: vmov.32 d11[1], r4 +; BE-I64-NEXT: vmov.32 d10[1], r1 +; BE-I64-NEXT: vld1.64 {d8, d9}, [r0] +; BE-I64-NEXT: vorr d0, d9, d9 +; BE-I64-NEXT: vrev64.32 q8, q5 +; BE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vorr d0, d8, d8 +; BE-I64-NEXT: mov r4, r1 +; BE-I64-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEXT: add r0, sp, #496 +; BE-I64-NEXT: vmov.32 d11[1], r4 +; BE-I64-NEXT: vmov.32 d10[1], r1 +; BE-I64-NEXT: vld1.64 {d8, d9}, [r0] +; BE-I64-NEXT: vorr d0, d9, d9 +; BE-I64-NEXT: vrev64.32 q8, q5 +; BE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vorr d0, d8, d8 +; BE-I64-NEXT: mov r4, r1 +; BE-I64-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEXT: add lr, sp, #112 +; BE-I64-NEXT: add r0, r6, #128 +; BE-I64-NEXT: vmov.32 d11[1], r4 +; BE-I64-NEXT: vmov.32 d10[1], r1 +; BE-I64-NEXT: vrev64.32 q8, q5 +; BE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-I64-NEXT: vst1.64 {d14, d15}, [r5:128] +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: add lr, sp, #200 +; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: add lr, sp, #216 +; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: add lr, sp, #96 +; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: add lr, sp, #80 +; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128] +; BE-I64-NEXT: add r0, r6, #64 +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: add lr, sp, #64 +; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: add lr, sp, #48 +; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-I64-NEXT: vst1.64 {d12, d13}, [r0:128] +; BE-I64-NEXT: add sp, sp, #232 +; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEXT: add sp, sp, #4 +; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-I32-NEON-LABEL: lrint_v32f64: +; BE-I32-NEON: @ %bb.0: +; BE-I32-NEON-NEXT: .save {r4, r5, r6, lr} +; BE-I32-NEON-NEXT: push {r4, r5, r6, lr} +; BE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEON-NEXT: .pad #176 +; BE-I32-NEON-NEXT: sub sp, sp, #176 +; BE-I32-NEON-NEXT: add lr, sp, #128 +; BE-I32-NEON-NEXT: mov r4, r0 +; BE-I32-NEON-NEXT: add r0, sp, #336 +; BE-I32-NEON-NEXT: vorr q6, q3, q3 +; BE-I32-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill +; BE-I32-NEON-NEXT: add lr, sp, #80 +; BE-I32-NEON-NEXT: vorr q5, q1, q1 +; BE-I32-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; BE-I32-NEON-NEXT: add lr, sp, #32 +; BE-I32-NEON-NEXT: vorr d0, d4, d4 +; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I32-NEON-NEXT: add r0, sp, #320 +; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEON-NEXT: add lr, sp, #160 +; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I32-NEON-NEXT: add r0, sp, #432 +; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEON-NEXT: add lr, sp, #112 +; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I32-NEON-NEXT: add r0, sp, #288 +; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEON-NEXT: add lr, sp, #96 +; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I32-NEON-NEXT: add r0, sp, #368 +; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEON-NEXT: add lr, sp, #48 +; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I32-NEON-NEXT: add r0, sp, #416 +; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEON-NEXT: add lr, sp, #144 +; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I32-NEON-NEXT: add r0, sp, #400 +; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEON-NEXT: add lr, sp, #64 +; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: vorr d0, d12, d12 +; BE-I32-NEON-NEXT: vmov.32 d14[0], r0 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: vorr d0, d10, d10 +; BE-I32-NEON-NEXT: vmov.32 d15[0], r0 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: vorr d0, d13, d13 +; BE-I32-NEON-NEXT: vmov.32 d9[0], r0 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: vorr d0, d11, d11 +; BE-I32-NEON-NEXT: vmov.32 d15[1], r0 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: add lr, sp, #128 +; BE-I32-NEON-NEXT: vorr q5, q4, q4 +; BE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I32-NEON-NEXT: vorr d0, d17, d17 +; BE-I32-NEON-NEXT: vmov.32 d11[1], r0 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: vmov.32 d14[1], r0 +; BE-I32-NEON-NEXT: add lr, sp, #128 +; BE-I32-NEON-NEXT: add r0, sp, #384 +; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-I32-NEON-NEXT: add lr, sp, #16 +; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEON-NEXT: add lr, sp, #80 +; BE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-I32-NEON-NEXT: vorr d0, d8, d8 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: add lr, sp, #64 +; BE-I32-NEON-NEXT: vmov.32 d10[0], r0 +; BE-I32-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload +; BE-I32-NEON-NEXT: vorr d0, d14, d14 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: vorr d0, d9, d9 +; BE-I32-NEON-NEXT: vmov.32 d13[0], r0 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: vorr d0, d15, d15 +; BE-I32-NEON-NEXT: vmov.32 d10[1], r0 +; BE-I32-NEON-NEXT: add lr, sp, #80 +; BE-I32-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: add lr, sp, #16 +; BE-I32-NEON-NEXT: vmov.32 d13[1], r0 +; BE-I32-NEON-NEXT: add r0, sp, #272 +; BE-I32-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-I32-NEON-NEXT: vorr d0, d10, d10 +; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I32-NEON-NEXT: vstmia sp, {d16, d17} @ 16-byte Spill +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: add lr, sp, #32 +; BE-I32-NEON-NEXT: vmov.32 d12[0], r0 +; BE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-I32-NEON-NEXT: vorr d0, d8, d8 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: vorr d0, d11, d11 +; BE-I32-NEON-NEXT: vmov.32 d15[0], r0 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: vorr d0, d9, d9 +; BE-I32-NEON-NEXT: vmov.32 d12[1], r0 +; BE-I32-NEON-NEXT: add lr, sp, #64 +; BE-I32-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: vldmia sp, {d10, d11} @ 16-byte Reload +; BE-I32-NEON-NEXT: add lr, sp, #16 +; BE-I32-NEON-NEXT: vmov.32 d15[1], r0 +; BE-I32-NEON-NEXT: add r0, sp, #256 +; BE-I32-NEON-NEXT: vorr d0, d10, d10 +; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: add lr, sp, #160 +; BE-I32-NEON-NEXT: vmov.32 d13[0], r0 +; BE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-I32-NEON-NEXT: vorr d0, d8, d8 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: vorr d0, d11, d11 +; BE-I32-NEON-NEXT: vmov.32 d14[0], r0 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: vorr d0, d9, d9 +; BE-I32-NEON-NEXT: vmov.32 d13[1], r0 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: vmov.32 d14[1], r0 +; BE-I32-NEON-NEXT: add lr, sp, #32 +; BE-I32-NEON-NEXT: add r0, sp, #304 +; BE-I32-NEON-NEXT: vld1.64 {d10, d11}, [r0] +; BE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-I32-NEON-NEXT: add lr, sp, #16 +; BE-I32-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload +; BE-I32-NEON-NEXT: vorr d0, d14, d14 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: add lr, sp, #48 +; BE-I32-NEON-NEXT: vorr q4, q6, q6 +; BE-I32-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; BE-I32-NEON-NEXT: vorr d0, d12, d12 +; BE-I32-NEON-NEXT: vmov.32 d8[0], r0 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: vorr d0, d15, d15 +; BE-I32-NEON-NEXT: add lr, sp, #160 +; BE-I32-NEON-NEXT: vmov.32 d17[0], r0 +; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: vorr d0, d10, d10 +; BE-I32-NEON-NEXT: vmov.32 d8[1], r0 +; BE-I32-NEON-NEXT: add lr, sp, #16 +; BE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: vorr d0, d13, d13 +; BE-I32-NEON-NEXT: vmov.32 d9[0], r0 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: add lr, sp, #160 +; BE-I32-NEON-NEXT: vorr d0, d11, d11 +; BE-I32-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; BE-I32-NEON-NEXT: add lr, sp, #48 +; BE-I32-NEON-NEXT: vmov.32 d13[1], r0 +; BE-I32-NEON-NEXT: add r0, sp, #352 +; BE-I32-NEON-NEXT: vld1.64 {d14, d15}, [r0] +; BE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: vorr d0, d14, d14 +; BE-I32-NEON-NEXT: vmov.32 d9[1], r0 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: vmov.32 d12[0], r0 +; BE-I32-NEON-NEXT: add lr, sp, #160 +; BE-I32-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; BE-I32-NEON-NEXT: add lr, sp, #96 +; BE-I32-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; BE-I32-NEON-NEXT: vorr d0, d12, d12 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: add lr, sp, #112 +; BE-I32-NEON-NEXT: vmov.32 d8[0], r0 +; BE-I32-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload +; BE-I32-NEON-NEXT: vorr d0, d14, d14 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: vorr d0, d13, d13 +; BE-I32-NEON-NEXT: vmov.32 d11[0], r0 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: add lr, sp, #144 +; BE-I32-NEON-NEXT: vmov.32 d8[1], r0 +; BE-I32-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; BE-I32-NEON-NEXT: vorr d0, d12, d12 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: vorr d0, d15, d15 +; BE-I32-NEON-NEXT: vmov.32 d10[0], r0 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: vorr d0, d13, d13 +; BE-I32-NEON-NEXT: vmov.32 d11[1], r0 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: add lr, sp, #48 +; BE-I32-NEON-NEXT: vmov.32 d10[1], r0 +; BE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I32-NEON-NEXT: vorr d0, d17, d17 +; BE-I32-NEON-NEXT: bl lrint +; BE-I32-NEON-NEXT: add lr, sp, #160 +; BE-I32-NEON-NEXT: vrev64.32 q9, q4 +; BE-I32-NEON-NEXT: vldmia lr, {d22, d23} @ 16-byte Reload +; BE-I32-NEON-NEXT: add lr, sp, #80 +; BE-I32-NEON-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload +; BE-I32-NEON-NEXT: add lr, sp, #128 +; BE-I32-NEON-NEXT: vmov.32 d22[1], r0 +; BE-I32-NEON-NEXT: mov r0, r4 +; BE-I32-NEON-NEXT: vst1.32 {d20, d21}, [r0:128]! +; BE-I32-NEON-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload +; BE-I32-NEON-NEXT: add lr, sp, #16 +; BE-I32-NEON-NEXT: vrev64.32 q8, q5 +; BE-I32-NEON-NEXT: vst1.32 {d20, d21}, [r0:128]! +; BE-I32-NEON-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload +; BE-I32-NEON-NEXT: add lr, sp, #32 +; BE-I32-NEON-NEXT: vst1.32 {d20, d21}, [r0:128]! +; BE-I32-NEON-NEXT: vst1.64 {d18, d19}, [r0:128] +; BE-I32-NEON-NEXT: add r0, r4, #64 +; BE-I32-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; BE-I32-NEON-NEXT: add lr, sp, #64 +; BE-I32-NEON-NEXT: vst1.32 {d18, d19}, [r0:128]! +; BE-I32-NEON-NEXT: vst1.32 {d22, d23}, [r0:128]! +; BE-I32-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; BE-I32-NEON-NEXT: vst1.32 {d18, d19}, [r0:128]! +; BE-I32-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] +; BE-I32-NEON-NEXT: add sp, sp, #176 +; BE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEON-NEXT: pop {r4, r5, r6, pc} +; +; BE-I64-NEON-LABEL: lrint_v32f64: +; BE-I64-NEON: @ %bb.0: +; BE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I64-NEON-NEXT: .pad #4 +; BE-I64-NEON-NEXT: sub sp, sp, #4 +; BE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEON-NEXT: .pad #232 +; BE-I64-NEON-NEXT: sub sp, sp, #232 +; BE-I64-NEON-NEXT: add lr, sp, #184 +; BE-I64-NEON-NEXT: str r0, [sp, #148] @ 4-byte Spill +; BE-I64-NEON-NEXT: add r0, sp, #416 +; BE-I64-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #168 +; BE-I64-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #152 +; BE-I64-NEON-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #128 +; BE-I64-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #200 +; BE-I64-NEON-NEXT: vld1.64 {d18, d19}, [r0] +; BE-I64-NEON-NEXT: add r0, sp, #448 +; BE-I64-NEON-NEXT: vorr d0, d19, d19 +; BE-I64-NEON-NEXT: vld1.64 {d14, d15}, [r0] +; BE-I64-NEON-NEXT: add r0, sp, #336 +; BE-I64-NEON-NEXT: vstmia lr, {d18, d19} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #64 +; BE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I64-NEON-NEXT: add r0, sp, #400 +; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #8 +; BE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I64-NEON-NEXT: add r0, sp, #352 +; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #24 +; BE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I64-NEON-NEXT: add r0, sp, #368 +; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #48 +; BE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I64-NEON-NEXT: add r0, sp, #384 +; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #96 +; BE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I64-NEON-NEXT: add r0, sp, #512 +; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #112 +; BE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I64-NEON-NEXT: add r0, sp, #432 +; BE-I64-NEON-NEXT: vld1.64 {d8, d9}, [r0] +; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vorr d0, d8, d8 +; BE-I64-NEON-NEXT: str r1, [sp, #80] @ 4-byte Spill +; BE-I64-NEON-NEXT: vmov.32 d13[0], r0 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vorr d0, d9, d9 +; BE-I64-NEON-NEXT: str r1, [sp, #44] @ 4-byte Spill +; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vorr d0, d14, d14 +; BE-I64-NEON-NEXT: add lr, sp, #216 +; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEON-NEXT: mov r9, r1 +; BE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vorr d0, d15, d15 +; BE-I64-NEON-NEXT: mov r8, r1 +; BE-I64-NEON-NEXT: vmov.32 d8[0], r0 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: add lr, sp, #64 +; BE-I64-NEON-NEXT: mov r4, r1 +; BE-I64-NEON-NEXT: vmov.32 d9[0], r0 +; BE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-I64-NEON-NEXT: vorr d0, d10, d10 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vorr d0, d11, d11 +; BE-I64-NEON-NEXT: mov r6, r1 +; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: add lr, sp, #200 +; BE-I64-NEON-NEXT: mov r7, r1 +; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; BE-I64-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vmov.32 d12[0], r0 +; BE-I64-NEON-NEXT: add lr, sp, #200 +; BE-I64-NEON-NEXT: mov r5, r1 +; BE-I64-NEON-NEXT: vmov.32 d15[1], r7 +; BE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #8 +; BE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-I64-NEON-NEXT: vorr d0, d11, d11 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vorr d0, d10, d10 +; BE-I64-NEON-NEXT: vmov.32 d14[1], r6 +; BE-I64-NEON-NEXT: add lr, sp, #64 +; BE-I64-NEON-NEXT: mov r10, r1 +; BE-I64-NEON-NEXT: vmov.32 d13[0], r0 +; BE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: add lr, sp, #24 +; BE-I64-NEON-NEXT: mov r11, r1 +; BE-I64-NEON-NEXT: vmov.32 d12[0], r0 +; BE-I64-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload +; BE-I64-NEON-NEXT: vorr d0, d15, d15 +; BE-I64-NEON-NEXT: vmov.32 d9[1], r4 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vorr d0, d14, d14 +; BE-I64-NEON-NEXT: vmov.32 d8[1], r8 +; BE-I64-NEON-NEXT: add lr, sp, #8 +; BE-I64-NEON-NEXT: str r1, [sp, #4] @ 4-byte Spill +; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEON-NEXT: add lr, sp, #24 +; BE-I64-NEON-NEXT: mov r8, r1 +; BE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #216 +; BE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-I64-NEON-NEXT: add lr, sp, #48 +; BE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-I64-NEON-NEXT: vorr d0, d9, d9 +; BE-I64-NEON-NEXT: vmov.32 d11[1], r9 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEON-NEXT: ldr r0, [sp, #44] @ 4-byte Reload +; BE-I64-NEON-NEXT: vorr d0, d8, d8 +; BE-I64-NEON-NEXT: add lr, sp, #216 +; BE-I64-NEON-NEXT: mov r9, r1 +; BE-I64-NEON-NEXT: vmov.32 d10[1], r0 +; BE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEON-NEXT: add lr, sp, #48 +; BE-I64-NEON-NEXT: ldr r0, [sp, #80] @ 4-byte Reload +; BE-I64-NEON-NEXT: mov r6, r1 +; BE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #200 +; BE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-I64-NEON-NEXT: add lr, sp, #96 +; BE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-I64-NEON-NEXT: vorr d0, d9, d9 +; BE-I64-NEON-NEXT: vmov.32 d11[1], r0 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vorr d0, d8, d8 +; BE-I64-NEON-NEXT: vmov.32 d10[1], r5 +; BE-I64-NEON-NEXT: add lr, sp, #200 +; BE-I64-NEON-NEXT: mov r7, r1 +; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: add lr, sp, #112 +; BE-I64-NEON-NEXT: vorr q4, q6, q6 +; BE-I64-NEON-NEXT: mov r4, r1 +; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; BE-I64-NEON-NEXT: vorr d0, d13, d13 +; BE-I64-NEON-NEXT: vmov.32 d9[1], r10 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vorr d0, d12, d12 +; BE-I64-NEON-NEXT: mov r5, r1 +; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEON-NEXT: vmov.32 d8[1], r11 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEON-NEXT: add lr, sp, #24 +; BE-I64-NEON-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEON-NEXT: add lr, sp, #48 +; BE-I64-NEON-NEXT: vmov.32 d17[1], r0 +; BE-I64-NEON-NEXT: vmov.32 d16[1], r8 +; BE-I64-NEON-NEXT: vorr q9, q8, q8 +; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEON-NEXT: add lr, sp, #112 +; BE-I64-NEON-NEXT: vmov.32 d17[1], r9 +; BE-I64-NEON-NEXT: vmov.32 d16[1], r6 +; BE-I64-NEON-NEXT: vorr q10, q8, q8 +; BE-I64-NEON-NEXT: vrev64.32 q8, q4 +; BE-I64-NEON-NEXT: vmov.32 d15[1], r7 +; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #200 +; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEON-NEXT: vmov.32 d11[1], r5 +; BE-I64-NEON-NEXT: vrev64.32 q8, q8 +; BE-I64-NEON-NEXT: vmov.32 d14[1], r4 +; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #216 +; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEON-NEXT: vmov.32 d10[1], r1 +; BE-I64-NEON-NEXT: vrev64.32 q8, q8 +; BE-I64-NEON-NEXT: vrev64.32 q6, q7 +; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #8 +; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEON-NEXT: add lr, sp, #96 +; BE-I64-NEON-NEXT: vrev64.32 q7, q5 +; BE-I64-NEON-NEXT: vrev64.32 q8, q8 +; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #64 +; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEON-NEXT: add lr, sp, #80 +; BE-I64-NEON-NEXT: vrev64.32 q8, q8 +; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #64 +; BE-I64-NEON-NEXT: vrev64.32 q8, q9 +; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #48 +; BE-I64-NEON-NEXT: vrev64.32 q8, q10 +; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEON-NEXT: add lr, sp, #128 +; BE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-I64-NEON-NEXT: vorr d0, d11, d11 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vorr d0, d10, d10 +; BE-I64-NEON-NEXT: mov r4, r1 +; BE-I64-NEON-NEXT: vmov.32 d9[0], r0 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vmov.32 d8[0], r0 +; BE-I64-NEON-NEXT: ldr r6, [sp, #148] @ 4-byte Reload +; BE-I64-NEON-NEXT: add lr, sp, #152 +; BE-I64-NEON-NEXT: vmov.32 d9[1], r4 +; BE-I64-NEON-NEXT: mov r5, r6 +; BE-I64-NEON-NEXT: vmov.32 d8[1], r1 +; BE-I64-NEON-NEXT: vrev64.32 q8, q4 +; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-I64-NEON-NEXT: vorr d0, d11, d11 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vorr d0, d10, d10 +; BE-I64-NEON-NEXT: mov r4, r1 +; BE-I64-NEON-NEXT: vmov.32 d9[0], r0 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vmov.32 d8[0], r0 +; BE-I64-NEON-NEXT: add lr, sp, #168 +; BE-I64-NEON-NEXT: vmov.32 d9[1], r4 +; BE-I64-NEON-NEXT: vmov.32 d8[1], r1 +; BE-I64-NEON-NEXT: vrev64.32 q8, q4 +; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-I64-NEON-NEXT: vorr d0, d11, d11 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vorr d0, d10, d10 +; BE-I64-NEON-NEXT: mov r4, r1 +; BE-I64-NEON-NEXT: vmov.32 d9[0], r0 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vmov.32 d8[0], r0 +; BE-I64-NEON-NEXT: add lr, sp, #184 +; BE-I64-NEON-NEXT: vmov.32 d9[1], r4 +; BE-I64-NEON-NEXT: vmov.32 d8[1], r1 +; BE-I64-NEON-NEXT: vrev64.32 q8, q4 +; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-I64-NEON-NEXT: vorr d0, d11, d11 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vorr d0, d10, d10 +; BE-I64-NEON-NEXT: mov r4, r1 +; BE-I64-NEON-NEXT: vmov.32 d9[0], r0 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vmov.32 d8[0], r0 +; BE-I64-NEON-NEXT: add r0, sp, #464 +; BE-I64-NEON-NEXT: vmov.32 d9[1], r4 +; BE-I64-NEON-NEXT: vmov.32 d8[1], r1 +; BE-I64-NEON-NEXT: vrev64.32 q8, q4 +; BE-I64-NEON-NEXT: vld1.64 {d8, d9}, [r0] +; BE-I64-NEON-NEXT: vorr d0, d9, d9 +; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128] +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vorr d0, d8, d8 +; BE-I64-NEON-NEXT: mov r4, r1 +; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEON-NEXT: add r0, sp, #480 +; BE-I64-NEON-NEXT: add r5, r6, #192 +; BE-I64-NEON-NEXT: vmov.32 d11[1], r4 +; BE-I64-NEON-NEXT: vmov.32 d10[1], r1 +; BE-I64-NEON-NEXT: vld1.64 {d8, d9}, [r0] +; BE-I64-NEON-NEXT: vorr d0, d9, d9 +; BE-I64-NEON-NEXT: vrev64.32 q8, q5 +; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vorr d0, d8, d8 +; BE-I64-NEON-NEXT: mov r4, r1 +; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEON-NEXT: add r0, sp, #496 +; BE-I64-NEON-NEXT: vmov.32 d11[1], r4 +; BE-I64-NEON-NEXT: vmov.32 d10[1], r1 +; BE-I64-NEON-NEXT: vld1.64 {d8, d9}, [r0] +; BE-I64-NEON-NEXT: vorr d0, d9, d9 +; BE-I64-NEON-NEXT: vrev64.32 q8, q5 +; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vorr d0, d8, d8 +; BE-I64-NEON-NEXT: mov r4, r1 +; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEON-NEXT: bl lrint +; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEON-NEXT: add lr, sp, #112 +; BE-I64-NEON-NEXT: add r0, r6, #128 +; BE-I64-NEON-NEXT: vmov.32 d11[1], r4 +; BE-I64-NEON-NEXT: vmov.32 d10[1], r1 +; BE-I64-NEON-NEXT: vrev64.32 q8, q5 +; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-I64-NEON-NEXT: vst1.64 {d14, d15}, [r5:128] +; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEON-NEXT: add lr, sp, #200 +; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEON-NEXT: add lr, sp, #216 +; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEON-NEXT: add lr, sp, #96 +; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEON-NEXT: add lr, sp, #80 +; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] +; BE-I64-NEON-NEXT: add r0, r6, #64 +; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEON-NEXT: add lr, sp, #64 +; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEON-NEXT: add lr, sp, #48 +; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-I64-NEON-NEXT: vst1.64 {d12, d13}, [r0:128] +; BE-I64-NEON-NEXT: add sp, sp, #232 +; BE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEON-NEXT: add sp, sp, #4 +; BE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + %a = call <32 x iXLen> @llvm.lrint.v32iXLen.v16f64(<32 x double> %x) + ret <32 x iXLen> %a +} +declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f64(<32 x double>) + +define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) { +; LE-I32-LABEL: lrint_v1fp128: +; LE-I32: @ %bb.0: +; LE-I32-NEXT: .save {r11, lr} +; LE-I32-NEXT: push {r11, lr} +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: pop {r11, pc} +; +; LE-I64-LABEL: lrint_v1fp128: +; LE-I64: @ %bb.0: +; LE-I64-NEXT: .save {r11, lr} +; LE-I64-NEXT: push {r11, lr} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: vmov.32 d0[0], r0 +; LE-I64-NEXT: vmov.32 d0[1], r1 +; LE-I64-NEXT: pop {r11, pc} +; +; LE-I32-NEON-LABEL: lrint_v1fp128: +; LE-I32-NEON: @ %bb.0: +; LE-I32-NEON-NEXT: .save {r11, lr} +; LE-I32-NEON-NEXT: push {r11, lr} +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: pop {r11, pc} +; +; LE-I64-NEON-LABEL: lrint_v1fp128: +; LE-I64-NEON: @ %bb.0: +; LE-I64-NEON-NEXT: .save {r11, lr} +; LE-I64-NEON-NEXT: push {r11, lr} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: vmov.32 d0[0], r0 +; LE-I64-NEON-NEXT: vmov.32 d0[1], r1 +; LE-I64-NEON-NEXT: pop {r11, pc} +; +; BE-I32-LABEL: lrint_v1fp128: +; BE-I32: @ %bb.0: +; BE-I32-NEXT: .save {r11, lr} +; BE-I32-NEXT: push {r11, lr} +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: pop {r11, pc} +; +; BE-I64-LABEL: lrint_v1fp128: +; BE-I64: @ %bb.0: +; BE-I64-NEXT: .save {r11, lr} +; BE-I64-NEXT: push {r11, lr} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: vmov.32 d16[0], r0 +; BE-I64-NEXT: vmov.32 d16[1], r1 +; BE-I64-NEXT: vrev64.32 d0, d16 +; BE-I64-NEXT: pop {r11, pc} +; +; BE-I32-NEON-LABEL: lrint_v1fp128: +; BE-I32-NEON: @ %bb.0: +; BE-I32-NEON-NEXT: .save {r11, lr} +; BE-I32-NEON-NEXT: push {r11, lr} +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: pop {r11, pc} +; +; BE-I64-NEON-LABEL: lrint_v1fp128: +; BE-I64-NEON: @ %bb.0: +; BE-I64-NEON-NEXT: .save {r11, lr} +; BE-I64-NEON-NEXT: push {r11, lr} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: vmov.32 d16[0], r0 +; BE-I64-NEON-NEXT: vmov.32 d16[1], r1 +; BE-I64-NEON-NEXT: vrev64.32 d0, d16 +; BE-I64-NEON-NEXT: pop {r11, pc} + %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1fp128(<1 x fp128> %x) + ret <1 x iXLen> %a +} +declare <1 x iXLen> @llvm.lrint.v1iXLen.v1fp128(<1 x fp128>) + +define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) { +; LE-I32-LABEL: lrint_v2fp128: +; LE-I32: @ %bb.0: +; LE-I32-NEXT: .save {r4, r5, r6, r7, r8, lr} +; LE-I32-NEXT: push {r4, r5, r6, r7, r8, lr} +; LE-I32-NEXT: mov r8, r3 +; LE-I32-NEXT: add r3, sp, #24 +; LE-I32-NEXT: mov r5, r2 +; LE-I32-NEXT: mov r6, r1 +; LE-I32-NEXT: mov r7, r0 +; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: mov r4, r0 +; LE-I32-NEXT: mov r0, r7 +; LE-I32-NEXT: mov r1, r6 +; LE-I32-NEXT: mov r2, r5 +; LE-I32-NEXT: mov r3, r8 +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: vmov.32 d0[0], r0 +; LE-I32-NEXT: vmov.32 d0[1], r4 +; LE-I32-NEXT: pop {r4, r5, r6, r7, r8, pc} +; +; LE-I64-LABEL: lrint_v2fp128: +; LE-I64: @ %bb.0: +; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, lr} +; LE-I64-NEXT: push {r4, r5, r6, r7, r8, lr} +; LE-I64-NEXT: .vsave {d8, d9} +; LE-I64-NEXT: vpush {d8, d9} +; LE-I64-NEXT: mov r8, r3 +; LE-I64-NEXT: add r3, sp, #40 +; LE-I64-NEXT: mov r5, r2 +; LE-I64-NEXT: mov r6, r1 +; LE-I64-NEXT: mov r7, r0 +; LE-I64-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: mov r4, r1 +; LE-I64-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEXT: mov r0, r7 +; LE-I64-NEXT: mov r1, r6 +; LE-I64-NEXT: mov r2, r5 +; LE-I64-NEXT: mov r3, r8 +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEXT: vmov.32 d9[1], r4 +; LE-I64-NEXT: vmov.32 d8[1], r1 +; LE-I64-NEXT: vorr q0, q4, q4 +; LE-I64-NEXT: vpop {d8, d9} +; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, pc} +; +; LE-I32-NEON-LABEL: lrint_v2fp128: +; LE-I32-NEON: @ %bb.0: +; LE-I32-NEON-NEXT: .save {r4, r5, r6, r7, r8, lr} +; LE-I32-NEON-NEXT: push {r4, r5, r6, r7, r8, lr} +; LE-I32-NEON-NEXT: mov r8, r3 +; LE-I32-NEON-NEXT: add r3, sp, #24 +; LE-I32-NEON-NEXT: mov r5, r2 +; LE-I32-NEON-NEXT: mov r6, r1 +; LE-I32-NEON-NEXT: mov r7, r0 +; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: mov r4, r0 +; LE-I32-NEON-NEXT: mov r0, r7 +; LE-I32-NEON-NEXT: mov r1, r6 +; LE-I32-NEON-NEXT: mov r2, r5 +; LE-I32-NEON-NEXT: mov r3, r8 +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: vmov.32 d0[0], r0 +; LE-I32-NEON-NEXT: vmov.32 d0[1], r4 +; LE-I32-NEON-NEXT: pop {r4, r5, r6, r7, r8, pc} +; +; LE-I64-NEON-LABEL: lrint_v2fp128: +; LE-I64-NEON: @ %bb.0: +; LE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, lr} +; LE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, lr} +; LE-I64-NEON-NEXT: .vsave {d8, d9} +; LE-I64-NEON-NEXT: vpush {d8, d9} +; LE-I64-NEON-NEXT: mov r8, r3 +; LE-I64-NEON-NEXT: add r3, sp, #40 +; LE-I64-NEON-NEXT: mov r5, r2 +; LE-I64-NEON-NEXT: mov r6, r1 +; LE-I64-NEON-NEXT: mov r7, r0 +; LE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: mov r4, r1 +; LE-I64-NEON-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEON-NEXT: mov r0, r7 +; LE-I64-NEON-NEXT: mov r1, r6 +; LE-I64-NEON-NEXT: mov r2, r5 +; LE-I64-NEON-NEXT: mov r3, r8 +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEON-NEXT: vmov.32 d9[1], r4 +; LE-I64-NEON-NEXT: vmov.32 d8[1], r1 +; LE-I64-NEON-NEXT: vorr q0, q4, q4 +; LE-I64-NEON-NEXT: vpop {d8, d9} +; LE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, pc} +; +; BE-I32-LABEL: lrint_v2fp128: +; BE-I32: @ %bb.0: +; BE-I32-NEXT: .save {r4, r5, r6, r7, r8, lr} +; BE-I32-NEXT: push {r4, r5, r6, r7, r8, lr} +; BE-I32-NEXT: mov r8, r3 +; BE-I32-NEXT: add r3, sp, #24 +; BE-I32-NEXT: mov r5, r2 +; BE-I32-NEXT: mov r6, r1 +; BE-I32-NEXT: mov r7, r0 +; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: mov r4, r0 +; BE-I32-NEXT: mov r0, r7 +; BE-I32-NEXT: mov r1, r6 +; BE-I32-NEXT: mov r2, r5 +; BE-I32-NEXT: mov r3, r8 +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: vmov.32 d16[0], r0 +; BE-I32-NEXT: vmov.32 d16[1], r4 +; BE-I32-NEXT: vrev64.32 d0, d16 +; BE-I32-NEXT: pop {r4, r5, r6, r7, r8, pc} +; +; BE-I64-LABEL: lrint_v2fp128: +; BE-I64: @ %bb.0: +; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, lr} +; BE-I64-NEXT: push {r4, r5, r6, r7, r8, lr} +; BE-I64-NEXT: .vsave {d8} +; BE-I64-NEXT: vpush {d8} +; BE-I64-NEXT: mov r8, r3 +; BE-I64-NEXT: add r3, sp, #32 +; BE-I64-NEXT: mov r5, r2 +; BE-I64-NEXT: mov r6, r1 +; BE-I64-NEXT: mov r7, r0 +; BE-I64-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: mov r4, r1 +; BE-I64-NEXT: vmov.32 d8[0], r0 +; BE-I64-NEXT: mov r0, r7 +; BE-I64-NEXT: mov r1, r6 +; BE-I64-NEXT: mov r2, r5 +; BE-I64-NEXT: mov r3, r8 +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: vmov.32 d16[0], r0 +; BE-I64-NEXT: vmov.32 d8[1], r4 +; BE-I64-NEXT: vmov.32 d16[1], r1 +; BE-I64-NEXT: vrev64.32 d1, d8 +; BE-I64-NEXT: vrev64.32 d0, d16 +; BE-I64-NEXT: vpop {d8} +; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, pc} +; +; BE-I32-NEON-LABEL: lrint_v2fp128: +; BE-I32-NEON: @ %bb.0: +; BE-I32-NEON-NEXT: .save {r4, r5, r6, r7, r8, lr} +; BE-I32-NEON-NEXT: push {r4, r5, r6, r7, r8, lr} +; BE-I32-NEON-NEXT: mov r8, r3 +; BE-I32-NEON-NEXT: add r3, sp, #24 +; BE-I32-NEON-NEXT: mov r5, r2 +; BE-I32-NEON-NEXT: mov r6, r1 +; BE-I32-NEON-NEXT: mov r7, r0 +; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: mov r4, r0 +; BE-I32-NEON-NEXT: mov r0, r7 +; BE-I32-NEON-NEXT: mov r1, r6 +; BE-I32-NEON-NEXT: mov r2, r5 +; BE-I32-NEON-NEXT: mov r3, r8 +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: vmov.32 d16[0], r0 +; BE-I32-NEON-NEXT: vmov.32 d16[1], r4 +; BE-I32-NEON-NEXT: vrev64.32 d0, d16 +; BE-I32-NEON-NEXT: pop {r4, r5, r6, r7, r8, pc} +; +; BE-I64-NEON-LABEL: lrint_v2fp128: +; BE-I64-NEON: @ %bb.0: +; BE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, lr} +; BE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, lr} +; BE-I64-NEON-NEXT: .vsave {d8} +; BE-I64-NEON-NEXT: vpush {d8} +; BE-I64-NEON-NEXT: mov r8, r3 +; BE-I64-NEON-NEXT: add r3, sp, #32 +; BE-I64-NEON-NEXT: mov r5, r2 +; BE-I64-NEON-NEXT: mov r6, r1 +; BE-I64-NEON-NEXT: mov r7, r0 +; BE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: mov r4, r1 +; BE-I64-NEON-NEXT: vmov.32 d8[0], r0 +; BE-I64-NEON-NEXT: mov r0, r7 +; BE-I64-NEON-NEXT: mov r1, r6 +; BE-I64-NEON-NEXT: mov r2, r5 +; BE-I64-NEON-NEXT: mov r3, r8 +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: vmov.32 d16[0], r0 +; BE-I64-NEON-NEXT: vmov.32 d8[1], r4 +; BE-I64-NEON-NEXT: vmov.32 d16[1], r1 +; BE-I64-NEON-NEXT: vrev64.32 d1, d8 +; BE-I64-NEON-NEXT: vrev64.32 d0, d16 +; BE-I64-NEON-NEXT: vpop {d8} +; BE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, pc} + %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2fp128(<2 x fp128> %x) + ret <2 x iXLen> %a +} +declare <2 x iXLen> @llvm.lrint.v2iXLen.v2fp128(<2 x fp128>) + +define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) { +; LE-I32-LABEL: lrint_v4fp128: +; LE-I32: @ %bb.0: +; LE-I32-NEXT: .save {r4, lr} +; LE-I32-NEXT: push {r4, lr} +; LE-I32-NEXT: .vsave {d8, d9} +; LE-I32-NEXT: vpush {d8, d9} +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: add r3, sp, #60 +; LE-I32-NEXT: ldr r12, [sp, #56] +; LE-I32-NEXT: vmov.32 d8[0], r0 +; LE-I32-NEXT: ldm r3, {r1, r2, r3} +; LE-I32-NEXT: mov r0, r12 +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: add r3, sp, #40 +; LE-I32-NEXT: mov r4, r0 +; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: add r3, sp, #28 +; LE-I32-NEXT: ldr r12, [sp, #24] +; LE-I32-NEXT: vmov.32 d9[0], r0 +; LE-I32-NEXT: ldm r3, {r1, r2, r3} +; LE-I32-NEXT: mov r0, r12 +; LE-I32-NEXT: vmov.32 d9[1], r4 +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: vmov.32 d8[1], r0 +; LE-I32-NEXT: vorr q0, q4, q4 +; LE-I32-NEXT: vpop {d8, d9} +; LE-I32-NEXT: pop {r4, pc} +; +; LE-I64-LABEL: lrint_v4fp128: +; LE-I64: @ %bb.0: +; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; LE-I64-NEXT: .vsave {d8, d9, d10, d11} +; LE-I64-NEXT: vpush {d8, d9, d10, d11} +; LE-I64-NEXT: mov r5, r3 +; LE-I64-NEXT: add r3, sp, #96 +; LE-I64-NEXT: mov r7, r2 +; LE-I64-NEXT: mov r6, r1 +; LE-I64-NEXT: mov r4, r0 +; LE-I64-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: mov r9, r1 +; LE-I64-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEXT: mov r0, r4 +; LE-I64-NEXT: mov r1, r6 +; LE-I64-NEXT: mov r2, r7 +; LE-I64-NEXT: mov r3, r5 +; LE-I64-NEXT: ldr r8, [sp, #80] +; LE-I64-NEXT: ldr r10, [sp, #64] +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add r3, sp, #68 +; LE-I64-NEXT: mov r5, r1 +; LE-I64-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEXT: mov r0, r10 +; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add r3, sp, #84 +; LE-I64-NEXT: mov r4, r1 +; LE-I64-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEXT: mov r0, r8 +; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEXT: vmov.32 d11[1], r4 +; LE-I64-NEXT: vmov.32 d9[1], r9 +; LE-I64-NEXT: vmov.32 d10[1], r5 +; LE-I64-NEXT: vmov.32 d8[1], r1 +; LE-I64-NEXT: vorr q0, q5, q5 +; LE-I64-NEXT: vorr q1, q4, q4 +; LE-I64-NEXT: vpop {d8, d9, d10, d11} +; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; +; LE-I32-NEON-LABEL: lrint_v4fp128: +; LE-I32-NEON: @ %bb.0: +; LE-I32-NEON-NEXT: .save {r4, lr} +; LE-I32-NEON-NEXT: push {r4, lr} +; LE-I32-NEON-NEXT: .vsave {d8, d9} +; LE-I32-NEON-NEXT: vpush {d8, d9} +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: add r3, sp, #60 +; LE-I32-NEON-NEXT: ldr r12, [sp, #56] +; LE-I32-NEON-NEXT: vmov.32 d8[0], r0 +; LE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I32-NEON-NEXT: mov r0, r12 +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: add r3, sp, #40 +; LE-I32-NEON-NEXT: mov r4, r0 +; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: add r3, sp, #28 +; LE-I32-NEON-NEXT: ldr r12, [sp, #24] +; LE-I32-NEON-NEXT: vmov.32 d9[0], r0 +; LE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I32-NEON-NEXT: mov r0, r12 +; LE-I32-NEON-NEXT: vmov.32 d9[1], r4 +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: vmov.32 d8[1], r0 +; LE-I32-NEON-NEXT: vorr q0, q4, q4 +; LE-I32-NEON-NEXT: vpop {d8, d9} +; LE-I32-NEON-NEXT: pop {r4, pc} +; +; LE-I64-NEON-LABEL: lrint_v4fp128: +; LE-I64-NEON: @ %bb.0: +; LE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; LE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; LE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11} +; LE-I64-NEON-NEXT: vpush {d8, d9, d10, d11} +; LE-I64-NEON-NEXT: mov r5, r3 +; LE-I64-NEON-NEXT: add r3, sp, #96 +; LE-I64-NEON-NEXT: mov r7, r2 +; LE-I64-NEON-NEXT: mov r6, r1 +; LE-I64-NEON-NEXT: mov r4, r0 +; LE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: mov r9, r1 +; LE-I64-NEON-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEON-NEXT: mov r0, r4 +; LE-I64-NEON-NEXT: mov r1, r6 +; LE-I64-NEON-NEXT: mov r2, r7 +; LE-I64-NEON-NEXT: mov r3, r5 +; LE-I64-NEON-NEXT: ldr r8, [sp, #80] +; LE-I64-NEON-NEXT: ldr r10, [sp, #64] +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add r3, sp, #68 +; LE-I64-NEON-NEXT: mov r5, r1 +; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEON-NEXT: mov r0, r10 +; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add r3, sp, #84 +; LE-I64-NEON-NEXT: mov r4, r1 +; LE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEON-NEXT: mov r0, r8 +; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEON-NEXT: vmov.32 d11[1], r4 +; LE-I64-NEON-NEXT: vmov.32 d9[1], r9 +; LE-I64-NEON-NEXT: vmov.32 d10[1], r5 +; LE-I64-NEON-NEXT: vmov.32 d8[1], r1 +; LE-I64-NEON-NEXT: vorr q0, q5, q5 +; LE-I64-NEON-NEXT: vorr q1, q4, q4 +; LE-I64-NEON-NEXT: vpop {d8, d9, d10, d11} +; LE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; +; BE-I32-LABEL: lrint_v4fp128: +; BE-I32: @ %bb.0: +; BE-I32-NEXT: .save {r4, lr} +; BE-I32-NEXT: push {r4, lr} +; BE-I32-NEXT: .vsave {d8, d9} +; BE-I32-NEXT: vpush {d8, d9} +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: add r3, sp, #60 +; BE-I32-NEXT: ldr r12, [sp, #56] +; BE-I32-NEXT: vmov.32 d8[0], r0 +; BE-I32-NEXT: ldm r3, {r1, r2, r3} +; BE-I32-NEXT: mov r0, r12 +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: add r3, sp, #40 +; BE-I32-NEXT: mov r4, r0 +; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: add r3, sp, #28 +; BE-I32-NEXT: ldr r12, [sp, #24] +; BE-I32-NEXT: vmov.32 d9[0], r0 +; BE-I32-NEXT: ldm r3, {r1, r2, r3} +; BE-I32-NEXT: mov r0, r12 +; BE-I32-NEXT: vmov.32 d9[1], r4 +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: vmov.32 d8[1], r0 +; BE-I32-NEXT: vrev64.32 q0, q4 +; BE-I32-NEXT: vpop {d8, d9} +; BE-I32-NEXT: pop {r4, pc} +; +; BE-I64-LABEL: lrint_v4fp128: +; BE-I64: @ %bb.0: +; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; BE-I64-NEXT: .vsave {d8, d9, d10} +; BE-I64-NEXT: vpush {d8, d9, d10} +; BE-I64-NEXT: mov r5, r3 +; BE-I64-NEXT: add r3, sp, #88 +; BE-I64-NEXT: mov r7, r2 +; BE-I64-NEXT: mov r6, r1 +; BE-I64-NEXT: mov r4, r0 +; BE-I64-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: mov r9, r1 +; BE-I64-NEXT: vmov.32 d8[0], r0 +; BE-I64-NEXT: mov r0, r4 +; BE-I64-NEXT: mov r1, r6 +; BE-I64-NEXT: mov r2, r7 +; BE-I64-NEXT: mov r3, r5 +; BE-I64-NEXT: ldr r8, [sp, #72] +; BE-I64-NEXT: ldr r10, [sp, #56] +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: add r3, sp, #60 +; BE-I64-NEXT: mov r5, r1 +; BE-I64-NEXT: vmov.32 d9[0], r0 +; BE-I64-NEXT: mov r0, r10 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: add r3, sp, #76 +; BE-I64-NEXT: mov r4, r1 +; BE-I64-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEXT: mov r0, r8 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: vmov.32 d16[0], r0 +; BE-I64-NEXT: vmov.32 d10[1], r4 +; BE-I64-NEXT: vmov.32 d8[1], r9 +; BE-I64-NEXT: vmov.32 d9[1], r5 +; BE-I64-NEXT: vmov.32 d16[1], r1 +; BE-I64-NEXT: vrev64.32 d1, d10 +; BE-I64-NEXT: vrev64.32 d3, d8 +; BE-I64-NEXT: vrev64.32 d0, d9 +; BE-I64-NEXT: vrev64.32 d2, d16 +; BE-I64-NEXT: vpop {d8, d9, d10} +; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; +; BE-I32-NEON-LABEL: lrint_v4fp128: +; BE-I32-NEON: @ %bb.0: +; BE-I32-NEON-NEXT: .save {r4, lr} +; BE-I32-NEON-NEXT: push {r4, lr} +; BE-I32-NEON-NEXT: .vsave {d8, d9} +; BE-I32-NEON-NEXT: vpush {d8, d9} +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: add r3, sp, #60 +; BE-I32-NEON-NEXT: ldr r12, [sp, #56] +; BE-I32-NEON-NEXT: vmov.32 d8[0], r0 +; BE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I32-NEON-NEXT: mov r0, r12 +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: add r3, sp, #40 +; BE-I32-NEON-NEXT: mov r4, r0 +; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: add r3, sp, #28 +; BE-I32-NEON-NEXT: ldr r12, [sp, #24] +; BE-I32-NEON-NEXT: vmov.32 d9[0], r0 +; BE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I32-NEON-NEXT: mov r0, r12 +; BE-I32-NEON-NEXT: vmov.32 d9[1], r4 +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: vmov.32 d8[1], r0 +; BE-I32-NEON-NEXT: vrev64.32 q0, q4 +; BE-I32-NEON-NEXT: vpop {d8, d9} +; BE-I32-NEON-NEXT: pop {r4, pc} +; +; BE-I64-NEON-LABEL: lrint_v4fp128: +; BE-I64-NEON: @ %bb.0: +; BE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; BE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; BE-I64-NEON-NEXT: .vsave {d8, d9, d10} +; BE-I64-NEON-NEXT: vpush {d8, d9, d10} +; BE-I64-NEON-NEXT: mov r5, r3 +; BE-I64-NEON-NEXT: add r3, sp, #88 +; BE-I64-NEON-NEXT: mov r7, r2 +; BE-I64-NEON-NEXT: mov r6, r1 +; BE-I64-NEON-NEXT: mov r4, r0 +; BE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: mov r9, r1 +; BE-I64-NEON-NEXT: vmov.32 d8[0], r0 +; BE-I64-NEON-NEXT: mov r0, r4 +; BE-I64-NEON-NEXT: mov r1, r6 +; BE-I64-NEON-NEXT: mov r2, r7 +; BE-I64-NEON-NEXT: mov r3, r5 +; BE-I64-NEON-NEXT: ldr r8, [sp, #72] +; BE-I64-NEON-NEXT: ldr r10, [sp, #56] +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: add r3, sp, #60 +; BE-I64-NEON-NEXT: mov r5, r1 +; BE-I64-NEON-NEXT: vmov.32 d9[0], r0 +; BE-I64-NEON-NEXT: mov r0, r10 +; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: add r3, sp, #76 +; BE-I64-NEON-NEXT: mov r4, r1 +; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEON-NEXT: mov r0, r8 +; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: vmov.32 d16[0], r0 +; BE-I64-NEON-NEXT: vmov.32 d10[1], r4 +; BE-I64-NEON-NEXT: vmov.32 d8[1], r9 +; BE-I64-NEON-NEXT: vmov.32 d9[1], r5 +; BE-I64-NEON-NEXT: vmov.32 d16[1], r1 +; BE-I64-NEON-NEXT: vrev64.32 d1, d10 +; BE-I64-NEON-NEXT: vrev64.32 d3, d8 +; BE-I64-NEON-NEXT: vrev64.32 d0, d9 +; BE-I64-NEON-NEXT: vrev64.32 d2, d16 +; BE-I64-NEON-NEXT: vpop {d8, d9, d10} +; BE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} + %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128> %x) + ret <4 x iXLen> %a +} +declare <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128>) + +define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) { +; LE-I32-LABEL: lrint_v8fp128: +; LE-I32: @ %bb.0: +; LE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; LE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; LE-I32-NEXT: .vsave {d8, d9, d10, d11} +; LE-I32-NEXT: vpush {d8, d9, d10, d11} +; LE-I32-NEXT: mov r6, r3 +; LE-I32-NEXT: add r3, sp, #112 +; LE-I32-NEXT: mov r7, r2 +; LE-I32-NEXT: mov r4, r1 +; LE-I32-NEXT: mov r5, r0 +; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: vmov.32 d8[0], r0 +; LE-I32-NEXT: mov r0, r5 +; LE-I32-NEXT: mov r1, r4 +; LE-I32-NEXT: mov r2, r7 +; LE-I32-NEXT: mov r3, r6 +; LE-I32-NEXT: ldr r8, [sp, #160] +; LE-I32-NEXT: ldr r9, [sp, #64] +; LE-I32-NEXT: ldr r10, [sp, #80] +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: add r3, sp, #84 +; LE-I32-NEXT: vmov.32 d10[0], r0 +; LE-I32-NEXT: mov r0, r10 +; LE-I32-NEXT: ldm r3, {r1, r2, r3} +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: ldr r6, [sp, #96] +; LE-I32-NEXT: vmov.32 d11[0], r0 +; LE-I32-NEXT: ldr r1, [sp, #100] +; LE-I32-NEXT: ldr r2, [sp, #104] +; LE-I32-NEXT: ldr r3, [sp, #108] +; LE-I32-NEXT: mov r0, r6 +; LE-I32-NEXT: ldr r4, [sp, #68] +; LE-I32-NEXT: ldr r5, [sp, #72] +; LE-I32-NEXT: ldr r10, [sp, #164] +; LE-I32-NEXT: ldr r7, [sp, #168] +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: ldr r3, [sp, #76] +; LE-I32-NEXT: vmov.32 d11[1], r0 +; LE-I32-NEXT: mov r0, r9 +; LE-I32-NEXT: mov r1, r4 +; LE-I32-NEXT: mov r2, r5 +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: ldr r3, [sp, #172] +; LE-I32-NEXT: vmov.32 d10[1], r0 +; LE-I32-NEXT: mov r0, r8 +; LE-I32-NEXT: mov r1, r10 +; LE-I32-NEXT: mov r2, r7 +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: add r3, sp, #144 +; LE-I32-NEXT: mov r4, r0 +; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: add r3, sp, #132 +; LE-I32-NEXT: ldr r7, [sp, #128] +; LE-I32-NEXT: vmov.32 d9[0], r0 +; LE-I32-NEXT: ldm r3, {r1, r2, r3} +; LE-I32-NEXT: mov r0, r7 +; LE-I32-NEXT: vmov.32 d9[1], r4 +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: vmov.32 d8[1], r0 +; LE-I32-NEXT: vorr q0, q5, q5 +; LE-I32-NEXT: vorr q1, q4, q4 +; LE-I32-NEXT: vpop {d8, d9, d10, d11} +; LE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; +; LE-I64-LABEL: lrint_v8fp128: +; LE-I64: @ %bb.0: +; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I64-NEXT: .pad #4 +; LE-I64-NEXT: sub sp, sp, #4 +; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEXT: .pad #8 +; LE-I64-NEXT: sub sp, sp, #8 +; LE-I64-NEXT: mov r11, r3 +; LE-I64-NEXT: add r3, sp, #208 +; LE-I64-NEXT: mov r10, r2 +; LE-I64-NEXT: mov r4, r1 +; LE-I64-NEXT: mov r5, r0 +; LE-I64-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add r7, sp, #164 +; LE-I64-NEXT: ldr r6, [sp, #160] +; LE-I64-NEXT: str r1, [sp, #4] @ 4-byte Spill +; LE-I64-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEXT: ldm r7, {r1, r2, r3, r7} +; LE-I64-NEXT: mov r0, r6 +; LE-I64-NEXT: ldr r8, [sp, #128] +; LE-I64-NEXT: ldr r9, [sp, #144] +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add r3, sp, #180 +; LE-I64-NEXT: str r1, [sp] @ 4-byte Spill +; LE-I64-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEXT: mov r0, r7 +; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add r3, sp, #132 +; LE-I64-NEXT: mov r7, r1 +; LE-I64-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEXT: mov r0, r8 +; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add r3, sp, #148 +; LE-I64-NEXT: mov r8, r1 +; LE-I64-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEXT: mov r0, r9 +; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: mov r9, r1 +; LE-I64-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEXT: mov r0, r5 +; LE-I64-NEXT: mov r1, r4 +; LE-I64-NEXT: mov r2, r10 +; LE-I64-NEXT: mov r3, r11 +; LE-I64-NEXT: ldr r6, [sp, #112] +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add r3, sp, #116 +; LE-I64-NEXT: mov r4, r1 +; LE-I64-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEXT: mov r0, r6 +; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add r3, sp, #196 +; LE-I64-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEXT: ldr r0, [sp, #192] +; LE-I64-NEXT: mov r5, r1 +; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEXT: ldr r0, [sp] @ 4-byte Reload +; LE-I64-NEXT: vmov.32 d11[1], r7 +; LE-I64-NEXT: vmov.32 d10[1], r0 +; LE-I64-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; LE-I64-NEXT: vmov.32 d15[1], r5 +; LE-I64-NEXT: vorr q2, q5, q5 +; LE-I64-NEXT: vmov.32 d13[1], r9 +; LE-I64-NEXT: vmov.32 d9[1], r0 +; LE-I64-NEXT: vmov.32 d14[1], r4 +; LE-I64-NEXT: vmov.32 d12[1], r8 +; LE-I64-NEXT: vorr q0, q7, q7 +; LE-I64-NEXT: vmov.32 d8[1], r1 +; LE-I64-NEXT: vorr q1, q6, q6 +; LE-I64-NEXT: vorr q3, q4, q4 +; LE-I64-NEXT: add sp, sp, #8 +; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEXT: add sp, sp, #4 +; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; LE-I32-NEON-LABEL: lrint_v8fp128: +; LE-I32-NEON: @ %bb.0: +; LE-I32-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; LE-I32-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; LE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11} +; LE-I32-NEON-NEXT: vpush {d8, d9, d10, d11} +; LE-I32-NEON-NEXT: mov r6, r3 +; LE-I32-NEON-NEXT: add r3, sp, #112 +; LE-I32-NEON-NEXT: mov r7, r2 +; LE-I32-NEON-NEXT: mov r4, r1 +; LE-I32-NEON-NEXT: mov r5, r0 +; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: vmov.32 d8[0], r0 +; LE-I32-NEON-NEXT: mov r0, r5 +; LE-I32-NEON-NEXT: mov r1, r4 +; LE-I32-NEON-NEXT: mov r2, r7 +; LE-I32-NEON-NEXT: mov r3, r6 +; LE-I32-NEON-NEXT: ldr r8, [sp, #160] +; LE-I32-NEON-NEXT: ldr r9, [sp, #64] +; LE-I32-NEON-NEXT: ldr r10, [sp, #80] +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: add r3, sp, #84 +; LE-I32-NEON-NEXT: vmov.32 d10[0], r0 +; LE-I32-NEON-NEXT: mov r0, r10 +; LE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: ldr r6, [sp, #96] +; LE-I32-NEON-NEXT: vmov.32 d11[0], r0 +; LE-I32-NEON-NEXT: ldr r1, [sp, #100] +; LE-I32-NEON-NEXT: ldr r2, [sp, #104] +; LE-I32-NEON-NEXT: ldr r3, [sp, #108] +; LE-I32-NEON-NEXT: mov r0, r6 +; LE-I32-NEON-NEXT: ldr r4, [sp, #68] +; LE-I32-NEON-NEXT: ldr r5, [sp, #72] +; LE-I32-NEON-NEXT: ldr r10, [sp, #164] +; LE-I32-NEON-NEXT: ldr r7, [sp, #168] +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: ldr r3, [sp, #76] +; LE-I32-NEON-NEXT: vmov.32 d11[1], r0 +; LE-I32-NEON-NEXT: mov r0, r9 +; LE-I32-NEON-NEXT: mov r1, r4 +; LE-I32-NEON-NEXT: mov r2, r5 +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: ldr r3, [sp, #172] +; LE-I32-NEON-NEXT: vmov.32 d10[1], r0 +; LE-I32-NEON-NEXT: mov r0, r8 +; LE-I32-NEON-NEXT: mov r1, r10 +; LE-I32-NEON-NEXT: mov r2, r7 +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: add r3, sp, #144 +; LE-I32-NEON-NEXT: mov r4, r0 +; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: add r3, sp, #132 +; LE-I32-NEON-NEXT: ldr r7, [sp, #128] +; LE-I32-NEON-NEXT: vmov.32 d9[0], r0 +; LE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I32-NEON-NEXT: mov r0, r7 +; LE-I32-NEON-NEXT: vmov.32 d9[1], r4 +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: vmov.32 d8[1], r0 +; LE-I32-NEON-NEXT: vorr q0, q5, q5 +; LE-I32-NEON-NEXT: vorr q1, q4, q4 +; LE-I32-NEON-NEXT: vpop {d8, d9, d10, d11} +; LE-I32-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; +; LE-I64-NEON-LABEL: lrint_v8fp128: +; LE-I64-NEON: @ %bb.0: +; LE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I64-NEON-NEXT: .pad #4 +; LE-I64-NEON-NEXT: sub sp, sp, #4 +; LE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEON-NEXT: .pad #8 +; LE-I64-NEON-NEXT: sub sp, sp, #8 +; LE-I64-NEON-NEXT: mov r11, r3 +; LE-I64-NEON-NEXT: add r3, sp, #208 +; LE-I64-NEON-NEXT: mov r10, r2 +; LE-I64-NEON-NEXT: mov r4, r1 +; LE-I64-NEON-NEXT: mov r5, r0 +; LE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add r7, sp, #164 +; LE-I64-NEON-NEXT: ldr r6, [sp, #160] +; LE-I64-NEON-NEXT: str r1, [sp, #4] @ 4-byte Spill +; LE-I64-NEON-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEON-NEXT: ldm r7, {r1, r2, r3, r7} +; LE-I64-NEON-NEXT: mov r0, r6 +; LE-I64-NEON-NEXT: ldr r8, [sp, #128] +; LE-I64-NEON-NEXT: ldr r9, [sp, #144] +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add r3, sp, #180 +; LE-I64-NEON-NEXT: str r1, [sp] @ 4-byte Spill +; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEON-NEXT: mov r0, r7 +; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add r3, sp, #132 +; LE-I64-NEON-NEXT: mov r7, r1 +; LE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEON-NEXT: mov r0, r8 +; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add r3, sp, #148 +; LE-I64-NEON-NEXT: mov r8, r1 +; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEON-NEXT: mov r0, r9 +; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: mov r9, r1 +; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEON-NEXT: mov r0, r5 +; LE-I64-NEON-NEXT: mov r1, r4 +; LE-I64-NEON-NEXT: mov r2, r10 +; LE-I64-NEON-NEXT: mov r3, r11 +; LE-I64-NEON-NEXT: ldr r6, [sp, #112] +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add r3, sp, #116 +; LE-I64-NEON-NEXT: mov r4, r1 +; LE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEON-NEXT: mov r0, r6 +; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add r3, sp, #196 +; LE-I64-NEON-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEON-NEXT: ldr r0, [sp, #192] +; LE-I64-NEON-NEXT: mov r5, r1 +; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEON-NEXT: ldr r0, [sp] @ 4-byte Reload +; LE-I64-NEON-NEXT: vmov.32 d11[1], r7 +; LE-I64-NEON-NEXT: vmov.32 d10[1], r0 +; LE-I64-NEON-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; LE-I64-NEON-NEXT: vmov.32 d15[1], r5 +; LE-I64-NEON-NEXT: vorr q2, q5, q5 +; LE-I64-NEON-NEXT: vmov.32 d13[1], r9 +; LE-I64-NEON-NEXT: vmov.32 d9[1], r0 +; LE-I64-NEON-NEXT: vmov.32 d14[1], r4 +; LE-I64-NEON-NEXT: vmov.32 d12[1], r8 +; LE-I64-NEON-NEXT: vorr q0, q7, q7 +; LE-I64-NEON-NEXT: vmov.32 d8[1], r1 +; LE-I64-NEON-NEXT: vorr q1, q6, q6 +; LE-I64-NEON-NEXT: vorr q3, q4, q4 +; LE-I64-NEON-NEXT: add sp, sp, #8 +; LE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEON-NEXT: add sp, sp, #4 +; LE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-I32-LABEL: lrint_v8fp128: +; BE-I32: @ %bb.0: +; BE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I32-NEXT: .pad #4 +; BE-I32-NEXT: sub sp, sp, #4 +; BE-I32-NEXT: .vsave {d8, d9, d10, d11} +; BE-I32-NEXT: vpush {d8, d9, d10, d11} +; BE-I32-NEXT: .pad #8 +; BE-I32-NEXT: sub sp, sp, #8 +; BE-I32-NEXT: str r3, [sp, #4] @ 4-byte Spill +; BE-I32-NEXT: add r3, sp, #128 +; BE-I32-NEXT: mov r11, r2 +; BE-I32-NEXT: mov r6, r1 +; BE-I32-NEXT: mov r7, r0 +; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: add r3, sp, #100 +; BE-I32-NEXT: ldr r5, [sp, #96] +; BE-I32-NEXT: vmov.32 d8[0], r0 +; BE-I32-NEXT: ldr r4, [sp, #160] +; BE-I32-NEXT: ldm r3, {r1, r2, r3} +; BE-I32-NEXT: mov r0, r5 +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: add r3, sp, #164 +; BE-I32-NEXT: vmov.32 d11[0], r0 +; BE-I32-NEXT: mov r0, r4 +; BE-I32-NEXT: ldm r3, {r1, r2, r3} +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: ldr r4, [sp, #176] +; BE-I32-NEXT: vmov.32 d9[0], r0 +; BE-I32-NEXT: ldr r1, [sp, #180] +; BE-I32-NEXT: ldr r2, [sp, #184] +; BE-I32-NEXT: ldr r3, [sp, #188] +; BE-I32-NEXT: mov r0, r4 +; BE-I32-NEXT: ldr r5, [sp, #116] +; BE-I32-NEXT: ldr r8, [sp, #120] +; BE-I32-NEXT: ldr r10, [sp, #84] +; BE-I32-NEXT: ldr r9, [sp, #88] +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: vmov.32 d9[1], r0 +; BE-I32-NEXT: ldr r3, [sp, #124] +; BE-I32-NEXT: ldr r0, [sp, #112] +; BE-I32-NEXT: mov r1, r5 +; BE-I32-NEXT: mov r2, r8 +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: vmov.32 d11[1], r0 +; BE-I32-NEXT: ldr r3, [sp, #92] +; BE-I32-NEXT: ldr r0, [sp, #80] +; BE-I32-NEXT: mov r1, r10 +; BE-I32-NEXT: mov r2, r9 +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: ldr r3, [sp, #4] @ 4-byte Reload +; BE-I32-NEXT: mov r4, r0 +; BE-I32-NEXT: mov r0, r7 +; BE-I32-NEXT: mov r1, r6 +; BE-I32-NEXT: mov r2, r11 +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: add r3, sp, #148 +; BE-I32-NEXT: ldr r7, [sp, #144] +; BE-I32-NEXT: vmov.32 d10[0], r0 +; BE-I32-NEXT: ldm r3, {r1, r2, r3} +; BE-I32-NEXT: mov r0, r7 +; BE-I32-NEXT: vmov.32 d10[1], r4 +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: vmov.32 d8[1], r0 +; BE-I32-NEXT: vrev64.32 q0, q5 +; BE-I32-NEXT: vrev64.32 q1, q4 +; BE-I32-NEXT: add sp, sp, #8 +; BE-I32-NEXT: vpop {d8, d9, d10, d11} +; BE-I32-NEXT: add sp, sp, #4 +; BE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-I64-LABEL: lrint_v8fp128: +; BE-I64: @ %bb.0: +; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I64-NEXT: .pad #4 +; BE-I64-NEXT: sub sp, sp, #4 +; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14} +; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14} +; BE-I64-NEXT: .pad #16 +; BE-I64-NEXT: sub sp, sp, #16 +; BE-I64-NEXT: str r3, [sp, #4] @ 4-byte Spill +; BE-I64-NEXT: add r3, sp, #208 +; BE-I64-NEXT: mov r11, r2 +; BE-I64-NEXT: mov r4, r1 +; BE-I64-NEXT: mov r5, r0 +; BE-I64-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: ldr r7, [sp, #176] +; BE-I64-NEXT: add r3, sp, #180 +; BE-I64-NEXT: str r1, [sp, #12] @ 4-byte Spill +; BE-I64-NEXT: vmov.32 d8[0], r0 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: mov r0, r7 +; BE-I64-NEXT: ldr r6, [sp, #128] +; BE-I64-NEXT: ldr r8, [sp, #144] +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: add r3, sp, #132 +; BE-I64-NEXT: str r1, [sp, #8] @ 4-byte Spill +; BE-I64-NEXT: vmov.32 d9[0], r0 +; BE-I64-NEXT: mov r0, r6 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: add r3, sp, #148 +; BE-I64-NEXT: mov r6, r1 +; BE-I64-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEXT: mov r0, r8 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: add r3, sp, #160 +; BE-I64-NEXT: mov r9, r0 +; BE-I64-NEXT: mov r7, r1 +; BE-I64-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: ldr r3, [sp, #4] @ 4-byte Reload +; BE-I64-NEXT: mov r8, r1 +; BE-I64-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEXT: mov r0, r5 +; BE-I64-NEXT: mov r1, r4 +; BE-I64-NEXT: mov r2, r11 +; BE-I64-NEXT: ldr r10, [sp, #112] +; BE-I64-NEXT: vmov.32 d12[0], r9 +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: add r3, sp, #116 +; BE-I64-NEXT: mov r4, r1 +; BE-I64-NEXT: vmov.32 d13[0], r0 +; BE-I64-NEXT: mov r0, r10 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: add r3, sp, #196 +; BE-I64-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEXT: ldr r0, [sp, #192] +; BE-I64-NEXT: mov r5, r1 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: vmov.32 d16[0], r0 +; BE-I64-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; BE-I64-NEXT: vmov.32 d14[1], r5 +; BE-I64-NEXT: vmov.32 d9[1], r0 +; BE-I64-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; BE-I64-NEXT: vmov.32 d12[1], r7 +; BE-I64-NEXT: vmov.32 d8[1], r0 +; BE-I64-NEXT: vmov.32 d13[1], r4 +; BE-I64-NEXT: vmov.32 d10[1], r6 +; BE-I64-NEXT: vmov.32 d11[1], r8 +; BE-I64-NEXT: vmov.32 d16[1], r1 +; BE-I64-NEXT: vrev64.32 d1, d14 +; BE-I64-NEXT: vrev64.32 d3, d12 +; BE-I64-NEXT: vrev64.32 d5, d9 +; BE-I64-NEXT: vrev64.32 d7, d8 +; BE-I64-NEXT: vrev64.32 d0, d13 +; BE-I64-NEXT: vrev64.32 d2, d10 +; BE-I64-NEXT: vrev64.32 d4, d11 +; BE-I64-NEXT: vrev64.32 d6, d16 +; BE-I64-NEXT: add sp, sp, #16 +; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14} +; BE-I64-NEXT: add sp, sp, #4 +; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-I32-NEON-LABEL: lrint_v8fp128: +; BE-I32-NEON: @ %bb.0: +; BE-I32-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I32-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I32-NEON-NEXT: .pad #4 +; BE-I32-NEON-NEXT: sub sp, sp, #4 +; BE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11} +; BE-I32-NEON-NEXT: vpush {d8, d9, d10, d11} +; BE-I32-NEON-NEXT: .pad #8 +; BE-I32-NEON-NEXT: sub sp, sp, #8 +; BE-I32-NEON-NEXT: str r3, [sp, #4] @ 4-byte Spill +; BE-I32-NEON-NEXT: add r3, sp, #128 +; BE-I32-NEON-NEXT: mov r11, r2 +; BE-I32-NEON-NEXT: mov r6, r1 +; BE-I32-NEON-NEXT: mov r7, r0 +; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: add r3, sp, #100 +; BE-I32-NEON-NEXT: ldr r5, [sp, #96] +; BE-I32-NEON-NEXT: vmov.32 d8[0], r0 +; BE-I32-NEON-NEXT: ldr r4, [sp, #160] +; BE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I32-NEON-NEXT: mov r0, r5 +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: add r3, sp, #164 +; BE-I32-NEON-NEXT: vmov.32 d11[0], r0 +; BE-I32-NEON-NEXT: mov r0, r4 +; BE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: ldr r4, [sp, #176] +; BE-I32-NEON-NEXT: vmov.32 d9[0], r0 +; BE-I32-NEON-NEXT: ldr r1, [sp, #180] +; BE-I32-NEON-NEXT: ldr r2, [sp, #184] +; BE-I32-NEON-NEXT: ldr r3, [sp, #188] +; BE-I32-NEON-NEXT: mov r0, r4 +; BE-I32-NEON-NEXT: ldr r5, [sp, #116] +; BE-I32-NEON-NEXT: ldr r8, [sp, #120] +; BE-I32-NEON-NEXT: ldr r10, [sp, #84] +; BE-I32-NEON-NEXT: ldr r9, [sp, #88] +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: vmov.32 d9[1], r0 +; BE-I32-NEON-NEXT: ldr r3, [sp, #124] +; BE-I32-NEON-NEXT: ldr r0, [sp, #112] +; BE-I32-NEON-NEXT: mov r1, r5 +; BE-I32-NEON-NEXT: mov r2, r8 +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: vmov.32 d11[1], r0 +; BE-I32-NEON-NEXT: ldr r3, [sp, #92] +; BE-I32-NEON-NEXT: ldr r0, [sp, #80] +; BE-I32-NEON-NEXT: mov r1, r10 +; BE-I32-NEON-NEXT: mov r2, r9 +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: ldr r3, [sp, #4] @ 4-byte Reload +; BE-I32-NEON-NEXT: mov r4, r0 +; BE-I32-NEON-NEXT: mov r0, r7 +; BE-I32-NEON-NEXT: mov r1, r6 +; BE-I32-NEON-NEXT: mov r2, r11 +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: add r3, sp, #148 +; BE-I32-NEON-NEXT: ldr r7, [sp, #144] +; BE-I32-NEON-NEXT: vmov.32 d10[0], r0 +; BE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I32-NEON-NEXT: mov r0, r7 +; BE-I32-NEON-NEXT: vmov.32 d10[1], r4 +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: vmov.32 d8[1], r0 +; BE-I32-NEON-NEXT: vrev64.32 q0, q5 +; BE-I32-NEON-NEXT: vrev64.32 q1, q4 +; BE-I32-NEON-NEXT: add sp, sp, #8 +; BE-I32-NEON-NEXT: vpop {d8, d9, d10, d11} +; BE-I32-NEON-NEXT: add sp, sp, #4 +; BE-I32-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-I64-NEON-LABEL: lrint_v8fp128: +; BE-I64-NEON: @ %bb.0: +; BE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I64-NEON-NEXT: .pad #4 +; BE-I64-NEON-NEXT: sub sp, sp, #4 +; BE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14} +; BE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14} +; BE-I64-NEON-NEXT: .pad #16 +; BE-I64-NEON-NEXT: sub sp, sp, #16 +; BE-I64-NEON-NEXT: str r3, [sp, #4] @ 4-byte Spill +; BE-I64-NEON-NEXT: add r3, sp, #208 +; BE-I64-NEON-NEXT: mov r11, r2 +; BE-I64-NEON-NEXT: mov r4, r1 +; BE-I64-NEON-NEXT: mov r5, r0 +; BE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: ldr r7, [sp, #176] +; BE-I64-NEON-NEXT: add r3, sp, #180 +; BE-I64-NEON-NEXT: str r1, [sp, #12] @ 4-byte Spill +; BE-I64-NEON-NEXT: vmov.32 d8[0], r0 +; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEON-NEXT: mov r0, r7 +; BE-I64-NEON-NEXT: ldr r6, [sp, #128] +; BE-I64-NEON-NEXT: ldr r8, [sp, #144] +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: add r3, sp, #132 +; BE-I64-NEON-NEXT: str r1, [sp, #8] @ 4-byte Spill +; BE-I64-NEON-NEXT: vmov.32 d9[0], r0 +; BE-I64-NEON-NEXT: mov r0, r6 +; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: add r3, sp, #148 +; BE-I64-NEON-NEXT: mov r6, r1 +; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEON-NEXT: mov r0, r8 +; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: add r3, sp, #160 +; BE-I64-NEON-NEXT: mov r9, r0 +; BE-I64-NEON-NEXT: mov r7, r1 +; BE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: ldr r3, [sp, #4] @ 4-byte Reload +; BE-I64-NEON-NEXT: mov r8, r1 +; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEON-NEXT: mov r0, r5 +; BE-I64-NEON-NEXT: mov r1, r4 +; BE-I64-NEON-NEXT: mov r2, r11 +; BE-I64-NEON-NEXT: ldr r10, [sp, #112] +; BE-I64-NEON-NEXT: vmov.32 d12[0], r9 +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: add r3, sp, #116 +; BE-I64-NEON-NEXT: mov r4, r1 +; BE-I64-NEON-NEXT: vmov.32 d13[0], r0 +; BE-I64-NEON-NEXT: mov r0, r10 +; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: add r3, sp, #196 +; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEON-NEXT: ldr r0, [sp, #192] +; BE-I64-NEON-NEXT: mov r5, r1 +; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: vmov.32 d16[0], r0 +; BE-I64-NEON-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; BE-I64-NEON-NEXT: vmov.32 d14[1], r5 +; BE-I64-NEON-NEXT: vmov.32 d9[1], r0 +; BE-I64-NEON-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; BE-I64-NEON-NEXT: vmov.32 d12[1], r7 +; BE-I64-NEON-NEXT: vmov.32 d8[1], r0 +; BE-I64-NEON-NEXT: vmov.32 d13[1], r4 +; BE-I64-NEON-NEXT: vmov.32 d10[1], r6 +; BE-I64-NEON-NEXT: vmov.32 d11[1], r8 +; BE-I64-NEON-NEXT: vmov.32 d16[1], r1 +; BE-I64-NEON-NEXT: vrev64.32 d1, d14 +; BE-I64-NEON-NEXT: vrev64.32 d3, d12 +; BE-I64-NEON-NEXT: vrev64.32 d5, d9 +; BE-I64-NEON-NEXT: vrev64.32 d7, d8 +; BE-I64-NEON-NEXT: vrev64.32 d0, d13 +; BE-I64-NEON-NEXT: vrev64.32 d2, d10 +; BE-I64-NEON-NEXT: vrev64.32 d4, d11 +; BE-I64-NEON-NEXT: vrev64.32 d6, d16 +; BE-I64-NEON-NEXT: add sp, sp, #16 +; BE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14} +; BE-I64-NEON-NEXT: add sp, sp, #4 +; BE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128> %x) + ret <8 x iXLen> %a +} +declare <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128>) + +define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) { +; LE-I32-LABEL: lrint_v16fp128: +; LE-I32: @ %bb.0: +; LE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I32-NEXT: .pad #4 +; LE-I32-NEXT: sub sp, sp, #4 +; LE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEXT: mov r8, r3 +; LE-I32-NEXT: add r3, sp, #280 +; LE-I32-NEXT: mov r9, r2 +; LE-I32-NEXT: mov r10, r1 +; LE-I32-NEXT: mov r6, r0 +; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: ldr r4, [sp, #216] +; LE-I32-NEXT: vmov.32 d8[0], r0 +; LE-I32-NEXT: ldr r1, [sp, #220] +; LE-I32-NEXT: ldr r2, [sp, #224] +; LE-I32-NEXT: ldr r3, [sp, #228] +; LE-I32-NEXT: mov r0, r4 +; LE-I32-NEXT: ldr r7, [sp, #152] +; LE-I32-NEXT: ldr r11, [sp, #104] +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: add r3, sp, #156 +; LE-I32-NEXT: vmov.32 d10[0], r0 +; LE-I32-NEXT: mov r0, r7 +; LE-I32-NEXT: ldm r3, {r1, r2, r3} +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: ldr r7, [sp, #184] +; LE-I32-NEXT: vmov.32 d12[0], r0 +; LE-I32-NEXT: ldr r1, [sp, #188] +; LE-I32-NEXT: ldr r2, [sp, #192] +; LE-I32-NEXT: ldr r3, [sp, #196] +; LE-I32-NEXT: mov r0, r7 +; LE-I32-NEXT: ldr r4, [sp, #120] +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: add r3, sp, #124 +; LE-I32-NEXT: vmov.32 d13[0], r0 +; LE-I32-NEXT: mov r0, r4 +; LE-I32-NEXT: ldm r3, {r1, r2, r3} +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: ldr r5, [sp, #136] +; LE-I32-NEXT: vmov.32 d15[0], r0 +; LE-I32-NEXT: ldr r1, [sp, #140] +; LE-I32-NEXT: ldr r2, [sp, #144] +; LE-I32-NEXT: ldr r3, [sp, #148] +; LE-I32-NEXT: mov r0, r5 +; LE-I32-NEXT: ldr r4, [sp, #108] +; LE-I32-NEXT: ldr r7, [sp, #112] +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: ldr r3, [sp, #116] +; LE-I32-NEXT: vmov.32 d15[1], r0 +; LE-I32-NEXT: mov r0, r11 +; LE-I32-NEXT: mov r1, r4 +; LE-I32-NEXT: mov r2, r7 +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: mov r4, r0 +; LE-I32-NEXT: mov r0, r6 +; LE-I32-NEXT: mov r1, r10 +; LE-I32-NEXT: mov r2, r9 +; LE-I32-NEXT: mov r3, r8 +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: ldr r7, [sp, #200] +; LE-I32-NEXT: vmov.32 d14[0], r0 +; LE-I32-NEXT: ldr r1, [sp, #204] +; LE-I32-NEXT: ldr r2, [sp, #208] +; LE-I32-NEXT: ldr r3, [sp, #212] +; LE-I32-NEXT: mov r0, r7 +; LE-I32-NEXT: ldr r5, [sp, #172] +; LE-I32-NEXT: vmov.32 d14[1], r4 +; LE-I32-NEXT: ldr r6, [sp, #176] +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: vmov.32 d13[1], r0 +; LE-I32-NEXT: ldr r3, [sp, #180] +; LE-I32-NEXT: ldr r0, [sp, #168] +; LE-I32-NEXT: mov r1, r5 +; LE-I32-NEXT: mov r2, r6 +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: add r3, sp, #248 +; LE-I32-NEXT: mov r5, r0 +; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: ldr r4, [sp, #264] +; LE-I32-NEXT: vmov.32 d11[0], r0 +; LE-I32-NEXT: ldr r1, [sp, #268] +; LE-I32-NEXT: ldr r2, [sp, #272] +; LE-I32-NEXT: vmov.32 d12[1], r5 +; LE-I32-NEXT: ldr r3, [sp, #276] +; LE-I32-NEXT: mov r0, r4 +; LE-I32-NEXT: ldr r6, [sp, #236] +; LE-I32-NEXT: ldr r7, [sp, #240] +; LE-I32-NEXT: ldr r8, [sp, #332] +; LE-I32-NEXT: ldr r5, [sp, #336] +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: vmov.32 d11[1], r0 +; LE-I32-NEXT: ldr r3, [sp, #244] +; LE-I32-NEXT: ldr r0, [sp, #232] +; LE-I32-NEXT: mov r1, r6 +; LE-I32-NEXT: mov r2, r7 +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: vmov.32 d10[1], r0 +; LE-I32-NEXT: ldr r3, [sp, #340] +; LE-I32-NEXT: ldr r0, [sp, #328] +; LE-I32-NEXT: mov r1, r8 +; LE-I32-NEXT: mov r2, r5 +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: add r3, sp, #312 +; LE-I32-NEXT: mov r4, r0 +; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: add r3, sp, #300 +; LE-I32-NEXT: ldr r7, [sp, #296] +; LE-I32-NEXT: vmov.32 d9[0], r0 +; LE-I32-NEXT: ldm r3, {r1, r2, r3} +; LE-I32-NEXT: mov r0, r7 +; LE-I32-NEXT: vmov.32 d9[1], r4 +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: vmov.32 d8[1], r0 +; LE-I32-NEXT: vorr q0, q7, q7 +; LE-I32-NEXT: vorr q1, q6, q6 +; LE-I32-NEXT: vorr q2, q5, q5 +; LE-I32-NEXT: vorr q3, q4, q4 +; LE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEXT: add sp, sp, #4 +; LE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; LE-I64-LABEL: lrint_v16fp128: +; LE-I64: @ %bb.0: +; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I64-NEXT: .pad #4 +; LE-I64-NEXT: sub sp, sp, #4 +; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEXT: .pad #72 +; LE-I64-NEXT: sub sp, sp, #72 +; LE-I64-NEXT: mov r6, r3 +; LE-I64-NEXT: add r3, sp, #408 +; LE-I64-NEXT: mov r7, r2 +; LE-I64-NEXT: mov r4, r0 +; LE-I64-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add r5, sp, #176 +; LE-I64-NEXT: mov r10, r1 +; LE-I64-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEXT: mov r0, r7 +; LE-I64-NEXT: ldm r5, {r2, r3, r5} +; LE-I64-NEXT: mov r1, r6 +; LE-I64-NEXT: ldr r8, [sp, #232] +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add r3, sp, #188 +; LE-I64-NEXT: mov r9, r1 +; LE-I64-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEXT: mov r0, r5 +; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add r3, sp, #236 +; LE-I64-NEXT: mov r11, r1 +; LE-I64-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEXT: mov r0, r8 +; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add r3, sp, #252 +; LE-I64-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEXT: ldr r0, [sp, #248] +; LE-I64-NEXT: mov r8, r1 +; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add r3, sp, #268 +; LE-I64-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEXT: ldr r0, [sp, #264] +; LE-I64-NEXT: mov r6, r1 +; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add r3, sp, #284 +; LE-I64-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEXT: ldr r0, [sp, #280] +; LE-I64-NEXT: mov r7, r1 +; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add r3, sp, #316 +; LE-I64-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEXT: ldr r0, [sp, #312] +; LE-I64-NEXT: mov r5, r1 +; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: vmov.32 d15[1], r5 +; LE-I64-NEXT: add lr, sp, #56 +; LE-I64-NEXT: ldr r5, [sp, #300] +; LE-I64-NEXT: vmov.32 d14[1], r7 +; LE-I64-NEXT: ldr r2, [sp, #304] +; LE-I64-NEXT: ldr r3, [sp, #308] +; LE-I64-NEXT: vmov.32 d11[1], r6 +; LE-I64-NEXT: ldr r6, [sp, #200] +; LE-I64-NEXT: ldr r7, [sp, #204] +; LE-I64-NEXT: vmov.32 d10[1], r8 +; LE-I64-NEXT: ldr r8, [sp, #344] +; LE-I64-NEXT: vmov.32 d9[1], r11 +; LE-I64-NEXT: ldr r11, [sp, #216] +; LE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #40 +; LE-I64-NEXT: vmov.32 d17[0], r0 +; LE-I64-NEXT: ldr r0, [sp, #296] +; LE-I64-NEXT: vmov.32 d8[1], r9 +; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #24 +; LE-I64-NEXT: vorr q5, q8, q8 +; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I64-NEXT: vorr q4, q6, q6 +; LE-I64-NEXT: vmov.32 d11[1], r1 +; LE-I64-NEXT: mov r1, r5 +; LE-I64-NEXT: vmov.32 d9[1], r10 +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEXT: ldr r2, [sp, #208] +; LE-I64-NEXT: ldr r3, [sp, #212] +; LE-I64-NEXT: add lr, sp, #8 +; LE-I64-NEXT: mov r9, r1 +; LE-I64-NEXT: mov r0, r6 +; LE-I64-NEXT: mov r1, r7 +; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add r3, sp, #220 +; LE-I64-NEXT: mov r10, r1 +; LE-I64-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEXT: mov r0, r11 +; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add r3, sp, #348 +; LE-I64-NEXT: mov r11, r1 +; LE-I64-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEXT: mov r0, r8 +; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add r3, sp, #364 +; LE-I64-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEXT: ldr r0, [sp, #360] +; LE-I64-NEXT: mov r8, r1 +; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add r3, sp, #380 +; LE-I64-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEXT: ldr r0, [sp, #376] +; LE-I64-NEXT: mov r5, r1 +; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add r3, sp, #396 +; LE-I64-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEXT: ldr r0, [sp, #392] +; LE-I64-NEXT: mov r6, r1 +; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add r3, sp, #332 +; LE-I64-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEXT: ldr r0, [sp, #328] +; LE-I64-NEXT: mov r7, r1 +; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add lr, sp, #8 +; LE-I64-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEXT: add r0, r4, #64 +; LE-I64-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #24 +; LE-I64-NEXT: vmov.32 d13[1], r8 +; LE-I64-NEXT: vmov.32 d18[1], r9 +; LE-I64-NEXT: vmov.32 d15[1], r6 +; LE-I64-NEXT: vmov.32 d12[1], r1 +; LE-I64-NEXT: vmov.32 d14[1], r5 +; LE-I64-NEXT: vst1.64 {d18, d19}, [r0:128]! +; LE-I64-NEXT: vst1.64 {d12, d13}, [r0:128]! +; LE-I64-NEXT: vmov.32 d8[1], r7 +; LE-I64-NEXT: vst1.64 {d14, d15}, [r0:128]! +; LE-I64-NEXT: vst1.64 {d8, d9}, [r0:128] +; LE-I64-NEXT: vmov.32 d11[1], r11 +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #40 +; LE-I64-NEXT: vmov.32 d10[1], r10 +; LE-I64-NEXT: vst1.64 {d16, d17}, [r4:128]! +; LE-I64-NEXT: vst1.64 {d10, d11}, [r4:128]! +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #56 +; LE-I64-NEXT: vst1.64 {d16, d17}, [r4:128]! +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: vst1.64 {d16, d17}, [r4:128] +; LE-I64-NEXT: add sp, sp, #72 +; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEXT: add sp, sp, #4 +; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; LE-I32-NEON-LABEL: lrint_v16fp128: +; LE-I32-NEON: @ %bb.0: +; LE-I32-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I32-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I32-NEON-NEXT: .pad #4 +; LE-I32-NEON-NEXT: sub sp, sp, #4 +; LE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEON-NEXT: mov r8, r3 +; LE-I32-NEON-NEXT: add r3, sp, #280 +; LE-I32-NEON-NEXT: mov r9, r2 +; LE-I32-NEON-NEXT: mov r10, r1 +; LE-I32-NEON-NEXT: mov r6, r0 +; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: ldr r4, [sp, #216] +; LE-I32-NEON-NEXT: vmov.32 d8[0], r0 +; LE-I32-NEON-NEXT: ldr r1, [sp, #220] +; LE-I32-NEON-NEXT: ldr r2, [sp, #224] +; LE-I32-NEON-NEXT: ldr r3, [sp, #228] +; LE-I32-NEON-NEXT: mov r0, r4 +; LE-I32-NEON-NEXT: ldr r7, [sp, #152] +; LE-I32-NEON-NEXT: ldr r11, [sp, #104] +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: add r3, sp, #156 +; LE-I32-NEON-NEXT: vmov.32 d10[0], r0 +; LE-I32-NEON-NEXT: mov r0, r7 +; LE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: ldr r7, [sp, #184] +; LE-I32-NEON-NEXT: vmov.32 d12[0], r0 +; LE-I32-NEON-NEXT: ldr r1, [sp, #188] +; LE-I32-NEON-NEXT: ldr r2, [sp, #192] +; LE-I32-NEON-NEXT: ldr r3, [sp, #196] +; LE-I32-NEON-NEXT: mov r0, r7 +; LE-I32-NEON-NEXT: ldr r4, [sp, #120] +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: add r3, sp, #124 +; LE-I32-NEON-NEXT: vmov.32 d13[0], r0 +; LE-I32-NEON-NEXT: mov r0, r4 +; LE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: ldr r5, [sp, #136] +; LE-I32-NEON-NEXT: vmov.32 d15[0], r0 +; LE-I32-NEON-NEXT: ldr r1, [sp, #140] +; LE-I32-NEON-NEXT: ldr r2, [sp, #144] +; LE-I32-NEON-NEXT: ldr r3, [sp, #148] +; LE-I32-NEON-NEXT: mov r0, r5 +; LE-I32-NEON-NEXT: ldr r4, [sp, #108] +; LE-I32-NEON-NEXT: ldr r7, [sp, #112] +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: ldr r3, [sp, #116] +; LE-I32-NEON-NEXT: vmov.32 d15[1], r0 +; LE-I32-NEON-NEXT: mov r0, r11 +; LE-I32-NEON-NEXT: mov r1, r4 +; LE-I32-NEON-NEXT: mov r2, r7 +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: mov r4, r0 +; LE-I32-NEON-NEXT: mov r0, r6 +; LE-I32-NEON-NEXT: mov r1, r10 +; LE-I32-NEON-NEXT: mov r2, r9 +; LE-I32-NEON-NEXT: mov r3, r8 +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: ldr r7, [sp, #200] +; LE-I32-NEON-NEXT: vmov.32 d14[0], r0 +; LE-I32-NEON-NEXT: ldr r1, [sp, #204] +; LE-I32-NEON-NEXT: ldr r2, [sp, #208] +; LE-I32-NEON-NEXT: ldr r3, [sp, #212] +; LE-I32-NEON-NEXT: mov r0, r7 +; LE-I32-NEON-NEXT: ldr r5, [sp, #172] +; LE-I32-NEON-NEXT: vmov.32 d14[1], r4 +; LE-I32-NEON-NEXT: ldr r6, [sp, #176] +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: vmov.32 d13[1], r0 +; LE-I32-NEON-NEXT: ldr r3, [sp, #180] +; LE-I32-NEON-NEXT: ldr r0, [sp, #168] +; LE-I32-NEON-NEXT: mov r1, r5 +; LE-I32-NEON-NEXT: mov r2, r6 +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: add r3, sp, #248 +; LE-I32-NEON-NEXT: mov r5, r0 +; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: ldr r4, [sp, #264] +; LE-I32-NEON-NEXT: vmov.32 d11[0], r0 +; LE-I32-NEON-NEXT: ldr r1, [sp, #268] +; LE-I32-NEON-NEXT: ldr r2, [sp, #272] +; LE-I32-NEON-NEXT: vmov.32 d12[1], r5 +; LE-I32-NEON-NEXT: ldr r3, [sp, #276] +; LE-I32-NEON-NEXT: mov r0, r4 +; LE-I32-NEON-NEXT: ldr r6, [sp, #236] +; LE-I32-NEON-NEXT: ldr r7, [sp, #240] +; LE-I32-NEON-NEXT: ldr r8, [sp, #332] +; LE-I32-NEON-NEXT: ldr r5, [sp, #336] +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: vmov.32 d11[1], r0 +; LE-I32-NEON-NEXT: ldr r3, [sp, #244] +; LE-I32-NEON-NEXT: ldr r0, [sp, #232] +; LE-I32-NEON-NEXT: mov r1, r6 +; LE-I32-NEON-NEXT: mov r2, r7 +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: vmov.32 d10[1], r0 +; LE-I32-NEON-NEXT: ldr r3, [sp, #340] +; LE-I32-NEON-NEXT: ldr r0, [sp, #328] +; LE-I32-NEON-NEXT: mov r1, r8 +; LE-I32-NEON-NEXT: mov r2, r5 +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: add r3, sp, #312 +; LE-I32-NEON-NEXT: mov r4, r0 +; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: add r3, sp, #300 +; LE-I32-NEON-NEXT: ldr r7, [sp, #296] +; LE-I32-NEON-NEXT: vmov.32 d9[0], r0 +; LE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I32-NEON-NEXT: mov r0, r7 +; LE-I32-NEON-NEXT: vmov.32 d9[1], r4 +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: vmov.32 d8[1], r0 +; LE-I32-NEON-NEXT: vorr q0, q7, q7 +; LE-I32-NEON-NEXT: vorr q1, q6, q6 +; LE-I32-NEON-NEXT: vorr q2, q5, q5 +; LE-I32-NEON-NEXT: vorr q3, q4, q4 +; LE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEON-NEXT: add sp, sp, #4 +; LE-I32-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; LE-I64-NEON-LABEL: lrint_v16fp128: +; LE-I64-NEON: @ %bb.0: +; LE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I64-NEON-NEXT: .pad #4 +; LE-I64-NEON-NEXT: sub sp, sp, #4 +; LE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEON-NEXT: .pad #72 +; LE-I64-NEON-NEXT: sub sp, sp, #72 +; LE-I64-NEON-NEXT: mov r6, r3 +; LE-I64-NEON-NEXT: add r3, sp, #408 +; LE-I64-NEON-NEXT: mov r7, r2 +; LE-I64-NEON-NEXT: mov r4, r0 +; LE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add r5, sp, #176 +; LE-I64-NEON-NEXT: mov r10, r1 +; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEON-NEXT: mov r0, r7 +; LE-I64-NEON-NEXT: ldm r5, {r2, r3, r5} +; LE-I64-NEON-NEXT: mov r1, r6 +; LE-I64-NEON-NEXT: ldr r8, [sp, #232] +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add r3, sp, #188 +; LE-I64-NEON-NEXT: mov r9, r1 +; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEON-NEXT: mov r0, r5 +; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add r3, sp, #236 +; LE-I64-NEON-NEXT: mov r11, r1 +; LE-I64-NEON-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEON-NEXT: mov r0, r8 +; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add r3, sp, #252 +; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEON-NEXT: ldr r0, [sp, #248] +; LE-I64-NEON-NEXT: mov r8, r1 +; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add r3, sp, #268 +; LE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEON-NEXT: ldr r0, [sp, #264] +; LE-I64-NEON-NEXT: mov r6, r1 +; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add r3, sp, #284 +; LE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEON-NEXT: ldr r0, [sp, #280] +; LE-I64-NEON-NEXT: mov r7, r1 +; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add r3, sp, #316 +; LE-I64-NEON-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEON-NEXT: ldr r0, [sp, #312] +; LE-I64-NEON-NEXT: mov r5, r1 +; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: vmov.32 d15[1], r5 +; LE-I64-NEON-NEXT: add lr, sp, #56 +; LE-I64-NEON-NEXT: ldr r5, [sp, #300] +; LE-I64-NEON-NEXT: vmov.32 d14[1], r7 +; LE-I64-NEON-NEXT: ldr r2, [sp, #304] +; LE-I64-NEON-NEXT: ldr r3, [sp, #308] +; LE-I64-NEON-NEXT: vmov.32 d11[1], r6 +; LE-I64-NEON-NEXT: ldr r6, [sp, #200] +; LE-I64-NEON-NEXT: ldr r7, [sp, #204] +; LE-I64-NEON-NEXT: vmov.32 d10[1], r8 +; LE-I64-NEON-NEXT: ldr r8, [sp, #344] +; LE-I64-NEON-NEXT: vmov.32 d9[1], r11 +; LE-I64-NEON-NEXT: ldr r11, [sp, #216] +; LE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #40 +; LE-I64-NEON-NEXT: vmov.32 d17[0], r0 +; LE-I64-NEON-NEXT: ldr r0, [sp, #296] +; LE-I64-NEON-NEXT: vmov.32 d8[1], r9 +; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #24 +; LE-I64-NEON-NEXT: vorr q5, q8, q8 +; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I64-NEON-NEXT: vorr q4, q6, q6 +; LE-I64-NEON-NEXT: vmov.32 d11[1], r1 +; LE-I64-NEON-NEXT: mov r1, r5 +; LE-I64-NEON-NEXT: vmov.32 d9[1], r10 +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEON-NEXT: ldr r2, [sp, #208] +; LE-I64-NEON-NEXT: ldr r3, [sp, #212] +; LE-I64-NEON-NEXT: add lr, sp, #8 +; LE-I64-NEON-NEXT: mov r9, r1 +; LE-I64-NEON-NEXT: mov r0, r6 +; LE-I64-NEON-NEXT: mov r1, r7 +; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add r3, sp, #220 +; LE-I64-NEON-NEXT: mov r10, r1 +; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEON-NEXT: mov r0, r11 +; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add r3, sp, #348 +; LE-I64-NEON-NEXT: mov r11, r1 +; LE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEON-NEXT: mov r0, r8 +; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add r3, sp, #364 +; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEON-NEXT: ldr r0, [sp, #360] +; LE-I64-NEON-NEXT: mov r8, r1 +; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add r3, sp, #380 +; LE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEON-NEXT: ldr r0, [sp, #376] +; LE-I64-NEON-NEXT: mov r5, r1 +; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add r3, sp, #396 +; LE-I64-NEON-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEON-NEXT: ldr r0, [sp, #392] +; LE-I64-NEON-NEXT: mov r6, r1 +; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add r3, sp, #332 +; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEON-NEXT: ldr r0, [sp, #328] +; LE-I64-NEON-NEXT: mov r7, r1 +; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add lr, sp, #8 +; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEON-NEXT: add r0, r4, #64 +; LE-I64-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #24 +; LE-I64-NEON-NEXT: vmov.32 d13[1], r8 +; LE-I64-NEON-NEXT: vmov.32 d18[1], r9 +; LE-I64-NEON-NEXT: vmov.32 d15[1], r6 +; LE-I64-NEON-NEXT: vmov.32 d12[1], r1 +; LE-I64-NEON-NEXT: vmov.32 d14[1], r5 +; LE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]! +; LE-I64-NEON-NEXT: vst1.64 {d12, d13}, [r0:128]! +; LE-I64-NEON-NEXT: vmov.32 d8[1], r7 +; LE-I64-NEON-NEXT: vst1.64 {d14, d15}, [r0:128]! +; LE-I64-NEON-NEXT: vst1.64 {d8, d9}, [r0:128] +; LE-I64-NEON-NEXT: vmov.32 d11[1], r11 +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #40 +; LE-I64-NEON-NEXT: vmov.32 d10[1], r10 +; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r4:128]! +; LE-I64-NEON-NEXT: vst1.64 {d10, d11}, [r4:128]! +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #56 +; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r4:128]! +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r4:128] +; LE-I64-NEON-NEXT: add sp, sp, #72 +; LE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEON-NEXT: add sp, sp, #4 +; LE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-I32-LABEL: lrint_v16fp128: +; BE-I32: @ %bb.0: +; BE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I32-NEXT: .pad #4 +; BE-I32-NEXT: sub sp, sp, #4 +; BE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEXT: .pad #16 +; BE-I32-NEXT: sub sp, sp, #16 +; BE-I32-NEXT: stm sp, {r0, r1, r2, r3} @ 16-byte Folded Spill +; BE-I32-NEXT: add r3, sp, #264 +; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: add r3, sp, #332 +; BE-I32-NEXT: ldr r7, [sp, #328] +; BE-I32-NEXT: vmov.32 d9[0], r0 +; BE-I32-NEXT: ldr r10, [sp, #280] +; BE-I32-NEXT: ldm r3, {r1, r2, r3} +; BE-I32-NEXT: mov r0, r7 +; BE-I32-NEXT: ldr r8, [sp, #168] +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: ldr r5, [sp, #344] +; BE-I32-NEXT: vmov.32 d11[0], r0 +; BE-I32-NEXT: ldr r1, [sp, #348] +; BE-I32-NEXT: ldr r2, [sp, #352] +; BE-I32-NEXT: ldr r3, [sp, #356] +; BE-I32-NEXT: mov r0, r5 +; BE-I32-NEXT: ldr r7, [sp, #284] +; BE-I32-NEXT: ldr r4, [sp, #288] +; BE-I32-NEXT: ldr r6, [sp, #172] +; BE-I32-NEXT: ldr r9, [sp, #176] +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: ldr r3, [sp, #292] +; BE-I32-NEXT: vmov.32 d11[1], r0 +; BE-I32-NEXT: mov r0, r10 +; BE-I32-NEXT: mov r1, r7 +; BE-I32-NEXT: mov r2, r4 +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: ldr r3, [sp, #180] +; BE-I32-NEXT: vmov.32 d9[1], r0 +; BE-I32-NEXT: mov r0, r8 +; BE-I32-NEXT: mov r1, r6 +; BE-I32-NEXT: mov r2, r9 +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: add r3, sp, #232 +; BE-I32-NEXT: mov r4, r0 +; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: add r3, sp, #136 +; BE-I32-NEXT: mov r6, r0 +; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: ldr r5, [sp, #296] +; BE-I32-NEXT: vmov.32 d13[0], r0 +; BE-I32-NEXT: ldr r1, [sp, #300] +; BE-I32-NEXT: ldr r2, [sp, #304] +; BE-I32-NEXT: ldr r3, [sp, #308] +; BE-I32-NEXT: mov r0, r5 +; BE-I32-NEXT: ldr r10, [sp, #216] +; BE-I32-NEXT: ldr r8, [sp, #220] +; BE-I32-NEXT: ldr r9, [sp, #152] +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: ldr r7, [sp, #248] +; BE-I32-NEXT: vmov.32 d10[0], r0 +; BE-I32-NEXT: ldr r1, [sp, #252] +; BE-I32-NEXT: ldr r2, [sp, #256] +; BE-I32-NEXT: vmov.32 d8[0], r6 +; BE-I32-NEXT: ldr r3, [sp, #260] +; BE-I32-NEXT: mov r0, r7 +; BE-I32-NEXT: ldr r5, [sp, #224] +; BE-I32-NEXT: ldr r11, [sp, #120] +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: ldr r3, [sp, #228] +; BE-I32-NEXT: vmov.32 d8[1], r0 +; BE-I32-NEXT: mov r0, r10 +; BE-I32-NEXT: mov r1, r8 +; BE-I32-NEXT: mov r2, r5 +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: add r3, sp, #200 +; BE-I32-NEXT: mov r5, r0 +; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: vmov.32 d15[0], r0 +; BE-I32-NEXT: ldr r0, [sp, #184] +; BE-I32-NEXT: ldr r1, [sp, #188] +; BE-I32-NEXT: ldr r2, [sp, #192] +; BE-I32-NEXT: vmov.32 d14[0], r4 +; BE-I32-NEXT: ldr r3, [sp, #196] +; BE-I32-NEXT: vmov.32 d15[1], r5 +; BE-I32-NEXT: ldr r7, [sp, #156] +; BE-I32-NEXT: ldr r6, [sp, #160] +; BE-I32-NEXT: ldr r4, [sp, #124] +; BE-I32-NEXT: ldr r5, [sp, #128] +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: ldr r3, [sp, #164] +; BE-I32-NEXT: vmov.32 d14[1], r0 +; BE-I32-NEXT: mov r0, r9 +; BE-I32-NEXT: mov r1, r7 +; BE-I32-NEXT: mov r2, r6 +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: ldr r3, [sp, #132] +; BE-I32-NEXT: vmov.32 d13[1], r0 +; BE-I32-NEXT: mov r0, r11 +; BE-I32-NEXT: mov r1, r4 +; BE-I32-NEXT: mov r2, r5 +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: mov r4, r0 +; BE-I32-NEXT: ldm sp, {r0, r1, r2, r3} @ 16-byte Folded Reload +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: add r3, sp, #316 +; BE-I32-NEXT: ldr r7, [sp, #312] +; BE-I32-NEXT: vmov.32 d12[0], r0 +; BE-I32-NEXT: ldm r3, {r1, r2, r3} +; BE-I32-NEXT: mov r0, r7 +; BE-I32-NEXT: vmov.32 d12[1], r4 +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: vmov.32 d10[1], r0 +; BE-I32-NEXT: vrev64.32 q0, q6 +; BE-I32-NEXT: vrev64.32 q1, q7 +; BE-I32-NEXT: vrev64.32 q2, q4 +; BE-I32-NEXT: vrev64.32 q3, q5 +; BE-I32-NEXT: add sp, sp, #16 +; BE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEXT: add sp, sp, #4 +; BE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-I64-LABEL: lrint_v16fp128: +; BE-I64: @ %bb.0: +; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I64-NEXT: .pad #4 +; BE-I64-NEXT: sub sp, sp, #4 +; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEXT: .pad #56 +; BE-I64-NEXT: sub sp, sp, #56 +; BE-I64-NEXT: mov r5, r3 +; BE-I64-NEXT: add r3, sp, #376 +; BE-I64-NEXT: mov r6, r2 +; BE-I64-NEXT: mov r4, r0 +; BE-I64-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: ldr r7, [sp, #392] +; BE-I64-NEXT: add r3, sp, #396 +; BE-I64-NEXT: mov r9, r1 +; BE-I64-NEXT: vmov.32 d8[0], r0 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: mov r0, r7 +; BE-I64-NEXT: ldr r11, [sp, #168] +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: ldr r2, [sp, #160] +; BE-I64-NEXT: mov r10, r1 +; BE-I64-NEXT: ldr r3, [sp, #164] +; BE-I64-NEXT: vmov.32 d9[0], r0 +; BE-I64-NEXT: mov r0, r6 +; BE-I64-NEXT: mov r1, r5 +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: add r3, sp, #172 +; BE-I64-NEXT: mov r8, r1 +; BE-I64-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEXT: mov r0, r11 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: add r3, sp, #220 +; BE-I64-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEXT: ldr r0, [sp, #216] +; BE-I64-NEXT: mov r11, r1 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: add r3, sp, #236 +; BE-I64-NEXT: vmov.32 d12[0], r0 +; BE-I64-NEXT: ldr r0, [sp, #232] +; BE-I64-NEXT: mov r6, r1 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: add r3, sp, #252 +; BE-I64-NEXT: vmov.32 d13[0], r0 +; BE-I64-NEXT: ldr r0, [sp, #248] +; BE-I64-NEXT: mov r7, r1 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: add r3, sp, #268 +; BE-I64-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEXT: ldr r0, [sp, #264] +; BE-I64-NEXT: mov r5, r1 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEXT: ldr r0, [sp, #280] +; BE-I64-NEXT: ldr r2, [sp, #288] +; BE-I64-NEXT: vmov.32 d13[1], r7 +; BE-I64-NEXT: ldr r7, [sp, #284] +; BE-I64-NEXT: ldr r3, [sp, #292] +; BE-I64-NEXT: vmov.32 d14[1], r5 +; BE-I64-NEXT: ldr r5, [sp, #328] +; BE-I64-NEXT: vmov.32 d12[1], r6 +; BE-I64-NEXT: ldr r6, [sp, #300] +; BE-I64-NEXT: vmov.32 d10[1], r8 +; BE-I64-NEXT: ldr r8, [sp, #184] +; BE-I64-NEXT: vmov.32 d11[1], r11 +; BE-I64-NEXT: vmov.32 d9[1], r10 +; BE-I64-NEXT: vmov.32 d8[1], r9 +; BE-I64-NEXT: vmov.32 d15[1], r1 +; BE-I64-NEXT: mov r1, r7 +; BE-I64-NEXT: vstr d14, [sp, #48] @ 8-byte Spill +; BE-I64-NEXT: vstr d13, [sp, #40] @ 8-byte Spill +; BE-I64-NEXT: vstr d12, [sp, #32] @ 8-byte Spill +; BE-I64-NEXT: vstr d11, [sp, #24] @ 8-byte Spill +; BE-I64-NEXT: vstr d10, [sp, #16] @ 8-byte Spill +; BE-I64-NEXT: vstr d9, [sp, #8] @ 8-byte Spill +; BE-I64-NEXT: vstr d8, [sp] @ 8-byte Spill +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: mov r10, r1 +; BE-I64-NEXT: ldr r1, [sp, #296] +; BE-I64-NEXT: ldr r2, [sp, #304] +; BE-I64-NEXT: vmov.32 d8[0], r0 +; BE-I64-NEXT: ldr r3, [sp, #308] +; BE-I64-NEXT: mov r0, r1 +; BE-I64-NEXT: mov r1, r6 +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: add r3, sp, #332 +; BE-I64-NEXT: mov r11, r1 +; BE-I64-NEXT: vmov.32 d9[0], r0 +; BE-I64-NEXT: mov r0, r5 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: add r3, sp, #188 +; BE-I64-NEXT: mov r7, r1 +; BE-I64-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEXT: mov r0, r8 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: add r3, sp, #204 +; BE-I64-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEXT: ldr r0, [sp, #200] +; BE-I64-NEXT: mov r8, r1 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: add r3, sp, #348 +; BE-I64-NEXT: vmov.32 d12[0], r0 +; BE-I64-NEXT: ldr r0, [sp, #344] +; BE-I64-NEXT: mov r5, r1 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: add r3, sp, #364 +; BE-I64-NEXT: vmov.32 d13[0], r0 +; BE-I64-NEXT: ldr r0, [sp, #360] +; BE-I64-NEXT: mov r9, r1 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: add r3, sp, #316 +; BE-I64-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEXT: ldr r0, [sp, #312] +; BE-I64-NEXT: mov r6, r1 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: vldr d18, [sp, #48] @ 8-byte Reload +; BE-I64-NEXT: vrev64.32 d17, d15 +; BE-I64-NEXT: vrev64.32 d16, d18 +; BE-I64-NEXT: vldr d18, [sp, #40] @ 8-byte Reload +; BE-I64-NEXT: vmov.32 d24[0], r0 +; BE-I64-NEXT: add r0, r4, #64 +; BE-I64-NEXT: vldr d20, [sp, #32] @ 8-byte Reload +; BE-I64-NEXT: vrev64.32 d19, d18 +; BE-I64-NEXT: vmov.32 d9[1], r11 +; BE-I64-NEXT: vmov.32 d10[1], r7 +; BE-I64-NEXT: vrev64.32 d18, d20 +; BE-I64-NEXT: vldr d20, [sp, #24] @ 8-byte Reload +; BE-I64-NEXT: vmov.32 d8[1], r10 +; BE-I64-NEXT: vmov.32 d14[1], r6 +; BE-I64-NEXT: vmov.32 d24[1], r1 +; BE-I64-NEXT: vldr d22, [sp, #16] @ 8-byte Reload +; BE-I64-NEXT: vrev64.32 d21, d20 +; BE-I64-NEXT: vrev64.32 d1, d9 +; BE-I64-NEXT: vmov.32 d13[1], r9 +; BE-I64-NEXT: vrev64.32 d31, d10 +; BE-I64-NEXT: vrev64.32 d20, d22 +; BE-I64-NEXT: vldr d22, [sp, #8] @ 8-byte Reload +; BE-I64-NEXT: vrev64.32 d0, d8 +; BE-I64-NEXT: vrev64.32 d29, d14 +; BE-I64-NEXT: vmov.32 d12[1], r5 +; BE-I64-NEXT: vrev64.32 d30, d24 +; BE-I64-NEXT: vrev64.32 d27, d22 +; BE-I64-NEXT: vldr d22, [sp] @ 8-byte Reload +; BE-I64-NEXT: vst1.64 {d0, d1}, [r0:128]! +; BE-I64-NEXT: vmov.32 d11[1], r8 +; BE-I64-NEXT: vrev64.32 d28, d13 +; BE-I64-NEXT: vst1.64 {d30, d31}, [r0:128]! +; BE-I64-NEXT: vrev64.32 d26, d22 +; BE-I64-NEXT: vrev64.32 d23, d12 +; BE-I64-NEXT: vst1.64 {d28, d29}, [r0:128]! +; BE-I64-NEXT: vrev64.32 d22, d11 +; BE-I64-NEXT: vst1.64 {d26, d27}, [r0:128] +; BE-I64-NEXT: vst1.64 {d20, d21}, [r4:128]! +; BE-I64-NEXT: vst1.64 {d22, d23}, [r4:128]! +; BE-I64-NEXT: vst1.64 {d18, d19}, [r4:128]! +; BE-I64-NEXT: vst1.64 {d16, d17}, [r4:128] +; BE-I64-NEXT: add sp, sp, #56 +; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEXT: add sp, sp, #4 +; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-I32-NEON-LABEL: lrint_v16fp128: +; BE-I32-NEON: @ %bb.0: +; BE-I32-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I32-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I32-NEON-NEXT: .pad #4 +; BE-I32-NEON-NEXT: sub sp, sp, #4 +; BE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEON-NEXT: .pad #16 +; BE-I32-NEON-NEXT: sub sp, sp, #16 +; BE-I32-NEON-NEXT: stm sp, {r0, r1, r2, r3} @ 16-byte Folded Spill +; BE-I32-NEON-NEXT: add r3, sp, #264 +; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: add r3, sp, #332 +; BE-I32-NEON-NEXT: ldr r7, [sp, #328] +; BE-I32-NEON-NEXT: vmov.32 d9[0], r0 +; BE-I32-NEON-NEXT: ldr r10, [sp, #280] +; BE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I32-NEON-NEXT: mov r0, r7 +; BE-I32-NEON-NEXT: ldr r8, [sp, #168] +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: ldr r5, [sp, #344] +; BE-I32-NEON-NEXT: vmov.32 d11[0], r0 +; BE-I32-NEON-NEXT: ldr r1, [sp, #348] +; BE-I32-NEON-NEXT: ldr r2, [sp, #352] +; BE-I32-NEON-NEXT: ldr r3, [sp, #356] +; BE-I32-NEON-NEXT: mov r0, r5 +; BE-I32-NEON-NEXT: ldr r7, [sp, #284] +; BE-I32-NEON-NEXT: ldr r4, [sp, #288] +; BE-I32-NEON-NEXT: ldr r6, [sp, #172] +; BE-I32-NEON-NEXT: ldr r9, [sp, #176] +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: ldr r3, [sp, #292] +; BE-I32-NEON-NEXT: vmov.32 d11[1], r0 +; BE-I32-NEON-NEXT: mov r0, r10 +; BE-I32-NEON-NEXT: mov r1, r7 +; BE-I32-NEON-NEXT: mov r2, r4 +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: ldr r3, [sp, #180] +; BE-I32-NEON-NEXT: vmov.32 d9[1], r0 +; BE-I32-NEON-NEXT: mov r0, r8 +; BE-I32-NEON-NEXT: mov r1, r6 +; BE-I32-NEON-NEXT: mov r2, r9 +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: add r3, sp, #232 +; BE-I32-NEON-NEXT: mov r4, r0 +; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: add r3, sp, #136 +; BE-I32-NEON-NEXT: mov r6, r0 +; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: ldr r5, [sp, #296] +; BE-I32-NEON-NEXT: vmov.32 d13[0], r0 +; BE-I32-NEON-NEXT: ldr r1, [sp, #300] +; BE-I32-NEON-NEXT: ldr r2, [sp, #304] +; BE-I32-NEON-NEXT: ldr r3, [sp, #308] +; BE-I32-NEON-NEXT: mov r0, r5 +; BE-I32-NEON-NEXT: ldr r10, [sp, #216] +; BE-I32-NEON-NEXT: ldr r8, [sp, #220] +; BE-I32-NEON-NEXT: ldr r9, [sp, #152] +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: ldr r7, [sp, #248] +; BE-I32-NEON-NEXT: vmov.32 d10[0], r0 +; BE-I32-NEON-NEXT: ldr r1, [sp, #252] +; BE-I32-NEON-NEXT: ldr r2, [sp, #256] +; BE-I32-NEON-NEXT: vmov.32 d8[0], r6 +; BE-I32-NEON-NEXT: ldr r3, [sp, #260] +; BE-I32-NEON-NEXT: mov r0, r7 +; BE-I32-NEON-NEXT: ldr r5, [sp, #224] +; BE-I32-NEON-NEXT: ldr r11, [sp, #120] +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: ldr r3, [sp, #228] +; BE-I32-NEON-NEXT: vmov.32 d8[1], r0 +; BE-I32-NEON-NEXT: mov r0, r10 +; BE-I32-NEON-NEXT: mov r1, r8 +; BE-I32-NEON-NEXT: mov r2, r5 +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: add r3, sp, #200 +; BE-I32-NEON-NEXT: mov r5, r0 +; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: vmov.32 d15[0], r0 +; BE-I32-NEON-NEXT: ldr r0, [sp, #184] +; BE-I32-NEON-NEXT: ldr r1, [sp, #188] +; BE-I32-NEON-NEXT: ldr r2, [sp, #192] +; BE-I32-NEON-NEXT: vmov.32 d14[0], r4 +; BE-I32-NEON-NEXT: ldr r3, [sp, #196] +; BE-I32-NEON-NEXT: vmov.32 d15[1], r5 +; BE-I32-NEON-NEXT: ldr r7, [sp, #156] +; BE-I32-NEON-NEXT: ldr r6, [sp, #160] +; BE-I32-NEON-NEXT: ldr r4, [sp, #124] +; BE-I32-NEON-NEXT: ldr r5, [sp, #128] +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: ldr r3, [sp, #164] +; BE-I32-NEON-NEXT: vmov.32 d14[1], r0 +; BE-I32-NEON-NEXT: mov r0, r9 +; BE-I32-NEON-NEXT: mov r1, r7 +; BE-I32-NEON-NEXT: mov r2, r6 +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: ldr r3, [sp, #132] +; BE-I32-NEON-NEXT: vmov.32 d13[1], r0 +; BE-I32-NEON-NEXT: mov r0, r11 +; BE-I32-NEON-NEXT: mov r1, r4 +; BE-I32-NEON-NEXT: mov r2, r5 +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: mov r4, r0 +; BE-I32-NEON-NEXT: ldm sp, {r0, r1, r2, r3} @ 16-byte Folded Reload +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: add r3, sp, #316 +; BE-I32-NEON-NEXT: ldr r7, [sp, #312] +; BE-I32-NEON-NEXT: vmov.32 d12[0], r0 +; BE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I32-NEON-NEXT: mov r0, r7 +; BE-I32-NEON-NEXT: vmov.32 d12[1], r4 +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: vmov.32 d10[1], r0 +; BE-I32-NEON-NEXT: vrev64.32 q0, q6 +; BE-I32-NEON-NEXT: vrev64.32 q1, q7 +; BE-I32-NEON-NEXT: vrev64.32 q2, q4 +; BE-I32-NEON-NEXT: vrev64.32 q3, q5 +; BE-I32-NEON-NEXT: add sp, sp, #16 +; BE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEON-NEXT: add sp, sp, #4 +; BE-I32-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-I64-NEON-LABEL: lrint_v16fp128: +; BE-I64-NEON: @ %bb.0: +; BE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I64-NEON-NEXT: .pad #4 +; BE-I64-NEON-NEXT: sub sp, sp, #4 +; BE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEON-NEXT: .pad #56 +; BE-I64-NEON-NEXT: sub sp, sp, #56 +; BE-I64-NEON-NEXT: mov r5, r3 +; BE-I64-NEON-NEXT: add r3, sp, #376 +; BE-I64-NEON-NEXT: mov r6, r2 +; BE-I64-NEON-NEXT: mov r4, r0 +; BE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: ldr r7, [sp, #392] +; BE-I64-NEON-NEXT: add r3, sp, #396 +; BE-I64-NEON-NEXT: mov r9, r1 +; BE-I64-NEON-NEXT: vmov.32 d8[0], r0 +; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEON-NEXT: mov r0, r7 +; BE-I64-NEON-NEXT: ldr r11, [sp, #168] +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: ldr r2, [sp, #160] +; BE-I64-NEON-NEXT: mov r10, r1 +; BE-I64-NEON-NEXT: ldr r3, [sp, #164] +; BE-I64-NEON-NEXT: vmov.32 d9[0], r0 +; BE-I64-NEON-NEXT: mov r0, r6 +; BE-I64-NEON-NEXT: mov r1, r5 +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: add r3, sp, #172 +; BE-I64-NEON-NEXT: mov r8, r1 +; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEON-NEXT: mov r0, r11 +; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: add r3, sp, #220 +; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEON-NEXT: ldr r0, [sp, #216] +; BE-I64-NEON-NEXT: mov r11, r1 +; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: add r3, sp, #236 +; BE-I64-NEON-NEXT: vmov.32 d12[0], r0 +; BE-I64-NEON-NEXT: ldr r0, [sp, #232] +; BE-I64-NEON-NEXT: mov r6, r1 +; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: add r3, sp, #252 +; BE-I64-NEON-NEXT: vmov.32 d13[0], r0 +; BE-I64-NEON-NEXT: ldr r0, [sp, #248] +; BE-I64-NEON-NEXT: mov r7, r1 +; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: add r3, sp, #268 +; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEON-NEXT: ldr r0, [sp, #264] +; BE-I64-NEON-NEXT: mov r5, r1 +; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEON-NEXT: ldr r0, [sp, #280] +; BE-I64-NEON-NEXT: ldr r2, [sp, #288] +; BE-I64-NEON-NEXT: vmov.32 d13[1], r7 +; BE-I64-NEON-NEXT: ldr r7, [sp, #284] +; BE-I64-NEON-NEXT: ldr r3, [sp, #292] +; BE-I64-NEON-NEXT: vmov.32 d14[1], r5 +; BE-I64-NEON-NEXT: ldr r5, [sp, #328] +; BE-I64-NEON-NEXT: vmov.32 d12[1], r6 +; BE-I64-NEON-NEXT: ldr r6, [sp, #300] +; BE-I64-NEON-NEXT: vmov.32 d10[1], r8 +; BE-I64-NEON-NEXT: ldr r8, [sp, #184] +; BE-I64-NEON-NEXT: vmov.32 d11[1], r11 +; BE-I64-NEON-NEXT: vmov.32 d9[1], r10 +; BE-I64-NEON-NEXT: vmov.32 d8[1], r9 +; BE-I64-NEON-NEXT: vmov.32 d15[1], r1 +; BE-I64-NEON-NEXT: mov r1, r7 +; BE-I64-NEON-NEXT: vstr d14, [sp, #48] @ 8-byte Spill +; BE-I64-NEON-NEXT: vstr d13, [sp, #40] @ 8-byte Spill +; BE-I64-NEON-NEXT: vstr d12, [sp, #32] @ 8-byte Spill +; BE-I64-NEON-NEXT: vstr d11, [sp, #24] @ 8-byte Spill +; BE-I64-NEON-NEXT: vstr d10, [sp, #16] @ 8-byte Spill +; BE-I64-NEON-NEXT: vstr d9, [sp, #8] @ 8-byte Spill +; BE-I64-NEON-NEXT: vstr d8, [sp] @ 8-byte Spill +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: mov r10, r1 +; BE-I64-NEON-NEXT: ldr r1, [sp, #296] +; BE-I64-NEON-NEXT: ldr r2, [sp, #304] +; BE-I64-NEON-NEXT: vmov.32 d8[0], r0 +; BE-I64-NEON-NEXT: ldr r3, [sp, #308] +; BE-I64-NEON-NEXT: mov r0, r1 +; BE-I64-NEON-NEXT: mov r1, r6 +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: add r3, sp, #332 +; BE-I64-NEON-NEXT: mov r11, r1 +; BE-I64-NEON-NEXT: vmov.32 d9[0], r0 +; BE-I64-NEON-NEXT: mov r0, r5 +; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: add r3, sp, #188 +; BE-I64-NEON-NEXT: mov r7, r1 +; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEON-NEXT: mov r0, r8 +; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: add r3, sp, #204 +; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEON-NEXT: ldr r0, [sp, #200] +; BE-I64-NEON-NEXT: mov r8, r1 +; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: add r3, sp, #348 +; BE-I64-NEON-NEXT: vmov.32 d12[0], r0 +; BE-I64-NEON-NEXT: ldr r0, [sp, #344] +; BE-I64-NEON-NEXT: mov r5, r1 +; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: add r3, sp, #364 +; BE-I64-NEON-NEXT: vmov.32 d13[0], r0 +; BE-I64-NEON-NEXT: ldr r0, [sp, #360] +; BE-I64-NEON-NEXT: mov r9, r1 +; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: add r3, sp, #316 +; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEON-NEXT: ldr r0, [sp, #312] +; BE-I64-NEON-NEXT: mov r6, r1 +; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: vldr d18, [sp, #48] @ 8-byte Reload +; BE-I64-NEON-NEXT: vrev64.32 d17, d15 +; BE-I64-NEON-NEXT: vrev64.32 d16, d18 +; BE-I64-NEON-NEXT: vldr d18, [sp, #40] @ 8-byte Reload +; BE-I64-NEON-NEXT: vmov.32 d24[0], r0 +; BE-I64-NEON-NEXT: add r0, r4, #64 +; BE-I64-NEON-NEXT: vldr d20, [sp, #32] @ 8-byte Reload +; BE-I64-NEON-NEXT: vrev64.32 d19, d18 +; BE-I64-NEON-NEXT: vmov.32 d9[1], r11 +; BE-I64-NEON-NEXT: vmov.32 d10[1], r7 +; BE-I64-NEON-NEXT: vrev64.32 d18, d20 +; BE-I64-NEON-NEXT: vldr d20, [sp, #24] @ 8-byte Reload +; BE-I64-NEON-NEXT: vmov.32 d8[1], r10 +; BE-I64-NEON-NEXT: vmov.32 d14[1], r6 +; BE-I64-NEON-NEXT: vmov.32 d24[1], r1 +; BE-I64-NEON-NEXT: vldr d22, [sp, #16] @ 8-byte Reload +; BE-I64-NEON-NEXT: vrev64.32 d21, d20 +; BE-I64-NEON-NEXT: vrev64.32 d1, d9 +; BE-I64-NEON-NEXT: vmov.32 d13[1], r9 +; BE-I64-NEON-NEXT: vrev64.32 d31, d10 +; BE-I64-NEON-NEXT: vrev64.32 d20, d22 +; BE-I64-NEON-NEXT: vldr d22, [sp, #8] @ 8-byte Reload +; BE-I64-NEON-NEXT: vrev64.32 d0, d8 +; BE-I64-NEON-NEXT: vrev64.32 d29, d14 +; BE-I64-NEON-NEXT: vmov.32 d12[1], r5 +; BE-I64-NEON-NEXT: vrev64.32 d30, d24 +; BE-I64-NEON-NEXT: vrev64.32 d27, d22 +; BE-I64-NEON-NEXT: vldr d22, [sp] @ 8-byte Reload +; BE-I64-NEON-NEXT: vst1.64 {d0, d1}, [r0:128]! +; BE-I64-NEON-NEXT: vmov.32 d11[1], r8 +; BE-I64-NEON-NEXT: vrev64.32 d28, d13 +; BE-I64-NEON-NEXT: vst1.64 {d30, d31}, [r0:128]! +; BE-I64-NEON-NEXT: vrev64.32 d26, d22 +; BE-I64-NEON-NEXT: vrev64.32 d23, d12 +; BE-I64-NEON-NEXT: vst1.64 {d28, d29}, [r0:128]! +; BE-I64-NEON-NEXT: vrev64.32 d22, d11 +; BE-I64-NEON-NEXT: vst1.64 {d26, d27}, [r0:128] +; BE-I64-NEON-NEXT: vst1.64 {d20, d21}, [r4:128]! +; BE-I64-NEON-NEXT: vst1.64 {d22, d23}, [r4:128]! +; BE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r4:128]! +; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r4:128] +; BE-I64-NEON-NEXT: add sp, sp, #56 +; BE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEON-NEXT: add sp, sp, #4 +; BE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16fp128(<16 x fp128> %x) + ret <16 x iXLen> %a +} +declare <16 x iXLen> @llvm.lrint.v16iXLen.v16fp128(<16 x fp128>) + +define <32 x iXLen> @lrint_v32fp128(<32 x fp128> %x) { +; LE-I32-LABEL: lrint_v32fp128: +; LE-I32: @ %bb.0: +; LE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I32-NEXT: .pad #4 +; LE-I32-NEXT: sub sp, sp, #4 +; LE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEXT: .pad #80 +; LE-I32-NEXT: sub sp, sp, #80 +; LE-I32-NEXT: str r3, [sp, #16] @ 4-byte Spill +; LE-I32-NEXT: add r3, sp, #336 +; LE-I32-NEXT: str r2, [sp, #12] @ 4-byte Spill +; LE-I32-NEXT: mov r9, r0 +; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: add r3, sp, #244 +; LE-I32-NEXT: ldr r7, [sp, #240] +; LE-I32-NEXT: vmov.32 d9[0], r0 +; LE-I32-NEXT: ldr r5, [sp, #288] +; LE-I32-NEXT: ldm r3, {r1, r2, r3} +; LE-I32-NEXT: mov r0, r7 +; LE-I32-NEXT: ldr r8, [sp, #352] +; LE-I32-NEXT: ldr r11, [sp, #656] +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: add r3, sp, #292 +; LE-I32-NEXT: vmov.32 d10[0], r0 +; LE-I32-NEXT: mov r0, r5 +; LE-I32-NEXT: ldm r3, {r1, r2, r3} +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: add r3, sp, #272 +; LE-I32-NEXT: mov r10, r0 +; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: ldr r6, [sp, #256] +; LE-I32-NEXT: vmov.32 d11[0], r0 +; LE-I32-NEXT: ldr r1, [sp, #260] +; LE-I32-NEXT: ldr r2, [sp, #264] +; LE-I32-NEXT: ldr r3, [sp, #268] +; LE-I32-NEXT: mov r0, r6 +; LE-I32-NEXT: ldr r7, [sp, #660] +; LE-I32-NEXT: vmov.32 d11[1], r10 +; LE-I32-NEXT: ldr r5, [sp, #664] +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: vmov.32 d10[1], r0 +; LE-I32-NEXT: ldr r1, [sp, #356] +; LE-I32-NEXT: ldr r2, [sp, #360] +; LE-I32-NEXT: add lr, sp, #32 +; LE-I32-NEXT: ldr r3, [sp, #364] +; LE-I32-NEXT: mov r0, r8 +; LE-I32-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: ldr r3, [sp, #668] +; LE-I32-NEXT: add lr, sp, #48 +; LE-I32-NEXT: vmov.32 d9[1], r0 +; LE-I32-NEXT: mov r0, r11 +; LE-I32-NEXT: mov r1, r7 +; LE-I32-NEXT: mov r2, r5 +; LE-I32-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: add r3, sp, #400 +; LE-I32-NEXT: mov r8, r0 +; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: add r3, sp, #592 +; LE-I32-NEXT: mov r6, r0 +; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: ldr r4, [sp, #416] +; LE-I32-NEXT: vmov.32 d15[0], r0 +; LE-I32-NEXT: ldr r1, [sp, #420] +; LE-I32-NEXT: ldr r2, [sp, #424] +; LE-I32-NEXT: vmov.32 d13[0], r6 +; LE-I32-NEXT: ldr r3, [sp, #428] +; LE-I32-NEXT: mov r0, r4 +; LE-I32-NEXT: ldr r7, [sp, #224] +; LE-I32-NEXT: ldr r10, [sp, #228] +; LE-I32-NEXT: ldr r5, [sp, #232] +; LE-I32-NEXT: ldr r11, [sp, #464] +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: ldr r3, [sp, #236] +; LE-I32-NEXT: vmov.32 d13[1], r0 +; LE-I32-NEXT: mov r0, r7 +; LE-I32-NEXT: mov r1, r10 +; LE-I32-NEXT: mov r2, r5 +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: add r3, sp, #208 +; LE-I32-NEXT: mov r4, r0 +; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: vmov.32 d9[0], r0 +; LE-I32-NEXT: ldr r0, [sp, #672] +; LE-I32-NEXT: ldr r1, [sp, #676] +; LE-I32-NEXT: ldr r2, [sp, #680] +; LE-I32-NEXT: vmov.32 d11[0], r8 +; LE-I32-NEXT: ldr r3, [sp, #684] +; LE-I32-NEXT: vmov.32 d9[1], r4 +; LE-I32-NEXT: ldr r7, [sp, #612] +; LE-I32-NEXT: ldr r6, [sp, #616] +; LE-I32-NEXT: ldr r5, [sp, #468] +; LE-I32-NEXT: ldr r4, [sp, #472] +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: vmov.32 d11[1], r0 +; LE-I32-NEXT: ldr r3, [sp, #620] +; LE-I32-NEXT: ldr r0, [sp, #608] +; LE-I32-NEXT: mov r1, r7 +; LE-I32-NEXT: mov r2, r6 +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: ldr r3, [sp, #476] +; LE-I32-NEXT: vmov.32 d15[1], r0 +; LE-I32-NEXT: mov r0, r11 +; LE-I32-NEXT: mov r1, r5 +; LE-I32-NEXT: mov r2, r4 +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: add r3, sp, #560 +; LE-I32-NEXT: str r0, [sp, #8] @ 4-byte Spill +; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: add r3, sp, #644 +; LE-I32-NEXT: ldr r7, [sp, #640] +; LE-I32-NEXT: vmov.32 d14[0], r0 +; LE-I32-NEXT: add lr, sp, #64 +; LE-I32-NEXT: ldm r3, {r1, r2, r3} +; LE-I32-NEXT: mov r0, r7 +; LE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: add r3, sp, #624 +; LE-I32-NEXT: mov r11, r0 +; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: add r3, sp, #196 +; LE-I32-NEXT: ldr r7, [sp, #192] +; LE-I32-NEXT: vmov.32 d10[0], r0 +; LE-I32-NEXT: ldm r3, {r1, r2, r3} +; LE-I32-NEXT: mov r0, r7 +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: mov r6, r0 +; LE-I32-NEXT: ldr r2, [sp, #184] +; LE-I32-NEXT: ldr r3, [sp, #188] +; LE-I32-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; LE-I32-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: add r3, sp, #324 +; LE-I32-NEXT: ldr r7, [sp, #320] +; LE-I32-NEXT: vmov.32 d8[0], r0 +; LE-I32-NEXT: ldm r3, {r1, r2, r3} +; LE-I32-NEXT: mov r0, r7 +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: add r3, sp, #304 +; LE-I32-NEXT: mov r7, r0 +; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: add lr, sp, #48 +; LE-I32-NEXT: ldr r4, [sp, #368] +; LE-I32-NEXT: ldr r1, [sp, #372] +; LE-I32-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload +; LE-I32-NEXT: ldr r2, [sp, #376] +; LE-I32-NEXT: ldr r3, [sp, #380] +; LE-I32-NEXT: vmov.32 d14[0], r0 +; LE-I32-NEXT: mov r0, r4 +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: ldr r5, [sp, #384] +; LE-I32-NEXT: vmov.32 d12[0], r0 +; LE-I32-NEXT: ldr r1, [sp, #388] +; LE-I32-NEXT: ldr r2, [sp, #392] +; LE-I32-NEXT: ldr r3, [sp, #396] +; LE-I32-NEXT: mov r0, r5 +; LE-I32-NEXT: ldr r4, [sp, #432] +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: vmov.32 d12[1], r0 +; LE-I32-NEXT: ldr r1, [sp, #436] +; LE-I32-NEXT: ldr r2, [sp, #440] +; LE-I32-NEXT: add lr, sp, #16 +; LE-I32-NEXT: ldr r3, [sp, #444] +; LE-I32-NEXT: mov r0, r4 +; LE-I32-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: vmov.32 d12[0], r0 +; LE-I32-NEXT: ldr r0, [sp, #576] +; LE-I32-NEXT: ldr r1, [sp, #580] +; LE-I32-NEXT: add lr, sp, #48 +; LE-I32-NEXT: vmov.32 d14[1], r7 +; LE-I32-NEXT: ldr r2, [sp, #584] +; LE-I32-NEXT: ldr r3, [sp, #588] +; LE-I32-NEXT: vmov.32 d10[1], r11 +; LE-I32-NEXT: ldr r8, [sp, #448] +; LE-I32-NEXT: ldr r4, [sp, #544] +; LE-I32-NEXT: ldr r10, [sp, #548] +; LE-I32-NEXT: vmov.32 d8[1], r6 +; LE-I32-NEXT: ldr r7, [sp, #552] +; LE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-I32-NEXT: ldr r11, [sp, #512] +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: add lr, sp, #64 +; LE-I32-NEXT: ldr r3, [sp, #556] +; LE-I32-NEXT: mov r1, r10 +; LE-I32-NEXT: mov r2, r7 +; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEXT: vmov.32 d16[1], r0 +; LE-I32-NEXT: mov r0, r4 +; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: add r3, sp, #528 +; LE-I32-NEXT: mov r4, r0 +; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; LE-I32-NEXT: vmov.32 d15[0], r0 +; LE-I32-NEXT: ldr r0, [sp, #480] +; LE-I32-NEXT: ldr r2, [sp, #488] +; LE-I32-NEXT: vmov.32 d13[0], r1 +; LE-I32-NEXT: ldr r1, [sp, #484] +; LE-I32-NEXT: ldr r3, [sp, #492] +; LE-I32-NEXT: vmov.32 d15[1], r4 +; LE-I32-NEXT: ldr r7, [sp, #452] +; LE-I32-NEXT: ldr r5, [sp, #456] +; LE-I32-NEXT: ldr r6, [sp, #516] +; LE-I32-NEXT: ldr r4, [sp, #520] +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: ldr r3, [sp, #460] +; LE-I32-NEXT: vmov.32 d13[1], r0 +; LE-I32-NEXT: mov r0, r8 +; LE-I32-NEXT: mov r1, r7 +; LE-I32-NEXT: mov r2, r5 +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: ldr r3, [sp, #524] +; LE-I32-NEXT: vmov.32 d12[1], r0 +; LE-I32-NEXT: mov r0, r11 +; LE-I32-NEXT: mov r1, r6 +; LE-I32-NEXT: mov r2, r4 +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: add r3, sp, #496 +; LE-I32-NEXT: mov r4, r0 +; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: vmov.32 d14[0], r0 +; LE-I32-NEXT: add r0, r9, #64 +; LE-I32-NEXT: add lr, sp, #64 +; LE-I32-NEXT: vst1.32 {d12, d13}, [r0:128]! +; LE-I32-NEXT: vmov.32 d14[1], r4 +; LE-I32-NEXT: vst1.32 {d14, d15}, [r0:128]! +; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEXT: add lr, sp, #32 +; LE-I32-NEXT: vst1.32 {d16, d17}, [r0:128]! +; LE-I32-NEXT: vst1.64 {d10, d11}, [r0:128] +; LE-I32-NEXT: vst1.32 {d8, d9}, [r9:128]! +; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEXT: add lr, sp, #48 +; LE-I32-NEXT: vst1.32 {d16, d17}, [r9:128]! +; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEXT: add lr, sp, #16 +; LE-I32-NEXT: vst1.32 {d16, d17}, [r9:128]! +; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEXT: vst1.64 {d16, d17}, [r9:128] +; LE-I32-NEXT: add sp, sp, #80 +; LE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEXT: add sp, sp, #4 +; LE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; LE-I64-LABEL: lrint_v32fp128: +; LE-I64: @ %bb.0: +; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I64-NEXT: .pad #4 +; LE-I64-NEXT: sub sp, sp, #4 +; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEXT: .pad #192 +; LE-I64-NEXT: sub sp, sp, #192 +; LE-I64-NEXT: str r3, [sp, #60] @ 4-byte Spill +; LE-I64-NEXT: add r3, sp, #688 +; LE-I64-NEXT: str r2, [sp, #56] @ 4-byte Spill +; LE-I64-NEXT: mov r9, r0 +; LE-I64-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add r3, sp, #560 +; LE-I64-NEXT: mov r4, r0 +; LE-I64-NEXT: str r1, [sp, #64] @ 4-byte Spill +; LE-I64-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: vmov.32 d17[0], r0 +; LE-I64-NEXT: ldr r7, [sp, #544] +; LE-I64-NEXT: ldr r6, [sp, #548] +; LE-I64-NEXT: add lr, sp, #96 +; LE-I64-NEXT: ldr r2, [sp, #552] +; LE-I64-NEXT: vmov.32 d17[1], r1 +; LE-I64-NEXT: ldr r3, [sp, #556] +; LE-I64-NEXT: mov r0, r7 +; LE-I64-NEXT: mov r1, r6 +; LE-I64-NEXT: vorr q4, q8, q8 +; LE-I64-NEXT: ldr r5, [sp, #528] +; LE-I64-NEXT: vmov.32 d17[0], r4 +; LE-I64-NEXT: ldr r10, [sp, #304] +; LE-I64-NEXT: ldr r8, [sp, #368] +; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add r3, sp, #532 +; LE-I64-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEXT: mov r11, r1 +; LE-I64-NEXT: add lr, sp, #144 +; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: mov r0, r5 +; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add r3, sp, #308 +; LE-I64-NEXT: mov r5, r1 +; LE-I64-NEXT: add lr, sp, #176 +; LE-I64-NEXT: vmov.32 d17[0], r0 +; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: mov r0, r10 +; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add r3, sp, #372 +; LE-I64-NEXT: mov r10, r1 +; LE-I64-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEXT: mov r0, r8 +; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add r3, sp, #404 +; LE-I64-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEXT: ldr r0, [sp, #400] +; LE-I64-NEXT: mov r6, r1 +; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add r3, sp, #596 +; LE-I64-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEXT: ldr r0, [sp, #592] +; LE-I64-NEXT: mov r7, r1 +; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add r3, sp, #676 +; LE-I64-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEXT: ldr r0, [sp, #672] +; LE-I64-NEXT: mov r4, r1 +; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add lr, sp, #96 +; LE-I64-NEXT: vmov.32 d13[1], r4 +; LE-I64-NEXT: str r1, [sp, #52] @ 4-byte Spill +; LE-I64-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #80 +; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #128 +; LE-I64-NEXT: vmov.32 d9[1], r7 +; LE-I64-NEXT: ldr r1, [sp, #628] +; LE-I64-NEXT: ldr r2, [sp, #632] +; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #112 +; LE-I64-NEXT: vmov.32 d15[1], r6 +; LE-I64-NEXT: ldr r3, [sp, #636] +; LE-I64-NEXT: ldr r7, [sp, #64] @ 4-byte Reload +; LE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #160 +; LE-I64-NEXT: vmov.32 d11[1], r10 +; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #176 +; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #144 +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: vmov.32 d18[0], r0 +; LE-I64-NEXT: ldr r0, [sp, #624] +; LE-I64-NEXT: vmov.32 d16[1], r11 +; LE-I64-NEXT: vmov.32 d9[1], r5 +; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #96 +; LE-I64-NEXT: vmov.32 d19[1], r7 +; LE-I64-NEXT: vstmia lr, {d18, d19} @ 16-byte Spill +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add r3, sp, #512 +; LE-I64-NEXT: str r0, [sp, #48] @ 4-byte Spill +; LE-I64-NEXT: str r1, [sp, #64] @ 4-byte Spill +; LE-I64-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add r3, sp, #768 +; LE-I64-NEXT: mov r11, r0 +; LE-I64-NEXT: str r1, [sp, #28] @ 4-byte Spill +; LE-I64-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: ldr r6, [sp, #784] +; LE-I64-NEXT: add r3, sp, #788 +; LE-I64-NEXT: mov r8, r1 +; LE-I64-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: mov r0, r6 +; LE-I64-NEXT: ldr r5, [sp, #736] +; LE-I64-NEXT: ldr r7, [sp, #752] +; LE-I64-NEXT: ldr r4, [sp, #720] +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add r3, sp, #740 +; LE-I64-NEXT: mov r6, r1 +; LE-I64-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEXT: mov r0, r5 +; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add r3, sp, #756 +; LE-I64-NEXT: mov r5, r1 +; LE-I64-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEXT: mov r0, r7 +; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add r3, sp, #724 +; LE-I64-NEXT: mov r7, r1 +; LE-I64-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEXT: mov r0, r4 +; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: vmov.32 d13[1], r7 +; LE-I64-NEXT: add lr, sp, #8 +; LE-I64-NEXT: ldr r2, [sp, #296] +; LE-I64-NEXT: vmov.32 d12[1], r5 +; LE-I64-NEXT: ldr r3, [sp, #300] +; LE-I64-NEXT: ldr r4, [sp, #576] +; LE-I64-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEXT: ldr r0, [sp, #56] @ 4-byte Reload +; LE-I64-NEXT: ldr r10, [sp, #384] +; LE-I64-NEXT: vmov.32 d15[1], r6 +; LE-I64-NEXT: ldr r6, [sp, #352] +; LE-I64-NEXT: vmov.32 d14[1], r8 +; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #32 +; LE-I64-NEXT: vmov.32 d11[1], r1 +; LE-I64-NEXT: ldr r1, [sp, #60] @ 4-byte Reload +; LE-I64-NEXT: vmov.32 d8[0], r11 +; LE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #176 +; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add lr, sp, #160 +; LE-I64-NEXT: add r3, sp, #356 +; LE-I64-NEXT: mov r5, r1 +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: vmov.32 d16[0], r0 +; LE-I64-NEXT: mov r0, r6 +; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add lr, sp, #112 +; LE-I64-NEXT: add r3, sp, #388 +; LE-I64-NEXT: mov r7, r1 +; LE-I64-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload +; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEXT: mov r0, r10 +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add lr, sp, #128 +; LE-I64-NEXT: add r3, sp, #580 +; LE-I64-NEXT: mov r6, r1 +; LE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEXT: mov r0, r4 +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add lr, sp, #80 +; LE-I64-NEXT: add r3, sp, #708 +; LE-I64-NEXT: mov r4, r1 +; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEXT: ldr r0, [sp, #704] +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: vmov.32 d8[1], r4 +; LE-I64-NEXT: add lr, sp, #80 +; LE-I64-NEXT: ldr r2, [sp, #52] @ 4-byte Reload +; LE-I64-NEXT: vmov.32 d12[1], r6 +; LE-I64-NEXT: ldr r6, [sp, #644] +; LE-I64-NEXT: ldr r3, [sp, #652] +; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #128 +; LE-I64-NEXT: vmov.32 d14[1], r7 +; LE-I64-NEXT: ldr r4, [sp, #480] +; LE-I64-NEXT: ldr r7, [sp, #656] +; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #112 +; LE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #160 +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEXT: ldr r0, [sp, #28] @ 4-byte Reload +; LE-I64-NEXT: ldr r10, [sp, #496] +; LE-I64-NEXT: vmov.32 d16[1], r5 +; LE-I64-NEXT: add r5, r9, #192 +; LE-I64-NEXT: ldr r8, [sp, #608] +; LE-I64-NEXT: vmov.32 d10[1], r1 +; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #176 +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: vmov.32 d16[1], r0 +; LE-I64-NEXT: ldr r0, [sp, #640] +; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #96 +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #8 +; LE-I64-NEXT: vmov.32 d16[1], r2 +; LE-I64-NEXT: ldr r2, [sp, #648] +; LE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]! +; LE-I64-NEXT: vst1.64 {d10, d11}, [r5:128]! +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]! +; LE-I64-NEXT: ldr r1, [sp, #48] @ 4-byte Reload +; LE-I64-NEXT: vmov.32 d9[0], r1 +; LE-I64-NEXT: mov r1, r6 +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add r3, sp, #660 +; LE-I64-NEXT: mov r11, r1 +; LE-I64-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEXT: mov r0, r7 +; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add r3, sp, #484 +; LE-I64-NEXT: mov r7, r1 +; LE-I64-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEXT: mov r0, r4 +; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add r3, sp, #500 +; LE-I64-NEXT: mov r6, r1 +; LE-I64-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEXT: mov r0, r10 +; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add r3, sp, #612 +; LE-I64-NEXT: mov r4, r1 +; LE-I64-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEXT: mov r0, r8 +; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEXT: ldr r0, [sp, #64] @ 4-byte Reload +; LE-I64-NEXT: add lr, sp, #96 +; LE-I64-NEXT: add r8, r9, #128 +; LE-I64-NEXT: vmov.32 d13[1], r7 +; LE-I64-NEXT: ldr r2, [sp, #344] +; LE-I64-NEXT: ldr r3, [sp, #348] +; LE-I64-NEXT: vmov.32 d12[1], r11 +; LE-I64-NEXT: ldr r7, [sp, #452] +; LE-I64-NEXT: ldr r10, [sp, #416] +; LE-I64-NEXT: vmov.32 d9[1], r0 +; LE-I64-NEXT: ldr r0, [sp, #336] +; LE-I64-NEXT: vmov.32 d8[1], r1 +; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #64 +; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #32 +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #144 +; LE-I64-NEXT: vmov.32 d11[1], r4 +; LE-I64-NEXT: ldr r4, [sp, #340] +; LE-I64-NEXT: vst1.64 {d16, d17}, [r5:128] +; LE-I64-NEXT: mov r1, r4 +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #80 +; LE-I64-NEXT: vmov.32 d10[1], r6 +; LE-I64-NEXT: ldr r6, [sp, #448] +; LE-I64-NEXT: vst1.64 {d16, d17}, [r8:128]! +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: vst1.64 {d16, d17}, [r8:128]! +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: ldr r2, [sp, #456] +; LE-I64-NEXT: mov r11, r1 +; LE-I64-NEXT: ldr r3, [sp, #460] +; LE-I64-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEXT: mov r0, r6 +; LE-I64-NEXT: mov r1, r7 +; LE-I64-NEXT: ldr r5, [sp, #432] +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add r3, sp, #468 +; LE-I64-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEXT: ldr r0, [sp, #464] +; LE-I64-NEXT: mov r6, r1 +; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add r3, sp, #420 +; LE-I64-NEXT: mov r7, r1 +; LE-I64-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEXT: mov r0, r10 +; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add r3, sp, #436 +; LE-I64-NEXT: mov r4, r1 +; LE-I64-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEXT: mov r0, r5 +; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add r3, sp, #324 +; LE-I64-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEXT: ldr r0, [sp, #320] +; LE-I64-NEXT: mov r5, r1 +; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add lr, sp, #64 +; LE-I64-NEXT: vmov.32 d9[1], r5 +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #96 +; LE-I64-NEXT: vmov.32 d13[1], r7 +; LE-I64-NEXT: vst1.64 {d16, d17}, [r8:128]! +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #176 +; LE-I64-NEXT: vmov.32 d8[1], r4 +; LE-I64-NEXT: vmov.32 d12[1], r6 +; LE-I64-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEXT: add r0, r9, #64 +; LE-I64-NEXT: vst1.64 {d16, d17}, [r8:128] +; LE-I64-NEXT: vst1.64 {d8, d9}, [r0:128]! +; LE-I64-NEXT: vst1.64 {d12, d13}, [r0:128]! +; LE-I64-NEXT: vst1.64 {d10, d11}, [r0:128]! +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #160 +; LE-I64-NEXT: vmov.32 d15[1], r11 +; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128] +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #112 +; LE-I64-NEXT: vmov.32 d14[1], r1 +; LE-I64-NEXT: vst1.64 {d16, d17}, [r9:128]! +; LE-I64-NEXT: vst1.64 {d14, d15}, [r9:128]! +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #128 +; LE-I64-NEXT: vst1.64 {d16, d17}, [r9:128]! +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: vst1.64 {d16, d17}, [r9:128] +; LE-I64-NEXT: add sp, sp, #192 +; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEXT: add sp, sp, #4 +; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; LE-I32-NEON-LABEL: lrint_v32fp128: +; LE-I32-NEON: @ %bb.0: +; LE-I32-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I32-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I32-NEON-NEXT: .pad #4 +; LE-I32-NEON-NEXT: sub sp, sp, #4 +; LE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEON-NEXT: .pad #80 +; LE-I32-NEON-NEXT: sub sp, sp, #80 +; LE-I32-NEON-NEXT: str r3, [sp, #16] @ 4-byte Spill +; LE-I32-NEON-NEXT: add r3, sp, #336 +; LE-I32-NEON-NEXT: str r2, [sp, #12] @ 4-byte Spill +; LE-I32-NEON-NEXT: mov r9, r0 +; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: add r3, sp, #244 +; LE-I32-NEON-NEXT: ldr r7, [sp, #240] +; LE-I32-NEON-NEXT: vmov.32 d9[0], r0 +; LE-I32-NEON-NEXT: ldr r5, [sp, #288] +; LE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I32-NEON-NEXT: mov r0, r7 +; LE-I32-NEON-NEXT: ldr r8, [sp, #352] +; LE-I32-NEON-NEXT: ldr r11, [sp, #656] +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: add r3, sp, #292 +; LE-I32-NEON-NEXT: vmov.32 d10[0], r0 +; LE-I32-NEON-NEXT: mov r0, r5 +; LE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: add r3, sp, #272 +; LE-I32-NEON-NEXT: mov r10, r0 +; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: ldr r6, [sp, #256] +; LE-I32-NEON-NEXT: vmov.32 d11[0], r0 +; LE-I32-NEON-NEXT: ldr r1, [sp, #260] +; LE-I32-NEON-NEXT: ldr r2, [sp, #264] +; LE-I32-NEON-NEXT: ldr r3, [sp, #268] +; LE-I32-NEON-NEXT: mov r0, r6 +; LE-I32-NEON-NEXT: ldr r7, [sp, #660] +; LE-I32-NEON-NEXT: vmov.32 d11[1], r10 +; LE-I32-NEON-NEXT: ldr r5, [sp, #664] +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: vmov.32 d10[1], r0 +; LE-I32-NEON-NEXT: ldr r1, [sp, #356] +; LE-I32-NEON-NEXT: ldr r2, [sp, #360] +; LE-I32-NEON-NEXT: add lr, sp, #32 +; LE-I32-NEON-NEXT: ldr r3, [sp, #364] +; LE-I32-NEON-NEXT: mov r0, r8 +; LE-I32-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: ldr r3, [sp, #668] +; LE-I32-NEON-NEXT: add lr, sp, #48 +; LE-I32-NEON-NEXT: vmov.32 d9[1], r0 +; LE-I32-NEON-NEXT: mov r0, r11 +; LE-I32-NEON-NEXT: mov r1, r7 +; LE-I32-NEON-NEXT: mov r2, r5 +; LE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: add r3, sp, #400 +; LE-I32-NEON-NEXT: mov r8, r0 +; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: add r3, sp, #592 +; LE-I32-NEON-NEXT: mov r6, r0 +; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: ldr r4, [sp, #416] +; LE-I32-NEON-NEXT: vmov.32 d15[0], r0 +; LE-I32-NEON-NEXT: ldr r1, [sp, #420] +; LE-I32-NEON-NEXT: ldr r2, [sp, #424] +; LE-I32-NEON-NEXT: vmov.32 d13[0], r6 +; LE-I32-NEON-NEXT: ldr r3, [sp, #428] +; LE-I32-NEON-NEXT: mov r0, r4 +; LE-I32-NEON-NEXT: ldr r7, [sp, #224] +; LE-I32-NEON-NEXT: ldr r10, [sp, #228] +; LE-I32-NEON-NEXT: ldr r5, [sp, #232] +; LE-I32-NEON-NEXT: ldr r11, [sp, #464] +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: ldr r3, [sp, #236] +; LE-I32-NEON-NEXT: vmov.32 d13[1], r0 +; LE-I32-NEON-NEXT: mov r0, r7 +; LE-I32-NEON-NEXT: mov r1, r10 +; LE-I32-NEON-NEXT: mov r2, r5 +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: add r3, sp, #208 +; LE-I32-NEON-NEXT: mov r4, r0 +; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: vmov.32 d9[0], r0 +; LE-I32-NEON-NEXT: ldr r0, [sp, #672] +; LE-I32-NEON-NEXT: ldr r1, [sp, #676] +; LE-I32-NEON-NEXT: ldr r2, [sp, #680] +; LE-I32-NEON-NEXT: vmov.32 d11[0], r8 +; LE-I32-NEON-NEXT: ldr r3, [sp, #684] +; LE-I32-NEON-NEXT: vmov.32 d9[1], r4 +; LE-I32-NEON-NEXT: ldr r7, [sp, #612] +; LE-I32-NEON-NEXT: ldr r6, [sp, #616] +; LE-I32-NEON-NEXT: ldr r5, [sp, #468] +; LE-I32-NEON-NEXT: ldr r4, [sp, #472] +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: vmov.32 d11[1], r0 +; LE-I32-NEON-NEXT: ldr r3, [sp, #620] +; LE-I32-NEON-NEXT: ldr r0, [sp, #608] +; LE-I32-NEON-NEXT: mov r1, r7 +; LE-I32-NEON-NEXT: mov r2, r6 +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: ldr r3, [sp, #476] +; LE-I32-NEON-NEXT: vmov.32 d15[1], r0 +; LE-I32-NEON-NEXT: mov r0, r11 +; LE-I32-NEON-NEXT: mov r1, r5 +; LE-I32-NEON-NEXT: mov r2, r4 +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: add r3, sp, #560 +; LE-I32-NEON-NEXT: str r0, [sp, #8] @ 4-byte Spill +; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: add r3, sp, #644 +; LE-I32-NEON-NEXT: ldr r7, [sp, #640] +; LE-I32-NEON-NEXT: vmov.32 d14[0], r0 +; LE-I32-NEON-NEXT: add lr, sp, #64 +; LE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I32-NEON-NEXT: mov r0, r7 +; LE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: add r3, sp, #624 +; LE-I32-NEON-NEXT: mov r11, r0 +; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: add r3, sp, #196 +; LE-I32-NEON-NEXT: ldr r7, [sp, #192] +; LE-I32-NEON-NEXT: vmov.32 d10[0], r0 +; LE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I32-NEON-NEXT: mov r0, r7 +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: mov r6, r0 +; LE-I32-NEON-NEXT: ldr r2, [sp, #184] +; LE-I32-NEON-NEXT: ldr r3, [sp, #188] +; LE-I32-NEON-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; LE-I32-NEON-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: add r3, sp, #324 +; LE-I32-NEON-NEXT: ldr r7, [sp, #320] +; LE-I32-NEON-NEXT: vmov.32 d8[0], r0 +; LE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I32-NEON-NEXT: mov r0, r7 +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: add r3, sp, #304 +; LE-I32-NEON-NEXT: mov r7, r0 +; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: add lr, sp, #48 +; LE-I32-NEON-NEXT: ldr r4, [sp, #368] +; LE-I32-NEON-NEXT: ldr r1, [sp, #372] +; LE-I32-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload +; LE-I32-NEON-NEXT: ldr r2, [sp, #376] +; LE-I32-NEON-NEXT: ldr r3, [sp, #380] +; LE-I32-NEON-NEXT: vmov.32 d14[0], r0 +; LE-I32-NEON-NEXT: mov r0, r4 +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: ldr r5, [sp, #384] +; LE-I32-NEON-NEXT: vmov.32 d12[0], r0 +; LE-I32-NEON-NEXT: ldr r1, [sp, #388] +; LE-I32-NEON-NEXT: ldr r2, [sp, #392] +; LE-I32-NEON-NEXT: ldr r3, [sp, #396] +; LE-I32-NEON-NEXT: mov r0, r5 +; LE-I32-NEON-NEXT: ldr r4, [sp, #432] +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: vmov.32 d12[1], r0 +; LE-I32-NEON-NEXT: ldr r1, [sp, #436] +; LE-I32-NEON-NEXT: ldr r2, [sp, #440] +; LE-I32-NEON-NEXT: add lr, sp, #16 +; LE-I32-NEON-NEXT: ldr r3, [sp, #444] +; LE-I32-NEON-NEXT: mov r0, r4 +; LE-I32-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: vmov.32 d12[0], r0 +; LE-I32-NEON-NEXT: ldr r0, [sp, #576] +; LE-I32-NEON-NEXT: ldr r1, [sp, #580] +; LE-I32-NEON-NEXT: add lr, sp, #48 +; LE-I32-NEON-NEXT: vmov.32 d14[1], r7 +; LE-I32-NEON-NEXT: ldr r2, [sp, #584] +; LE-I32-NEON-NEXT: ldr r3, [sp, #588] +; LE-I32-NEON-NEXT: vmov.32 d10[1], r11 +; LE-I32-NEON-NEXT: ldr r8, [sp, #448] +; LE-I32-NEON-NEXT: ldr r4, [sp, #544] +; LE-I32-NEON-NEXT: ldr r10, [sp, #548] +; LE-I32-NEON-NEXT: vmov.32 d8[1], r6 +; LE-I32-NEON-NEXT: ldr r7, [sp, #552] +; LE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-I32-NEON-NEXT: ldr r11, [sp, #512] +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: add lr, sp, #64 +; LE-I32-NEON-NEXT: ldr r3, [sp, #556] +; LE-I32-NEON-NEXT: mov r1, r10 +; LE-I32-NEON-NEXT: mov r2, r7 +; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEON-NEXT: vmov.32 d16[1], r0 +; LE-I32-NEON-NEXT: mov r0, r4 +; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: add r3, sp, #528 +; LE-I32-NEON-NEXT: mov r4, r0 +; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; LE-I32-NEON-NEXT: vmov.32 d15[0], r0 +; LE-I32-NEON-NEXT: ldr r0, [sp, #480] +; LE-I32-NEON-NEXT: ldr r2, [sp, #488] +; LE-I32-NEON-NEXT: vmov.32 d13[0], r1 +; LE-I32-NEON-NEXT: ldr r1, [sp, #484] +; LE-I32-NEON-NEXT: ldr r3, [sp, #492] +; LE-I32-NEON-NEXT: vmov.32 d15[1], r4 +; LE-I32-NEON-NEXT: ldr r7, [sp, #452] +; LE-I32-NEON-NEXT: ldr r5, [sp, #456] +; LE-I32-NEON-NEXT: ldr r6, [sp, #516] +; LE-I32-NEON-NEXT: ldr r4, [sp, #520] +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: ldr r3, [sp, #460] +; LE-I32-NEON-NEXT: vmov.32 d13[1], r0 +; LE-I32-NEON-NEXT: mov r0, r8 +; LE-I32-NEON-NEXT: mov r1, r7 +; LE-I32-NEON-NEXT: mov r2, r5 +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: ldr r3, [sp, #524] +; LE-I32-NEON-NEXT: vmov.32 d12[1], r0 +; LE-I32-NEON-NEXT: mov r0, r11 +; LE-I32-NEON-NEXT: mov r1, r6 +; LE-I32-NEON-NEXT: mov r2, r4 +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: add r3, sp, #496 +; LE-I32-NEON-NEXT: mov r4, r0 +; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I32-NEON-NEXT: bl lrintl +; LE-I32-NEON-NEXT: vmov.32 d14[0], r0 +; LE-I32-NEON-NEXT: add r0, r9, #64 +; LE-I32-NEON-NEXT: add lr, sp, #64 +; LE-I32-NEON-NEXT: vst1.32 {d12, d13}, [r0:128]! +; LE-I32-NEON-NEXT: vmov.32 d14[1], r4 +; LE-I32-NEON-NEXT: vst1.32 {d14, d15}, [r0:128]! +; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEON-NEXT: add lr, sp, #32 +; LE-I32-NEON-NEXT: vst1.32 {d16, d17}, [r0:128]! +; LE-I32-NEON-NEXT: vst1.64 {d10, d11}, [r0:128] +; LE-I32-NEON-NEXT: vst1.32 {d8, d9}, [r9:128]! +; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEON-NEXT: add lr, sp, #48 +; LE-I32-NEON-NEXT: vst1.32 {d16, d17}, [r9:128]! +; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEON-NEXT: add lr, sp, #16 +; LE-I32-NEON-NEXT: vst1.32 {d16, d17}, [r9:128]! +; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEON-NEXT: vst1.64 {d16, d17}, [r9:128] +; LE-I32-NEON-NEXT: add sp, sp, #80 +; LE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEON-NEXT: add sp, sp, #4 +; LE-I32-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; LE-I64-NEON-LABEL: lrint_v32fp128: +; LE-I64-NEON: @ %bb.0: +; LE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I64-NEON-NEXT: .pad #4 +; LE-I64-NEON-NEXT: sub sp, sp, #4 +; LE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEON-NEXT: .pad #192 +; LE-I64-NEON-NEXT: sub sp, sp, #192 +; LE-I64-NEON-NEXT: str r3, [sp, #60] @ 4-byte Spill +; LE-I64-NEON-NEXT: add r3, sp, #688 +; LE-I64-NEON-NEXT: str r2, [sp, #56] @ 4-byte Spill +; LE-I64-NEON-NEXT: mov r9, r0 +; LE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add r3, sp, #560 +; LE-I64-NEON-NEXT: mov r4, r0 +; LE-I64-NEON-NEXT: str r1, [sp, #64] @ 4-byte Spill +; LE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: vmov.32 d17[0], r0 +; LE-I64-NEON-NEXT: ldr r7, [sp, #544] +; LE-I64-NEON-NEXT: ldr r6, [sp, #548] +; LE-I64-NEON-NEXT: add lr, sp, #96 +; LE-I64-NEON-NEXT: ldr r2, [sp, #552] +; LE-I64-NEON-NEXT: vmov.32 d17[1], r1 +; LE-I64-NEON-NEXT: ldr r3, [sp, #556] +; LE-I64-NEON-NEXT: mov r0, r7 +; LE-I64-NEON-NEXT: mov r1, r6 +; LE-I64-NEON-NEXT: vorr q4, q8, q8 +; LE-I64-NEON-NEXT: ldr r5, [sp, #528] +; LE-I64-NEON-NEXT: vmov.32 d17[0], r4 +; LE-I64-NEON-NEXT: ldr r10, [sp, #304] +; LE-I64-NEON-NEXT: ldr r8, [sp, #368] +; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add r3, sp, #532 +; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEON-NEXT: mov r11, r1 +; LE-I64-NEON-NEXT: add lr, sp, #144 +; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEON-NEXT: mov r0, r5 +; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add r3, sp, #308 +; LE-I64-NEON-NEXT: mov r5, r1 +; LE-I64-NEON-NEXT: add lr, sp, #176 +; LE-I64-NEON-NEXT: vmov.32 d17[0], r0 +; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEON-NEXT: mov r0, r10 +; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add r3, sp, #372 +; LE-I64-NEON-NEXT: mov r10, r1 +; LE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEON-NEXT: mov r0, r8 +; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add r3, sp, #404 +; LE-I64-NEON-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEON-NEXT: ldr r0, [sp, #400] +; LE-I64-NEON-NEXT: mov r6, r1 +; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add r3, sp, #596 +; LE-I64-NEON-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEON-NEXT: ldr r0, [sp, #592] +; LE-I64-NEON-NEXT: mov r7, r1 +; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add r3, sp, #676 +; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEON-NEXT: ldr r0, [sp, #672] +; LE-I64-NEON-NEXT: mov r4, r1 +; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add lr, sp, #96 +; LE-I64-NEON-NEXT: vmov.32 d13[1], r4 +; LE-I64-NEON-NEXT: str r1, [sp, #52] @ 4-byte Spill +; LE-I64-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #80 +; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #128 +; LE-I64-NEON-NEXT: vmov.32 d9[1], r7 +; LE-I64-NEON-NEXT: ldr r1, [sp, #628] +; LE-I64-NEON-NEXT: ldr r2, [sp, #632] +; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #112 +; LE-I64-NEON-NEXT: vmov.32 d15[1], r6 +; LE-I64-NEON-NEXT: ldr r3, [sp, #636] +; LE-I64-NEON-NEXT: ldr r7, [sp, #64] @ 4-byte Reload +; LE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #160 +; LE-I64-NEON-NEXT: vmov.32 d11[1], r10 +; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #176 +; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #144 +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: vmov.32 d18[0], r0 +; LE-I64-NEON-NEXT: ldr r0, [sp, #624] +; LE-I64-NEON-NEXT: vmov.32 d16[1], r11 +; LE-I64-NEON-NEXT: vmov.32 d9[1], r5 +; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #96 +; LE-I64-NEON-NEXT: vmov.32 d19[1], r7 +; LE-I64-NEON-NEXT: vstmia lr, {d18, d19} @ 16-byte Spill +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add r3, sp, #512 +; LE-I64-NEON-NEXT: str r0, [sp, #48] @ 4-byte Spill +; LE-I64-NEON-NEXT: str r1, [sp, #64] @ 4-byte Spill +; LE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add r3, sp, #768 +; LE-I64-NEON-NEXT: mov r11, r0 +; LE-I64-NEON-NEXT: str r1, [sp, #28] @ 4-byte Spill +; LE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: ldr r6, [sp, #784] +; LE-I64-NEON-NEXT: add r3, sp, #788 +; LE-I64-NEON-NEXT: mov r8, r1 +; LE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEON-NEXT: mov r0, r6 +; LE-I64-NEON-NEXT: ldr r5, [sp, #736] +; LE-I64-NEON-NEXT: ldr r7, [sp, #752] +; LE-I64-NEON-NEXT: ldr r4, [sp, #720] +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add r3, sp, #740 +; LE-I64-NEON-NEXT: mov r6, r1 +; LE-I64-NEON-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEON-NEXT: mov r0, r5 +; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add r3, sp, #756 +; LE-I64-NEON-NEXT: mov r5, r1 +; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEON-NEXT: mov r0, r7 +; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add r3, sp, #724 +; LE-I64-NEON-NEXT: mov r7, r1 +; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEON-NEXT: mov r0, r4 +; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: vmov.32 d13[1], r7 +; LE-I64-NEON-NEXT: add lr, sp, #8 +; LE-I64-NEON-NEXT: ldr r2, [sp, #296] +; LE-I64-NEON-NEXT: vmov.32 d12[1], r5 +; LE-I64-NEON-NEXT: ldr r3, [sp, #300] +; LE-I64-NEON-NEXT: ldr r4, [sp, #576] +; LE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEON-NEXT: ldr r0, [sp, #56] @ 4-byte Reload +; LE-I64-NEON-NEXT: ldr r10, [sp, #384] +; LE-I64-NEON-NEXT: vmov.32 d15[1], r6 +; LE-I64-NEON-NEXT: ldr r6, [sp, #352] +; LE-I64-NEON-NEXT: vmov.32 d14[1], r8 +; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #32 +; LE-I64-NEON-NEXT: vmov.32 d11[1], r1 +; LE-I64-NEON-NEXT: ldr r1, [sp, #60] @ 4-byte Reload +; LE-I64-NEON-NEXT: vmov.32 d8[0], r11 +; LE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #176 +; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add lr, sp, #160 +; LE-I64-NEON-NEXT: add r3, sp, #356 +; LE-I64-NEON-NEXT: mov r5, r1 +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEON-NEXT: vmov.32 d16[0], r0 +; LE-I64-NEON-NEXT: mov r0, r6 +; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add lr, sp, #112 +; LE-I64-NEON-NEXT: add r3, sp, #388 +; LE-I64-NEON-NEXT: mov r7, r1 +; LE-I64-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload +; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEON-NEXT: mov r0, r10 +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add lr, sp, #128 +; LE-I64-NEON-NEXT: add r3, sp, #580 +; LE-I64-NEON-NEXT: mov r6, r1 +; LE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEON-NEXT: mov r0, r4 +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add lr, sp, #80 +; LE-I64-NEON-NEXT: add r3, sp, #708 +; LE-I64-NEON-NEXT: mov r4, r1 +; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEON-NEXT: ldr r0, [sp, #704] +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: vmov.32 d8[1], r4 +; LE-I64-NEON-NEXT: add lr, sp, #80 +; LE-I64-NEON-NEXT: ldr r2, [sp, #52] @ 4-byte Reload +; LE-I64-NEON-NEXT: vmov.32 d12[1], r6 +; LE-I64-NEON-NEXT: ldr r6, [sp, #644] +; LE-I64-NEON-NEXT: ldr r3, [sp, #652] +; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #128 +; LE-I64-NEON-NEXT: vmov.32 d14[1], r7 +; LE-I64-NEON-NEXT: ldr r4, [sp, #480] +; LE-I64-NEON-NEXT: ldr r7, [sp, #656] +; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #112 +; LE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #160 +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEON-NEXT: ldr r0, [sp, #28] @ 4-byte Reload +; LE-I64-NEON-NEXT: ldr r10, [sp, #496] +; LE-I64-NEON-NEXT: vmov.32 d16[1], r5 +; LE-I64-NEON-NEXT: add r5, r9, #192 +; LE-I64-NEON-NEXT: ldr r8, [sp, #608] +; LE-I64-NEON-NEXT: vmov.32 d10[1], r1 +; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #176 +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: vmov.32 d16[1], r0 +; LE-I64-NEON-NEXT: ldr r0, [sp, #640] +; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #96 +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #8 +; LE-I64-NEON-NEXT: vmov.32 d16[1], r2 +; LE-I64-NEON-NEXT: ldr r2, [sp, #648] +; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! +; LE-I64-NEON-NEXT: vst1.64 {d10, d11}, [r5:128]! +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! +; LE-I64-NEON-NEXT: ldr r1, [sp, #48] @ 4-byte Reload +; LE-I64-NEON-NEXT: vmov.32 d9[0], r1 +; LE-I64-NEON-NEXT: mov r1, r6 +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add r3, sp, #660 +; LE-I64-NEON-NEXT: mov r11, r1 +; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEON-NEXT: mov r0, r7 +; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add r3, sp, #484 +; LE-I64-NEON-NEXT: mov r7, r1 +; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEON-NEXT: mov r0, r4 +; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add r3, sp, #500 +; LE-I64-NEON-NEXT: mov r6, r1 +; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEON-NEXT: mov r0, r10 +; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add r3, sp, #612 +; LE-I64-NEON-NEXT: mov r4, r1 +; LE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEON-NEXT: mov r0, r8 +; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEON-NEXT: ldr r0, [sp, #64] @ 4-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #96 +; LE-I64-NEON-NEXT: add r8, r9, #128 +; LE-I64-NEON-NEXT: vmov.32 d13[1], r7 +; LE-I64-NEON-NEXT: ldr r2, [sp, #344] +; LE-I64-NEON-NEXT: ldr r3, [sp, #348] +; LE-I64-NEON-NEXT: vmov.32 d12[1], r11 +; LE-I64-NEON-NEXT: ldr r7, [sp, #452] +; LE-I64-NEON-NEXT: ldr r10, [sp, #416] +; LE-I64-NEON-NEXT: vmov.32 d9[1], r0 +; LE-I64-NEON-NEXT: ldr r0, [sp, #336] +; LE-I64-NEON-NEXT: vmov.32 d8[1], r1 +; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #64 +; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I64-NEON-NEXT: add lr, sp, #32 +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #144 +; LE-I64-NEON-NEXT: vmov.32 d11[1], r4 +; LE-I64-NEON-NEXT: ldr r4, [sp, #340] +; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128] +; LE-I64-NEON-NEXT: mov r1, r4 +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #80 +; LE-I64-NEON-NEXT: vmov.32 d10[1], r6 +; LE-I64-NEON-NEXT: ldr r6, [sp, #448] +; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r8:128]! +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r8:128]! +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: ldr r2, [sp, #456] +; LE-I64-NEON-NEXT: mov r11, r1 +; LE-I64-NEON-NEXT: ldr r3, [sp, #460] +; LE-I64-NEON-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEON-NEXT: mov r0, r6 +; LE-I64-NEON-NEXT: mov r1, r7 +; LE-I64-NEON-NEXT: ldr r5, [sp, #432] +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add r3, sp, #468 +; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEON-NEXT: ldr r0, [sp, #464] +; LE-I64-NEON-NEXT: mov r6, r1 +; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add r3, sp, #420 +; LE-I64-NEON-NEXT: mov r7, r1 +; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEON-NEXT: mov r0, r10 +; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add r3, sp, #436 +; LE-I64-NEON-NEXT: mov r4, r1 +; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEON-NEXT: mov r0, r5 +; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add r3, sp, #324 +; LE-I64-NEON-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEON-NEXT: ldr r0, [sp, #320] +; LE-I64-NEON-NEXT: mov r5, r1 +; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEON-NEXT: bl lrintl +; LE-I64-NEON-NEXT: add lr, sp, #64 +; LE-I64-NEON-NEXT: vmov.32 d9[1], r5 +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #96 +; LE-I64-NEON-NEXT: vmov.32 d13[1], r7 +; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r8:128]! +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #176 +; LE-I64-NEON-NEXT: vmov.32 d8[1], r4 +; LE-I64-NEON-NEXT: vmov.32 d12[1], r6 +; LE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEON-NEXT: add r0, r9, #64 +; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r8:128] +; LE-I64-NEON-NEXT: vst1.64 {d8, d9}, [r0:128]! +; LE-I64-NEON-NEXT: vst1.64 {d12, d13}, [r0:128]! +; LE-I64-NEON-NEXT: vst1.64 {d10, d11}, [r0:128]! +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #160 +; LE-I64-NEON-NEXT: vmov.32 d15[1], r11 +; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #112 +; LE-I64-NEON-NEXT: vmov.32 d14[1], r1 +; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r9:128]! +; LE-I64-NEON-NEXT: vst1.64 {d14, d15}, [r9:128]! +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: add lr, sp, #128 +; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r9:128]! +; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r9:128] +; LE-I64-NEON-NEXT: add sp, sp, #192 +; LE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEON-NEXT: add sp, sp, #4 +; LE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-I32-LABEL: lrint_v32fp128: +; BE-I32: @ %bb.0: +; BE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I32-NEXT: .pad #4 +; BE-I32-NEXT: sub sp, sp, #4 +; BE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEXT: .pad #104 +; BE-I32-NEXT: sub sp, sp, #104 +; BE-I32-NEXT: mov r4, r3 +; BE-I32-NEXT: add r3, sp, #248 +; BE-I32-NEXT: mov r8, r2 +; BE-I32-NEXT: mov r11, r0 +; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: add r3, sp, #616 +; BE-I32-NEXT: mov r9, r0 +; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: add r3, sp, #680 +; BE-I32-NEXT: mov r5, r0 +; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: ldr r7, [sp, #232] +; BE-I32-NEXT: add lr, sp, #72 +; BE-I32-NEXT: ldr r1, [sp, #236] +; BE-I32-NEXT: vmov.32 d17[0], r0 +; BE-I32-NEXT: ldr r2, [sp, #240] +; BE-I32-NEXT: ldr r3, [sp, #244] +; BE-I32-NEXT: mov r0, r7 +; BE-I32-NEXT: ldr r10, [sp, #376] +; BE-I32-NEXT: vmov.32 d11[0], r5 +; BE-I32-NEXT: ldr r6, [sp, #296] +; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: add r3, sp, #300 +; BE-I32-NEXT: vmov.32 d15[0], r0 +; BE-I32-NEXT: mov r0, r6 +; BE-I32-NEXT: ldm r3, {r1, r2, r3} +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: add r3, sp, #380 +; BE-I32-NEXT: vmov.32 d13[0], r0 +; BE-I32-NEXT: mov r0, r10 +; BE-I32-NEXT: ldm r3, {r1, r2, r3} +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: add r3, sp, #360 +; BE-I32-NEXT: mov r5, r0 +; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: vmov.32 d17[0], r0 +; BE-I32-NEXT: ldr r6, [sp, #312] +; BE-I32-NEXT: ldr r1, [sp, #316] +; BE-I32-NEXT: ldr r2, [sp, #320] +; BE-I32-NEXT: ldr r3, [sp, #324] +; BE-I32-NEXT: vmov.32 d17[1], r5 +; BE-I32-NEXT: mov r0, r6 +; BE-I32-NEXT: ldr r7, [sp, #572] +; BE-I32-NEXT: vorr q4, q8, q8 +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: ldr r6, [sp, #632] +; BE-I32-NEXT: add lr, sp, #88 +; BE-I32-NEXT: ldr r1, [sp, #636] +; BE-I32-NEXT: vmov.32 d13[1], r0 +; BE-I32-NEXT: ldr r2, [sp, #640] +; BE-I32-NEXT: ldr r3, [sp, #644] +; BE-I32-NEXT: mov r0, r6 +; BE-I32-NEXT: ldr r5, [sp, #576] +; BE-I32-NEXT: vmov.32 d15[1], r9 +; BE-I32-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: vmov.32 d11[1], r0 +; BE-I32-NEXT: ldr r3, [sp, #580] +; BE-I32-NEXT: ldr r0, [sp, #568] +; BE-I32-NEXT: mov r1, r7 +; BE-I32-NEXT: mov r2, r5 +; BE-I32-NEXT: vorr q6, q5, q5 +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: add r3, sp, #552 +; BE-I32-NEXT: mov r9, r0 +; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: add r3, sp, #520 +; BE-I32-NEXT: mov r5, r0 +; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: ldr r6, [sp, #584] +; BE-I32-NEXT: add lr, sp, #8 +; BE-I32-NEXT: ldr r1, [sp, #588] +; BE-I32-NEXT: vmov.32 d16[0], r0 +; BE-I32-NEXT: ldr r2, [sp, #592] +; BE-I32-NEXT: ldr r3, [sp, #596] +; BE-I32-NEXT: mov r0, r6 +; BE-I32-NEXT: vmov.32 d17[0], r5 +; BE-I32-NEXT: ldr r7, [sp, #216] +; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: add r3, sp, #220 +; BE-I32-NEXT: vmov.32 d12[0], r0 +; BE-I32-NEXT: mov r0, r7 +; BE-I32-NEXT: ldm r3, {r1, r2, r3} +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: ldr r2, [sp, #208] +; BE-I32-NEXT: mov r7, r0 +; BE-I32-NEXT: ldr r3, [sp, #212] +; BE-I32-NEXT: mov r0, r8 +; BE-I32-NEXT: mov r1, r4 +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: add r3, sp, #456 +; BE-I32-NEXT: mov r5, r0 +; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: ldr r6, [sp, #328] +; BE-I32-NEXT: vmov.32 d10[0], r0 +; BE-I32-NEXT: ldr r1, [sp, #332] +; BE-I32-NEXT: ldr r2, [sp, #336] +; BE-I32-NEXT: vmov.32 d14[0], r5 +; BE-I32-NEXT: ldr r3, [sp, #340] +; BE-I32-NEXT: mov r0, r6 +; BE-I32-NEXT: ldr r10, [sp, #504] +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: ldr r6, [sp, #344] +; BE-I32-NEXT: vmov.32 d8[0], r0 +; BE-I32-NEXT: ldr r1, [sp, #348] +; BE-I32-NEXT: ldr r2, [sp, #352] +; BE-I32-NEXT: ldr r3, [sp, #356] +; BE-I32-NEXT: mov r0, r6 +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: vmov.32 d8[1], r0 +; BE-I32-NEXT: ldr r6, [sp, #600] +; BE-I32-NEXT: add lr, sp, #56 +; BE-I32-NEXT: ldr r1, [sp, #604] +; BE-I32-NEXT: vmov.32 d14[1], r7 +; BE-I32-NEXT: ldr r2, [sp, #608] +; BE-I32-NEXT: ldr r3, [sp, #612] +; BE-I32-NEXT: mov r0, r6 +; BE-I32-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; BE-I32-NEXT: add lr, sp, #40 +; BE-I32-NEXT: ldr r5, [sp, #508] +; BE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: vmov.32 d12[1], r0 +; BE-I32-NEXT: add lr, sp, #24 +; BE-I32-NEXT: ldr r7, [sp, #536] +; BE-I32-NEXT: ldr r1, [sp, #540] +; BE-I32-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; BE-I32-NEXT: add lr, sp, #8 +; BE-I32-NEXT: mov r0, r7 +; BE-I32-NEXT: ldr r2, [sp, #544] +; BE-I32-NEXT: ldr r3, [sp, #548] +; BE-I32-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; BE-I32-NEXT: ldr r6, [sp, #512] +; BE-I32-NEXT: vmov.32 d13[1], r9 +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: ldr r3, [sp, #516] +; BE-I32-NEXT: vmov.32 d12[1], r0 +; BE-I32-NEXT: mov r0, r10 +; BE-I32-NEXT: mov r1, r5 +; BE-I32-NEXT: mov r2, r6 +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: add r3, sp, #488 +; BE-I32-NEXT: mov r5, r0 +; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: add r3, sp, #424 +; BE-I32-NEXT: mov r7, r0 +; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: ldr r6, [sp, #264] +; BE-I32-NEXT: vmov.32 d15[0], r0 +; BE-I32-NEXT: ldr r1, [sp, #268] +; BE-I32-NEXT: ldr r2, [sp, #272] +; BE-I32-NEXT: vmov.32 d11[0], r7 +; BE-I32-NEXT: ldr r3, [sp, #276] +; BE-I32-NEXT: mov r0, r6 +; BE-I32-NEXT: ldr r8, [sp, #696] +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: add lr, sp, #88 +; BE-I32-NEXT: ldr r4, [sp, #472] +; BE-I32-NEXT: ldr r1, [sp, #476] +; BE-I32-NEXT: vmov.32 d11[1], r5 +; BE-I32-NEXT: ldr r2, [sp, #480] +; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I32-NEXT: ldr r3, [sp, #484] +; BE-I32-NEXT: vmov.32 d16[0], r0 +; BE-I32-NEXT: mov r0, r4 +; BE-I32-NEXT: ldr r6, [sp, #700] +; BE-I32-NEXT: ldr r7, [sp, #704] +; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: ldr r3, [sp, #708] +; BE-I32-NEXT: vmov.32 d10[1], r0 +; BE-I32-NEXT: mov r0, r8 +; BE-I32-NEXT: mov r1, r6 +; BE-I32-NEXT: mov r2, r7 +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: add r3, sp, #648 +; BE-I32-NEXT: mov r4, r0 +; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: add lr, sp, #72 +; BE-I32-NEXT: ldr r5, [sp, #664] +; BE-I32-NEXT: ldr r1, [sp, #668] +; BE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-I32-NEXT: ldr r2, [sp, #672] +; BE-I32-NEXT: ldr r3, [sp, #676] +; BE-I32-NEXT: vmov.32 d8[0], r0 +; BE-I32-NEXT: mov r0, r5 +; BE-I32-NEXT: ldr r6, [sp, #444] +; BE-I32-NEXT: vmov.32 d9[1], r4 +; BE-I32-NEXT: ldr r7, [sp, #448] +; BE-I32-NEXT: ldr r8, [sp, #412] +; BE-I32-NEXT: ldr r4, [sp, #416] +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: vmov.32 d8[1], r0 +; BE-I32-NEXT: ldr r3, [sp, #452] +; BE-I32-NEXT: ldr r0, [sp, #440] +; BE-I32-NEXT: mov r1, r6 +; BE-I32-NEXT: mov r2, r7 +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: vmov.32 d15[1], r0 +; BE-I32-NEXT: ldr r3, [sp, #420] +; BE-I32-NEXT: ldr r0, [sp, #408] +; BE-I32-NEXT: mov r1, r8 +; BE-I32-NEXT: mov r2, r4 +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: add r3, sp, #392 +; BE-I32-NEXT: mov r4, r0 +; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: add r3, sp, #284 +; BE-I32-NEXT: ldr r7, [sp, #280] +; BE-I32-NEXT: vmov.32 d14[0], r0 +; BE-I32-NEXT: ldm r3, {r1, r2, r3} +; BE-I32-NEXT: mov r0, r7 +; BE-I32-NEXT: vmov.32 d14[1], r4 +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: add lr, sp, #88 +; BE-I32-NEXT: vrev64.32 q9, q4 +; BE-I32-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload +; BE-I32-NEXT: add lr, sp, #24 +; BE-I32-NEXT: vrev64.32 q8, q7 +; BE-I32-NEXT: vmov.32 d20[1], r0 +; BE-I32-NEXT: add r0, r11, #64 +; BE-I32-NEXT: vst1.32 {d10, d11}, [r0:128]! +; BE-I32-NEXT: vst1.32 {d12, d13}, [r0:128]! +; BE-I32-NEXT: vldmia lr, {d22, d23} @ 16-byte Reload +; BE-I32-NEXT: add lr, sp, #40 +; BE-I32-NEXT: vst1.32 {d22, d23}, [r0:128]! +; BE-I32-NEXT: vst1.64 {d18, d19}, [r0:128] +; BE-I32-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; BE-I32-NEXT: add lr, sp, #56 +; BE-I32-NEXT: vst1.32 {d18, d19}, [r11:128]! +; BE-I32-NEXT: vst1.32 {d20, d21}, [r11:128]! +; BE-I32-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; BE-I32-NEXT: vst1.32 {d18, d19}, [r11:128]! +; BE-I32-NEXT: vst1.64 {d16, d17}, [r11:128] +; BE-I32-NEXT: add sp, sp, #104 +; BE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEXT: add sp, sp, #4 +; BE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-I64-LABEL: lrint_v32fp128: +; BE-I64: @ %bb.0: +; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I64-NEXT: .pad #4 +; BE-I64-NEXT: sub sp, sp, #4 +; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEXT: .pad #152 +; BE-I64-NEXT: sub sp, sp, #152 +; BE-I64-NEXT: str r3, [sp, #120] @ 4-byte Spill +; BE-I64-NEXT: add r3, sp, #712 +; BE-I64-NEXT: str r2, [sp, #112] @ 4-byte Spill +; BE-I64-NEXT: mov r9, r0 +; BE-I64-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: ldr r7, [sp, #648] +; BE-I64-NEXT: add r3, sp, #652 +; BE-I64-NEXT: mov r5, r1 +; BE-I64-NEXT: vmov.32 d9[0], r0 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: mov r0, r7 +; BE-I64-NEXT: ldr r6, [sp, #520] +; BE-I64-NEXT: ldr r8, [sp, #632] +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: add r3, sp, #524 +; BE-I64-NEXT: mov r7, r1 +; BE-I64-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEXT: mov r0, r6 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: add r3, sp, #636 +; BE-I64-NEXT: mov r4, r1 +; BE-I64-NEXT: vmov.32 d8[0], r0 +; BE-I64-NEXT: mov r0, r8 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: mov r6, r1 +; BE-I64-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEXT: ldr r0, [sp, #488] +; BE-I64-NEXT: vmov.32 d8[1], r4 +; BE-I64-NEXT: ldr r1, [sp, #492] +; BE-I64-NEXT: ldr r2, [sp, #496] +; BE-I64-NEXT: vmov.32 d10[1], r7 +; BE-I64-NEXT: ldr r3, [sp, #500] +; BE-I64-NEXT: vmov.32 d9[1], r5 +; BE-I64-NEXT: vstr d8, [sp, #144] @ 8-byte Spill +; BE-I64-NEXT: vstr d10, [sp, #136] @ 8-byte Spill +; BE-I64-NEXT: vstr d9, [sp, #128] @ 8-byte Spill +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: add r3, sp, #680 +; BE-I64-NEXT: str r0, [sp, #104] @ 4-byte Spill +; BE-I64-NEXT: str r1, [sp, #88] @ 4-byte Spill +; BE-I64-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: vmov.32 d16[0], r0 +; BE-I64-NEXT: ldr r0, [sp, #728] +; BE-I64-NEXT: ldr r2, [sp, #736] +; BE-I64-NEXT: vmov.32 d11[1], r6 +; BE-I64-NEXT: ldr r6, [sp, #732] +; BE-I64-NEXT: ldr r3, [sp, #740] +; BE-I64-NEXT: vmov.32 d16[1], r1 +; BE-I64-NEXT: ldr r5, [sp, #504] +; BE-I64-NEXT: mov r1, r6 +; BE-I64-NEXT: ldr r7, [sp, #744] +; BE-I64-NEXT: ldr r4, [sp, #748] +; BE-I64-NEXT: vstr d11, [sp, #24] @ 8-byte Spill +; BE-I64-NEXT: vstr d16, [sp, #8] @ 8-byte Spill +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: ldr r2, [sp, #752] +; BE-I64-NEXT: mov r11, r1 +; BE-I64-NEXT: ldr r3, [sp, #756] +; BE-I64-NEXT: vmov.32 d13[0], r0 +; BE-I64-NEXT: mov r0, r7 +; BE-I64-NEXT: mov r1, r4 +; BE-I64-NEXT: ldr r10, [sp, #552] +; BE-I64-NEXT: ldr r6, [sp, #664] +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: add r3, sp, #508 +; BE-I64-NEXT: mov r8, r1 +; BE-I64-NEXT: vmov.32 d8[0], r0 +; BE-I64-NEXT: mov r0, r5 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: add r3, sp, #540 +; BE-I64-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEXT: ldr r0, [sp, #536] +; BE-I64-NEXT: mov r7, r1 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: add r3, sp, #556 +; BE-I64-NEXT: mov r5, r1 +; BE-I64-NEXT: vmov.32 d12[0], r0 +; BE-I64-NEXT: mov r0, r10 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: add r3, sp, #668 +; BE-I64-NEXT: mov r4, r1 +; BE-I64-NEXT: vmov.32 d9[0], r0 +; BE-I64-NEXT: mov r0, r6 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: add r3, sp, #700 +; BE-I64-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEXT: ldr r0, [sp, #696] +; BE-I64-NEXT: mov r6, r1 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEXT: ldr r0, [sp, #104] @ 4-byte Reload +; BE-I64-NEXT: ldr r2, [sp, #256] +; BE-I64-NEXT: vmov.32 d13[1], r11 +; BE-I64-NEXT: ldr r3, [sp, #260] +; BE-I64-NEXT: vmov.32 d14[1], r6 +; BE-I64-NEXT: ldr r6, [sp, #264] +; BE-I64-NEXT: vmov.32 d9[1], r4 +; BE-I64-NEXT: ldr r4, [sp, #344] +; BE-I64-NEXT: vmov.32 d12[1], r5 +; BE-I64-NEXT: ldr r5, [sp, #312] +; BE-I64-NEXT: vmov.32 d8[1], r8 +; BE-I64-NEXT: ldr r8, [sp, #328] +; BE-I64-NEXT: vmov.32 d10[1], r7 +; BE-I64-NEXT: vstr d13, [sp, #32] @ 8-byte Spill +; BE-I64-NEXT: vmov.32 d11[1], r1 +; BE-I64-NEXT: ldr r1, [sp, #120] @ 4-byte Reload +; BE-I64-NEXT: vmov.32 d13[0], r0 +; BE-I64-NEXT: ldr r0, [sp, #112] @ 4-byte Reload +; BE-I64-NEXT: vstr d14, [sp] @ 8-byte Spill +; BE-I64-NEXT: vstr d9, [sp, #16] @ 8-byte Spill +; BE-I64-NEXT: vstr d12, [sp, #56] @ 8-byte Spill +; BE-I64-NEXT: vstr d10, [sp, #64] @ 8-byte Spill +; BE-I64-NEXT: vstr d8, [sp, #40] @ 8-byte Spill +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: add r3, sp, #268 +; BE-I64-NEXT: mov r11, r1 +; BE-I64-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEXT: mov r0, r6 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: add r3, sp, #316 +; BE-I64-NEXT: mov r10, r1 +; BE-I64-NEXT: vmov.32 d8[0], r0 +; BE-I64-NEXT: mov r0, r5 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: add r3, sp, #332 +; BE-I64-NEXT: mov r7, r1 +; BE-I64-NEXT: vmov.32 d12[0], r0 +; BE-I64-NEXT: mov r0, r8 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: add r3, sp, #348 +; BE-I64-NEXT: mov r5, r1 +; BE-I64-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEXT: mov r0, r4 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: add r3, sp, #364 +; BE-I64-NEXT: vmov.32 d9[0], r0 +; BE-I64-NEXT: ldr r0, [sp, #360] +; BE-I64-NEXT: mov r4, r1 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: add r3, sp, #476 +; BE-I64-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEXT: ldr r0, [sp, #472] +; BE-I64-NEXT: mov r6, r1 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: vmov.32 d16[0], r0 +; BE-I64-NEXT: ldr r0, [sp, #88] @ 4-byte Reload +; BE-I64-NEXT: ldr r2, [sp, #592] +; BE-I64-NEXT: vldr d20, [sp, #136] @ 8-byte Reload +; BE-I64-NEXT: vmov.32 d16[1], r1 +; BE-I64-NEXT: ldr r1, [sp, #588] +; BE-I64-NEXT: ldr r3, [sp, #596] +; BE-I64-NEXT: vldr d22, [sp, #24] @ 8-byte Reload +; BE-I64-NEXT: vldr d18, [sp, #8] @ 8-byte Reload +; BE-I64-NEXT: vrev64.32 d21, d20 +; BE-I64-NEXT: vmov.32 d10[1], r6 +; BE-I64-NEXT: ldr r6, [sp, #600] +; BE-I64-NEXT: vmov.32 d9[1], r4 +; BE-I64-NEXT: ldr r4, [sp, #616] +; BE-I64-NEXT: vmov.32 d12[1], r7 +; BE-I64-NEXT: ldr r7, [sp, #604] +; BE-I64-NEXT: vmov.32 d8[1], r10 +; BE-I64-NEXT: add r10, r9, #192 +; BE-I64-NEXT: vmov.32 d14[1], r11 +; BE-I64-NEXT: ldr r11, [sp, #440] +; BE-I64-NEXT: vmov.32 d13[1], r0 +; BE-I64-NEXT: ldr r0, [sp, #584] +; BE-I64-NEXT: vmov.32 d15[1], r5 +; BE-I64-NEXT: vstr d16, [sp, #48] @ 8-byte Spill +; BE-I64-NEXT: vldr d16, [sp, #128] @ 8-byte Reload +; BE-I64-NEXT: vrev64.32 d20, d22 +; BE-I64-NEXT: vldr d22, [sp] @ 8-byte Reload +; BE-I64-NEXT: vrev64.32 d19, d18 +; BE-I64-NEXT: vrev64.32 d17, d16 +; BE-I64-NEXT: vrev64.32 d18, d22 +; BE-I64-NEXT: vstr d10, [sp, #120] @ 8-byte Spill +; BE-I64-NEXT: vstr d9, [sp, #112] @ 8-byte Spill +; BE-I64-NEXT: vstr d15, [sp, #104] @ 8-byte Spill +; BE-I64-NEXT: vstr d12, [sp, #96] @ 8-byte Spill +; BE-I64-NEXT: vstr d8, [sp, #80] @ 8-byte Spill +; BE-I64-NEXT: vstr d14, [sp, #72] @ 8-byte Spill +; BE-I64-NEXT: vstr d13, [sp, #88] @ 8-byte Spill +; BE-I64-NEXT: vst1.64 {d20, d21}, [r10:128]! +; BE-I64-NEXT: vrev64.32 d16, d11 +; BE-I64-NEXT: vst1.64 {d18, d19}, [r10:128]! +; BE-I64-NEXT: vst1.64 {d16, d17}, [r10:128]! +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: ldr r2, [sp, #608] +; BE-I64-NEXT: mov r8, r1 +; BE-I64-NEXT: ldr r3, [sp, #612] +; BE-I64-NEXT: vmov.32 d8[0], r0 +; BE-I64-NEXT: mov r0, r6 +; BE-I64-NEXT: mov r1, r7 +; BE-I64-NEXT: ldr r5, [sp, #456] +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: add r3, sp, #620 +; BE-I64-NEXT: mov r6, r1 +; BE-I64-NEXT: vmov.32 d9[0], r0 +; BE-I64-NEXT: mov r0, r4 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: add r3, sp, #444 +; BE-I64-NEXT: mov r7, r1 +; BE-I64-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEXT: mov r0, r11 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: add r3, sp, #460 +; BE-I64-NEXT: mov r4, r1 +; BE-I64-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEXT: mov r0, r5 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: add r3, sp, #572 +; BE-I64-NEXT: vmov.32 d13[0], r0 +; BE-I64-NEXT: ldr r0, [sp, #568] +; BE-I64-NEXT: mov r5, r1 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: vldr d16, [sp, #16] @ 8-byte Reload +; BE-I64-NEXT: vldr d18, [sp, #56] @ 8-byte Reload +; BE-I64-NEXT: vrev64.32 d17, d16 +; BE-I64-NEXT: ldr r2, [sp, #304] +; BE-I64-NEXT: vrev64.32 d16, d18 +; BE-I64-NEXT: ldr r3, [sp, #308] +; BE-I64-NEXT: vldr d18, [sp, #144] @ 8-byte Reload +; BE-I64-NEXT: vldr d20, [sp, #64] @ 8-byte Reload +; BE-I64-NEXT: vrev64.32 d19, d18 +; BE-I64-NEXT: vrev64.32 d18, d20 +; BE-I64-NEXT: vldr d20, [sp, #40] @ 8-byte Reload +; BE-I64-NEXT: vldr d22, [sp, #32] @ 8-byte Reload +; BE-I64-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEXT: ldr r0, [sp, #296] +; BE-I64-NEXT: vmov.32 d10[1], r7 +; BE-I64-NEXT: ldr r7, [sp, #412] +; BE-I64-NEXT: vmov.32 d9[1], r6 +; BE-I64-NEXT: ldr r6, [sp, #408] +; BE-I64-NEXT: vmov.32 d8[1], r8 +; BE-I64-NEXT: add r8, r9, #128 +; BE-I64-NEXT: vrev64.32 d21, d20 +; BE-I64-NEXT: vmov.32 d13[1], r5 +; BE-I64-NEXT: ldr r5, [sp, #300] +; BE-I64-NEXT: vrev64.32 d20, d22 +; BE-I64-NEXT: vmov.32 d14[1], r1 +; BE-I64-NEXT: mov r1, r5 +; BE-I64-NEXT: vstr d10, [sp, #136] @ 8-byte Spill +; BE-I64-NEXT: vstr d9, [sp, #128] @ 8-byte Spill +; BE-I64-NEXT: vstr d8, [sp, #24] @ 8-byte Spill +; BE-I64-NEXT: vst1.64 {d20, d21}, [r10:128] +; BE-I64-NEXT: vst1.64 {d18, d19}, [r8:128]! +; BE-I64-NEXT: vmov.32 d11[1], r4 +; BE-I64-NEXT: ldr r4, [sp, #424] +; BE-I64-NEXT: ldr r10, [sp, #376] +; BE-I64-NEXT: vst1.64 {d16, d17}, [r8:128]! +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: ldr r2, [sp, #416] +; BE-I64-NEXT: mov r11, r1 +; BE-I64-NEXT: ldr r3, [sp, #420] +; BE-I64-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEXT: mov r0, r6 +; BE-I64-NEXT: mov r1, r7 +; BE-I64-NEXT: ldr r5, [sp, #392] +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: add r3, sp, #428 +; BE-I64-NEXT: mov r6, r1 +; BE-I64-NEXT: vmov.32 d8[0], r0 +; BE-I64-NEXT: mov r0, r4 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: add r3, sp, #380 +; BE-I64-NEXT: mov r7, r1 +; BE-I64-NEXT: vmov.32 d9[0], r0 +; BE-I64-NEXT: mov r0, r10 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: add r3, sp, #396 +; BE-I64-NEXT: mov r4, r1 +; BE-I64-NEXT: vmov.32 d12[0], r0 +; BE-I64-NEXT: mov r0, r5 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: add r3, sp, #284 +; BE-I64-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEXT: ldr r0, [sp, #280] +; BE-I64-NEXT: mov r5, r1 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: bl lrintl +; BE-I64-NEXT: vldr d16, [sp, #120] @ 8-byte Reload +; BE-I64-NEXT: vldr d18, [sp, #112] @ 8-byte Reload +; BE-I64-NEXT: vrev64.32 d17, d16 +; BE-I64-NEXT: vldr d26, [sp, #136] @ 8-byte Reload +; BE-I64-NEXT: vrev64.32 d16, d18 +; BE-I64-NEXT: vldr d18, [sp, #104] @ 8-byte Reload +; BE-I64-NEXT: vrev64.32 d31, d26 +; BE-I64-NEXT: vldr d26, [sp, #128] @ 8-byte Reload +; BE-I64-NEXT: vldr d20, [sp, #96] @ 8-byte Reload +; BE-I64-NEXT: vrev64.32 d19, d18 +; BE-I64-NEXT: vrev64.32 d18, d20 +; BE-I64-NEXT: vldr d20, [sp, #80] @ 8-byte Reload +; BE-I64-NEXT: vrev64.32 d30, d26 +; BE-I64-NEXT: vldr d26, [sp, #24] @ 8-byte Reload +; BE-I64-NEXT: vmov.32 d10[1], r5 +; BE-I64-NEXT: vldr d22, [sp, #72] @ 8-byte Reload +; BE-I64-NEXT: vrev64.32 d21, d20 +; BE-I64-NEXT: vrev64.32 d1, d26 +; BE-I64-NEXT: vmov.32 d9[1], r7 +; BE-I64-NEXT: vmov.32 d12[1], r4 +; BE-I64-NEXT: vrev64.32 d20, d22 +; BE-I64-NEXT: vldr d22, [sp, #88] @ 8-byte Reload +; BE-I64-NEXT: vmov.32 d8[1], r6 +; BE-I64-NEXT: vrev64.32 d0, d14 +; BE-I64-NEXT: vmov.32 d28[0], r0 +; BE-I64-NEXT: add r0, r9, #64 +; BE-I64-NEXT: vrev64.32 d3, d10 +; BE-I64-NEXT: vldr d24, [sp, #48] @ 8-byte Reload +; BE-I64-NEXT: vrev64.32 d23, d22 +; BE-I64-NEXT: vrev64.32 d5, d9 +; BE-I64-NEXT: vst1.64 {d0, d1}, [r8:128]! +; BE-I64-NEXT: vrev64.32 d2, d12 +; BE-I64-NEXT: vmov.32 d15[1], r11 +; BE-I64-NEXT: vrev64.32 d22, d24 +; BE-I64-NEXT: vrev64.32 d25, d13 +; BE-I64-NEXT: vrev64.32 d4, d8 +; BE-I64-NEXT: vst1.64 {d30, d31}, [r8:128] +; BE-I64-NEXT: vst1.64 {d2, d3}, [r0:128]! +; BE-I64-NEXT: vmov.32 d28[1], r1 +; BE-I64-NEXT: vrev64.32 d24, d11 +; BE-I64-NEXT: vst1.64 {d4, d5}, [r0:128]! +; BE-I64-NEXT: vrev64.32 d27, d15 +; BE-I64-NEXT: vst1.64 {d24, d25}, [r0:128]! +; BE-I64-NEXT: vrev64.32 d26, d28 +; BE-I64-NEXT: vst1.64 {d22, d23}, [r0:128] +; BE-I64-NEXT: vst1.64 {d20, d21}, [r9:128]! +; BE-I64-NEXT: vst1.64 {d26, d27}, [r9:128]! +; BE-I64-NEXT: vst1.64 {d18, d19}, [r9:128]! +; BE-I64-NEXT: vst1.64 {d16, d17}, [r9:128] +; BE-I64-NEXT: add sp, sp, #152 +; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEXT: add sp, sp, #4 +; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-I32-NEON-LABEL: lrint_v32fp128: +; BE-I32-NEON: @ %bb.0: +; BE-I32-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I32-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I32-NEON-NEXT: .pad #4 +; BE-I32-NEON-NEXT: sub sp, sp, #4 +; BE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEON-NEXT: .pad #104 +; BE-I32-NEON-NEXT: sub sp, sp, #104 +; BE-I32-NEON-NEXT: mov r4, r3 +; BE-I32-NEON-NEXT: add r3, sp, #248 +; BE-I32-NEON-NEXT: mov r8, r2 +; BE-I32-NEON-NEXT: mov r11, r0 +; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: add r3, sp, #616 +; BE-I32-NEON-NEXT: mov r9, r0 +; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: add r3, sp, #680 +; BE-I32-NEON-NEXT: mov r5, r0 +; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: ldr r7, [sp, #232] +; BE-I32-NEON-NEXT: add lr, sp, #72 +; BE-I32-NEON-NEXT: ldr r1, [sp, #236] +; BE-I32-NEON-NEXT: vmov.32 d17[0], r0 +; BE-I32-NEON-NEXT: ldr r2, [sp, #240] +; BE-I32-NEON-NEXT: ldr r3, [sp, #244] +; BE-I32-NEON-NEXT: mov r0, r7 +; BE-I32-NEON-NEXT: ldr r10, [sp, #376] +; BE-I32-NEON-NEXT: vmov.32 d11[0], r5 +; BE-I32-NEON-NEXT: ldr r6, [sp, #296] +; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: add r3, sp, #300 +; BE-I32-NEON-NEXT: vmov.32 d15[0], r0 +; BE-I32-NEON-NEXT: mov r0, r6 +; BE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: add r3, sp, #380 +; BE-I32-NEON-NEXT: vmov.32 d13[0], r0 +; BE-I32-NEON-NEXT: mov r0, r10 +; BE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: add r3, sp, #360 +; BE-I32-NEON-NEXT: mov r5, r0 +; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: vmov.32 d17[0], r0 +; BE-I32-NEON-NEXT: ldr r6, [sp, #312] +; BE-I32-NEON-NEXT: ldr r1, [sp, #316] +; BE-I32-NEON-NEXT: ldr r2, [sp, #320] +; BE-I32-NEON-NEXT: ldr r3, [sp, #324] +; BE-I32-NEON-NEXT: vmov.32 d17[1], r5 +; BE-I32-NEON-NEXT: mov r0, r6 +; BE-I32-NEON-NEXT: ldr r7, [sp, #572] +; BE-I32-NEON-NEXT: vorr q4, q8, q8 +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: ldr r6, [sp, #632] +; BE-I32-NEON-NEXT: add lr, sp, #88 +; BE-I32-NEON-NEXT: ldr r1, [sp, #636] +; BE-I32-NEON-NEXT: vmov.32 d13[1], r0 +; BE-I32-NEON-NEXT: ldr r2, [sp, #640] +; BE-I32-NEON-NEXT: ldr r3, [sp, #644] +; BE-I32-NEON-NEXT: mov r0, r6 +; BE-I32-NEON-NEXT: ldr r5, [sp, #576] +; BE-I32-NEON-NEXT: vmov.32 d15[1], r9 +; BE-I32-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: vmov.32 d11[1], r0 +; BE-I32-NEON-NEXT: ldr r3, [sp, #580] +; BE-I32-NEON-NEXT: ldr r0, [sp, #568] +; BE-I32-NEON-NEXT: mov r1, r7 +; BE-I32-NEON-NEXT: mov r2, r5 +; BE-I32-NEON-NEXT: vorr q6, q5, q5 +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: add r3, sp, #552 +; BE-I32-NEON-NEXT: mov r9, r0 +; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: add r3, sp, #520 +; BE-I32-NEON-NEXT: mov r5, r0 +; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: ldr r6, [sp, #584] +; BE-I32-NEON-NEXT: add lr, sp, #8 +; BE-I32-NEON-NEXT: ldr r1, [sp, #588] +; BE-I32-NEON-NEXT: vmov.32 d16[0], r0 +; BE-I32-NEON-NEXT: ldr r2, [sp, #592] +; BE-I32-NEON-NEXT: ldr r3, [sp, #596] +; BE-I32-NEON-NEXT: mov r0, r6 +; BE-I32-NEON-NEXT: vmov.32 d17[0], r5 +; BE-I32-NEON-NEXT: ldr r7, [sp, #216] +; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: add r3, sp, #220 +; BE-I32-NEON-NEXT: vmov.32 d12[0], r0 +; BE-I32-NEON-NEXT: mov r0, r7 +; BE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: ldr r2, [sp, #208] +; BE-I32-NEON-NEXT: mov r7, r0 +; BE-I32-NEON-NEXT: ldr r3, [sp, #212] +; BE-I32-NEON-NEXT: mov r0, r8 +; BE-I32-NEON-NEXT: mov r1, r4 +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: add r3, sp, #456 +; BE-I32-NEON-NEXT: mov r5, r0 +; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: ldr r6, [sp, #328] +; BE-I32-NEON-NEXT: vmov.32 d10[0], r0 +; BE-I32-NEON-NEXT: ldr r1, [sp, #332] +; BE-I32-NEON-NEXT: ldr r2, [sp, #336] +; BE-I32-NEON-NEXT: vmov.32 d14[0], r5 +; BE-I32-NEON-NEXT: ldr r3, [sp, #340] +; BE-I32-NEON-NEXT: mov r0, r6 +; BE-I32-NEON-NEXT: ldr r10, [sp, #504] +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: ldr r6, [sp, #344] +; BE-I32-NEON-NEXT: vmov.32 d8[0], r0 +; BE-I32-NEON-NEXT: ldr r1, [sp, #348] +; BE-I32-NEON-NEXT: ldr r2, [sp, #352] +; BE-I32-NEON-NEXT: ldr r3, [sp, #356] +; BE-I32-NEON-NEXT: mov r0, r6 +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: vmov.32 d8[1], r0 +; BE-I32-NEON-NEXT: ldr r6, [sp, #600] +; BE-I32-NEON-NEXT: add lr, sp, #56 +; BE-I32-NEON-NEXT: ldr r1, [sp, #604] +; BE-I32-NEON-NEXT: vmov.32 d14[1], r7 +; BE-I32-NEON-NEXT: ldr r2, [sp, #608] +; BE-I32-NEON-NEXT: ldr r3, [sp, #612] +; BE-I32-NEON-NEXT: mov r0, r6 +; BE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; BE-I32-NEON-NEXT: add lr, sp, #40 +; BE-I32-NEON-NEXT: ldr r5, [sp, #508] +; BE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: vmov.32 d12[1], r0 +; BE-I32-NEON-NEXT: add lr, sp, #24 +; BE-I32-NEON-NEXT: ldr r7, [sp, #536] +; BE-I32-NEON-NEXT: ldr r1, [sp, #540] +; BE-I32-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; BE-I32-NEON-NEXT: add lr, sp, #8 +; BE-I32-NEON-NEXT: mov r0, r7 +; BE-I32-NEON-NEXT: ldr r2, [sp, #544] +; BE-I32-NEON-NEXT: ldr r3, [sp, #548] +; BE-I32-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; BE-I32-NEON-NEXT: ldr r6, [sp, #512] +; BE-I32-NEON-NEXT: vmov.32 d13[1], r9 +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: ldr r3, [sp, #516] +; BE-I32-NEON-NEXT: vmov.32 d12[1], r0 +; BE-I32-NEON-NEXT: mov r0, r10 +; BE-I32-NEON-NEXT: mov r1, r5 +; BE-I32-NEON-NEXT: mov r2, r6 +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: add r3, sp, #488 +; BE-I32-NEON-NEXT: mov r5, r0 +; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: add r3, sp, #424 +; BE-I32-NEON-NEXT: mov r7, r0 +; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: ldr r6, [sp, #264] +; BE-I32-NEON-NEXT: vmov.32 d15[0], r0 +; BE-I32-NEON-NEXT: ldr r1, [sp, #268] +; BE-I32-NEON-NEXT: ldr r2, [sp, #272] +; BE-I32-NEON-NEXT: vmov.32 d11[0], r7 +; BE-I32-NEON-NEXT: ldr r3, [sp, #276] +; BE-I32-NEON-NEXT: mov r0, r6 +; BE-I32-NEON-NEXT: ldr r8, [sp, #696] +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: add lr, sp, #88 +; BE-I32-NEON-NEXT: ldr r4, [sp, #472] +; BE-I32-NEON-NEXT: ldr r1, [sp, #476] +; BE-I32-NEON-NEXT: vmov.32 d11[1], r5 +; BE-I32-NEON-NEXT: ldr r2, [sp, #480] +; BE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I32-NEON-NEXT: ldr r3, [sp, #484] +; BE-I32-NEON-NEXT: vmov.32 d16[0], r0 +; BE-I32-NEON-NEXT: mov r0, r4 +; BE-I32-NEON-NEXT: ldr r6, [sp, #700] +; BE-I32-NEON-NEXT: ldr r7, [sp, #704] +; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: ldr r3, [sp, #708] +; BE-I32-NEON-NEXT: vmov.32 d10[1], r0 +; BE-I32-NEON-NEXT: mov r0, r8 +; BE-I32-NEON-NEXT: mov r1, r6 +; BE-I32-NEON-NEXT: mov r2, r7 +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: add r3, sp, #648 +; BE-I32-NEON-NEXT: mov r4, r0 +; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: add lr, sp, #72 +; BE-I32-NEON-NEXT: ldr r5, [sp, #664] +; BE-I32-NEON-NEXT: ldr r1, [sp, #668] +; BE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-I32-NEON-NEXT: ldr r2, [sp, #672] +; BE-I32-NEON-NEXT: ldr r3, [sp, #676] +; BE-I32-NEON-NEXT: vmov.32 d8[0], r0 +; BE-I32-NEON-NEXT: mov r0, r5 +; BE-I32-NEON-NEXT: ldr r6, [sp, #444] +; BE-I32-NEON-NEXT: vmov.32 d9[1], r4 +; BE-I32-NEON-NEXT: ldr r7, [sp, #448] +; BE-I32-NEON-NEXT: ldr r8, [sp, #412] +; BE-I32-NEON-NEXT: ldr r4, [sp, #416] +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: vmov.32 d8[1], r0 +; BE-I32-NEON-NEXT: ldr r3, [sp, #452] +; BE-I32-NEON-NEXT: ldr r0, [sp, #440] +; BE-I32-NEON-NEXT: mov r1, r6 +; BE-I32-NEON-NEXT: mov r2, r7 +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: vmov.32 d15[1], r0 +; BE-I32-NEON-NEXT: ldr r3, [sp, #420] +; BE-I32-NEON-NEXT: ldr r0, [sp, #408] +; BE-I32-NEON-NEXT: mov r1, r8 +; BE-I32-NEON-NEXT: mov r2, r4 +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: add r3, sp, #392 +; BE-I32-NEON-NEXT: mov r4, r0 +; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: add r3, sp, #284 +; BE-I32-NEON-NEXT: ldr r7, [sp, #280] +; BE-I32-NEON-NEXT: vmov.32 d14[0], r0 +; BE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I32-NEON-NEXT: mov r0, r7 +; BE-I32-NEON-NEXT: vmov.32 d14[1], r4 +; BE-I32-NEON-NEXT: bl lrintl +; BE-I32-NEON-NEXT: add lr, sp, #88 +; BE-I32-NEON-NEXT: vrev64.32 q9, q4 +; BE-I32-NEON-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload +; BE-I32-NEON-NEXT: add lr, sp, #24 +; BE-I32-NEON-NEXT: vrev64.32 q8, q7 +; BE-I32-NEON-NEXT: vmov.32 d20[1], r0 +; BE-I32-NEON-NEXT: add r0, r11, #64 +; BE-I32-NEON-NEXT: vst1.32 {d10, d11}, [r0:128]! +; BE-I32-NEON-NEXT: vst1.32 {d12, d13}, [r0:128]! +; BE-I32-NEON-NEXT: vldmia lr, {d22, d23} @ 16-byte Reload +; BE-I32-NEON-NEXT: add lr, sp, #40 +; BE-I32-NEON-NEXT: vst1.32 {d22, d23}, [r0:128]! +; BE-I32-NEON-NEXT: vst1.64 {d18, d19}, [r0:128] +; BE-I32-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; BE-I32-NEON-NEXT: add lr, sp, #56 +; BE-I32-NEON-NEXT: vst1.32 {d18, d19}, [r11:128]! +; BE-I32-NEON-NEXT: vst1.32 {d20, d21}, [r11:128]! +; BE-I32-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; BE-I32-NEON-NEXT: vst1.32 {d18, d19}, [r11:128]! +; BE-I32-NEON-NEXT: vst1.64 {d16, d17}, [r11:128] +; BE-I32-NEON-NEXT: add sp, sp, #104 +; BE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEON-NEXT: add sp, sp, #4 +; BE-I32-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-I64-NEON-LABEL: lrint_v32fp128: +; BE-I64-NEON: @ %bb.0: +; BE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I64-NEON-NEXT: .pad #4 +; BE-I64-NEON-NEXT: sub sp, sp, #4 +; BE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEON-NEXT: .pad #152 +; BE-I64-NEON-NEXT: sub sp, sp, #152 +; BE-I64-NEON-NEXT: str r3, [sp, #120] @ 4-byte Spill +; BE-I64-NEON-NEXT: add r3, sp, #712 +; BE-I64-NEON-NEXT: str r2, [sp, #112] @ 4-byte Spill +; BE-I64-NEON-NEXT: mov r9, r0 +; BE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: ldr r7, [sp, #648] +; BE-I64-NEON-NEXT: add r3, sp, #652 +; BE-I64-NEON-NEXT: mov r5, r1 +; BE-I64-NEON-NEXT: vmov.32 d9[0], r0 +; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEON-NEXT: mov r0, r7 +; BE-I64-NEON-NEXT: ldr r6, [sp, #520] +; BE-I64-NEON-NEXT: ldr r8, [sp, #632] +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: add r3, sp, #524 +; BE-I64-NEON-NEXT: mov r7, r1 +; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEON-NEXT: mov r0, r6 +; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: add r3, sp, #636 +; BE-I64-NEON-NEXT: mov r4, r1 +; BE-I64-NEON-NEXT: vmov.32 d8[0], r0 +; BE-I64-NEON-NEXT: mov r0, r8 +; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: mov r6, r1 +; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEON-NEXT: ldr r0, [sp, #488] +; BE-I64-NEON-NEXT: vmov.32 d8[1], r4 +; BE-I64-NEON-NEXT: ldr r1, [sp, #492] +; BE-I64-NEON-NEXT: ldr r2, [sp, #496] +; BE-I64-NEON-NEXT: vmov.32 d10[1], r7 +; BE-I64-NEON-NEXT: ldr r3, [sp, #500] +; BE-I64-NEON-NEXT: vmov.32 d9[1], r5 +; BE-I64-NEON-NEXT: vstr d8, [sp, #144] @ 8-byte Spill +; BE-I64-NEON-NEXT: vstr d10, [sp, #136] @ 8-byte Spill +; BE-I64-NEON-NEXT: vstr d9, [sp, #128] @ 8-byte Spill +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: add r3, sp, #680 +; BE-I64-NEON-NEXT: str r0, [sp, #104] @ 4-byte Spill +; BE-I64-NEON-NEXT: str r1, [sp, #88] @ 4-byte Spill +; BE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: vmov.32 d16[0], r0 +; BE-I64-NEON-NEXT: ldr r0, [sp, #728] +; BE-I64-NEON-NEXT: ldr r2, [sp, #736] +; BE-I64-NEON-NEXT: vmov.32 d11[1], r6 +; BE-I64-NEON-NEXT: ldr r6, [sp, #732] +; BE-I64-NEON-NEXT: ldr r3, [sp, #740] +; BE-I64-NEON-NEXT: vmov.32 d16[1], r1 +; BE-I64-NEON-NEXT: ldr r5, [sp, #504] +; BE-I64-NEON-NEXT: mov r1, r6 +; BE-I64-NEON-NEXT: ldr r7, [sp, #744] +; BE-I64-NEON-NEXT: ldr r4, [sp, #748] +; BE-I64-NEON-NEXT: vstr d11, [sp, #24] @ 8-byte Spill +; BE-I64-NEON-NEXT: vstr d16, [sp, #8] @ 8-byte Spill +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: ldr r2, [sp, #752] +; BE-I64-NEON-NEXT: mov r11, r1 +; BE-I64-NEON-NEXT: ldr r3, [sp, #756] +; BE-I64-NEON-NEXT: vmov.32 d13[0], r0 +; BE-I64-NEON-NEXT: mov r0, r7 +; BE-I64-NEON-NEXT: mov r1, r4 +; BE-I64-NEON-NEXT: ldr r10, [sp, #552] +; BE-I64-NEON-NEXT: ldr r6, [sp, #664] +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: add r3, sp, #508 +; BE-I64-NEON-NEXT: mov r8, r1 +; BE-I64-NEON-NEXT: vmov.32 d8[0], r0 +; BE-I64-NEON-NEXT: mov r0, r5 +; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: add r3, sp, #540 +; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEON-NEXT: ldr r0, [sp, #536] +; BE-I64-NEON-NEXT: mov r7, r1 +; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: add r3, sp, #556 +; BE-I64-NEON-NEXT: mov r5, r1 +; BE-I64-NEON-NEXT: vmov.32 d12[0], r0 +; BE-I64-NEON-NEXT: mov r0, r10 +; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: add r3, sp, #668 +; BE-I64-NEON-NEXT: mov r4, r1 +; BE-I64-NEON-NEXT: vmov.32 d9[0], r0 +; BE-I64-NEON-NEXT: mov r0, r6 +; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: add r3, sp, #700 +; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEON-NEXT: ldr r0, [sp, #696] +; BE-I64-NEON-NEXT: mov r6, r1 +; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEON-NEXT: ldr r0, [sp, #104] @ 4-byte Reload +; BE-I64-NEON-NEXT: ldr r2, [sp, #256] +; BE-I64-NEON-NEXT: vmov.32 d13[1], r11 +; BE-I64-NEON-NEXT: ldr r3, [sp, #260] +; BE-I64-NEON-NEXT: vmov.32 d14[1], r6 +; BE-I64-NEON-NEXT: ldr r6, [sp, #264] +; BE-I64-NEON-NEXT: vmov.32 d9[1], r4 +; BE-I64-NEON-NEXT: ldr r4, [sp, #344] +; BE-I64-NEON-NEXT: vmov.32 d12[1], r5 +; BE-I64-NEON-NEXT: ldr r5, [sp, #312] +; BE-I64-NEON-NEXT: vmov.32 d8[1], r8 +; BE-I64-NEON-NEXT: ldr r8, [sp, #328] +; BE-I64-NEON-NEXT: vmov.32 d10[1], r7 +; BE-I64-NEON-NEXT: vstr d13, [sp, #32] @ 8-byte Spill +; BE-I64-NEON-NEXT: vmov.32 d11[1], r1 +; BE-I64-NEON-NEXT: ldr r1, [sp, #120] @ 4-byte Reload +; BE-I64-NEON-NEXT: vmov.32 d13[0], r0 +; BE-I64-NEON-NEXT: ldr r0, [sp, #112] @ 4-byte Reload +; BE-I64-NEON-NEXT: vstr d14, [sp] @ 8-byte Spill +; BE-I64-NEON-NEXT: vstr d9, [sp, #16] @ 8-byte Spill +; BE-I64-NEON-NEXT: vstr d12, [sp, #56] @ 8-byte Spill +; BE-I64-NEON-NEXT: vstr d10, [sp, #64] @ 8-byte Spill +; BE-I64-NEON-NEXT: vstr d8, [sp, #40] @ 8-byte Spill +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: add r3, sp, #268 +; BE-I64-NEON-NEXT: mov r11, r1 +; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEON-NEXT: mov r0, r6 +; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: add r3, sp, #316 +; BE-I64-NEON-NEXT: mov r10, r1 +; BE-I64-NEON-NEXT: vmov.32 d8[0], r0 +; BE-I64-NEON-NEXT: mov r0, r5 +; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: add r3, sp, #332 +; BE-I64-NEON-NEXT: mov r7, r1 +; BE-I64-NEON-NEXT: vmov.32 d12[0], r0 +; BE-I64-NEON-NEXT: mov r0, r8 +; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: add r3, sp, #348 +; BE-I64-NEON-NEXT: mov r5, r1 +; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEON-NEXT: mov r0, r4 +; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: add r3, sp, #364 +; BE-I64-NEON-NEXT: vmov.32 d9[0], r0 +; BE-I64-NEON-NEXT: ldr r0, [sp, #360] +; BE-I64-NEON-NEXT: mov r4, r1 +; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: add r3, sp, #476 +; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEON-NEXT: ldr r0, [sp, #472] +; BE-I64-NEON-NEXT: mov r6, r1 +; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: vmov.32 d16[0], r0 +; BE-I64-NEON-NEXT: ldr r0, [sp, #88] @ 4-byte Reload +; BE-I64-NEON-NEXT: ldr r2, [sp, #592] +; BE-I64-NEON-NEXT: vldr d20, [sp, #136] @ 8-byte Reload +; BE-I64-NEON-NEXT: vmov.32 d16[1], r1 +; BE-I64-NEON-NEXT: ldr r1, [sp, #588] +; BE-I64-NEON-NEXT: ldr r3, [sp, #596] +; BE-I64-NEON-NEXT: vldr d22, [sp, #24] @ 8-byte Reload +; BE-I64-NEON-NEXT: vldr d18, [sp, #8] @ 8-byte Reload +; BE-I64-NEON-NEXT: vrev64.32 d21, d20 +; BE-I64-NEON-NEXT: vmov.32 d10[1], r6 +; BE-I64-NEON-NEXT: ldr r6, [sp, #600] +; BE-I64-NEON-NEXT: vmov.32 d9[1], r4 +; BE-I64-NEON-NEXT: ldr r4, [sp, #616] +; BE-I64-NEON-NEXT: vmov.32 d12[1], r7 +; BE-I64-NEON-NEXT: ldr r7, [sp, #604] +; BE-I64-NEON-NEXT: vmov.32 d8[1], r10 +; BE-I64-NEON-NEXT: add r10, r9, #192 +; BE-I64-NEON-NEXT: vmov.32 d14[1], r11 +; BE-I64-NEON-NEXT: ldr r11, [sp, #440] +; BE-I64-NEON-NEXT: vmov.32 d13[1], r0 +; BE-I64-NEON-NEXT: ldr r0, [sp, #584] +; BE-I64-NEON-NEXT: vmov.32 d15[1], r5 +; BE-I64-NEON-NEXT: vstr d16, [sp, #48] @ 8-byte Spill +; BE-I64-NEON-NEXT: vldr d16, [sp, #128] @ 8-byte Reload +; BE-I64-NEON-NEXT: vrev64.32 d20, d22 +; BE-I64-NEON-NEXT: vldr d22, [sp] @ 8-byte Reload +; BE-I64-NEON-NEXT: vrev64.32 d19, d18 +; BE-I64-NEON-NEXT: vrev64.32 d17, d16 +; BE-I64-NEON-NEXT: vrev64.32 d18, d22 +; BE-I64-NEON-NEXT: vstr d10, [sp, #120] @ 8-byte Spill +; BE-I64-NEON-NEXT: vstr d9, [sp, #112] @ 8-byte Spill +; BE-I64-NEON-NEXT: vstr d15, [sp, #104] @ 8-byte Spill +; BE-I64-NEON-NEXT: vstr d12, [sp, #96] @ 8-byte Spill +; BE-I64-NEON-NEXT: vstr d8, [sp, #80] @ 8-byte Spill +; BE-I64-NEON-NEXT: vstr d14, [sp, #72] @ 8-byte Spill +; BE-I64-NEON-NEXT: vstr d13, [sp, #88] @ 8-byte Spill +; BE-I64-NEON-NEXT: vst1.64 {d20, d21}, [r10:128]! +; BE-I64-NEON-NEXT: vrev64.32 d16, d11 +; BE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r10:128]! +; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r10:128]! +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: ldr r2, [sp, #608] +; BE-I64-NEON-NEXT: mov r8, r1 +; BE-I64-NEON-NEXT: ldr r3, [sp, #612] +; BE-I64-NEON-NEXT: vmov.32 d8[0], r0 +; BE-I64-NEON-NEXT: mov r0, r6 +; BE-I64-NEON-NEXT: mov r1, r7 +; BE-I64-NEON-NEXT: ldr r5, [sp, #456] +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: add r3, sp, #620 +; BE-I64-NEON-NEXT: mov r6, r1 +; BE-I64-NEON-NEXT: vmov.32 d9[0], r0 +; BE-I64-NEON-NEXT: mov r0, r4 +; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: add r3, sp, #444 +; BE-I64-NEON-NEXT: mov r7, r1 +; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEON-NEXT: mov r0, r11 +; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: add r3, sp, #460 +; BE-I64-NEON-NEXT: mov r4, r1 +; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEON-NEXT: mov r0, r5 +; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: add r3, sp, #572 +; BE-I64-NEON-NEXT: vmov.32 d13[0], r0 +; BE-I64-NEON-NEXT: ldr r0, [sp, #568] +; BE-I64-NEON-NEXT: mov r5, r1 +; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: vldr d16, [sp, #16] @ 8-byte Reload +; BE-I64-NEON-NEXT: vldr d18, [sp, #56] @ 8-byte Reload +; BE-I64-NEON-NEXT: vrev64.32 d17, d16 +; BE-I64-NEON-NEXT: ldr r2, [sp, #304] +; BE-I64-NEON-NEXT: vrev64.32 d16, d18 +; BE-I64-NEON-NEXT: ldr r3, [sp, #308] +; BE-I64-NEON-NEXT: vldr d18, [sp, #144] @ 8-byte Reload +; BE-I64-NEON-NEXT: vldr d20, [sp, #64] @ 8-byte Reload +; BE-I64-NEON-NEXT: vrev64.32 d19, d18 +; BE-I64-NEON-NEXT: vrev64.32 d18, d20 +; BE-I64-NEON-NEXT: vldr d20, [sp, #40] @ 8-byte Reload +; BE-I64-NEON-NEXT: vldr d22, [sp, #32] @ 8-byte Reload +; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEON-NEXT: ldr r0, [sp, #296] +; BE-I64-NEON-NEXT: vmov.32 d10[1], r7 +; BE-I64-NEON-NEXT: ldr r7, [sp, #412] +; BE-I64-NEON-NEXT: vmov.32 d9[1], r6 +; BE-I64-NEON-NEXT: ldr r6, [sp, #408] +; BE-I64-NEON-NEXT: vmov.32 d8[1], r8 +; BE-I64-NEON-NEXT: add r8, r9, #128 +; BE-I64-NEON-NEXT: vrev64.32 d21, d20 +; BE-I64-NEON-NEXT: vmov.32 d13[1], r5 +; BE-I64-NEON-NEXT: ldr r5, [sp, #300] +; BE-I64-NEON-NEXT: vrev64.32 d20, d22 +; BE-I64-NEON-NEXT: vmov.32 d14[1], r1 +; BE-I64-NEON-NEXT: mov r1, r5 +; BE-I64-NEON-NEXT: vstr d10, [sp, #136] @ 8-byte Spill +; BE-I64-NEON-NEXT: vstr d9, [sp, #128] @ 8-byte Spill +; BE-I64-NEON-NEXT: vstr d8, [sp, #24] @ 8-byte Spill +; BE-I64-NEON-NEXT: vst1.64 {d20, d21}, [r10:128] +; BE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r8:128]! +; BE-I64-NEON-NEXT: vmov.32 d11[1], r4 +; BE-I64-NEON-NEXT: ldr r4, [sp, #424] +; BE-I64-NEON-NEXT: ldr r10, [sp, #376] +; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r8:128]! +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: ldr r2, [sp, #416] +; BE-I64-NEON-NEXT: mov r11, r1 +; BE-I64-NEON-NEXT: ldr r3, [sp, #420] +; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEON-NEXT: mov r0, r6 +; BE-I64-NEON-NEXT: mov r1, r7 +; BE-I64-NEON-NEXT: ldr r5, [sp, #392] +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: add r3, sp, #428 +; BE-I64-NEON-NEXT: mov r6, r1 +; BE-I64-NEON-NEXT: vmov.32 d8[0], r0 +; BE-I64-NEON-NEXT: mov r0, r4 +; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: add r3, sp, #380 +; BE-I64-NEON-NEXT: mov r7, r1 +; BE-I64-NEON-NEXT: vmov.32 d9[0], r0 +; BE-I64-NEON-NEXT: mov r0, r10 +; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: add r3, sp, #396 +; BE-I64-NEON-NEXT: mov r4, r1 +; BE-I64-NEON-NEXT: vmov.32 d12[0], r0 +; BE-I64-NEON-NEXT: mov r0, r5 +; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: add r3, sp, #284 +; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEON-NEXT: ldr r0, [sp, #280] +; BE-I64-NEON-NEXT: mov r5, r1 +; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEON-NEXT: bl lrintl +; BE-I64-NEON-NEXT: vldr d16, [sp, #120] @ 8-byte Reload +; BE-I64-NEON-NEXT: vldr d18, [sp, #112] @ 8-byte Reload +; BE-I64-NEON-NEXT: vrev64.32 d17, d16 +; BE-I64-NEON-NEXT: vldr d26, [sp, #136] @ 8-byte Reload +; BE-I64-NEON-NEXT: vrev64.32 d16, d18 +; BE-I64-NEON-NEXT: vldr d18, [sp, #104] @ 8-byte Reload +; BE-I64-NEON-NEXT: vrev64.32 d31, d26 +; BE-I64-NEON-NEXT: vldr d26, [sp, #128] @ 8-byte Reload +; BE-I64-NEON-NEXT: vldr d20, [sp, #96] @ 8-byte Reload +; BE-I64-NEON-NEXT: vrev64.32 d19, d18 +; BE-I64-NEON-NEXT: vrev64.32 d18, d20 +; BE-I64-NEON-NEXT: vldr d20, [sp, #80] @ 8-byte Reload +; BE-I64-NEON-NEXT: vrev64.32 d30, d26 +; BE-I64-NEON-NEXT: vldr d26, [sp, #24] @ 8-byte Reload +; BE-I64-NEON-NEXT: vmov.32 d10[1], r5 +; BE-I64-NEON-NEXT: vldr d22, [sp, #72] @ 8-byte Reload +; BE-I64-NEON-NEXT: vrev64.32 d21, d20 +; BE-I64-NEON-NEXT: vrev64.32 d1, d26 +; BE-I64-NEON-NEXT: vmov.32 d9[1], r7 +; BE-I64-NEON-NEXT: vmov.32 d12[1], r4 +; BE-I64-NEON-NEXT: vrev64.32 d20, d22 +; BE-I64-NEON-NEXT: vldr d22, [sp, #88] @ 8-byte Reload +; BE-I64-NEON-NEXT: vmov.32 d8[1], r6 +; BE-I64-NEON-NEXT: vrev64.32 d0, d14 +; BE-I64-NEON-NEXT: vmov.32 d28[0], r0 +; BE-I64-NEON-NEXT: add r0, r9, #64 +; BE-I64-NEON-NEXT: vrev64.32 d3, d10 +; BE-I64-NEON-NEXT: vldr d24, [sp, #48] @ 8-byte Reload +; BE-I64-NEON-NEXT: vrev64.32 d23, d22 +; BE-I64-NEON-NEXT: vrev64.32 d5, d9 +; BE-I64-NEON-NEXT: vst1.64 {d0, d1}, [r8:128]! +; BE-I64-NEON-NEXT: vrev64.32 d2, d12 +; BE-I64-NEON-NEXT: vmov.32 d15[1], r11 +; BE-I64-NEON-NEXT: vrev64.32 d22, d24 +; BE-I64-NEON-NEXT: vrev64.32 d25, d13 +; BE-I64-NEON-NEXT: vrev64.32 d4, d8 +; BE-I64-NEON-NEXT: vst1.64 {d30, d31}, [r8:128] +; BE-I64-NEON-NEXT: vst1.64 {d2, d3}, [r0:128]! +; BE-I64-NEON-NEXT: vmov.32 d28[1], r1 +; BE-I64-NEON-NEXT: vrev64.32 d24, d11 +; BE-I64-NEON-NEXT: vst1.64 {d4, d5}, [r0:128]! +; BE-I64-NEON-NEXT: vrev64.32 d27, d15 +; BE-I64-NEON-NEXT: vst1.64 {d24, d25}, [r0:128]! +; BE-I64-NEON-NEXT: vrev64.32 d26, d28 +; BE-I64-NEON-NEXT: vst1.64 {d22, d23}, [r0:128] +; BE-I64-NEON-NEXT: vst1.64 {d20, d21}, [r9:128]! +; BE-I64-NEON-NEXT: vst1.64 {d26, d27}, [r9:128]! +; BE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r9:128]! +; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r9:128] +; BE-I64-NEON-NEXT: add sp, sp, #152 +; BE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEON-NEXT: add sp, sp, #4 +; BE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + %a = call <32 x iXLen> @llvm.lrint.v32iXLen.v16fp128(<32 x fp128> %x) + ret <32 x iXLen> %a +} +declare <32 x iXLen> @llvm.lrint.v32iXLen.v32fp128(<32 x fp128>) From 9aadce5ec090e3a403f516031d807639f4da2524 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 8 Aug 2025 07:27:23 -0500 Subject: [PATCH 5/8] nounwind for vector tests since cfi directives are causing CI failures --- .../AArch64/sve-fixed-vector-llrint.ll | 76 +- .../CodeGen/AArch64/sve-fixed-vector-lrint.ll | 170 +-- llvm/test/CodeGen/AArch64/vector-llrint.ll | 73 +- llvm/test/CodeGen/AArch64/vector-lrint.ll | 95 +- llvm/test/CodeGen/PowerPC/vector-llrint.ll | 916 ++++--------- llvm/test/CodeGen/PowerPC/vector-lrint.ll | 1213 +++++------------ llvm/test/CodeGen/X86/vector-llrint-f16.ll | 12 +- llvm/test/CodeGen/X86/vector-llrint.ll | 134 +- llvm/test/CodeGen/X86/vector-lrint-f16.ll | 12 +- llvm/test/CodeGen/X86/vector-lrint.ll | 259 +--- 10 files changed, 760 insertions(+), 2200 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll b/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll index 838aac0edcb73..38ba9240d15b4 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64 -mattr=+sve -aarch64-sve-vector-bits-min=256 | FileCheck %s -define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) { +define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) nounwind { ; CHECK-LABEL: llrint_v1i64_v1f16: ; CHECK: // %bb.0: ; CHECK-NEXT: frintx h0, h0 @@ -13,7 +13,7 @@ define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) { } declare <1 x i64> @llvm.llrint.v1i64.v1f16(<1 x half>) -define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) { +define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) nounwind { ; CHECK-LABEL: llrint_v1i64_v2f16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 @@ -30,7 +30,7 @@ define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) { } declare <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half>) -define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) { +define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) nounwind { ; CHECK-LABEL: llrint_v4i64_v4f16: ; CHECK: // %bb.0: ; CHECK-NEXT: frintx v0.4h, v0.4h @@ -51,7 +51,7 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) { } declare <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half>) -define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) { +define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) nounwind { ; CHECK-LABEL: llrint_v8i64_v8f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 @@ -85,7 +85,7 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) { } declare <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half>) -define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) { +define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) nounwind { ; CHECK-LABEL: llrint_v16i64_v16f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ext v2.16b, v1.16b, v1.16b, #8 @@ -144,16 +144,13 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) { } declare <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half>) -define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { +define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) nounwind { ; CHECK-LABEL: llrint_v32i64_v32f16: ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: sub x9, sp, #272 ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: and sp, x9, #0xffffffffffffffe0 -; CHECK-NEXT: .cfi_def_cfa w29, 16 -; CHECK-NEXT: .cfi_offset w30, -8 -; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: frintx v5.4h, v0.4h ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: ext v4.16b, v1.16b, v1.16b, #8 @@ -278,7 +275,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { } declare <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half>) -define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) { +define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) nounwind { ; CHECK-LABEL: llrint_v1i64_v1f32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 @@ -291,7 +288,7 @@ define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) { } declare <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float>) -define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) { +define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) nounwind { ; CHECK-LABEL: llrint_v2i64_v2f32: ; CHECK: // %bb.0: ; CHECK-NEXT: frintx v0.2s, v0.2s @@ -303,7 +300,7 @@ define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) { } declare <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float>) -define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) { +define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) nounwind { ; CHECK-LABEL: llrint_v4i64_v4f32: ; CHECK: // %bb.0: ; CHECK-NEXT: frintx v0.4s, v0.4s @@ -324,7 +321,7 @@ define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) { } declare <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float>) -define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) { +define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) nounwind { ; CHECK-LABEL: llrint_v8i64_v8f32: ; CHECK: // %bb.0: ; CHECK-NEXT: frintx v0.4s, v0.4s @@ -357,7 +354,7 @@ define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) { } declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>) -define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) { +define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) nounwind { ; CHECK-LABEL: llrint_v16i64_v16f32: ; CHECK: // %bb.0: ; CHECK-NEXT: frintx v3.4s, v3.4s @@ -414,16 +411,13 @@ define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) { } declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>) -define <32 x i64> @llrint_v32i64_v32f32(<32 x float> %x) { +define <32 x i64> @llrint_v32i64_v32f32(<32 x float> %x) nounwind { ; CHECK-LABEL: llrint_v32i64_v32f32: ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: sub x9, sp, #272 ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: and sp, x9, #0xffffffffffffffe0 -; CHECK-NEXT: .cfi_def_cfa w29, 16 -; CHECK-NEXT: .cfi_offset w30, -8 -; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: frintx v0.4s, v0.4s ; CHECK-NEXT: frintx v1.4s, v1.4s ; CHECK-NEXT: frintx v2.4s, v2.4s @@ -544,7 +538,7 @@ define <32 x i64> @llrint_v32i64_v32f32(<32 x float> %x) { } declare <32 x i64> @llvm.llrint.v32i64.v32f32(<32 x float>) -define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) { +define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) nounwind { ; CHECK-LABEL: llrint_v1i64_v1f64: ; CHECK: // %bb.0: ; CHECK-NEXT: frintx d0, d0 @@ -556,7 +550,7 @@ define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) { } declare <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double>) -define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) { +define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) nounwind { ; CHECK-LABEL: llrint_v2i64_v2f64: ; CHECK: // %bb.0: ; CHECK-NEXT: frintx v0.2d, v0.2d @@ -567,7 +561,7 @@ define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) { } declare <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double>) -define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) { +define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) nounwind { ; CHECK-LABEL: llrint_v4i64_v4f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d, vl2 @@ -593,7 +587,7 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) { } declare <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double>) -define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) { +define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) nounwind { ; CHECK-LABEL: llrint_v8i64_v8f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d, vl2 @@ -635,7 +629,7 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) { } declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>) -define <16 x i64> @llrint_v16f64(<16 x double> %x) { +define <16 x i64> @llrint_v16f64(<16 x double> %x) nounwind { ; CHECK-LABEL: llrint_v16f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p1.d, vl2 @@ -708,16 +702,13 @@ define <16 x i64> @llrint_v16f64(<16 x double> %x) { } declare <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double>) -define <32 x i64> @llrint_v32f64(<32 x double> %x) { +define <32 x i64> @llrint_v32f64(<32 x double> %x) nounwind { ; CHECK-LABEL: llrint_v32f64: ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: sub x9, sp, #272 ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: and sp, x9, #0xffffffffffffffe0 -; CHECK-NEXT: .cfi_def_cfa w29, 16 -; CHECK-NEXT: .cfi_offset w30, -8 -; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ptrue p1.d, vl2 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 @@ -862,12 +853,10 @@ define <32 x i64> @llrint_v32f64(<32 x double> %x) { } declare <32 x i64> @llvm.llrint.v32i64.v32f64(<32 x double>) -define <1 x i64> @llrint_v1i64_v1fp128(<1 x fp128> %x) { +define <1 x i64> @llrint_v1i64_v1fp128(<1 x fp128> %x) nounwind { ; CHECK-LABEL: llrint_v1i64_v1fp128: ; CHECK: // %bb.0: ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload @@ -877,15 +866,13 @@ define <1 x i64> @llrint_v1i64_v1fp128(<1 x fp128> %x) { } declare <1 x i64> @llvm.llrint.v1i64.v1fp128(<1 x fp128>) -define <2 x i64> @llrint_v2i64_v2fp128(<2 x fp128> %x) { +define <2 x i64> @llrint_v2i64_v2fp128(<2 x fp128> %x) nounwind { ; CHECK-LABEL: llrint_v2i64_v2fp128: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 ; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill @@ -902,15 +889,12 @@ define <2 x i64> @llrint_v2i64_v2fp128(<2 x fp128> %x) { } declare <2 x i64> @llvm.llrint.v2i64.v2fp128(<2 x fp128>) -define <4 x i64> @llrint_v4i64_v4fp128(<4 x fp128> %x) { +define <4 x i64> @llrint_v4i64_v4fp128(<4 x fp128> %x) nounwind { ; CHECK-LABEL: llrint_v4i64_v4fp128: ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #64 ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 80 + 8 * VG -; CHECK-NEXT: .cfi_offset w30, -8 -; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: str q0, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: mov v0.16b, v3.16b ; CHECK-NEXT: stp q2, q1, [sp, #16] // 32-byte Folded Spill @@ -950,15 +934,12 @@ define <4 x i64> @llrint_v4i64_v4fp128(<4 x fp128> %x) { } declare <4 x i64> @llvm.llrint.v4i64.v4fp128(<4 x fp128>) -define <8 x i64> @llrint_v8i64_v8fp128(<8 x fp128> %x) { +define <8 x i64> @llrint_v8i64_v8fp128(<8 x fp128> %x) nounwind { ; CHECK-LABEL: llrint_v8i64_v8fp128: ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #128 ; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x90, 0x01, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 144 + 16 * VG -; CHECK-NEXT: .cfi_offset w30, -8 -; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: str q0, [sp, #112] // 16-byte Folded Spill ; CHECK-NEXT: mov v0.16b, v7.16b ; CHECK-NEXT: stp q6, q5, [sp, #16] // 32-byte Folded Spill @@ -1030,15 +1011,12 @@ define <8 x i64> @llrint_v8i64_v8fp128(<8 x fp128> %x) { } declare <8 x i64> @llvm.llrint.v8i64.v8fp128(<8 x fp128>) -define <16 x i64> @llrint_v16fp128(<16 x fp128> %x) { +define <16 x i64> @llrint_v16fp128(<16 x fp128> %x) nounwind { ; CHECK-LABEL: llrint_v16fp128: ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #256 ; CHECK-NEXT: addvl sp, sp, #-4 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x90, 0x02, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 272 + 32 * VG -; CHECK-NEXT: .cfi_offset w30, -8 -; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: addvl x8, sp, #4 ; CHECK-NEXT: str q1, [sp, #240] // 16-byte Folded Spill ; CHECK-NEXT: ldr q1, [x8, #272] @@ -1194,17 +1172,13 @@ define <16 x i64> @llrint_v16fp128(<16 x fp128> %x) { } declare <16 x i64> @llvm.llrint.v16i64.v16fp128(<16 x fp128>) -define <32 x i64> @llrint_v32fp128(<32 x fp128> %x) { +define <32 x i64> @llrint_v32fp128(<32 x fp128> %x) nounwind { ; CHECK-LABEL: llrint_v32fp128: ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #512 ; CHECK-NEXT: addvl sp, sp, #-8 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xa0, 0x04, 0x22, 0x11, 0xc0, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 544 + 64 * VG -; CHECK-NEXT: .cfi_offset w19, -8 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: .cfi_offset w29, -32 ; CHECK-NEXT: addvl x9, sp, #8 ; CHECK-NEXT: stp q2, q1, [sp, #16] // 32-byte Folded Spill ; CHECK-NEXT: mov x19, x8 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll b/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll index 0b5e27f9fe15d..175f4993d06c9 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll @@ -4,7 +4,7 @@ ; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=aarch64 -mattr=+sve \ ; RUN: -aarch64-sve-vector-bits-min=256 | FileCheck --check-prefixes=CHECK-i64 %s -define <1 x iXLen> @lrint_v1f16(<1 x half> %x) { +define <1 x iXLen> @lrint_v1f16(<1 x half> %x) nounwind { ; CHECK-i32-LABEL: lrint_v1f16: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: frintx h0, h0 @@ -23,7 +23,7 @@ define <1 x iXLen> @lrint_v1f16(<1 x half> %x) { } declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half>) -define <2 x iXLen> @lrint_v2f16(<2 x half> %x) { +define <2 x iXLen> @lrint_v2f16(<2 x half> %x) nounwind { ; CHECK-i32-LABEL: lrint_v2f16: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: // kill: def $d0 killed $d0 def $q0 @@ -53,7 +53,7 @@ define <2 x iXLen> @lrint_v2f16(<2 x half> %x) { } declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half>) -define <4 x iXLen> @lrint_v4f16(<4 x half> %x) { +define <4 x iXLen> @lrint_v4f16(<4 x half> %x) nounwind { ; CHECK-i32-LABEL: lrint_v4f16: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: frintx v0.4h, v0.4h @@ -81,7 +81,7 @@ define <4 x iXLen> @lrint_v4f16(<4 x half> %x) { } declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half>) -define <8 x iXLen> @lrint_v8f16(<8 x half> %x) { +define <8 x iXLen> @lrint_v8f16(<8 x half> %x) nounwind { ; CHECK-i32-LABEL: lrint_v8f16: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: frintx v2.8h, v0.8h @@ -143,7 +143,7 @@ define <8 x iXLen> @lrint_v8f16(<8 x half> %x) { } declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half>) -define <16 x iXLen> @lrint_v16f16(<16 x half> %x) { +define <16 x iXLen> @lrint_v16f16(<16 x half> %x) nounwind { ; CHECK-i32-LABEL: lrint_v16f16: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: frintx v1.8h, v1.8h @@ -254,26 +254,17 @@ define <16 x iXLen> @lrint_v16f16(<16 x half> %x) { } declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half>) -define <32 x iXLen> @lrint_v32f16(<32 x half> %x) { +define <32 x iXLen> @lrint_v32f16(<32 x half> %x) nounwind { ; CHECK-i32-LABEL: lrint_v32f16: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: stp x26, x25, [sp, #-64]! // 16-byte Folded Spill -; CHECK-i32-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill -; CHECK-i32-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill -; CHECK-i32-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill -; CHECK-i32-NEXT: .cfi_def_cfa_offset 64 -; CHECK-i32-NEXT: .cfi_offset w19, -8 -; CHECK-i32-NEXT: .cfi_offset w20, -16 -; CHECK-i32-NEXT: .cfi_offset w21, -24 -; CHECK-i32-NEXT: .cfi_offset w22, -32 -; CHECK-i32-NEXT: .cfi_offset w23, -40 -; CHECK-i32-NEXT: .cfi_offset w24, -48 -; CHECK-i32-NEXT: .cfi_offset w25, -56 -; CHECK-i32-NEXT: .cfi_offset w26, -64 ; CHECK-i32-NEXT: frintx v3.8h, v3.8h ; CHECK-i32-NEXT: frintx v2.8h, v2.8h +; CHECK-i32-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill ; CHECK-i32-NEXT: frintx v1.8h, v1.8h ; CHECK-i32-NEXT: frintx v0.8h, v0.8h +; CHECK-i32-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill +; CHECK-i32-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill ; CHECK-i32-NEXT: mov h4, v3.h[7] ; CHECK-i32-NEXT: mov h5, v3.h[6] ; CHECK-i32-NEXT: mov h6, v3.h[5] @@ -378,9 +369,6 @@ define <32 x iXLen> @lrint_v32f16(<32 x half> %x) { ; CHECK-i64-NEXT: sub x9, sp, #272 ; CHECK-i64-NEXT: mov x29, sp ; CHECK-i64-NEXT: and sp, x9, #0xffffffffffffffe0 -; CHECK-i64-NEXT: .cfi_def_cfa w29, 16 -; CHECK-i64-NEXT: .cfi_offset w30, -8 -; CHECK-i64-NEXT: .cfi_offset w29, -16 ; CHECK-i64-NEXT: frintx v5.4h, v0.4h ; CHECK-i64-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-i64-NEXT: ext v4.16b, v1.16b, v1.16b, #8 @@ -505,7 +493,7 @@ define <32 x iXLen> @lrint_v32f16(<32 x half> %x) { } declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f16(<32 x half>) -define <1 x iXLen> @lrint_v1f32(<1 x float> %x) { +define <1 x iXLen> @lrint_v1f32(<1 x float> %x) nounwind { ; CHECK-i32-LABEL: lrint_v1f32: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: frintx v0.2s, v0.2s @@ -524,7 +512,7 @@ define <1 x iXLen> @lrint_v1f32(<1 x float> %x) { } declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float>) -define <2 x iXLen> @lrint_v2f32(<2 x float> %x) { +define <2 x iXLen> @lrint_v2f32(<2 x float> %x) nounwind { ; CHECK-i32-LABEL: lrint_v2f32: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: frintx v0.2s, v0.2s @@ -542,7 +530,7 @@ define <2 x iXLen> @lrint_v2f32(<2 x float> %x) { } declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float>) -define <4 x iXLen> @lrint_v4f32(<4 x float> %x) { +define <4 x iXLen> @lrint_v4f32(<4 x float> %x) nounwind { ; CHECK-i32-LABEL: lrint_v4f32: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: frintx v0.4s, v0.4s @@ -569,7 +557,7 @@ define <4 x iXLen> @lrint_v4f32(<4 x float> %x) { } declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float>) -define <8 x iXLen> @lrint_v8f32(<8 x float> %x) { +define <8 x iXLen> @lrint_v8f32(<8 x float> %x) nounwind { ; CHECK-i32-LABEL: lrint_v8f32: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: ptrue p0.d, vl2 @@ -636,7 +624,7 @@ define <8 x iXLen> @lrint_v8f32(<8 x float> %x) { } declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float>) -define <16 x iXLen> @lrint_v16f32(<16 x float> %x) { +define <16 x iXLen> @lrint_v16f32(<16 x float> %x) nounwind { ; CHECK-i32-LABEL: lrint_v16f32: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: ptrue p0.d, vl2 @@ -754,24 +742,10 @@ define <16 x iXLen> @lrint_v16f32(<16 x float> %x) { } declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float>) -define <32 x iXLen> @lrint_v32f32(<32 x float> %x) { +define <32 x iXLen> @lrint_v32f32(<32 x float> %x) nounwind { ; CHECK-i32-LABEL: lrint_v32f32: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: str x27, [sp, #-80]! // 8-byte Folded Spill -; CHECK-i32-NEXT: stp x26, x25, [sp, #16] // 16-byte Folded Spill -; CHECK-i32-NEXT: stp x24, x23, [sp, #32] // 16-byte Folded Spill -; CHECK-i32-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill -; CHECK-i32-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill -; CHECK-i32-NEXT: .cfi_def_cfa_offset 80 -; CHECK-i32-NEXT: .cfi_offset w19, -8 -; CHECK-i32-NEXT: .cfi_offset w20, -16 -; CHECK-i32-NEXT: .cfi_offset w21, -24 -; CHECK-i32-NEXT: .cfi_offset w22, -32 -; CHECK-i32-NEXT: .cfi_offset w23, -40 -; CHECK-i32-NEXT: .cfi_offset w24, -48 -; CHECK-i32-NEXT: .cfi_offset w25, -56 -; CHECK-i32-NEXT: .cfi_offset w26, -64 -; CHECK-i32-NEXT: .cfi_offset w27, -80 ; CHECK-i32-NEXT: ptrue p1.d, vl2 ; CHECK-i32-NEXT: // kill: def $q6 killed $q6 def $z6 ; CHECK-i32-NEXT: // kill: def $q7 killed $q7 def $z7 @@ -781,11 +755,15 @@ define <32 x iXLen> @lrint_v32f32(<32 x float> %x) { ; CHECK-i32-NEXT: // kill: def $q5 killed $q5 def $z5 ; CHECK-i32-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-i32-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-i32-NEXT: stp x24, x23, [sp, #32] // 16-byte Folded Spill ; CHECK-i32-NEXT: ptrue p0.s, vl8 +; CHECK-i32-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill ; CHECK-i32-NEXT: splice z6.d, p1, z6.d, z7.d ; CHECK-i32-NEXT: splice z2.d, p1, z2.d, z3.d ; CHECK-i32-NEXT: splice z4.d, p1, z4.d, z5.d ; CHECK-i32-NEXT: splice z0.d, p1, z0.d, z1.d +; CHECK-i32-NEXT: stp x26, x25, [sp, #16] // 16-byte Folded Spill +; CHECK-i32-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-i32-NEXT: movprfx z3, z6 ; CHECK-i32-NEXT: frintx z3.s, p0/m, z6.s ; CHECK-i32-NEXT: frintx z2.s, p0/m, z2.s @@ -897,9 +875,6 @@ define <32 x iXLen> @lrint_v32f32(<32 x float> %x) { ; CHECK-i64-NEXT: sub x9, sp, #272 ; CHECK-i64-NEXT: mov x29, sp ; CHECK-i64-NEXT: and sp, x9, #0xffffffffffffffe0 -; CHECK-i64-NEXT: .cfi_def_cfa w29, 16 -; CHECK-i64-NEXT: .cfi_offset w30, -8 -; CHECK-i64-NEXT: .cfi_offset w29, -16 ; CHECK-i64-NEXT: frintx v0.4s, v0.4s ; CHECK-i64-NEXT: frintx v1.4s, v1.4s ; CHECK-i64-NEXT: frintx v2.4s, v2.4s @@ -1020,7 +995,7 @@ define <32 x iXLen> @lrint_v32f32(<32 x float> %x) { } declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f32(<32 x float>) -define <1 x iXLen> @lrint_v1f64(<1 x double> %x) { +define <1 x iXLen> @lrint_v1f64(<1 x double> %x) nounwind { ; CHECK-i32-LABEL: lrint_v1f64: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: frintx d0, d0 @@ -1039,7 +1014,7 @@ define <1 x iXLen> @lrint_v1f64(<1 x double> %x) { } declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double>) -define <2 x iXLen> @lrint_v2f64(<2 x double> %x) { +define <2 x iXLen> @lrint_v2f64(<2 x double> %x) nounwind { ; CHECK-i32-LABEL: lrint_v2f64: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: frintx v0.2d, v0.2d @@ -1061,7 +1036,7 @@ define <2 x iXLen> @lrint_v2f64(<2 x double> %x) { } declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double>) -define <4 x iXLen> @lrint_v4f64(<4 x double> %x) { +define <4 x iXLen> @lrint_v4f64(<4 x double> %x) nounwind { ; CHECK-i32-LABEL: lrint_v4f64: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: ptrue p0.d, vl2 @@ -1109,7 +1084,7 @@ define <4 x iXLen> @lrint_v4f64(<4 x double> %x) { } declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double>) -define <8 x iXLen> @lrint_v8f64(<8 x double> %x) { +define <8 x iXLen> @lrint_v8f64(<8 x double> %x) nounwind { ; CHECK-i32-LABEL: lrint_v8f64: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: ptrue p0.d, vl2 @@ -1188,7 +1163,7 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) { } declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double>) -define <16 x iXLen> @lrint_v16f64(<16 x double> %x) { +define <16 x iXLen> @lrint_v16f64(<16 x double> %x) nounwind { ; CHECK-i32-LABEL: lrint_v16f64: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: ptrue p1.d, vl2 @@ -1329,7 +1304,7 @@ define <16 x iXLen> @lrint_v16f64(<16 x double> %x) { } declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f64(<16 x double>) -define <32 x iXLen> @lrint_v32f64(<32 x double> %x) { +define <32 x iXLen> @lrint_v32f64(<32 x double> %x) nounwind { ; CHECK-i32-LABEL: lrint_v32f64: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: ptrue p1.d, vl2 @@ -1465,9 +1440,6 @@ define <32 x iXLen> @lrint_v32f64(<32 x double> %x) { ; CHECK-i64-NEXT: sub x9, sp, #272 ; CHECK-i64-NEXT: mov x29, sp ; CHECK-i64-NEXT: and sp, x9, #0xffffffffffffffe0 -; CHECK-i64-NEXT: .cfi_def_cfa w29, 16 -; CHECK-i64-NEXT: .cfi_offset w30, -8 -; CHECK-i64-NEXT: .cfi_offset w29, -16 ; CHECK-i64-NEXT: ptrue p1.d, vl2 ; CHECK-i64-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-i64-NEXT: // kill: def $q1 killed $q1 def $z1 @@ -1612,12 +1584,10 @@ define <32 x iXLen> @lrint_v32f64(<32 x double> %x) { } declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f64(<32 x double>) -define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) { +define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) nounwind { ; CHECK-i32-LABEL: lrint_v1fp128: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-i32-NEXT: .cfi_def_cfa_offset 16 -; CHECK-i32-NEXT: .cfi_offset w30, -16 ; CHECK-i32-NEXT: bl lrintl ; CHECK-i32-NEXT: fmov s0, w0 ; CHECK-i32-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload @@ -1626,8 +1596,6 @@ define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) { ; CHECK-i64-LABEL: lrint_v1fp128: ; CHECK-i64: // %bb.0: ; CHECK-i64-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-i64-NEXT: .cfi_def_cfa_offset 16 -; CHECK-i64-NEXT: .cfi_offset w30, -16 ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 ; CHECK-i64-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload @@ -1637,13 +1605,11 @@ define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) { } declare <1 x iXLen> @llvm.lrint.v1iXLen.v1fp128(<1 x fp128>) -define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) { +define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) nounwind { ; CHECK-i32-LABEL: lrint_v2fp128: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: sub sp, sp, #48 ; CHECK-i32-NEXT: str x30, [sp, #32] // 8-byte Folded Spill -; CHECK-i32-NEXT: .cfi_def_cfa_offset 48 -; CHECK-i32-NEXT: .cfi_offset w30, -16 ; CHECK-i32-NEXT: str q1, [sp, #16] // 16-byte Folded Spill ; CHECK-i32-NEXT: bl lrintl ; CHECK-i32-NEXT: fmov s0, w0 @@ -1660,11 +1626,9 @@ define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) { ; CHECK-i64-LABEL: lrint_v2fp128: ; CHECK-i64: // %bb.0: ; CHECK-i64-NEXT: sub sp, sp, #48 -; CHECK-i64-NEXT: str x30, [sp, #32] // 8-byte Folded Spill -; CHECK-i64-NEXT: .cfi_def_cfa_offset 48 -; CHECK-i64-NEXT: .cfi_offset w30, -16 ; CHECK-i64-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-i64-NEXT: mov v0.16b, v1.16b +; CHECK-i64-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 ; CHECK-i64-NEXT: str q0, [sp, #16] // 16-byte Folded Spill @@ -1681,13 +1645,11 @@ define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) { } declare <2 x iXLen> @llvm.lrint.v2iXLen.v2fp128(<2 x fp128>) -define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) { +define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) nounwind { ; CHECK-i32-LABEL: lrint_v4fp128: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: sub sp, sp, #80 ; CHECK-i32-NEXT: str x30, [sp, #64] // 8-byte Folded Spill -; CHECK-i32-NEXT: .cfi_def_cfa_offset 80 -; CHECK-i32-NEXT: .cfi_offset w30, -16 ; CHECK-i32-NEXT: stp q2, q3, [sp, #16] // 32-byte Folded Spill ; CHECK-i32-NEXT: str q1, [sp] // 16-byte Folded Spill ; CHECK-i32-NEXT: bl lrintl @@ -1716,9 +1678,6 @@ define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) { ; CHECK-i64-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-i64-NEXT: sub sp, sp, #64 ; CHECK-i64-NEXT: addvl sp, sp, #-1 -; CHECK-i64-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 80 + 8 * VG -; CHECK-i64-NEXT: .cfi_offset w30, -8 -; CHECK-i64-NEXT: .cfi_offset w29, -16 ; CHECK-i64-NEXT: str q0, [sp, #48] // 16-byte Folded Spill ; CHECK-i64-NEXT: mov v0.16b, v3.16b ; CHECK-i64-NEXT: stp q2, q1, [sp, #16] // 32-byte Folded Spill @@ -1758,25 +1717,16 @@ define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) { } declare <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128>) -define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) { +define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) nounwind { ; CHECK-i32-LABEL: lrint_v8fp128: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: sub sp, sp, #176 +; CHECK-i32-NEXT: str q0, [sp, #96] // 16-byte Folded Spill +; CHECK-i32-NEXT: mov v0.16b, v7.16b ; CHECK-i32-NEXT: stp x30, x25, [sp, #112] // 16-byte Folded Spill ; CHECK-i32-NEXT: stp x24, x23, [sp, #128] // 16-byte Folded Spill ; CHECK-i32-NEXT: stp x22, x21, [sp, #144] // 16-byte Folded Spill ; CHECK-i32-NEXT: stp x20, x19, [sp, #160] // 16-byte Folded Spill -; CHECK-i32-NEXT: .cfi_def_cfa_offset 176 -; CHECK-i32-NEXT: .cfi_offset w19, -8 -; CHECK-i32-NEXT: .cfi_offset w20, -16 -; CHECK-i32-NEXT: .cfi_offset w21, -24 -; CHECK-i32-NEXT: .cfi_offset w22, -32 -; CHECK-i32-NEXT: .cfi_offset w23, -40 -; CHECK-i32-NEXT: .cfi_offset w24, -48 -; CHECK-i32-NEXT: .cfi_offset w25, -56 -; CHECK-i32-NEXT: .cfi_offset w30, -64 -; CHECK-i32-NEXT: str q0, [sp, #96] // 16-byte Folded Spill -; CHECK-i32-NEXT: mov v0.16b, v7.16b ; CHECK-i32-NEXT: stp q6, q5, [sp] // 32-byte Folded Spill ; CHECK-i32-NEXT: stp q4, q3, [sp, #32] // 32-byte Folded Spill ; CHECK-i32-NEXT: stp q2, q1, [sp, #64] // 32-byte Folded Spill @@ -1822,9 +1772,6 @@ define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) { ; CHECK-i64-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-i64-NEXT: sub sp, sp, #128 ; CHECK-i64-NEXT: addvl sp, sp, #-2 -; CHECK-i64-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x90, 0x01, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 144 + 16 * VG -; CHECK-i64-NEXT: .cfi_offset w30, -8 -; CHECK-i64-NEXT: .cfi_offset w29, -16 ; CHECK-i64-NEXT: str q0, [sp, #112] // 16-byte Folded Spill ; CHECK-i64-NEXT: mov v0.16b, v7.16b ; CHECK-i64-NEXT: stp q6, q5, [sp, #16] // 32-byte Folded Spill @@ -1896,48 +1843,35 @@ define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) { } declare <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128>) -define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) { +define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) nounwind { ; CHECK-i32-LABEL: lrint_v16fp128: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: sub sp, sp, #368 -; CHECK-i32-NEXT: stp x29, x30, [sp, #272] // 16-byte Folded Spill -; CHECK-i32-NEXT: stp x28, x27, [sp, #288] // 16-byte Folded Spill -; CHECK-i32-NEXT: stp x26, x25, [sp, #304] // 16-byte Folded Spill -; CHECK-i32-NEXT: stp x24, x23, [sp, #320] // 16-byte Folded Spill -; CHECK-i32-NEXT: stp x22, x21, [sp, #336] // 16-byte Folded Spill -; CHECK-i32-NEXT: stp x20, x19, [sp, #352] // 16-byte Folded Spill -; CHECK-i32-NEXT: .cfi_def_cfa_offset 368 -; CHECK-i32-NEXT: .cfi_offset w19, -8 -; CHECK-i32-NEXT: .cfi_offset w20, -16 -; CHECK-i32-NEXT: .cfi_offset w21, -24 -; CHECK-i32-NEXT: .cfi_offset w22, -32 -; CHECK-i32-NEXT: .cfi_offset w23, -40 -; CHECK-i32-NEXT: .cfi_offset w24, -48 -; CHECK-i32-NEXT: .cfi_offset w25, -56 -; CHECK-i32-NEXT: .cfi_offset w26, -64 -; CHECK-i32-NEXT: .cfi_offset w27, -72 -; CHECK-i32-NEXT: .cfi_offset w28, -80 -; CHECK-i32-NEXT: .cfi_offset w30, -88 -; CHECK-i32-NEXT: .cfi_offset w29, -96 -; CHECK-i32-NEXT: stp q7, q6, [sp, #80] // 32-byte Folded Spill -; CHECK-i32-NEXT: stp q5, q4, [sp, #112] // 32-byte Folded Spill ; CHECK-i32-NEXT: stp q3, q0, [sp, #144] // 32-byte Folded Spill ; CHECK-i32-NEXT: stp q2, q1, [sp, #176] // 32-byte Folded Spill ; CHECK-i32-NEXT: ldr q1, [sp, #368] +; CHECK-i32-NEXT: stp x29, x30, [sp, #272] // 16-byte Folded Spill ; CHECK-i32-NEXT: str q1, [sp, #64] // 16-byte Folded Spill ; CHECK-i32-NEXT: ldr q1, [sp, #384] +; CHECK-i32-NEXT: stp x28, x27, [sp, #288] // 16-byte Folded Spill ; CHECK-i32-NEXT: str q1, [sp, #48] // 16-byte Folded Spill ; CHECK-i32-NEXT: ldr q1, [sp, #400] +; CHECK-i32-NEXT: stp x26, x25, [sp, #304] // 16-byte Folded Spill ; CHECK-i32-NEXT: str q1, [sp, #32] // 16-byte Folded Spill ; CHECK-i32-NEXT: ldr q1, [sp, #416] +; CHECK-i32-NEXT: stp x24, x23, [sp, #320] // 16-byte Folded Spill ; CHECK-i32-NEXT: str q1, [sp, #208] // 16-byte Folded Spill ; CHECK-i32-NEXT: ldr q1, [sp, #432] +; CHECK-i32-NEXT: stp x22, x21, [sp, #336] // 16-byte Folded Spill ; CHECK-i32-NEXT: str q1, [sp, #16] // 16-byte Folded Spill ; CHECK-i32-NEXT: ldr q1, [sp, #448] +; CHECK-i32-NEXT: stp x20, x19, [sp, #352] // 16-byte Folded Spill ; CHECK-i32-NEXT: str q1, [sp, #224] // 16-byte Folded Spill ; CHECK-i32-NEXT: ldr q1, [sp, #464] +; CHECK-i32-NEXT: stp q7, q6, [sp, #80] // 32-byte Folded Spill ; CHECK-i32-NEXT: str q1, [sp, #240] // 16-byte Folded Spill ; CHECK-i32-NEXT: ldr q1, [sp, #480] +; CHECK-i32-NEXT: stp q5, q4, [sp, #112] // 32-byte Folded Spill ; CHECK-i32-NEXT: mov v0.16b, v1.16b ; CHECK-i32-NEXT: bl lrintl ; CHECK-i32-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload @@ -2019,9 +1953,6 @@ define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) { ; CHECK-i64-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-i64-NEXT: sub sp, sp, #256 ; CHECK-i64-NEXT: addvl sp, sp, #-4 -; CHECK-i64-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x90, 0x02, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 272 + 32 * VG -; CHECK-i64-NEXT: .cfi_offset w30, -8 -; CHECK-i64-NEXT: .cfi_offset w29, -16 ; CHECK-i64-NEXT: addvl x8, sp, #4 ; CHECK-i64-NEXT: str q1, [sp, #240] // 16-byte Folded Spill ; CHECK-i64-NEXT: ldr q1, [x8, #272] @@ -2177,7 +2108,7 @@ define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) { } declare <16 x iXLen> @llvm.lrint.v16iXLen.v16fp128(<16 x fp128>) -define <32 x iXLen> @lrint_v32fp128(<32 x fp128> %x) { +define <32 x iXLen> @lrint_v32fp128(<32 x fp128> %x) nounwind { ; CHECK-i32-LABEL: lrint_v32fp128: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: stp x29, x30, [sp, #-96]! // 16-byte Folded Spill @@ -2187,19 +2118,6 @@ define <32 x iXLen> @lrint_v32fp128(<32 x fp128> %x) { ; CHECK-i32-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill ; CHECK-i32-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-i32-NEXT: sub sp, sp, #528 -; CHECK-i32-NEXT: .cfi_def_cfa_offset 624 -; CHECK-i32-NEXT: .cfi_offset w19, -8 -; CHECK-i32-NEXT: .cfi_offset w20, -16 -; CHECK-i32-NEXT: .cfi_offset w21, -24 -; CHECK-i32-NEXT: .cfi_offset w22, -32 -; CHECK-i32-NEXT: .cfi_offset w23, -40 -; CHECK-i32-NEXT: .cfi_offset w24, -48 -; CHECK-i32-NEXT: .cfi_offset w25, -56 -; CHECK-i32-NEXT: .cfi_offset w26, -64 -; CHECK-i32-NEXT: .cfi_offset w27, -72 -; CHECK-i32-NEXT: .cfi_offset w28, -80 -; CHECK-i32-NEXT: .cfi_offset w30, -88 -; CHECK-i32-NEXT: .cfi_offset w29, -96 ; CHECK-i32-NEXT: stp q2, q1, [sp, #368] // 32-byte Folded Spill ; CHECK-i32-NEXT: ldr q1, [sp, #624] ; CHECK-i32-NEXT: str q0, [sp, #224] // 16-byte Folded Spill @@ -2412,10 +2330,6 @@ define <32 x iXLen> @lrint_v32fp128(<32 x fp128> %x) { ; CHECK-i64-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-i64-NEXT: sub sp, sp, #512 ; CHECK-i64-NEXT: addvl sp, sp, #-8 -; CHECK-i64-NEXT: .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xa0, 0x04, 0x22, 0x11, 0xc0, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 544 + 64 * VG -; CHECK-i64-NEXT: .cfi_offset w19, -8 -; CHECK-i64-NEXT: .cfi_offset w30, -16 -; CHECK-i64-NEXT: .cfi_offset w29, -32 ; CHECK-i64-NEXT: addvl x9, sp, #8 ; CHECK-i64-NEXT: stp q2, q1, [sp, #16] // 32-byte Folded Spill ; CHECK-i64-NEXT: mov x19, x8 diff --git a/llvm/test/CodeGen/AArch64/vector-llrint.ll b/llvm/test/CodeGen/AArch64/vector-llrint.ll index 9e6f46df05fec..8f139cc225a67 100644 --- a/llvm/test/CodeGen/AArch64/vector-llrint.ll +++ b/llvm/test/CodeGen/AArch64/vector-llrint.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64 -mattr=+neon | FileCheck %s -define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) { +define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) nounwind { ; CHECK-LABEL: llrint_v1i64_v1f16: ; CHECK: // %bb.0: ; CHECK-NEXT: fcvt s0, h0 @@ -14,7 +14,7 @@ define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) { } declare <1 x i64> @llvm.llrint.v1i64.v1f16(<1 x half>) -define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) { +define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) nounwind { ; CHECK-LABEL: llrint_v1i64_v2f16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 @@ -33,7 +33,7 @@ define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) { } declare <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half>) -define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) { +define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) nounwind { ; CHECK-LABEL: llrint_v4i64_v4f16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 @@ -62,7 +62,7 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) { } declare <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half>) -define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) { +define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) nounwind { ; CHECK-LABEL: llrint_v8i64_v8f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 @@ -110,7 +110,7 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) { } declare <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half>) -define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) { +define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) nounwind { ; CHECK-LABEL: llrint_v16i64_v16f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 @@ -197,7 +197,7 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) { } declare <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half>) -define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { +define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) nounwind { ; CHECK-LABEL: llrint_v32i64_v32f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ext v4.16b, v1.16b, v1.16b, #8 @@ -370,7 +370,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { } declare <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half>) -define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) { +define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) nounwind { ; CHECK-LABEL: llrint_v1i64_v1f32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 @@ -383,7 +383,7 @@ define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) { } declare <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float>) -define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) { +define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) nounwind { ; CHECK-LABEL: llrint_v2i64_v2f32: ; CHECK: // %bb.0: ; CHECK-NEXT: frintx v0.2s, v0.2s @@ -395,7 +395,7 @@ define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) { } declare <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float>) -define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) { +define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) nounwind { ; CHECK-LABEL: llrint_v4i64_v4f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 @@ -411,7 +411,7 @@ define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) { } declare <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float>) -define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) { +define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) nounwind { ; CHECK-LABEL: llrint_v8i64_v8f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 @@ -434,7 +434,7 @@ define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) { } declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>) -define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) { +define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) nounwind { ; CHECK-LABEL: llrint_v16i64_v16f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ext v4.16b, v1.16b, v1.16b, #8 @@ -471,7 +471,7 @@ define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) { } declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>) -define <32 x i64> @llrint_v32i64_v32f32(<32 x float> %x) { +define <32 x i64> @llrint_v32i64_v32f32(<32 x float> %x) nounwind { ; CHECK-LABEL: llrint_v32i64_v32f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ext v16.16b, v7.16b, v7.16b, #8 @@ -544,7 +544,7 @@ define <32 x i64> @llrint_v32i64_v32f32(<32 x float> %x) { } declare <32 x i64> @llvm.llrint.v32i64.v32f32(<32 x float>) -define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) { +define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) nounwind { ; CHECK-LABEL: llrint_v1i64_v1f64: ; CHECK: // %bb.0: ; CHECK-NEXT: frintx d0, d0 @@ -556,7 +556,7 @@ define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) { } declare <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double>) -define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) { +define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) nounwind { ; CHECK-LABEL: llrint_v2i64_v2f64: ; CHECK: // %bb.0: ; CHECK-NEXT: frintx v0.2d, v0.2d @@ -567,7 +567,7 @@ define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) { } declare <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double>) -define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) { +define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) nounwind { ; CHECK-LABEL: llrint_v4i64_v4f64: ; CHECK: // %bb.0: ; CHECK-NEXT: frintx v0.2d, v0.2d @@ -580,7 +580,7 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) { } declare <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double>) -define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) { +define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) nounwind { ; CHECK-LABEL: llrint_v8i64_v8f64: ; CHECK: // %bb.0: ; CHECK-NEXT: frintx v0.2d, v0.2d @@ -597,7 +597,7 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) { } declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>) -define <16 x i64> @llrint_v16f64(<16 x double> %x) { +define <16 x i64> @llrint_v16f64(<16 x double> %x) nounwind { ; CHECK-LABEL: llrint_v16f64: ; CHECK: // %bb.0: ; CHECK-NEXT: frintx v0.2d, v0.2d @@ -622,7 +622,7 @@ define <16 x i64> @llrint_v16f64(<16 x double> %x) { } declare <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double>) -define <32 x i64> @llrint_v32f64(<32 x double> %x) { +define <32 x i64> @llrint_v32f64(<32 x double> %x) nounwind { ; CHECK-LABEL: llrint_v32f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q17, q16, [sp, #96] @@ -675,12 +675,10 @@ define <32 x i64> @llrint_v32f64(<32 x double> %x) { } declare <32 x i64> @llvm.llrint.v32i64.v32f64(<32 x double>) -define <1 x i64> @llrint_v1i64_v1f128(<1 x fp128> %x) { +define <1 x i64> @llrint_v1i64_v1f128(<1 x fp128> %x) nounwind { ; CHECK-LABEL: llrint_v1i64_v1f128: ; CHECK: // %bb.0: ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload @@ -690,15 +688,13 @@ define <1 x i64> @llrint_v1i64_v1f128(<1 x fp128> %x) { } declare <1 x i64> @llvm.llrint.v1i64.v1f128(<1 x fp128>) -define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) { +define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) nounwind { ; CHECK-LABEL: llrint_v2i64_v2f128: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 ; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill @@ -715,15 +711,13 @@ define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) { } declare <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128>) -define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) { +define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) nounwind { ; CHECK-LABEL: llrint_v4i64_v4f128: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #80 -; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 80 -; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: stp q3, q2, [sp, #32] // 32-byte Folded Spill ; CHECK-NEXT: bl llrintl ; CHECK-NEXT: fmov d0, x0 @@ -751,15 +745,13 @@ define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) { } declare <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128>) -define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) { +define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) nounwind { ; CHECK-LABEL: llrint_v8i64_v8f128: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #144 -; CHECK-NEXT: str x30, [sp, #128] // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 144 -; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: str x30, [sp, #128] // 8-byte Folded Spill ; CHECK-NEXT: stp q3, q2, [sp, #16] // 32-byte Folded Spill ; CHECK-NEXT: stp q5, q4, [sp, #48] // 32-byte Folded Spill ; CHECK-NEXT: stp q7, q6, [sp, #96] // 32-byte Folded Spill @@ -811,23 +803,20 @@ define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) { } declare <8 x i64> @llvm.llrint.v8i64.v8f128(<8 x fp128>) -define <16 x i64> @llrint_v16f128(<16 x fp128> %x) { +define <16 x i64> @llrint_v16f128(<16 x fp128> %x) nounwind { ; CHECK-LABEL: llrint_v16f128: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #272 -; CHECK-NEXT: stp x29, x30, [sp, #256] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 272 -; CHECK-NEXT: .cfi_offset w30, -8 -; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: str q2, [sp, #160] // 16-byte Folded Spill ; CHECK-NEXT: ldr q2, [sp, #368] ; CHECK-NEXT: stp q0, q3, [sp] // 32-byte Folded Spill ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: str q2, [sp, #240] // 16-byte Folded Spill ; CHECK-NEXT: ldr q2, [sp, #384] -; CHECK-NEXT: stp q5, q7, [sp, #32] // 32-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #256] // 16-byte Folded Spill ; CHECK-NEXT: str q2, [sp, #224] // 16-byte Folded Spill ; CHECK-NEXT: ldr q2, [sp, #336] +; CHECK-NEXT: stp q5, q7, [sp, #32] // 32-byte Folded Spill ; CHECK-NEXT: str q2, [sp, #192] // 16-byte Folded Spill ; CHECK-NEXT: ldr q2, [sp, #352] ; CHECK-NEXT: str q2, [sp, #176] // 16-byte Folded Spill @@ -929,16 +918,12 @@ define <16 x i64> @llrint_v16f128(<16 x fp128> %x) { } declare <16 x i64> @llvm.llrint.v16i64.v16f128(<16 x fp128>) -define <32 x i64> @llrint_v32f128(<32 x fp128> %x) { +define <32 x i64> @llrint_v32f128(<32 x fp128> %x) nounwind { ; CHECK-LABEL: llrint_v32f128: ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #512 -; CHECK-NEXT: .cfi_def_cfa_offset 544 -; CHECK-NEXT: .cfi_offset w19, -8 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: .cfi_offset w29, -32 ; CHECK-NEXT: str q0, [sp, #464] // 16-byte Folded Spill ; CHECK-NEXT: ldr q0, [sp, #896] ; CHECK-NEXT: mov x19, x8 diff --git a/llvm/test/CodeGen/AArch64/vector-lrint.ll b/llvm/test/CodeGen/AArch64/vector-lrint.ll index cb7fe14273a42..b899db839a65a 100644 --- a/llvm/test/CodeGen/AArch64/vector-lrint.ll +++ b/llvm/test/CodeGen/AArch64/vector-lrint.ll @@ -45,7 +45,7 @@ ; CHECK-i64-GI-NEXT: warning: Instruction selection used fallback path for lrint_v16f64 ; CHECK-i64-GI-NEXT: warning: Instruction selection used fallback path for lrint_v32f64 -define <1 x iXLen> @lrint_v1f16(<1 x half> %x) { +define <1 x iXLen> @lrint_v1f16(<1 x half> %x) nounwind { ; CHECK-i32-LABEL: lrint_v1f16: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: fcvt s0, h0 @@ -66,7 +66,7 @@ define <1 x iXLen> @lrint_v1f16(<1 x half> %x) { } declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half>) -define <2 x iXLen> @lrint_v2f16(<2 x half> %x) { +define <2 x iXLen> @lrint_v2f16(<2 x half> %x) nounwind { ; CHECK-i32-LABEL: lrint_v2f16: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: // kill: def $d0 killed $d0 def $q0 @@ -100,7 +100,7 @@ define <2 x iXLen> @lrint_v2f16(<2 x half> %x) { } declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half>) -define <4 x iXLen> @lrint_v4f16(<4 x half> %x) { +define <4 x iXLen> @lrint_v4f16(<4 x half> %x) nounwind { ; CHECK-i32-LABEL: lrint_v4f16: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: // kill: def $d0 killed $d0 def $q0 @@ -153,7 +153,7 @@ define <4 x iXLen> @lrint_v4f16(<4 x half> %x) { } declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half>) -define <8 x iXLen> @lrint_v8f16(<8 x half> %x) { +define <8 x iXLen> @lrint_v8f16(<8 x half> %x) nounwind { ; CHECK-i32-LABEL: lrint_v8f16: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: ext v1.16b, v0.16b, v0.16b, #8 @@ -244,7 +244,7 @@ define <8 x iXLen> @lrint_v8f16(<8 x half> %x) { } declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half>) -define <16 x iXLen> @lrint_v16f16(<16 x half> %x) { +define <16 x iXLen> @lrint_v16f16(<16 x half> %x) nounwind { ; CHECK-i32-LABEL: lrint_v16f16: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: ext v2.16b, v0.16b, v0.16b, #8 @@ -413,7 +413,7 @@ define <16 x iXLen> @lrint_v16f16(<16 x half> %x) { } declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half>) -define <32 x iXLen> @lrint_v32f16(<32 x half> %x) { +define <32 x iXLen> @lrint_v32f16(<32 x half> %x) nounwind { ; CHECK-i32-LABEL: lrint_v32f16: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: ext v5.16b, v0.16b, v0.16b, #8 @@ -748,7 +748,7 @@ define <32 x iXLen> @lrint_v32f16(<32 x half> %x) { } declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f16(<32 x half>) -define <1 x iXLen> @lrint_v1f32(<1 x float> %x) { +define <1 x iXLen> @lrint_v1f32(<1 x float> %x) nounwind { ; CHECK-i32-LABEL: lrint_v1f32: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: frintx v0.2s, v0.2s @@ -774,7 +774,7 @@ define <1 x iXLen> @lrint_v1f32(<1 x float> %x) { } declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float>) -define <2 x iXLen> @lrint_v2f32(<2 x float> %x) { +define <2 x iXLen> @lrint_v2f32(<2 x float> %x) nounwind { ; CHECK-i32-LABEL: lrint_v2f32: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: frintx v0.2s, v0.2s @@ -792,7 +792,7 @@ define <2 x iXLen> @lrint_v2f32(<2 x float> %x) { } declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float>) -define <4 x iXLen> @lrint_v4f32(<4 x float> %x) { +define <4 x iXLen> @lrint_v4f32(<4 x float> %x) nounwind { ; CHECK-i32-LABEL: lrint_v4f32: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: frintx v0.4s, v0.4s @@ -814,7 +814,7 @@ define <4 x iXLen> @lrint_v4f32(<4 x float> %x) { } declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float>) -define <8 x iXLen> @lrint_v8f32(<8 x float> %x) { +define <8 x iXLen> @lrint_v8f32(<8 x float> %x) nounwind { ; CHECK-i32-LABEL: lrint_v8f32: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: frintx v0.4s, v0.4s @@ -845,7 +845,7 @@ define <8 x iXLen> @lrint_v8f32(<8 x float> %x) { } declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float>) -define <16 x iXLen> @lrint_v16f32(<16 x float> %x) { +define <16 x iXLen> @lrint_v16f32(<16 x float> %x) nounwind { ; CHECK-i32-LABEL: lrint_v16f32: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: frintx v0.4s, v0.4s @@ -894,7 +894,7 @@ define <16 x iXLen> @lrint_v16f32(<16 x float> %x) { } declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float>) -define <32 x iXLen> @lrint_v32f32(<32 x float> %x) { +define <32 x iXLen> @lrint_v32f32(<32 x float> %x) nounwind { ; CHECK-i32-LABEL: lrint_v32f32: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: frintx v0.4s, v0.4s @@ -987,7 +987,7 @@ define <32 x iXLen> @lrint_v32f32(<32 x float> %x) { } declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f32(<32 x float>) -define <1 x iXLen> @lrint_v1f64(<1 x double> %x) { +define <1 x iXLen> @lrint_v1f64(<1 x double> %x) nounwind { ; CHECK-i32-LABEL: lrint_v1f64: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: frintx d0, d0 @@ -1006,7 +1006,7 @@ define <1 x iXLen> @lrint_v1f64(<1 x double> %x) { } declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double>) -define <2 x iXLen> @lrint_v2f64(<2 x double> %x) { +define <2 x iXLen> @lrint_v2f64(<2 x double> %x) nounwind { ; CHECK-i32-LABEL: lrint_v2f64: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: frintx v0.2d, v0.2d @@ -1028,7 +1028,7 @@ define <2 x iXLen> @lrint_v2f64(<2 x double> %x) { } declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double>) -define <4 x iXLen> @lrint_v4f64(<4 x double> %x) { +define <4 x iXLen> @lrint_v4f64(<4 x double> %x) nounwind { ; CHECK-i32-LABEL: lrint_v4f64: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: frintx v0.2d, v0.2d @@ -1057,7 +1057,7 @@ define <4 x iXLen> @lrint_v4f64(<4 x double> %x) { } declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double>) -define <8 x iXLen> @lrint_v8f64(<8 x double> %x) { +define <8 x iXLen> @lrint_v8f64(<8 x double> %x) nounwind { ; CHECK-i32-LABEL: lrint_v8f64: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: frintx v2.2d, v2.2d @@ -1102,7 +1102,7 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) { } declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double>) -define <16 x iXLen> @lrint_v16f64(<16 x double> %x) { +define <16 x iXLen> @lrint_v16f64(<16 x double> %x) nounwind { ; CHECK-i32-LABEL: lrint_v16f64: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: frintx v0.2d, v0.2d @@ -1179,7 +1179,7 @@ define <16 x iXLen> @lrint_v16f64(<16 x double> %x) { } declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f64(<16 x double>) -define <32 x iXLen> @lrint_v32f64(<32 x double> %x) { +define <32 x iXLen> @lrint_v32f64(<32 x double> %x) nounwind { ; CHECK-i32-LABEL: lrint_v32f64: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: frintx v17.2d, v0.2d @@ -1336,12 +1336,10 @@ define <32 x iXLen> @lrint_v32f64(<32 x double> %x) { } declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f64(<32 x double>) -define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) { +define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) nounwind { ; CHECK-i32-LABEL: lrint_v1fp128: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-i32-NEXT: .cfi_def_cfa_offset 16 -; CHECK-i32-NEXT: .cfi_offset w30, -16 ; CHECK-i32-NEXT: bl lrintl ; CHECK-i32-NEXT: fmov s0, w0 ; CHECK-i32-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload @@ -1350,8 +1348,6 @@ define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) { ; CHECK-i64-LABEL: lrint_v1fp128: ; CHECK-i64: // %bb.0: ; CHECK-i64-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-i64-NEXT: .cfi_def_cfa_offset 16 -; CHECK-i64-NEXT: .cfi_offset w30, -16 ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 ; CHECK-i64-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload @@ -1361,13 +1357,11 @@ define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) { } declare <1 x iXLen> @llvm.lrint.v1iXLen.v1fp128(<1 x fp128>) -define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) { +define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) nounwind { ; CHECK-i32-LABEL: lrint_v2fp128: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: sub sp, sp, #48 ; CHECK-i32-NEXT: str x30, [sp, #32] // 8-byte Folded Spill -; CHECK-i32-NEXT: .cfi_def_cfa_offset 48 -; CHECK-i32-NEXT: .cfi_offset w30, -16 ; CHECK-i32-NEXT: str q1, [sp, #16] // 16-byte Folded Spill ; CHECK-i32-NEXT: bl lrintl ; CHECK-i32-NEXT: fmov s0, w0 @@ -1384,11 +1378,9 @@ define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) { ; CHECK-i64-LABEL: lrint_v2fp128: ; CHECK-i64: // %bb.0: ; CHECK-i64-NEXT: sub sp, sp, #48 -; CHECK-i64-NEXT: str x30, [sp, #32] // 8-byte Folded Spill -; CHECK-i64-NEXT: .cfi_def_cfa_offset 48 -; CHECK-i64-NEXT: .cfi_offset w30, -16 ; CHECK-i64-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-i64-NEXT: mov v0.16b, v1.16b +; CHECK-i64-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 ; CHECK-i64-NEXT: str q0, [sp, #16] // 16-byte Folded Spill @@ -1405,13 +1397,11 @@ define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) { } declare <2 x iXLen> @llvm.lrint.v2iXLen.v2fp128(<2 x fp128>) -define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) { +define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) nounwind { ; CHECK-i32-LABEL: lrint_v4fp128: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: sub sp, sp, #80 ; CHECK-i32-NEXT: str x30, [sp, #64] // 8-byte Folded Spill -; CHECK-i32-NEXT: .cfi_def_cfa_offset 80 -; CHECK-i32-NEXT: .cfi_offset w30, -16 ; CHECK-i32-NEXT: stp q2, q3, [sp, #16] // 32-byte Folded Spill ; CHECK-i32-NEXT: str q1, [sp] // 16-byte Folded Spill ; CHECK-i32-NEXT: bl lrintl @@ -1438,11 +1428,9 @@ define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) { ; CHECK-i64-LABEL: lrint_v4fp128: ; CHECK-i64: // %bb.0: ; CHECK-i64-NEXT: sub sp, sp, #80 -; CHECK-i64-NEXT: str x30, [sp, #64] // 8-byte Folded Spill -; CHECK-i64-NEXT: .cfi_def_cfa_offset 80 -; CHECK-i64-NEXT: .cfi_offset w30, -16 ; CHECK-i64-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-i64-NEXT: mov v0.16b, v1.16b +; CHECK-i64-NEXT: str x30, [sp, #64] // 8-byte Folded Spill ; CHECK-i64-NEXT: stp q3, q2, [sp, #32] // 32-byte Folded Spill ; CHECK-i64-NEXT: bl lrintl ; CHECK-i64-NEXT: fmov d0, x0 @@ -1470,17 +1458,15 @@ define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) { } declare <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128>) -define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) { +define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) nounwind { ; CHECK-i32-LABEL: lrint_v8fp128: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: sub sp, sp, #144 ; CHECK-i32-NEXT: str x30, [sp, #128] // 8-byte Folded Spill -; CHECK-i32-NEXT: .cfi_def_cfa_offset 144 -; CHECK-i32-NEXT: .cfi_offset w30, -16 +; CHECK-i32-NEXT: str q4, [sp, #96] // 16-byte Folded Spill ; CHECK-i32-NEXT: stp q1, q2, [sp] // 32-byte Folded Spill ; CHECK-i32-NEXT: stp q3, q5, [sp, #32] // 32-byte Folded Spill ; CHECK-i32-NEXT: stp q6, q7, [sp, #64] // 32-byte Folded Spill -; CHECK-i32-NEXT: str q4, [sp, #96] // 16-byte Folded Spill ; CHECK-i32-NEXT: bl lrintl ; CHECK-i32-NEXT: fmov s0, w0 ; CHECK-i32-NEXT: str q0, [sp, #112] // 16-byte Folded Spill @@ -1524,11 +1510,9 @@ define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) { ; CHECK-i64-LABEL: lrint_v8fp128: ; CHECK-i64: // %bb.0: ; CHECK-i64-NEXT: sub sp, sp, #144 -; CHECK-i64-NEXT: str x30, [sp, #128] // 8-byte Folded Spill -; CHECK-i64-NEXT: .cfi_def_cfa_offset 144 -; CHECK-i64-NEXT: .cfi_offset w30, -16 ; CHECK-i64-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-i64-NEXT: mov v0.16b, v1.16b +; CHECK-i64-NEXT: str x30, [sp, #128] // 8-byte Folded Spill ; CHECK-i64-NEXT: stp q3, q2, [sp, #16] // 32-byte Folded Spill ; CHECK-i64-NEXT: stp q5, q4, [sp, #48] // 32-byte Folded Spill ; CHECK-i64-NEXT: stp q7, q6, [sp, #96] // 32-byte Folded Spill @@ -1580,22 +1564,19 @@ define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) { } declare <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128>) -define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) { +define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) nounwind { ; CHECK-i32-LABEL: lrint_v16fp128: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: sub sp, sp, #272 -; CHECK-i32-NEXT: stp x29, x30, [sp, #256] // 16-byte Folded Spill -; CHECK-i32-NEXT: .cfi_def_cfa_offset 272 -; CHECK-i32-NEXT: .cfi_offset w30, -8 -; CHECK-i32-NEXT: .cfi_offset w29, -16 ; CHECK-i32-NEXT: stp q1, q2, [sp] // 32-byte Folded Spill ; CHECK-i32-NEXT: ldr q1, [sp, #384] -; CHECK-i32-NEXT: stp q3, q5, [sp, #32] // 32-byte Folded Spill +; CHECK-i32-NEXT: stp x29, x30, [sp, #256] // 16-byte Folded Spill ; CHECK-i32-NEXT: str q1, [sp, #176] // 16-byte Folded Spill ; CHECK-i32-NEXT: ldr q1, [sp, #368] -; CHECK-i32-NEXT: stp q7, q4, [sp, #208] // 32-byte Folded Spill +; CHECK-i32-NEXT: stp q3, q5, [sp, #32] // 32-byte Folded Spill ; CHECK-i32-NEXT: str q1, [sp, #160] // 16-byte Folded Spill ; CHECK-i32-NEXT: ldr q1, [sp, #352] +; CHECK-i32-NEXT: stp q7, q4, [sp, #208] // 32-byte Folded Spill ; CHECK-i32-NEXT: str q1, [sp, #144] // 16-byte Folded Spill ; CHECK-i32-NEXT: ldr q1, [sp, #336] ; CHECK-i32-NEXT: str q1, [sp, #192] // 16-byte Folded Spill @@ -1689,19 +1670,16 @@ define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) { ; CHECK-i64-LABEL: lrint_v16fp128: ; CHECK-i64: // %bb.0: ; CHECK-i64-NEXT: sub sp, sp, #272 -; CHECK-i64-NEXT: stp x29, x30, [sp, #256] // 16-byte Folded Spill -; CHECK-i64-NEXT: .cfi_def_cfa_offset 272 -; CHECK-i64-NEXT: .cfi_offset w30, -8 -; CHECK-i64-NEXT: .cfi_offset w29, -16 ; CHECK-i64-NEXT: str q2, [sp, #160] // 16-byte Folded Spill ; CHECK-i64-NEXT: ldr q2, [sp, #368] ; CHECK-i64-NEXT: stp q0, q3, [sp] // 32-byte Folded Spill ; CHECK-i64-NEXT: mov v0.16b, v1.16b ; CHECK-i64-NEXT: str q2, [sp, #240] // 16-byte Folded Spill ; CHECK-i64-NEXT: ldr q2, [sp, #384] -; CHECK-i64-NEXT: stp q5, q7, [sp, #32] // 32-byte Folded Spill +; CHECK-i64-NEXT: stp x29, x30, [sp, #256] // 16-byte Folded Spill ; CHECK-i64-NEXT: str q2, [sp, #224] // 16-byte Folded Spill ; CHECK-i64-NEXT: ldr q2, [sp, #336] +; CHECK-i64-NEXT: stp q5, q7, [sp, #32] // 32-byte Folded Spill ; CHECK-i64-NEXT: str q2, [sp, #192] // 16-byte Folded Spill ; CHECK-i64-NEXT: ldr q2, [sp, #352] ; CHECK-i64-NEXT: str q2, [sp, #176] // 16-byte Folded Spill @@ -1803,14 +1781,11 @@ define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) { } declare <16 x iXLen> @llvm.lrint.v16iXLen.v16fp128(<16 x fp128>) -define <32 x iXLen> @lrint_v32fp128(<32 x fp128> %x) { +define <32 x iXLen> @lrint_v32fp128(<32 x fp128> %x) nounwind { ; CHECK-i32-LABEL: lrint_v32fp128: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-i32-NEXT: sub sp, sp, #512 -; CHECK-i32-NEXT: .cfi_def_cfa_offset 528 -; CHECK-i32-NEXT: .cfi_offset w30, -8 -; CHECK-i32-NEXT: .cfi_offset w29, -16 ; CHECK-i32-NEXT: str q1, [sp] // 16-byte Folded Spill ; CHECK-i32-NEXT: ldr q1, [sp, #896] ; CHECK-i32-NEXT: stp q2, q3, [sp, #16] // 32-byte Folded Spill @@ -2026,10 +2001,6 @@ define <32 x iXLen> @lrint_v32fp128(<32 x fp128> %x) { ; CHECK-i64-NEXT: str x29, [sp, #-32]! // 8-byte Folded Spill ; CHECK-i64-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-i64-NEXT: sub sp, sp, #512 -; CHECK-i64-NEXT: .cfi_def_cfa_offset 544 -; CHECK-i64-NEXT: .cfi_offset w19, -8 -; CHECK-i64-NEXT: .cfi_offset w30, -16 -; CHECK-i64-NEXT: .cfi_offset w29, -32 ; CHECK-i64-NEXT: str q0, [sp, #464] // 16-byte Folded Spill ; CHECK-i64-NEXT: ldr q0, [sp, #896] ; CHECK-i64-NEXT: mov x19, x8 diff --git a/llvm/test/CodeGen/PowerPC/vector-llrint.ll b/llvm/test/CodeGen/PowerPC/vector-llrint.ll index d57bf6b2e706c..8a9e48e002381 100644 --- a/llvm/test/CodeGen/PowerPC/vector-llrint.ll +++ b/llvm/test/CodeGen/PowerPC/vector-llrint.ll @@ -13,14 +13,12 @@ ; RUN: -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs < %s \ ; RUN: --enable-unsafe-fp-math | FileCheck %s --check-prefix=FAST -define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) { +define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) nounwind { ; BE-LABEL: llrint_v1i64_v1f16: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -112(r1) ; BE-NEXT: std r0, 128(r1) -; BE-NEXT: .cfi_def_cfa_offset 112 -; BE-NEXT: .cfi_offset lr, 16 ; BE-NEXT: bl __truncsfhf2 ; BE-NEXT: nop ; BE-NEXT: clrldi r3, r3, 48 @@ -38,8 +36,6 @@ define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) { ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -32(r1) ; CHECK-NEXT: std r0, 48(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: .cfi_offset lr, 16 ; CHECK-NEXT: bl __truncsfhf2 ; CHECK-NEXT: nop ; CHECK-NEXT: clrldi r3, r3, 48 @@ -57,8 +53,6 @@ define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) { ; FAST-NEXT: mflr r0 ; FAST-NEXT: stdu r1, -32(r1) ; FAST-NEXT: std r0, 48(r1) -; FAST-NEXT: .cfi_def_cfa_offset 32 -; FAST-NEXT: .cfi_offset lr, 16 ; FAST-NEXT: bl __truncsfhf2 ; FAST-NEXT: nop ; FAST-NEXT: clrldi r3, r3, 48 @@ -75,16 +69,12 @@ define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) { } declare <1 x i64> @llvm.llrint.v1i64.v1f16(<1 x half>) -define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) { +define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) nounwind { ; BE-LABEL: llrint_v1i64_v2f16: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -160(r1) ; BE-NEXT: std r0, 176(r1) -; BE-NEXT: .cfi_def_cfa_offset 160 -; BE-NEXT: .cfi_offset lr, 16 -; BE-NEXT: .cfi_offset r30, -24 -; BE-NEXT: .cfi_offset f31, -8 ; BE-NEXT: stfd f31, 152(r1) # 8-byte Folded Spill ; BE-NEXT: fmr f31, f1 ; BE-NEXT: fmr f1, f2 @@ -122,17 +112,12 @@ define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -96(r1) -; CHECK-NEXT: std r0, 112(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 96 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset r30, -24 -; CHECK-NEXT: .cfi_offset f31, -8 -; CHECK-NEXT: .cfi_offset v31, -48 ; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: std r0, 112(r1) ; CHECK-NEXT: std r30, 72(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f31, 88(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f31, f2 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: bl __truncsfhf2 ; CHECK-NEXT: nop ; CHECK-NEXT: fmr f1, f31 @@ -157,7 +142,7 @@ define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) { ; CHECK-NEXT: lfd f31, 88(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r30, 72(r1) # 8-byte Folded Reload ; CHECK-NEXT: xxmrghd v2, vs0, v31 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 96 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -166,10 +151,6 @@ define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) { ; FAST-LABEL: llrint_v1i64_v2f16: ; FAST: # %bb.0: ; FAST-NEXT: mflr r0 -; FAST-NEXT: .cfi_def_cfa_offset 48 -; FAST-NEXT: .cfi_offset lr, 16 -; FAST-NEXT: .cfi_offset f30, -16 -; FAST-NEXT: .cfi_offset f31, -8 ; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill ; FAST-NEXT: stdu r1, -48(r1) @@ -206,20 +187,12 @@ define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) { } declare <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half>) -define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) { +define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) nounwind { ; BE-LABEL: llrint_v4i64_v4f16: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -208(r1) ; BE-NEXT: std r0, 224(r1) -; BE-NEXT: .cfi_def_cfa_offset 208 -; BE-NEXT: .cfi_offset lr, 16 -; BE-NEXT: .cfi_offset r28, -56 -; BE-NEXT: .cfi_offset r29, -48 -; BE-NEXT: .cfi_offset r30, -40 -; BE-NEXT: .cfi_offset f29, -24 -; BE-NEXT: .cfi_offset f30, -16 -; BE-NEXT: .cfi_offset f31, -8 ; BE-NEXT: stfd f29, 184(r1) # 8-byte Folded Spill ; BE-NEXT: fmr f29, f1 ; BE-NEXT: fmr f1, f2 @@ -293,18 +266,8 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -144(r1) -; CHECK-NEXT: std r0, 160(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 144 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset r28, -56 -; CHECK-NEXT: .cfi_offset r29, -48 -; CHECK-NEXT: .cfi_offset r30, -40 -; CHECK-NEXT: .cfi_offset f29, -24 -; CHECK-NEXT: .cfi_offset f30, -16 -; CHECK-NEXT: .cfi_offset f31, -8 -; CHECK-NEXT: .cfi_offset v30, -96 -; CHECK-NEXT: .cfi_offset v31, -80 ; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: std r0, 160(r1) ; CHECK-NEXT: std r28, 88(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r29, 96(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r30, 104(r1) # 8-byte Folded Spill @@ -312,11 +275,11 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) { ; CHECK-NEXT: fmr f29, f2 ; CHECK-NEXT: stfd f30, 128(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f30, f3 -; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 64 ; CHECK-NEXT: stfd f31, 136(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f31, f4 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: bl __truncsfhf2 ; CHECK-NEXT: nop ; CHECK-NEXT: fmr f1, f29 @@ -369,11 +332,11 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) { ; CHECK-NEXT: lfd f29, 120(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r30, 104(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r29, 96(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 ; CHECK-NEXT: ld r28, 88(r1) # 8-byte Folded Reload ; CHECK-NEXT: xxmrghd v3, vs0, v30 -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 144 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -382,12 +345,6 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) { ; FAST-LABEL: llrint_v4i64_v4f16: ; FAST: # %bb.0: ; FAST-NEXT: mflr r0 -; FAST-NEXT: .cfi_def_cfa_offset 64 -; FAST-NEXT: .cfi_offset lr, 16 -; FAST-NEXT: .cfi_offset f28, -32 -; FAST-NEXT: .cfi_offset f29, -24 -; FAST-NEXT: .cfi_offset f30, -16 -; FAST-NEXT: .cfi_offset f31, -8 ; FAST-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill @@ -451,28 +408,12 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) { } declare <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half>) -define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) { +define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) nounwind { ; BE-LABEL: llrint_v8i64_v8f16: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -304(r1) ; BE-NEXT: std r0, 320(r1) -; BE-NEXT: .cfi_def_cfa_offset 304 -; BE-NEXT: .cfi_offset lr, 16 -; BE-NEXT: .cfi_offset r24, -120 -; BE-NEXT: .cfi_offset r25, -112 -; BE-NEXT: .cfi_offset r26, -104 -; BE-NEXT: .cfi_offset r27, -96 -; BE-NEXT: .cfi_offset r28, -88 -; BE-NEXT: .cfi_offset r29, -80 -; BE-NEXT: .cfi_offset r30, -72 -; BE-NEXT: .cfi_offset f25, -56 -; BE-NEXT: .cfi_offset f26, -48 -; BE-NEXT: .cfi_offset f27, -40 -; BE-NEXT: .cfi_offset f28, -32 -; BE-NEXT: .cfi_offset f29, -24 -; BE-NEXT: .cfi_offset f30, -16 -; BE-NEXT: .cfi_offset f31, -8 ; BE-NEXT: stfd f25, 248(r1) # 8-byte Folded Spill ; BE-NEXT: fmr f25, f1 ; BE-NEXT: fmr f1, f2 @@ -618,44 +559,24 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -240(r1) -; CHECK-NEXT: std r0, 256(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 240 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset r24, -120 -; CHECK-NEXT: .cfi_offset r25, -112 -; CHECK-NEXT: .cfi_offset r26, -104 -; CHECK-NEXT: .cfi_offset r27, -96 -; CHECK-NEXT: .cfi_offset r28, -88 -; CHECK-NEXT: .cfi_offset r29, -80 -; CHECK-NEXT: .cfi_offset r30, -72 -; CHECK-NEXT: .cfi_offset f25, -56 -; CHECK-NEXT: .cfi_offset f26, -48 -; CHECK-NEXT: .cfi_offset f27, -40 -; CHECK-NEXT: .cfi_offset f28, -32 -; CHECK-NEXT: .cfi_offset f29, -24 -; CHECK-NEXT: .cfi_offset f30, -16 -; CHECK-NEXT: .cfi_offset f31, -8 -; CHECK-NEXT: .cfi_offset v28, -192 -; CHECK-NEXT: .cfi_offset v29, -176 -; CHECK-NEXT: .cfi_offset v30, -160 -; CHECK-NEXT: .cfi_offset v31, -144 ; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: std r0, 256(r1) ; CHECK-NEXT: std r24, 120(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r25, 128(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r26, 136(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r27, 144(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r28, 152(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r29, 160(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r30, 168(r1) # 8-byte Folded Spill -; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: std r30, 168(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f25, 184(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f25, f2 ; CHECK-NEXT: stfd f26, 192(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f26, f3 ; CHECK-NEXT: stfd f27, 200(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f27, f4 -; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 80 ; CHECK-NEXT: stfd f28, 208(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f28, f5 @@ -663,11 +584,11 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) { ; CHECK-NEXT: fmr f29, f6 ; CHECK-NEXT: stfd f30, 224(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f30, f7 -; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 96 ; CHECK-NEXT: stfd f31, 232(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f31, f8 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: bl __truncsfhf2 ; CHECK-NEXT: nop ; CHECK-NEXT: fmr f1, f25 @@ -770,7 +691,7 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) { ; CHECK-NEXT: vmr v4, v29 ; CHECK-NEXT: lfd f30, 224(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f29, 216(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 80 ; CHECK-NEXT: lfd f28, 208(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f27, 200(r1) # 8-byte Folded Reload @@ -778,7 +699,7 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) { ; CHECK-NEXT: lfd f25, 184(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r30, 168(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r29, 160(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 64 ; CHECK-NEXT: ld r28, 152(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r27, 144(r1) # 8-byte Folded Reload @@ -786,9 +707,9 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) { ; CHECK-NEXT: ld r26, 136(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r25, 128(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r24, 120(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 240 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -797,16 +718,6 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) { ; FAST-LABEL: llrint_v8i64_v8f16: ; FAST: # %bb.0: ; FAST-NEXT: mflr r0 -; FAST-NEXT: .cfi_def_cfa_offset 96 -; FAST-NEXT: .cfi_offset lr, 16 -; FAST-NEXT: .cfi_offset f24, -64 -; FAST-NEXT: .cfi_offset f25, -56 -; FAST-NEXT: .cfi_offset f26, -48 -; FAST-NEXT: .cfi_offset f27, -40 -; FAST-NEXT: .cfi_offset f28, -32 -; FAST-NEXT: .cfi_offset f29, -24 -; FAST-NEXT: .cfi_offset f30, -16 -; FAST-NEXT: .cfi_offset f31, -8 ; FAST-NEXT: stfd f24, -64(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f25, -56(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f26, -48(r1) # 8-byte Folded Spill @@ -924,44 +835,12 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) { } declare <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half>) -define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) { +define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) nounwind { ; BE-LABEL: llrint_v16i64_v16f16: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -496(r1) ; BE-NEXT: std r0, 512(r1) -; BE-NEXT: .cfi_def_cfa_offset 496 -; BE-NEXT: .cfi_offset lr, 16 -; BE-NEXT: .cfi_offset r16, -248 -; BE-NEXT: .cfi_offset r17, -240 -; BE-NEXT: .cfi_offset r18, -232 -; BE-NEXT: .cfi_offset r19, -224 -; BE-NEXT: .cfi_offset r20, -216 -; BE-NEXT: .cfi_offset r21, -208 -; BE-NEXT: .cfi_offset r22, -200 -; BE-NEXT: .cfi_offset r23, -192 -; BE-NEXT: .cfi_offset r24, -184 -; BE-NEXT: .cfi_offset r25, -176 -; BE-NEXT: .cfi_offset r26, -168 -; BE-NEXT: .cfi_offset r27, -160 -; BE-NEXT: .cfi_offset r28, -152 -; BE-NEXT: .cfi_offset r29, -144 -; BE-NEXT: .cfi_offset r30, -136 -; BE-NEXT: .cfi_offset f17, -120 -; BE-NEXT: .cfi_offset f18, -112 -; BE-NEXT: .cfi_offset f19, -104 -; BE-NEXT: .cfi_offset f20, -96 -; BE-NEXT: .cfi_offset f21, -88 -; BE-NEXT: .cfi_offset f22, -80 -; BE-NEXT: .cfi_offset f23, -72 -; BE-NEXT: .cfi_offset f24, -64 -; BE-NEXT: .cfi_offset f25, -56 -; BE-NEXT: .cfi_offset f26, -48 -; BE-NEXT: .cfi_offset f27, -40 -; BE-NEXT: .cfi_offset f28, -32 -; BE-NEXT: .cfi_offset f29, -24 -; BE-NEXT: .cfi_offset f30, -16 -; BE-NEXT: .cfi_offset f31, -8 ; BE-NEXT: stfd f20, 400(r1) # 8-byte Folded Spill ; BE-NEXT: fmr f20, f1 ; BE-NEXT: fmr f1, f2 @@ -1248,105 +1127,65 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -432(r1) -; CHECK-NEXT: std r0, 448(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 432 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset r16, -248 -; CHECK-NEXT: .cfi_offset r17, -240 -; CHECK-NEXT: .cfi_offset r18, -232 -; CHECK-NEXT: .cfi_offset r19, -224 -; CHECK-NEXT: .cfi_offset r20, -216 -; CHECK-NEXT: .cfi_offset r21, -208 -; CHECK-NEXT: .cfi_offset r22, -200 -; CHECK-NEXT: .cfi_offset r23, -192 -; CHECK-NEXT: .cfi_offset r24, -184 -; CHECK-NEXT: .cfi_offset r25, -176 -; CHECK-NEXT: .cfi_offset r26, -168 -; CHECK-NEXT: .cfi_offset r27, -160 -; CHECK-NEXT: .cfi_offset r28, -152 -; CHECK-NEXT: .cfi_offset r29, -144 -; CHECK-NEXT: .cfi_offset r30, -136 -; CHECK-NEXT: .cfi_offset f17, -120 -; CHECK-NEXT: .cfi_offset f18, -112 -; CHECK-NEXT: .cfi_offset f19, -104 -; CHECK-NEXT: .cfi_offset f20, -96 -; CHECK-NEXT: .cfi_offset f21, -88 -; CHECK-NEXT: .cfi_offset f22, -80 -; CHECK-NEXT: .cfi_offset f23, -72 -; CHECK-NEXT: .cfi_offset f24, -64 -; CHECK-NEXT: .cfi_offset f25, -56 -; CHECK-NEXT: .cfi_offset f26, -48 -; CHECK-NEXT: .cfi_offset f27, -40 -; CHECK-NEXT: .cfi_offset f28, -32 -; CHECK-NEXT: .cfi_offset f29, -24 -; CHECK-NEXT: .cfi_offset f30, -16 -; CHECK-NEXT: .cfi_offset f31, -8 -; CHECK-NEXT: .cfi_offset v24, -384 -; CHECK-NEXT: .cfi_offset v25, -368 -; CHECK-NEXT: .cfi_offset v26, -352 -; CHECK-NEXT: .cfi_offset v27, -336 -; CHECK-NEXT: .cfi_offset v28, -320 -; CHECK-NEXT: .cfi_offset v29, -304 -; CHECK-NEXT: .cfi_offset v30, -288 -; CHECK-NEXT: .cfi_offset v31, -272 ; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: std r0, 448(r1) ; CHECK-NEXT: std r16, 184(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r17, 192(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r18, 200(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r19, 208(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r20, 216(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r21, 224(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r22, 232(r1) # 8-byte Folded Spill -; CHECK-NEXT: stvx v24, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v24, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: std r22, 232(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r23, 240(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r24, 248(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r25, 256(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r26, 264(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r27, 272(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r28, 280(r1) # 8-byte Folded Spill -; CHECK-NEXT: stvx v25, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: std r28, 280(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r29, 288(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r30, 296(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f17, 312(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f18, 320(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f19, 328(r1) # 8-byte Folded Spill +; CHECK-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 96 ; CHECK-NEXT: stfd f20, 336(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f20, f2 -; CHECK-NEXT: stvx v26, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: li r3, 96 ; CHECK-NEXT: stfd f21, 344(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f21, f3 ; CHECK-NEXT: stfd f22, 352(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f22, f4 +; CHECK-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 112 ; CHECK-NEXT: stfd f23, 360(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f23, f5 -; CHECK-NEXT: stvx v27, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: li r3, 112 ; CHECK-NEXT: stfd f24, 368(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f24, f6 ; CHECK-NEXT: stfd f25, 376(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f25, f7 +; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 128 ; CHECK-NEXT: stfd f26, 384(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f26, f8 -; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: li r3, 128 ; CHECK-NEXT: stfd f27, 392(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f27, f9 ; CHECK-NEXT: stfd f28, 400(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f28, f10 +; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 144 ; CHECK-NEXT: stfd f29, 408(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f29, f11 -; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: li r3, 144 ; CHECK-NEXT: stfd f30, 416(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f30, f12 ; CHECK-NEXT: stfd f31, 424(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f31, f13 -; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 160 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: bl __truncsfhf2 ; CHECK-NEXT: nop ; CHECK-NEXT: fmr f1, f20 @@ -1549,7 +1388,7 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) { ; CHECK-NEXT: vmr v4, v29 ; CHECK-NEXT: lfd f30, 416(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f29, 408(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 144 ; CHECK-NEXT: vmr v5, v28 ; CHECK-NEXT: vmr v6, v27 @@ -1557,7 +1396,7 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) { ; CHECK-NEXT: vmr v8, v25 ; CHECK-NEXT: lfd f28, 400(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f27, 392(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 128 ; CHECK-NEXT: lfd f26, 384(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f25, 376(r1) # 8-byte Folded Reload @@ -1565,7 +1404,7 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) { ; CHECK-NEXT: lfd f24, 368(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f23, 360(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f22, 352(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 112 ; CHECK-NEXT: lfd f21, 344(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r30, 296(r1) # 8-byte Folded Reload @@ -1573,7 +1412,7 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) { ; CHECK-NEXT: lfd f19, 328(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r29, 288(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r28, 280(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 96 ; CHECK-NEXT: lfd f18, 320(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r27, 272(r1) # 8-byte Folded Reload @@ -1581,7 +1420,7 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) { ; CHECK-NEXT: ld r26, 264(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r25, 256(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r24, 248(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 80 ; CHECK-NEXT: ld r23, 240(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r22, 232(r1) # 8-byte Folded Reload @@ -1589,13 +1428,13 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) { ; CHECK-NEXT: ld r20, 216(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r19, 208(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r18, 200(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 64 ; CHECK-NEXT: ld r17, 192(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r16, 184(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 432 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -1604,24 +1443,6 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) { ; FAST-LABEL: llrint_v16i64_v16f16: ; FAST: # %bb.0: ; FAST-NEXT: mflr r0 -; FAST-NEXT: .cfi_def_cfa_offset 160 -; FAST-NEXT: .cfi_offset lr, 16 -; FAST-NEXT: .cfi_offset f16, -128 -; FAST-NEXT: .cfi_offset f17, -120 -; FAST-NEXT: .cfi_offset f18, -112 -; FAST-NEXT: .cfi_offset f19, -104 -; FAST-NEXT: .cfi_offset f20, -96 -; FAST-NEXT: .cfi_offset f21, -88 -; FAST-NEXT: .cfi_offset f22, -80 -; FAST-NEXT: .cfi_offset f23, -72 -; FAST-NEXT: .cfi_offset f24, -64 -; FAST-NEXT: .cfi_offset f25, -56 -; FAST-NEXT: .cfi_offset f26, -48 -; FAST-NEXT: .cfi_offset f27, -40 -; FAST-NEXT: .cfi_offset f28, -32 -; FAST-NEXT: .cfi_offset f29, -24 -; FAST-NEXT: .cfi_offset f30, -16 -; FAST-NEXT: .cfi_offset f31, -8 ; FAST-NEXT: stfd f16, -128(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f17, -120(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f18, -112(r1) # 8-byte Folded Spill @@ -1845,50 +1666,12 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) { } declare <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half>) -define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { +define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) nounwind { ; BE-LABEL: llrint_v32i64_v32f16: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -864(r1) ; BE-NEXT: std r0, 880(r1) -; BE-NEXT: .cfi_def_cfa_offset 864 -; BE-NEXT: .cfi_offset lr, 16 -; BE-NEXT: .cfi_offset r14, -288 -; BE-NEXT: .cfi_offset r15, -280 -; BE-NEXT: .cfi_offset r16, -272 -; BE-NEXT: .cfi_offset r17, -264 -; BE-NEXT: .cfi_offset r18, -256 -; BE-NEXT: .cfi_offset r19, -248 -; BE-NEXT: .cfi_offset r20, -240 -; BE-NEXT: .cfi_offset r21, -232 -; BE-NEXT: .cfi_offset r22, -224 -; BE-NEXT: .cfi_offset r23, -216 -; BE-NEXT: .cfi_offset r24, -208 -; BE-NEXT: .cfi_offset r25, -200 -; BE-NEXT: .cfi_offset r26, -192 -; BE-NEXT: .cfi_offset r27, -184 -; BE-NEXT: .cfi_offset r28, -176 -; BE-NEXT: .cfi_offset r29, -168 -; BE-NEXT: .cfi_offset r30, -160 -; BE-NEXT: .cfi_offset r31, -152 -; BE-NEXT: .cfi_offset f14, -144 -; BE-NEXT: .cfi_offset f15, -136 -; BE-NEXT: .cfi_offset f16, -128 -; BE-NEXT: .cfi_offset f17, -120 -; BE-NEXT: .cfi_offset f18, -112 -; BE-NEXT: .cfi_offset f19, -104 -; BE-NEXT: .cfi_offset f20, -96 -; BE-NEXT: .cfi_offset f21, -88 -; BE-NEXT: .cfi_offset f22, -80 -; BE-NEXT: .cfi_offset f23, -72 -; BE-NEXT: .cfi_offset f24, -64 -; BE-NEXT: .cfi_offset f25, -56 -; BE-NEXT: .cfi_offset f26, -48 -; BE-NEXT: .cfi_offset f27, -40 -; BE-NEXT: .cfi_offset f28, -32 -; BE-NEXT: .cfi_offset f29, -24 -; BE-NEXT: .cfi_offset f30, -16 -; BE-NEXT: .cfi_offset f31, -8 ; BE-NEXT: stfd f20, 768(r1) # 8-byte Folded Spill ; BE-NEXT: fmr f20, f1 ; BE-NEXT: fmr f1, f2 @@ -1928,6 +1711,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { ; BE-NEXT: stfd f30, 848(r1) # 8-byte Folded Spill ; BE-NEXT: stfd f31, 856(r1) # 8-byte Folded Spill ; BE-NEXT: fmr f31, f13 +; BE-NEXT: mr r30, r3 ; BE-NEXT: fmr f29, f12 ; BE-NEXT: fmr f30, f11 ; BE-NEXT: fmr f28, f10 @@ -1938,7 +1722,6 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { ; BE-NEXT: fmr f23, f5 ; BE-NEXT: fmr f22, f4 ; BE-NEXT: fmr f21, f3 -; BE-NEXT: mr r30, r3 ; BE-NEXT: bl __truncsfhf2 ; BE-NEXT: nop ; BE-NEXT: fmr f1, f20 @@ -2441,98 +2224,48 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -688(r1) -; CHECK-NEXT: std r0, 704(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 688 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset r14, -288 -; CHECK-NEXT: .cfi_offset r15, -280 -; CHECK-NEXT: .cfi_offset r16, -272 -; CHECK-NEXT: .cfi_offset r17, -264 -; CHECK-NEXT: .cfi_offset r18, -256 -; CHECK-NEXT: .cfi_offset r19, -248 -; CHECK-NEXT: .cfi_offset r20, -240 -; CHECK-NEXT: .cfi_offset r21, -232 -; CHECK-NEXT: .cfi_offset r22, -224 -; CHECK-NEXT: .cfi_offset r23, -216 -; CHECK-NEXT: .cfi_offset r24, -208 -; CHECK-NEXT: .cfi_offset r25, -200 -; CHECK-NEXT: .cfi_offset r26, -192 -; CHECK-NEXT: .cfi_offset r27, -184 -; CHECK-NEXT: .cfi_offset r28, -176 -; CHECK-NEXT: .cfi_offset r29, -168 -; CHECK-NEXT: .cfi_offset r30, -160 -; CHECK-NEXT: .cfi_offset r31, -152 -; CHECK-NEXT: .cfi_offset f14, -144 -; CHECK-NEXT: .cfi_offset f15, -136 -; CHECK-NEXT: .cfi_offset f16, -128 -; CHECK-NEXT: .cfi_offset f17, -120 -; CHECK-NEXT: .cfi_offset f18, -112 -; CHECK-NEXT: .cfi_offset f19, -104 -; CHECK-NEXT: .cfi_offset f20, -96 -; CHECK-NEXT: .cfi_offset f21, -88 -; CHECK-NEXT: .cfi_offset f22, -80 -; CHECK-NEXT: .cfi_offset f23, -72 -; CHECK-NEXT: .cfi_offset f24, -64 -; CHECK-NEXT: .cfi_offset f25, -56 -; CHECK-NEXT: .cfi_offset f26, -48 -; CHECK-NEXT: .cfi_offset f27, -40 -; CHECK-NEXT: .cfi_offset f28, -32 -; CHECK-NEXT: .cfi_offset f29, -24 -; CHECK-NEXT: .cfi_offset f30, -16 -; CHECK-NEXT: .cfi_offset f31, -8 -; CHECK-NEXT: .cfi_offset v20, -480 -; CHECK-NEXT: .cfi_offset v21, -464 -; CHECK-NEXT: .cfi_offset v22, -448 -; CHECK-NEXT: .cfi_offset v23, -432 -; CHECK-NEXT: .cfi_offset v24, -416 -; CHECK-NEXT: .cfi_offset v25, -400 -; CHECK-NEXT: .cfi_offset v26, -384 -; CHECK-NEXT: .cfi_offset v27, -368 -; CHECK-NEXT: .cfi_offset v28, -352 -; CHECK-NEXT: .cfi_offset v29, -336 -; CHECK-NEXT: .cfi_offset v30, -320 -; CHECK-NEXT: .cfi_offset v31, -304 ; CHECK-NEXT: li r4, 208 +; CHECK-NEXT: std r0, 704(r1) ; CHECK-NEXT: std r14, 400(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r15, 408(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r16, 416(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r17, 424(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r18, 432(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r19, 440(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r20, 448(r1) # 8-byte Folded Spill -; CHECK-NEXT: stvx v20, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v20, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 224 +; CHECK-NEXT: std r20, 448(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r21, 456(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r22, 464(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r23, 472(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r24, 480(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r25, 488(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r26, 496(r1) # 8-byte Folded Spill -; CHECK-NEXT: stvx v21, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v21, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 240 +; CHECK-NEXT: std r26, 496(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r27, 504(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r28, 512(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r29, 520(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r30, 528(r1) # 8-byte Folded Spill ; CHECK-NEXT: mr r30, r3 -; CHECK-NEXT: std r31, 536(r1) # 8-byte Folded Spill -; CHECK-NEXT: stvx v22, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v22, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 256 +; CHECK-NEXT: std r31, 536(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f14, 544(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f15, 552(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f16, 560(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f17, 568(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f18, 576(r1) # 8-byte Folded Spill -; CHECK-NEXT: stfd f19, 584(r1) # 8-byte Folded Spill -; CHECK-NEXT: stvx v23, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v23, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 272 +; CHECK-NEXT: stfd f19, 584(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f20, 592(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f20, f2 ; CHECK-NEXT: stfd f21, 600(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f21, f3 ; CHECK-NEXT: stfd f22, 608(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f22, f4 -; CHECK-NEXT: stvx v24, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v24, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 288 ; CHECK-NEXT: stfd f23, 616(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f23, f5 @@ -2540,7 +2273,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { ; CHECK-NEXT: fmr f24, f6 ; CHECK-NEXT: stfd f25, 632(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f25, f7 -; CHECK-NEXT: stvx v25, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v25, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 304 ; CHECK-NEXT: stfd f26, 640(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f26, f8 @@ -2548,7 +2281,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { ; CHECK-NEXT: fmr f27, f9 ; CHECK-NEXT: stfd f28, 656(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f28, f10 -; CHECK-NEXT: stvx v26, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v26, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 320 ; CHECK-NEXT: stfd f29, 664(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f29, f11 @@ -2556,15 +2289,15 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { ; CHECK-NEXT: fmr f30, f12 ; CHECK-NEXT: stfd f31, 680(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f31, f13 -; CHECK-NEXT: stvx v27, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v27, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 336 -; CHECK-NEXT: stvx v28, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v28, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 352 -; CHECK-NEXT: stvx v29, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v29, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 368 -; CHECK-NEXT: stvx v30, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v30, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 384 -; CHECK-NEXT: stvx v31, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: bl __truncsfhf2 ; CHECK-NEXT: nop ; CHECK-NEXT: fmr f1, f20 @@ -3043,7 +2776,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { ; CHECK-NEXT: li r3, 384 ; CHECK-NEXT: xxswapd vs4, vs4 ; CHECK-NEXT: stxvd2x vs4, 0, r30 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 368 ; CHECK-NEXT: lfd f31, 680(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f30, 672(r1) # 8-byte Folded Reload @@ -3061,7 +2794,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { ; CHECK-NEXT: lfd f18, 576(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f17, 568(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f16, 560(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 352 ; CHECK-NEXT: lfd f15, 552(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f14, 544(r1) # 8-byte Folded Reload @@ -3069,7 +2802,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { ; CHECK-NEXT: ld r30, 528(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r29, 520(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r28, 512(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 336 ; CHECK-NEXT: ld r27, 504(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r26, 496(r1) # 8-byte Folded Reload @@ -3077,7 +2810,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { ; CHECK-NEXT: ld r24, 480(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r23, 472(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r22, 464(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 320 ; CHECK-NEXT: ld r21, 456(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r20, 448(r1) # 8-byte Folded Reload @@ -3085,23 +2818,23 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { ; CHECK-NEXT: ld r18, 432(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r17, 424(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r16, 416(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 304 ; CHECK-NEXT: ld r15, 408(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r14, 400(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 288 -; CHECK-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 272 -; CHECK-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 256 -; CHECK-NEXT: lvx v23, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v23, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 240 -; CHECK-NEXT: lvx v22, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v22, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 224 -; CHECK-NEXT: lvx v21, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v21, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 208 -; CHECK-NEXT: lvx v20, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v20, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 688 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -3111,95 +2844,62 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { ; FAST: # %bb.0: ; FAST-NEXT: mflr r0 ; FAST-NEXT: stdu r1, -480(r1) -; FAST-NEXT: std r0, 496(r1) -; FAST-NEXT: .cfi_def_cfa_offset 480 -; FAST-NEXT: .cfi_offset lr, 16 -; FAST-NEXT: .cfi_offset r30, -160 -; FAST-NEXT: .cfi_offset f14, -144 -; FAST-NEXT: .cfi_offset f15, -136 -; FAST-NEXT: .cfi_offset f16, -128 -; FAST-NEXT: .cfi_offset f17, -120 -; FAST-NEXT: .cfi_offset f18, -112 -; FAST-NEXT: .cfi_offset f19, -104 -; FAST-NEXT: .cfi_offset f20, -96 -; FAST-NEXT: .cfi_offset f21, -88 -; FAST-NEXT: .cfi_offset f22, -80 -; FAST-NEXT: .cfi_offset f23, -72 -; FAST-NEXT: .cfi_offset f24, -64 -; FAST-NEXT: .cfi_offset f25, -56 -; FAST-NEXT: .cfi_offset f26, -48 -; FAST-NEXT: .cfi_offset f27, -40 -; FAST-NEXT: .cfi_offset f28, -32 -; FAST-NEXT: .cfi_offset f29, -24 -; FAST-NEXT: .cfi_offset f30, -16 -; FAST-NEXT: .cfi_offset f31, -8 -; FAST-NEXT: .cfi_offset v20, -352 -; FAST-NEXT: .cfi_offset v21, -336 -; FAST-NEXT: .cfi_offset v22, -320 -; FAST-NEXT: .cfi_offset v23, -304 -; FAST-NEXT: .cfi_offset v24, -288 -; FAST-NEXT: .cfi_offset v25, -272 -; FAST-NEXT: .cfi_offset v26, -256 -; FAST-NEXT: .cfi_offset v27, -240 -; FAST-NEXT: .cfi_offset v28, -224 -; FAST-NEXT: .cfi_offset v29, -208 -; FAST-NEXT: .cfi_offset v30, -192 -; FAST-NEXT: .cfi_offset v31, -176 ; FAST-NEXT: li r4, 128 +; FAST-NEXT: std r0, 496(r1) ; FAST-NEXT: std r30, 320(r1) # 8-byte Folded Spill +; FAST-NEXT: mr r30, r3 ; FAST-NEXT: stfd f14, 336(r1) # 8-byte Folded Spill -; FAST-NEXT: fmr f14, f5 ; FAST-NEXT: stfd f15, 344(r1) # 8-byte Folded Spill +; FAST-NEXT: fmr f14, f5 ; FAST-NEXT: stfd f16, 352(r1) # 8-byte Folded Spill -; FAST-NEXT: fmr f16, f4 -; FAST-NEXT: mr r30, r3 -; FAST-NEXT: stvx v20, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v20, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 144 +; FAST-NEXT: fmr f16, f4 ; FAST-NEXT: stfd f17, 360(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f18, 368(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f19, 376(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f20, 384(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f21, 392(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f22, 400(r1) # 8-byte Folded Spill -; FAST-NEXT: stvx v21, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v21, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 160 +; FAST-NEXT: stfd f22, 400(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f23, 408(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f24, 416(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f25, 424(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f26, 432(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f27, 440(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f28, 448(r1) # 8-byte Folded Spill -; FAST-NEXT: stvx v22, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v22, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 176 ; FAST-NEXT: xxlor v22, f3, f3 +; FAST-NEXT: stfd f28, 448(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f29, 456(r1) # 8-byte Folded Spill ; FAST-NEXT: fmr f29, f9 ; FAST-NEXT: stfd f30, 464(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f31, 472(r1) # 8-byte Folded Spill -; FAST-NEXT: stvx v23, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v23, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 192 ; FAST-NEXT: xxlor v23, f2, f2 -; FAST-NEXT: stvx v24, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v24, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 208 -; FAST-NEXT: stvx v25, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v25, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 224 ; FAST-NEXT: xxlor v25, f13, f13 -; FAST-NEXT: stvx v26, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v26, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 240 ; FAST-NEXT: xxlor v26, f12, f12 -; FAST-NEXT: stvx v27, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v27, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 256 ; FAST-NEXT: xxlor v27, f11, f11 -; FAST-NEXT: stvx v28, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v28, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 272 ; FAST-NEXT: xxlor v28, f10, f10 -; FAST-NEXT: stvx v29, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v29, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 288 ; FAST-NEXT: xxlor v29, f8, f8 -; FAST-NEXT: stvx v30, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v30, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 304 ; FAST-NEXT: xxlor v30, f7, f7 -; FAST-NEXT: stvx v31, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v31, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 44 ; FAST-NEXT: xxlor v31, f6, f6 ; FAST-NEXT: stxsspx f1, r1, r4 # 4-byte Folded Spill @@ -3628,30 +3328,30 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { ; FAST-NEXT: lfd f16, 352(r1) # 8-byte Folded Reload ; FAST-NEXT: lfd f15, 344(r1) # 8-byte Folded Reload ; FAST-NEXT: lfd f14, 336(r1) # 8-byte Folded Reload -; FAST-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 288 ; FAST-NEXT: ld r30, 320(r1) # 8-byte Folded Reload -; FAST-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 272 -; FAST-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 256 -; FAST-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 240 -; FAST-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 224 -; FAST-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 208 -; FAST-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 192 -; FAST-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 176 -; FAST-NEXT: lvx v23, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v23, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 160 -; FAST-NEXT: lvx v22, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v22, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 144 -; FAST-NEXT: lvx v21, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v21, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 128 -; FAST-NEXT: lvx v20, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v20, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: addi r1, r1, 480 ; FAST-NEXT: ld r0, 16(r1) ; FAST-NEXT: mtlr r0 @@ -3661,14 +3361,12 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { } declare <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half>) -define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) { +define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) nounwind { ; BE-LABEL: llrint_v1i64_v1f32: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -112(r1) ; BE-NEXT: std r0, 128(r1) -; BE-NEXT: .cfi_def_cfa_offset 112 -; BE-NEXT: .cfi_offset lr, 16 ; BE-NEXT: bl llrintf ; BE-NEXT: nop ; BE-NEXT: addi r1, r1, 112 @@ -3681,8 +3379,6 @@ define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) { ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -32(r1) ; CHECK-NEXT: std r0, 48(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: .cfi_offset lr, 16 ; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop ; CHECK-NEXT: addi r1, r1, 32 @@ -3700,15 +3396,13 @@ define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) { } declare <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float>) -define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) { +define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) nounwind { ; BE-LABEL: llrint_v2i64_v2f32: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -144(r1) -; BE-NEXT: std r0, 160(r1) -; BE-NEXT: .cfi_def_cfa_offset 144 -; BE-NEXT: .cfi_offset lr, 16 ; BE-NEXT: addi r3, r1, 112 +; BE-NEXT: std r0, 160(r1) ; BE-NEXT: stxvw4x v2, 0, r3 ; BE-NEXT: lfs f1, 116(r1) ; BE-NEXT: bl llrintf @@ -3729,14 +3423,11 @@ define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -64(r1) -; CHECK-NEXT: std r0, 80(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset v31, -16 ; CHECK-NEXT: xxsldwi vs0, v2, v2, 3 ; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: std r0, 80(r1) ; CHECK-NEXT: xscvspdpn f1, vs0 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: vmr v31, v2 ; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop @@ -3748,7 +3439,7 @@ define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) { ; CHECK-NEXT: mtfprd f0, r3 ; CHECK-NEXT: li r3, 48 ; CHECK-NEXT: xxmrghd v2, vs0, v31 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 64 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -3773,15 +3464,13 @@ define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) { } declare <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float>) -define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) { +define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) nounwind { ; BE-LABEL: llrint_v4i64_v4f32: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -160(r1) -; BE-NEXT: std r0, 176(r1) -; BE-NEXT: .cfi_def_cfa_offset 160 -; BE-NEXT: .cfi_offset lr, 16 ; BE-NEXT: addi r3, r1, 112 +; BE-NEXT: std r0, 176(r1) ; BE-NEXT: stxvw4x v2, 0, r3 ; BE-NEXT: lfs f1, 116(r1) ; BE-NEXT: bl llrintf @@ -3812,17 +3501,13 @@ define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -80(r1) -; CHECK-NEXT: std r0, 96(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 80 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset v30, -32 -; CHECK-NEXT: .cfi_offset v31, -16 ; CHECK-NEXT: xxsldwi vs0, v2, v2, 3 ; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: std r0, 96(r1) ; CHECK-NEXT: xscvspdpn f1, vs0 -; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 64 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: vmr v31, v2 ; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop @@ -3845,9 +3530,9 @@ define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) { ; CHECK-NEXT: li r3, 64 ; CHECK-NEXT: vmr v2, v30 ; CHECK-NEXT: xxmrghd v3, v31, vs0 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 80 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -3883,15 +3568,13 @@ define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) { } declare <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float>) -define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) { +define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) nounwind { ; BE-LABEL: llrint_v8i64_v8f32: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -208(r1) -; BE-NEXT: std r0, 224(r1) -; BE-NEXT: .cfi_def_cfa_offset 208 -; BE-NEXT: .cfi_offset lr, 16 ; BE-NEXT: addi r3, r1, 112 +; BE-NEXT: std r0, 224(r1) ; BE-NEXT: stxvw4x v2, 0, r3 ; BE-NEXT: addi r3, r1, 128 ; BE-NEXT: stxvw4x v3, 0, r3 @@ -3944,24 +3627,18 @@ define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -112(r1) -; CHECK-NEXT: std r0, 128(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 112 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset v28, -64 -; CHECK-NEXT: .cfi_offset v29, -48 -; CHECK-NEXT: .cfi_offset v30, -32 -; CHECK-NEXT: .cfi_offset v31, -16 ; CHECK-NEXT: li r3, 48 ; CHECK-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: std r0, 128(r1) +; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 64 ; CHECK-NEXT: xscvspdpn f1, vs0 -; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 80 -; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 96 ; CHECK-NEXT: vmr v30, v2 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: vmr v31, v3 ; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop @@ -4007,13 +3684,13 @@ define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) { ; CHECK-NEXT: vmr v2, v29 ; CHECK-NEXT: vmr v4, v28 ; CHECK-NEXT: xxmrghd v5, v31, vs0 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 80 -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 64 -; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 112 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -4071,15 +3748,13 @@ define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) { } declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>) -define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) { +define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) nounwind { ; BE-LABEL: llrint_v16i64_v16f32: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -304(r1) -; BE-NEXT: std r0, 320(r1) -; BE-NEXT: .cfi_def_cfa_offset 304 -; BE-NEXT: .cfi_offset lr, 16 ; BE-NEXT: addi r3, r1, 112 +; BE-NEXT: std r0, 320(r1) ; BE-NEXT: stxvw4x v2, 0, r3 ; BE-NEXT: addi r3, r1, 128 ; BE-NEXT: stxvw4x v3, 0, r3 @@ -4176,38 +3851,28 @@ define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -176(r1) -; CHECK-NEXT: std r0, 192(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 176 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset v24, -128 -; CHECK-NEXT: .cfi_offset v25, -112 -; CHECK-NEXT: .cfi_offset v26, -96 -; CHECK-NEXT: .cfi_offset v27, -80 -; CHECK-NEXT: .cfi_offset v28, -64 -; CHECK-NEXT: .cfi_offset v29, -48 -; CHECK-NEXT: .cfi_offset v30, -32 -; CHECK-NEXT: .cfi_offset v31, -16 ; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: std r0, 192(r1) ; CHECK-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-NEXT: stvx v24, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v24, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 64 ; CHECK-NEXT: xscvspdpn f1, vs0 -; CHECK-NEXT: stvx v25, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 80 -; CHECK-NEXT: stvx v26, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 96 ; CHECK-NEXT: vmr v26, v3 -; CHECK-NEXT: stvx v27, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 112 -; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 128 ; CHECK-NEXT: vmr v28, v4 -; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 144 ; CHECK-NEXT: vmr v29, v2 -; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 160 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: vmr v31, v5 ; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop @@ -4299,21 +3964,21 @@ define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) { ; CHECK-NEXT: vmr v6, v25 ; CHECK-NEXT: vmr v8, v24 ; CHECK-NEXT: xxmrghd v9, v31, vs0 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 144 -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 128 -; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 112 -; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 96 -; CHECK-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 80 -; CHECK-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 64 -; CHECK-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 176 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -4415,14 +4080,12 @@ define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) { } declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>) -define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) { +define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) nounwind { ; BE-LABEL: llrint_v1i64_v1f64: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -112(r1) ; BE-NEXT: std r0, 128(r1) -; BE-NEXT: .cfi_def_cfa_offset 112 -; BE-NEXT: .cfi_offset lr, 16 ; BE-NEXT: bl llrint ; BE-NEXT: nop ; BE-NEXT: addi r1, r1, 112 @@ -4435,8 +4098,6 @@ define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) { ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -32(r1) ; CHECK-NEXT: std r0, 48(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: .cfi_offset lr, 16 ; CHECK-NEXT: bl llrint ; CHECK-NEXT: nop ; CHECK-NEXT: addi r1, r1, 32 @@ -4454,16 +4115,13 @@ define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) { } declare <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double>) -define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) { +define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) nounwind { ; BE-LABEL: llrint_v2i64_v2f64: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -160(r1) -; BE-NEXT: std r0, 176(r1) -; BE-NEXT: .cfi_def_cfa_offset 160 -; BE-NEXT: .cfi_offset lr, 16 -; BE-NEXT: .cfi_offset v31, -16 ; BE-NEXT: li r3, 144 +; BE-NEXT: std r0, 176(r1) ; BE-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; BE-NEXT: vmr v31, v2 ; BE-NEXT: xxlor f1, v31, v31 @@ -4487,12 +4145,9 @@ define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -64(r1) -; CHECK-NEXT: std r0, 80(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset v31, -16 ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: std r0, 80(r1) +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: vmr v31, v2 ; CHECK-NEXT: xxlor f1, v31, v31 ; CHECK-NEXT: bl llrint @@ -4504,7 +4159,7 @@ define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) { ; CHECK-NEXT: mtfprd f0, r3 ; CHECK-NEXT: li r3, 48 ; CHECK-NEXT: xxmrghd v2, v31, vs0 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 64 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -4527,17 +4182,13 @@ define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) { } declare <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double>) -define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) { +define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) nounwind { ; BE-LABEL: llrint_v4i64_v4f64: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -192(r1) -; BE-NEXT: std r0, 208(r1) -; BE-NEXT: .cfi_def_cfa_offset 192 -; BE-NEXT: .cfi_offset lr, 16 -; BE-NEXT: .cfi_offset v30, -32 -; BE-NEXT: .cfi_offset v31, -16 ; BE-NEXT: li r3, 160 +; BE-NEXT: std r0, 208(r1) ; BE-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; BE-NEXT: vmr v30, v2 ; BE-NEXT: li r3, 176 @@ -4576,17 +4227,13 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -80(r1) -; CHECK-NEXT: std r0, 96(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 80 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset v30, -32 -; CHECK-NEXT: .cfi_offset v31, -16 ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: std r0, 96(r1) +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: vmr v30, v2 ; CHECK-NEXT: li r3, 64 ; CHECK-NEXT: xxlor f1, v30, v30 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: vmr v31, v3 ; CHECK-NEXT: bl llrint ; CHECK-NEXT: nop @@ -4607,9 +4254,9 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) { ; CHECK-NEXT: li r3, 64 ; CHECK-NEXT: vmr v2, v30 ; CHECK-NEXT: xxmrghd v3, v31, vs0 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 80 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -4641,25 +4288,19 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) { } declare <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double>) -define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) { +define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) nounwind { ; BE-LABEL: llrint_v8i64_v8f64: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -256(r1) -; BE-NEXT: std r0, 272(r1) -; BE-NEXT: .cfi_def_cfa_offset 256 -; BE-NEXT: .cfi_offset lr, 16 -; BE-NEXT: .cfi_offset v28, -64 -; BE-NEXT: .cfi_offset v29, -48 -; BE-NEXT: .cfi_offset v30, -32 -; BE-NEXT: .cfi_offset v31, -16 ; BE-NEXT: li r3, 192 +; BE-NEXT: std r0, 272(r1) ; BE-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill ; BE-NEXT: li r3, 208 ; BE-NEXT: vmr v28, v2 -; BE-NEXT: xxlor f1, v28, v28 ; BE-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill ; BE-NEXT: li r3, 224 +; BE-NEXT: xxlor f1, v28, v28 ; BE-NEXT: vmr v29, v3 ; BE-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; BE-NEXT: li r3, 240 @@ -4722,25 +4363,19 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -112(r1) -; CHECK-NEXT: std r0, 128(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 112 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset v28, -64 -; CHECK-NEXT: .cfi_offset v29, -48 -; CHECK-NEXT: .cfi_offset v30, -32 -; CHECK-NEXT: .cfi_offset v31, -16 ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: std r0, 128(r1) +; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 64 ; CHECK-NEXT: vmr v28, v2 -; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 80 ; CHECK-NEXT: xxlor f1, v28, v28 ; CHECK-NEXT: vmr v29, v3 -; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 96 ; CHECK-NEXT: vmr v30, v4 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: vmr v31, v5 ; CHECK-NEXT: bl llrint ; CHECK-NEXT: nop @@ -4781,13 +4416,13 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) { ; CHECK-NEXT: vmr v3, v29 ; CHECK-NEXT: vmr v2, v28 ; CHECK-NEXT: xxmrghd v5, v31, vs0 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 80 -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 64 -; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 112 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -4837,14 +4472,12 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) { } declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>) -define <1 x i64> @llrint_v1i64_v1f128(<1 x fp128> %x) { +define <1 x i64> @llrint_v1i64_v1f128(<1 x fp128> %x) nounwind { ; BE-LABEL: llrint_v1i64_v1f128: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -112(r1) ; BE-NEXT: std r0, 128(r1) -; BE-NEXT: .cfi_def_cfa_offset 112 -; BE-NEXT: .cfi_offset lr, 16 ; BE-NEXT: bl llrintf128 ; BE-NEXT: nop ; BE-NEXT: addi r1, r1, 112 @@ -4857,8 +4490,6 @@ define <1 x i64> @llrint_v1i64_v1f128(<1 x fp128> %x) { ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -32(r1) ; CHECK-NEXT: std r0, 48(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: .cfi_offset lr, 16 ; CHECK-NEXT: bl llrintf128 ; CHECK-NEXT: nop ; CHECK-NEXT: addi r1, r1, 32 @@ -4871,8 +4502,6 @@ define <1 x i64> @llrint_v1i64_v1f128(<1 x fp128> %x) { ; FAST-NEXT: mflr r0 ; FAST-NEXT: stdu r1, -32(r1) ; FAST-NEXT: std r0, 48(r1) -; FAST-NEXT: .cfi_def_cfa_offset 32 -; FAST-NEXT: .cfi_offset lr, 16 ; FAST-NEXT: bl llrintf128 ; FAST-NEXT: nop ; FAST-NEXT: addi r1, r1, 32 @@ -4884,16 +4513,13 @@ define <1 x i64> @llrint_v1i64_v1f128(<1 x fp128> %x) { } declare <1 x i64> @llvm.llrint.v1i64.v1f128(<1 x fp128>) -define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) { +define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) nounwind { ; BE-LABEL: llrint_v2i64_v2f128: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -160(r1) -; BE-NEXT: std r0, 176(r1) -; BE-NEXT: .cfi_def_cfa_offset 160 -; BE-NEXT: .cfi_offset lr, 16 -; BE-NEXT: .cfi_offset v31, -16 ; BE-NEXT: li r3, 144 +; BE-NEXT: std r0, 176(r1) ; BE-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; BE-NEXT: vmr v31, v2 ; BE-NEXT: vmr v2, v3 @@ -4917,15 +4543,11 @@ define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -80(r1) -; CHECK-NEXT: std r0, 96(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 80 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset v30, -32 -; CHECK-NEXT: .cfi_offset v31, -16 ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: std r0, 96(r1) +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 64 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: vmr v31, v3 ; CHECK-NEXT: bl llrintf128 ; CHECK-NEXT: nop @@ -4935,10 +4557,10 @@ define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) { ; CHECK-NEXT: nop ; CHECK-NEXT: mtfprd f0, r3 ; CHECK-NEXT: li r3, 64 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 ; CHECK-NEXT: xxmrghd v2, vs0, v30 -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 80 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -4948,15 +4570,11 @@ define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) { ; FAST: # %bb.0: ; FAST-NEXT: mflr r0 ; FAST-NEXT: stdu r1, -80(r1) -; FAST-NEXT: std r0, 96(r1) -; FAST-NEXT: .cfi_def_cfa_offset 80 -; FAST-NEXT: .cfi_offset lr, 16 -; FAST-NEXT: .cfi_offset v30, -32 -; FAST-NEXT: .cfi_offset v31, -16 ; FAST-NEXT: li r3, 48 -; FAST-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: std r0, 96(r1) +; FAST-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; FAST-NEXT: li r3, 64 -; FAST-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; FAST-NEXT: vmr v31, v3 ; FAST-NEXT: bl llrintf128 ; FAST-NEXT: nop @@ -4966,10 +4584,10 @@ define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) { ; FAST-NEXT: nop ; FAST-NEXT: mtfprd f0, r3 ; FAST-NEXT: li r3, 64 -; FAST-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 48 ; FAST-NEXT: xxmrghd v2, vs0, v30 -; FAST-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: addi r1, r1, 80 ; FAST-NEXT: ld r0, 16(r1) ; FAST-NEXT: mtlr r0 @@ -4979,18 +4597,13 @@ define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) { } declare <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128>) -define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) { +define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) nounwind { ; BE-LABEL: llrint_v4i64_v4f128: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -208(r1) -; BE-NEXT: std r0, 224(r1) -; BE-NEXT: .cfi_def_cfa_offset 208 -; BE-NEXT: .cfi_offset lr, 16 -; BE-NEXT: .cfi_offset v29, -48 -; BE-NEXT: .cfi_offset v30, -32 -; BE-NEXT: .cfi_offset v31, -16 ; BE-NEXT: li r3, 160 +; BE-NEXT: std r0, 224(r1) ; BE-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill ; BE-NEXT: li r3, 176 ; BE-NEXT: vmr v29, v2 @@ -5034,23 +4647,17 @@ define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -112(r1) -; CHECK-NEXT: std r0, 128(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 112 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset v28, -64 -; CHECK-NEXT: .cfi_offset v29, -48 -; CHECK-NEXT: .cfi_offset v30, -32 -; CHECK-NEXT: .cfi_offset v31, -16 ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: std r0, 128(r1) +; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 64 -; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 80 ; CHECK-NEXT: vmr v29, v3 -; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 96 ; CHECK-NEXT: vmr v30, v4 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: vmr v31, v5 ; CHECK-NEXT: bl llrintf128 ; CHECK-NEXT: nop @@ -5070,14 +4677,14 @@ define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) { ; CHECK-NEXT: mtfprd f0, r3 ; CHECK-NEXT: li r3, 96 ; CHECK-NEXT: vmr v2, v29 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 80 ; CHECK-NEXT: xxmrghd v3, vs0, v30 -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 64 -; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 112 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -5087,23 +4694,17 @@ define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) { ; FAST: # %bb.0: ; FAST-NEXT: mflr r0 ; FAST-NEXT: stdu r1, -112(r1) -; FAST-NEXT: std r0, 128(r1) -; FAST-NEXT: .cfi_def_cfa_offset 112 -; FAST-NEXT: .cfi_offset lr, 16 -; FAST-NEXT: .cfi_offset v28, -64 -; FAST-NEXT: .cfi_offset v29, -48 -; FAST-NEXT: .cfi_offset v30, -32 -; FAST-NEXT: .cfi_offset v31, -16 ; FAST-NEXT: li r3, 48 -; FAST-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: std r0, 128(r1) +; FAST-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill ; FAST-NEXT: li r3, 64 -; FAST-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill ; FAST-NEXT: li r3, 80 ; FAST-NEXT: vmr v29, v3 -; FAST-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; FAST-NEXT: li r3, 96 ; FAST-NEXT: vmr v30, v4 -; FAST-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; FAST-NEXT: vmr v31, v5 ; FAST-NEXT: bl llrintf128 ; FAST-NEXT: nop @@ -5123,14 +4724,14 @@ define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) { ; FAST-NEXT: mtfprd f0, r3 ; FAST-NEXT: li r3, 96 ; FAST-NEXT: vmr v2, v29 -; FAST-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 80 ; FAST-NEXT: xxmrghd v3, vs0, v30 -; FAST-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 64 -; FAST-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 48 -; FAST-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: addi r1, r1, 112 ; FAST-NEXT: ld r0, 16(r1) ; FAST-NEXT: mtlr r0 @@ -5140,22 +4741,13 @@ define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) { } declare <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128>) -define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) { +define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) nounwind { ; BE-LABEL: llrint_v8i64_v8f128: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -304(r1) -; BE-NEXT: std r0, 320(r1) -; BE-NEXT: .cfi_def_cfa_offset 304 -; BE-NEXT: .cfi_offset lr, 16 -; BE-NEXT: .cfi_offset v25, -112 -; BE-NEXT: .cfi_offset v26, -96 -; BE-NEXT: .cfi_offset v27, -80 -; BE-NEXT: .cfi_offset v28, -64 -; BE-NEXT: .cfi_offset v29, -48 -; BE-NEXT: .cfi_offset v30, -32 -; BE-NEXT: .cfi_offset v31, -16 ; BE-NEXT: li r3, 192 +; BE-NEXT: std r0, 320(r1) ; BE-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill ; BE-NEXT: li r3, 208 ; BE-NEXT: vmr v25, v2 @@ -5239,39 +4831,29 @@ define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -176(r1) -; CHECK-NEXT: std r0, 192(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 176 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset v24, -128 -; CHECK-NEXT: .cfi_offset v25, -112 -; CHECK-NEXT: .cfi_offset v26, -96 -; CHECK-NEXT: .cfi_offset v27, -80 -; CHECK-NEXT: .cfi_offset v28, -64 -; CHECK-NEXT: .cfi_offset v29, -48 -; CHECK-NEXT: .cfi_offset v30, -32 -; CHECK-NEXT: .cfi_offset v31, -16 ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: stvx v24, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: std r0, 192(r1) +; CHECK-NEXT: stxvd2x v24, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 64 -; CHECK-NEXT: stvx v25, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 80 ; CHECK-NEXT: vmr v25, v3 -; CHECK-NEXT: stvx v26, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 96 ; CHECK-NEXT: vmr v26, v4 -; CHECK-NEXT: stvx v27, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 112 ; CHECK-NEXT: vmr v27, v5 -; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 128 ; CHECK-NEXT: vmr v28, v6 -; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 144 ; CHECK-NEXT: vmr v29, v7 -; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 160 ; CHECK-NEXT: vmr v30, v8 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: vmr v31, v9 ; CHECK-NEXT: bl llrintf128 ; CHECK-NEXT: nop @@ -5309,24 +4891,24 @@ define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) { ; CHECK-NEXT: mtfprd f0, r3 ; CHECK-NEXT: li r3, 160 ; CHECK-NEXT: vmr v4, v29 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 144 ; CHECK-NEXT: vmr v3, v27 ; CHECK-NEXT: vmr v2, v25 ; CHECK-NEXT: xxmrghd v5, vs0, v30 -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 128 -; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 112 -; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 96 -; CHECK-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 80 -; CHECK-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 64 -; CHECK-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 176 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -5336,39 +4918,29 @@ define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) { ; FAST: # %bb.0: ; FAST-NEXT: mflr r0 ; FAST-NEXT: stdu r1, -176(r1) -; FAST-NEXT: std r0, 192(r1) -; FAST-NEXT: .cfi_def_cfa_offset 176 -; FAST-NEXT: .cfi_offset lr, 16 -; FAST-NEXT: .cfi_offset v24, -128 -; FAST-NEXT: .cfi_offset v25, -112 -; FAST-NEXT: .cfi_offset v26, -96 -; FAST-NEXT: .cfi_offset v27, -80 -; FAST-NEXT: .cfi_offset v28, -64 -; FAST-NEXT: .cfi_offset v29, -48 -; FAST-NEXT: .cfi_offset v30, -32 -; FAST-NEXT: .cfi_offset v31, -16 ; FAST-NEXT: li r3, 48 -; FAST-NEXT: stvx v24, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: std r0, 192(r1) +; FAST-NEXT: stxvd2x v24, r1, r3 # 16-byte Folded Spill ; FAST-NEXT: li r3, 64 -; FAST-NEXT: stvx v25, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill ; FAST-NEXT: li r3, 80 ; FAST-NEXT: vmr v25, v3 -; FAST-NEXT: stvx v26, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill ; FAST-NEXT: li r3, 96 ; FAST-NEXT: vmr v26, v4 -; FAST-NEXT: stvx v27, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill ; FAST-NEXT: li r3, 112 ; FAST-NEXT: vmr v27, v5 -; FAST-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill ; FAST-NEXT: li r3, 128 ; FAST-NEXT: vmr v28, v6 -; FAST-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill ; FAST-NEXT: li r3, 144 ; FAST-NEXT: vmr v29, v7 -; FAST-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; FAST-NEXT: li r3, 160 ; FAST-NEXT: vmr v30, v8 -; FAST-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; FAST-NEXT: vmr v31, v9 ; FAST-NEXT: bl llrintf128 ; FAST-NEXT: nop @@ -5406,24 +4978,24 @@ define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) { ; FAST-NEXT: mtfprd f0, r3 ; FAST-NEXT: li r3, 160 ; FAST-NEXT: vmr v4, v29 -; FAST-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 144 ; FAST-NEXT: vmr v3, v27 ; FAST-NEXT: vmr v2, v25 ; FAST-NEXT: xxmrghd v5, vs0, v30 -; FAST-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 128 -; FAST-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 112 -; FAST-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 96 -; FAST-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 80 -; FAST-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 64 -; FAST-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 48 -; FAST-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: addi r1, r1, 176 ; FAST-NEXT: ld r0, 16(r1) ; FAST-NEXT: mtlr r0 diff --git a/llvm/test/CodeGen/PowerPC/vector-lrint.ll b/llvm/test/CodeGen/PowerPC/vector-lrint.ll index c64c2e15179cb..6c824be017e81 100644 --- a/llvm/test/CodeGen/PowerPC/vector-lrint.ll +++ b/llvm/test/CodeGen/PowerPC/vector-lrint.ll @@ -28,14 +28,12 @@ ; RUN: -verify-machineinstrs --enable-unsafe-fp-math | \ ; RUN: FileCheck %s --check-prefixes=FAST -define <1 x i64> @lrint_v1f16(<1 x half> %x) { +define <1 x i64> @lrint_v1f16(<1 x half> %x) nounwind { ; BE-LABEL: lrint_v1f16: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -112(r1) ; BE-NEXT: std r0, 128(r1) -; BE-NEXT: .cfi_def_cfa_offset 112 -; BE-NEXT: .cfi_offset lr, 16 ; BE-NEXT: bl __truncsfhf2 ; BE-NEXT: nop ; BE-NEXT: clrldi r3, r3, 48 @@ -53,8 +51,6 @@ define <1 x i64> @lrint_v1f16(<1 x half> %x) { ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -32(r1) ; CHECK-NEXT: std r0, 48(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: .cfi_offset lr, 16 ; CHECK-NEXT: bl __truncsfhf2 ; CHECK-NEXT: nop ; CHECK-NEXT: clrldi r3, r3, 48 @@ -72,8 +68,6 @@ define <1 x i64> @lrint_v1f16(<1 x half> %x) { ; FAST-NEXT: mflr r0 ; FAST-NEXT: stdu r1, -32(r1) ; FAST-NEXT: std r0, 48(r1) -; FAST-NEXT: .cfi_def_cfa_offset 32 -; FAST-NEXT: .cfi_offset lr, 16 ; FAST-NEXT: bl __truncsfhf2 ; FAST-NEXT: nop ; FAST-NEXT: clrldi r3, r3, 48 @@ -90,16 +84,12 @@ define <1 x i64> @lrint_v1f16(<1 x half> %x) { } declare <1 x i64> @llvm.lrint.v1i64.v1f16(<1 x half>) -define <2 x i64> @lrint_v2f16(<2 x half> %x) { +define <2 x i64> @lrint_v2f16(<2 x half> %x) nounwind { ; BE-LABEL: lrint_v2f16: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -160(r1) ; BE-NEXT: std r0, 176(r1) -; BE-NEXT: .cfi_def_cfa_offset 160 -; BE-NEXT: .cfi_offset lr, 16 -; BE-NEXT: .cfi_offset r30, -24 -; BE-NEXT: .cfi_offset f31, -8 ; BE-NEXT: stfd f31, 152(r1) # 8-byte Folded Spill ; BE-NEXT: fmr f31, f1 ; BE-NEXT: fmr f1, f2 @@ -137,17 +127,12 @@ define <2 x i64> @lrint_v2f16(<2 x half> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -96(r1) -; CHECK-NEXT: std r0, 112(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 96 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset r30, -24 -; CHECK-NEXT: .cfi_offset f31, -8 -; CHECK-NEXT: .cfi_offset v31, -48 ; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: std r0, 112(r1) ; CHECK-NEXT: std r30, 72(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f31, 88(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f31, f2 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: bl __truncsfhf2 ; CHECK-NEXT: nop ; CHECK-NEXT: fmr f1, f31 @@ -172,7 +157,7 @@ define <2 x i64> @lrint_v2f16(<2 x half> %x) { ; CHECK-NEXT: lfd f31, 88(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r30, 72(r1) # 8-byte Folded Reload ; CHECK-NEXT: xxmrghd v2, vs0, v31 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 96 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -181,10 +166,6 @@ define <2 x i64> @lrint_v2f16(<2 x half> %x) { ; FAST-LABEL: lrint_v2f16: ; FAST: # %bb.0: ; FAST-NEXT: mflr r0 -; FAST-NEXT: .cfi_def_cfa_offset 48 -; FAST-NEXT: .cfi_offset lr, 16 -; FAST-NEXT: .cfi_offset f30, -16 -; FAST-NEXT: .cfi_offset f31, -8 ; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill ; FAST-NEXT: stdu r1, -48(r1) @@ -221,20 +202,12 @@ define <2 x i64> @lrint_v2f16(<2 x half> %x) { } declare <2 x i64> @llvm.lrint.v2i64.v2f16(<2 x half>) -define <4 x i64> @lrint_v4f16(<4 x half> %x) { +define <4 x i64> @lrint_v4f16(<4 x half> %x) nounwind { ; BE-LABEL: lrint_v4f16: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -208(r1) ; BE-NEXT: std r0, 224(r1) -; BE-NEXT: .cfi_def_cfa_offset 208 -; BE-NEXT: .cfi_offset lr, 16 -; BE-NEXT: .cfi_offset r28, -56 -; BE-NEXT: .cfi_offset r29, -48 -; BE-NEXT: .cfi_offset r30, -40 -; BE-NEXT: .cfi_offset f29, -24 -; BE-NEXT: .cfi_offset f30, -16 -; BE-NEXT: .cfi_offset f31, -8 ; BE-NEXT: stfd f29, 184(r1) # 8-byte Folded Spill ; BE-NEXT: fmr f29, f1 ; BE-NEXT: fmr f1, f2 @@ -308,18 +281,8 @@ define <4 x i64> @lrint_v4f16(<4 x half> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -144(r1) -; CHECK-NEXT: std r0, 160(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 144 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset r28, -56 -; CHECK-NEXT: .cfi_offset r29, -48 -; CHECK-NEXT: .cfi_offset r30, -40 -; CHECK-NEXT: .cfi_offset f29, -24 -; CHECK-NEXT: .cfi_offset f30, -16 -; CHECK-NEXT: .cfi_offset f31, -8 -; CHECK-NEXT: .cfi_offset v30, -96 -; CHECK-NEXT: .cfi_offset v31, -80 ; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: std r0, 160(r1) ; CHECK-NEXT: std r28, 88(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r29, 96(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r30, 104(r1) # 8-byte Folded Spill @@ -327,11 +290,11 @@ define <4 x i64> @lrint_v4f16(<4 x half> %x) { ; CHECK-NEXT: fmr f29, f2 ; CHECK-NEXT: stfd f30, 128(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f30, f3 -; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 64 ; CHECK-NEXT: stfd f31, 136(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f31, f4 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: bl __truncsfhf2 ; CHECK-NEXT: nop ; CHECK-NEXT: fmr f1, f29 @@ -384,11 +347,11 @@ define <4 x i64> @lrint_v4f16(<4 x half> %x) { ; CHECK-NEXT: lfd f29, 120(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r30, 104(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r29, 96(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 ; CHECK-NEXT: ld r28, 88(r1) # 8-byte Folded Reload ; CHECK-NEXT: xxmrghd v3, vs0, v30 -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 144 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -397,12 +360,6 @@ define <4 x i64> @lrint_v4f16(<4 x half> %x) { ; FAST-LABEL: lrint_v4f16: ; FAST: # %bb.0: ; FAST-NEXT: mflr r0 -; FAST-NEXT: .cfi_def_cfa_offset 64 -; FAST-NEXT: .cfi_offset lr, 16 -; FAST-NEXT: .cfi_offset f28, -32 -; FAST-NEXT: .cfi_offset f29, -24 -; FAST-NEXT: .cfi_offset f30, -16 -; FAST-NEXT: .cfi_offset f31, -8 ; FAST-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill @@ -466,28 +423,12 @@ define <4 x i64> @lrint_v4f16(<4 x half> %x) { } declare <4 x i64> @llvm.lrint.v4i64.v4f16(<4 x half>) -define <8 x i64> @lrint_v8f16(<8 x half> %x) { +define <8 x i64> @lrint_v8f16(<8 x half> %x) nounwind { ; BE-LABEL: lrint_v8f16: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -304(r1) ; BE-NEXT: std r0, 320(r1) -; BE-NEXT: .cfi_def_cfa_offset 304 -; BE-NEXT: .cfi_offset lr, 16 -; BE-NEXT: .cfi_offset r24, -120 -; BE-NEXT: .cfi_offset r25, -112 -; BE-NEXT: .cfi_offset r26, -104 -; BE-NEXT: .cfi_offset r27, -96 -; BE-NEXT: .cfi_offset r28, -88 -; BE-NEXT: .cfi_offset r29, -80 -; BE-NEXT: .cfi_offset r30, -72 -; BE-NEXT: .cfi_offset f25, -56 -; BE-NEXT: .cfi_offset f26, -48 -; BE-NEXT: .cfi_offset f27, -40 -; BE-NEXT: .cfi_offset f28, -32 -; BE-NEXT: .cfi_offset f29, -24 -; BE-NEXT: .cfi_offset f30, -16 -; BE-NEXT: .cfi_offset f31, -8 ; BE-NEXT: stfd f25, 248(r1) # 8-byte Folded Spill ; BE-NEXT: fmr f25, f1 ; BE-NEXT: fmr f1, f2 @@ -633,44 +574,24 @@ define <8 x i64> @lrint_v8f16(<8 x half> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -240(r1) -; CHECK-NEXT: std r0, 256(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 240 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset r24, -120 -; CHECK-NEXT: .cfi_offset r25, -112 -; CHECK-NEXT: .cfi_offset r26, -104 -; CHECK-NEXT: .cfi_offset r27, -96 -; CHECK-NEXT: .cfi_offset r28, -88 -; CHECK-NEXT: .cfi_offset r29, -80 -; CHECK-NEXT: .cfi_offset r30, -72 -; CHECK-NEXT: .cfi_offset f25, -56 -; CHECK-NEXT: .cfi_offset f26, -48 -; CHECK-NEXT: .cfi_offset f27, -40 -; CHECK-NEXT: .cfi_offset f28, -32 -; CHECK-NEXT: .cfi_offset f29, -24 -; CHECK-NEXT: .cfi_offset f30, -16 -; CHECK-NEXT: .cfi_offset f31, -8 -; CHECK-NEXT: .cfi_offset v28, -192 -; CHECK-NEXT: .cfi_offset v29, -176 -; CHECK-NEXT: .cfi_offset v30, -160 -; CHECK-NEXT: .cfi_offset v31, -144 ; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: std r0, 256(r1) ; CHECK-NEXT: std r24, 120(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r25, 128(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r26, 136(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r27, 144(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r28, 152(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r29, 160(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r30, 168(r1) # 8-byte Folded Spill -; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: std r30, 168(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f25, 184(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f25, f2 ; CHECK-NEXT: stfd f26, 192(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f26, f3 ; CHECK-NEXT: stfd f27, 200(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f27, f4 -; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 80 ; CHECK-NEXT: stfd f28, 208(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f28, f5 @@ -678,11 +599,11 @@ define <8 x i64> @lrint_v8f16(<8 x half> %x) { ; CHECK-NEXT: fmr f29, f6 ; CHECK-NEXT: stfd f30, 224(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f30, f7 -; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 96 ; CHECK-NEXT: stfd f31, 232(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f31, f8 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: bl __truncsfhf2 ; CHECK-NEXT: nop ; CHECK-NEXT: fmr f1, f25 @@ -785,7 +706,7 @@ define <8 x i64> @lrint_v8f16(<8 x half> %x) { ; CHECK-NEXT: vmr v4, v29 ; CHECK-NEXT: lfd f30, 224(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f29, 216(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 80 ; CHECK-NEXT: lfd f28, 208(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f27, 200(r1) # 8-byte Folded Reload @@ -793,7 +714,7 @@ define <8 x i64> @lrint_v8f16(<8 x half> %x) { ; CHECK-NEXT: lfd f25, 184(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r30, 168(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r29, 160(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 64 ; CHECK-NEXT: ld r28, 152(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r27, 144(r1) # 8-byte Folded Reload @@ -801,9 +722,9 @@ define <8 x i64> @lrint_v8f16(<8 x half> %x) { ; CHECK-NEXT: ld r26, 136(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r25, 128(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r24, 120(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 240 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -812,16 +733,6 @@ define <8 x i64> @lrint_v8f16(<8 x half> %x) { ; FAST-LABEL: lrint_v8f16: ; FAST: # %bb.0: ; FAST-NEXT: mflr r0 -; FAST-NEXT: .cfi_def_cfa_offset 96 -; FAST-NEXT: .cfi_offset lr, 16 -; FAST-NEXT: .cfi_offset f24, -64 -; FAST-NEXT: .cfi_offset f25, -56 -; FAST-NEXT: .cfi_offset f26, -48 -; FAST-NEXT: .cfi_offset f27, -40 -; FAST-NEXT: .cfi_offset f28, -32 -; FAST-NEXT: .cfi_offset f29, -24 -; FAST-NEXT: .cfi_offset f30, -16 -; FAST-NEXT: .cfi_offset f31, -8 ; FAST-NEXT: stfd f24, -64(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f25, -56(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f26, -48(r1) # 8-byte Folded Spill @@ -939,44 +850,12 @@ define <8 x i64> @lrint_v8f16(<8 x half> %x) { } declare <8 x i64> @llvm.lrint.v8i64.v8f16(<8 x half>) -define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) { +define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) nounwind { ; BE-LABEL: lrint_v16i64_v16f16: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -496(r1) ; BE-NEXT: std r0, 512(r1) -; BE-NEXT: .cfi_def_cfa_offset 496 -; BE-NEXT: .cfi_offset lr, 16 -; BE-NEXT: .cfi_offset r16, -248 -; BE-NEXT: .cfi_offset r17, -240 -; BE-NEXT: .cfi_offset r18, -232 -; BE-NEXT: .cfi_offset r19, -224 -; BE-NEXT: .cfi_offset r20, -216 -; BE-NEXT: .cfi_offset r21, -208 -; BE-NEXT: .cfi_offset r22, -200 -; BE-NEXT: .cfi_offset r23, -192 -; BE-NEXT: .cfi_offset r24, -184 -; BE-NEXT: .cfi_offset r25, -176 -; BE-NEXT: .cfi_offset r26, -168 -; BE-NEXT: .cfi_offset r27, -160 -; BE-NEXT: .cfi_offset r28, -152 -; BE-NEXT: .cfi_offset r29, -144 -; BE-NEXT: .cfi_offset r30, -136 -; BE-NEXT: .cfi_offset f17, -120 -; BE-NEXT: .cfi_offset f18, -112 -; BE-NEXT: .cfi_offset f19, -104 -; BE-NEXT: .cfi_offset f20, -96 -; BE-NEXT: .cfi_offset f21, -88 -; BE-NEXT: .cfi_offset f22, -80 -; BE-NEXT: .cfi_offset f23, -72 -; BE-NEXT: .cfi_offset f24, -64 -; BE-NEXT: .cfi_offset f25, -56 -; BE-NEXT: .cfi_offset f26, -48 -; BE-NEXT: .cfi_offset f27, -40 -; BE-NEXT: .cfi_offset f28, -32 -; BE-NEXT: .cfi_offset f29, -24 -; BE-NEXT: .cfi_offset f30, -16 -; BE-NEXT: .cfi_offset f31, -8 ; BE-NEXT: stfd f20, 400(r1) # 8-byte Folded Spill ; BE-NEXT: fmr f20, f1 ; BE-NEXT: fmr f1, f2 @@ -1263,105 +1142,65 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -432(r1) -; CHECK-NEXT: std r0, 448(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 432 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset r16, -248 -; CHECK-NEXT: .cfi_offset r17, -240 -; CHECK-NEXT: .cfi_offset r18, -232 -; CHECK-NEXT: .cfi_offset r19, -224 -; CHECK-NEXT: .cfi_offset r20, -216 -; CHECK-NEXT: .cfi_offset r21, -208 -; CHECK-NEXT: .cfi_offset r22, -200 -; CHECK-NEXT: .cfi_offset r23, -192 -; CHECK-NEXT: .cfi_offset r24, -184 -; CHECK-NEXT: .cfi_offset r25, -176 -; CHECK-NEXT: .cfi_offset r26, -168 -; CHECK-NEXT: .cfi_offset r27, -160 -; CHECK-NEXT: .cfi_offset r28, -152 -; CHECK-NEXT: .cfi_offset r29, -144 -; CHECK-NEXT: .cfi_offset r30, -136 -; CHECK-NEXT: .cfi_offset f17, -120 -; CHECK-NEXT: .cfi_offset f18, -112 -; CHECK-NEXT: .cfi_offset f19, -104 -; CHECK-NEXT: .cfi_offset f20, -96 -; CHECK-NEXT: .cfi_offset f21, -88 -; CHECK-NEXT: .cfi_offset f22, -80 -; CHECK-NEXT: .cfi_offset f23, -72 -; CHECK-NEXT: .cfi_offset f24, -64 -; CHECK-NEXT: .cfi_offset f25, -56 -; CHECK-NEXT: .cfi_offset f26, -48 -; CHECK-NEXT: .cfi_offset f27, -40 -; CHECK-NEXT: .cfi_offset f28, -32 -; CHECK-NEXT: .cfi_offset f29, -24 -; CHECK-NEXT: .cfi_offset f30, -16 -; CHECK-NEXT: .cfi_offset f31, -8 -; CHECK-NEXT: .cfi_offset v24, -384 -; CHECK-NEXT: .cfi_offset v25, -368 -; CHECK-NEXT: .cfi_offset v26, -352 -; CHECK-NEXT: .cfi_offset v27, -336 -; CHECK-NEXT: .cfi_offset v28, -320 -; CHECK-NEXT: .cfi_offset v29, -304 -; CHECK-NEXT: .cfi_offset v30, -288 -; CHECK-NEXT: .cfi_offset v31, -272 ; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: std r0, 448(r1) ; CHECK-NEXT: std r16, 184(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r17, 192(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r18, 200(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r19, 208(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r20, 216(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r21, 224(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r22, 232(r1) # 8-byte Folded Spill -; CHECK-NEXT: stvx v24, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v24, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: std r22, 232(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r23, 240(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r24, 248(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r25, 256(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r26, 264(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r27, 272(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r28, 280(r1) # 8-byte Folded Spill -; CHECK-NEXT: stvx v25, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: std r28, 280(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r29, 288(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r30, 296(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f17, 312(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f18, 320(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f19, 328(r1) # 8-byte Folded Spill +; CHECK-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 96 ; CHECK-NEXT: stfd f20, 336(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f20, f2 -; CHECK-NEXT: stvx v26, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: li r3, 96 ; CHECK-NEXT: stfd f21, 344(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f21, f3 ; CHECK-NEXT: stfd f22, 352(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f22, f4 +; CHECK-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 112 ; CHECK-NEXT: stfd f23, 360(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f23, f5 -; CHECK-NEXT: stvx v27, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: li r3, 112 ; CHECK-NEXT: stfd f24, 368(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f24, f6 ; CHECK-NEXT: stfd f25, 376(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f25, f7 +; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 128 ; CHECK-NEXT: stfd f26, 384(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f26, f8 -; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: li r3, 128 ; CHECK-NEXT: stfd f27, 392(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f27, f9 ; CHECK-NEXT: stfd f28, 400(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f28, f10 +; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 144 ; CHECK-NEXT: stfd f29, 408(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f29, f11 -; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: li r3, 144 ; CHECK-NEXT: stfd f30, 416(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f30, f12 ; CHECK-NEXT: stfd f31, 424(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f31, f13 -; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 160 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: bl __truncsfhf2 ; CHECK-NEXT: nop ; CHECK-NEXT: fmr f1, f20 @@ -1564,7 +1403,7 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) { ; CHECK-NEXT: vmr v4, v29 ; CHECK-NEXT: lfd f30, 416(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f29, 408(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 144 ; CHECK-NEXT: vmr v5, v28 ; CHECK-NEXT: vmr v6, v27 @@ -1572,7 +1411,7 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) { ; CHECK-NEXT: vmr v8, v25 ; CHECK-NEXT: lfd f28, 400(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f27, 392(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 128 ; CHECK-NEXT: lfd f26, 384(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f25, 376(r1) # 8-byte Folded Reload @@ -1580,7 +1419,7 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) { ; CHECK-NEXT: lfd f24, 368(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f23, 360(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f22, 352(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 112 ; CHECK-NEXT: lfd f21, 344(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r30, 296(r1) # 8-byte Folded Reload @@ -1588,7 +1427,7 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) { ; CHECK-NEXT: lfd f19, 328(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r29, 288(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r28, 280(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 96 ; CHECK-NEXT: lfd f18, 320(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r27, 272(r1) # 8-byte Folded Reload @@ -1596,7 +1435,7 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) { ; CHECK-NEXT: ld r26, 264(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r25, 256(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r24, 248(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 80 ; CHECK-NEXT: ld r23, 240(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r22, 232(r1) # 8-byte Folded Reload @@ -1604,13 +1443,13 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) { ; CHECK-NEXT: ld r20, 216(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r19, 208(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r18, 200(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 64 ; CHECK-NEXT: ld r17, 192(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r16, 184(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 432 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -1619,24 +1458,6 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) { ; FAST-LABEL: lrint_v16i64_v16f16: ; FAST: # %bb.0: ; FAST-NEXT: mflr r0 -; FAST-NEXT: .cfi_def_cfa_offset 160 -; FAST-NEXT: .cfi_offset lr, 16 -; FAST-NEXT: .cfi_offset f16, -128 -; FAST-NEXT: .cfi_offset f17, -120 -; FAST-NEXT: .cfi_offset f18, -112 -; FAST-NEXT: .cfi_offset f19, -104 -; FAST-NEXT: .cfi_offset f20, -96 -; FAST-NEXT: .cfi_offset f21, -88 -; FAST-NEXT: .cfi_offset f22, -80 -; FAST-NEXT: .cfi_offset f23, -72 -; FAST-NEXT: .cfi_offset f24, -64 -; FAST-NEXT: .cfi_offset f25, -56 -; FAST-NEXT: .cfi_offset f26, -48 -; FAST-NEXT: .cfi_offset f27, -40 -; FAST-NEXT: .cfi_offset f28, -32 -; FAST-NEXT: .cfi_offset f29, -24 -; FAST-NEXT: .cfi_offset f30, -16 -; FAST-NEXT: .cfi_offset f31, -8 ; FAST-NEXT: stfd f16, -128(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f17, -120(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f18, -112(r1) # 8-byte Folded Spill @@ -1860,50 +1681,12 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) { } declare <16 x i64> @llvm.lrint.v16i64.v16f16(<16 x half>) -define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) { +define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) nounwind { ; BE-LABEL: lrint_v32i64_v32f16: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -864(r1) ; BE-NEXT: std r0, 880(r1) -; BE-NEXT: .cfi_def_cfa_offset 864 -; BE-NEXT: .cfi_offset lr, 16 -; BE-NEXT: .cfi_offset r14, -288 -; BE-NEXT: .cfi_offset r15, -280 -; BE-NEXT: .cfi_offset r16, -272 -; BE-NEXT: .cfi_offset r17, -264 -; BE-NEXT: .cfi_offset r18, -256 -; BE-NEXT: .cfi_offset r19, -248 -; BE-NEXT: .cfi_offset r20, -240 -; BE-NEXT: .cfi_offset r21, -232 -; BE-NEXT: .cfi_offset r22, -224 -; BE-NEXT: .cfi_offset r23, -216 -; BE-NEXT: .cfi_offset r24, -208 -; BE-NEXT: .cfi_offset r25, -200 -; BE-NEXT: .cfi_offset r26, -192 -; BE-NEXT: .cfi_offset r27, -184 -; BE-NEXT: .cfi_offset r28, -176 -; BE-NEXT: .cfi_offset r29, -168 -; BE-NEXT: .cfi_offset r30, -160 -; BE-NEXT: .cfi_offset r31, -152 -; BE-NEXT: .cfi_offset f14, -144 -; BE-NEXT: .cfi_offset f15, -136 -; BE-NEXT: .cfi_offset f16, -128 -; BE-NEXT: .cfi_offset f17, -120 -; BE-NEXT: .cfi_offset f18, -112 -; BE-NEXT: .cfi_offset f19, -104 -; BE-NEXT: .cfi_offset f20, -96 -; BE-NEXT: .cfi_offset f21, -88 -; BE-NEXT: .cfi_offset f22, -80 -; BE-NEXT: .cfi_offset f23, -72 -; BE-NEXT: .cfi_offset f24, -64 -; BE-NEXT: .cfi_offset f25, -56 -; BE-NEXT: .cfi_offset f26, -48 -; BE-NEXT: .cfi_offset f27, -40 -; BE-NEXT: .cfi_offset f28, -32 -; BE-NEXT: .cfi_offset f29, -24 -; BE-NEXT: .cfi_offset f30, -16 -; BE-NEXT: .cfi_offset f31, -8 ; BE-NEXT: stfd f20, 768(r1) # 8-byte Folded Spill ; BE-NEXT: fmr f20, f1 ; BE-NEXT: fmr f1, f2 @@ -1943,6 +1726,7 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) { ; BE-NEXT: stfd f30, 848(r1) # 8-byte Folded Spill ; BE-NEXT: stfd f31, 856(r1) # 8-byte Folded Spill ; BE-NEXT: fmr f31, f13 +; BE-NEXT: mr r30, r3 ; BE-NEXT: fmr f29, f12 ; BE-NEXT: fmr f30, f11 ; BE-NEXT: fmr f28, f10 @@ -1953,7 +1737,6 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) { ; BE-NEXT: fmr f23, f5 ; BE-NEXT: fmr f22, f4 ; BE-NEXT: fmr f21, f3 -; BE-NEXT: mr r30, r3 ; BE-NEXT: bl __truncsfhf2 ; BE-NEXT: nop ; BE-NEXT: fmr f1, f20 @@ -2456,98 +2239,48 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -688(r1) -; CHECK-NEXT: std r0, 704(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 688 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset r14, -288 -; CHECK-NEXT: .cfi_offset r15, -280 -; CHECK-NEXT: .cfi_offset r16, -272 -; CHECK-NEXT: .cfi_offset r17, -264 -; CHECK-NEXT: .cfi_offset r18, -256 -; CHECK-NEXT: .cfi_offset r19, -248 -; CHECK-NEXT: .cfi_offset r20, -240 -; CHECK-NEXT: .cfi_offset r21, -232 -; CHECK-NEXT: .cfi_offset r22, -224 -; CHECK-NEXT: .cfi_offset r23, -216 -; CHECK-NEXT: .cfi_offset r24, -208 -; CHECK-NEXT: .cfi_offset r25, -200 -; CHECK-NEXT: .cfi_offset r26, -192 -; CHECK-NEXT: .cfi_offset r27, -184 -; CHECK-NEXT: .cfi_offset r28, -176 -; CHECK-NEXT: .cfi_offset r29, -168 -; CHECK-NEXT: .cfi_offset r30, -160 -; CHECK-NEXT: .cfi_offset r31, -152 -; CHECK-NEXT: .cfi_offset f14, -144 -; CHECK-NEXT: .cfi_offset f15, -136 -; CHECK-NEXT: .cfi_offset f16, -128 -; CHECK-NEXT: .cfi_offset f17, -120 -; CHECK-NEXT: .cfi_offset f18, -112 -; CHECK-NEXT: .cfi_offset f19, -104 -; CHECK-NEXT: .cfi_offset f20, -96 -; CHECK-NEXT: .cfi_offset f21, -88 -; CHECK-NEXT: .cfi_offset f22, -80 -; CHECK-NEXT: .cfi_offset f23, -72 -; CHECK-NEXT: .cfi_offset f24, -64 -; CHECK-NEXT: .cfi_offset f25, -56 -; CHECK-NEXT: .cfi_offset f26, -48 -; CHECK-NEXT: .cfi_offset f27, -40 -; CHECK-NEXT: .cfi_offset f28, -32 -; CHECK-NEXT: .cfi_offset f29, -24 -; CHECK-NEXT: .cfi_offset f30, -16 -; CHECK-NEXT: .cfi_offset f31, -8 -; CHECK-NEXT: .cfi_offset v20, -480 -; CHECK-NEXT: .cfi_offset v21, -464 -; CHECK-NEXT: .cfi_offset v22, -448 -; CHECK-NEXT: .cfi_offset v23, -432 -; CHECK-NEXT: .cfi_offset v24, -416 -; CHECK-NEXT: .cfi_offset v25, -400 -; CHECK-NEXT: .cfi_offset v26, -384 -; CHECK-NEXT: .cfi_offset v27, -368 -; CHECK-NEXT: .cfi_offset v28, -352 -; CHECK-NEXT: .cfi_offset v29, -336 -; CHECK-NEXT: .cfi_offset v30, -320 -; CHECK-NEXT: .cfi_offset v31, -304 ; CHECK-NEXT: li r4, 208 +; CHECK-NEXT: std r0, 704(r1) ; CHECK-NEXT: std r14, 400(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r15, 408(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r16, 416(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r17, 424(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r18, 432(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r19, 440(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r20, 448(r1) # 8-byte Folded Spill -; CHECK-NEXT: stvx v20, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v20, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 224 +; CHECK-NEXT: std r20, 448(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r21, 456(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r22, 464(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r23, 472(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r24, 480(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r25, 488(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r26, 496(r1) # 8-byte Folded Spill -; CHECK-NEXT: stvx v21, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v21, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 240 +; CHECK-NEXT: std r26, 496(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r27, 504(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r28, 512(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r29, 520(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r30, 528(r1) # 8-byte Folded Spill ; CHECK-NEXT: mr r30, r3 -; CHECK-NEXT: std r31, 536(r1) # 8-byte Folded Spill -; CHECK-NEXT: stvx v22, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v22, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 256 +; CHECK-NEXT: std r31, 536(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f14, 544(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f15, 552(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f16, 560(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f17, 568(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f18, 576(r1) # 8-byte Folded Spill -; CHECK-NEXT: stfd f19, 584(r1) # 8-byte Folded Spill -; CHECK-NEXT: stvx v23, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v23, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 272 +; CHECK-NEXT: stfd f19, 584(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f20, 592(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f20, f2 ; CHECK-NEXT: stfd f21, 600(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f21, f3 ; CHECK-NEXT: stfd f22, 608(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f22, f4 -; CHECK-NEXT: stvx v24, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v24, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 288 ; CHECK-NEXT: stfd f23, 616(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f23, f5 @@ -2555,7 +2288,7 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) { ; CHECK-NEXT: fmr f24, f6 ; CHECK-NEXT: stfd f25, 632(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f25, f7 -; CHECK-NEXT: stvx v25, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v25, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 304 ; CHECK-NEXT: stfd f26, 640(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f26, f8 @@ -2563,7 +2296,7 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) { ; CHECK-NEXT: fmr f27, f9 ; CHECK-NEXT: stfd f28, 656(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f28, f10 -; CHECK-NEXT: stvx v26, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v26, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 320 ; CHECK-NEXT: stfd f29, 664(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f29, f11 @@ -2571,15 +2304,15 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) { ; CHECK-NEXT: fmr f30, f12 ; CHECK-NEXT: stfd f31, 680(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f31, f13 -; CHECK-NEXT: stvx v27, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v27, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 336 -; CHECK-NEXT: stvx v28, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v28, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 352 -; CHECK-NEXT: stvx v29, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v29, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 368 -; CHECK-NEXT: stvx v30, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v30, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 384 -; CHECK-NEXT: stvx v31, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: bl __truncsfhf2 ; CHECK-NEXT: nop ; CHECK-NEXT: fmr f1, f20 @@ -3058,7 +2791,7 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) { ; CHECK-NEXT: li r3, 384 ; CHECK-NEXT: xxswapd vs4, vs4 ; CHECK-NEXT: stxvd2x vs4, 0, r30 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 368 ; CHECK-NEXT: lfd f31, 680(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f30, 672(r1) # 8-byte Folded Reload @@ -3076,7 +2809,7 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) { ; CHECK-NEXT: lfd f18, 576(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f17, 568(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f16, 560(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 352 ; CHECK-NEXT: lfd f15, 552(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f14, 544(r1) # 8-byte Folded Reload @@ -3084,7 +2817,7 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) { ; CHECK-NEXT: ld r30, 528(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r29, 520(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r28, 512(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 336 ; CHECK-NEXT: ld r27, 504(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r26, 496(r1) # 8-byte Folded Reload @@ -3092,7 +2825,7 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) { ; CHECK-NEXT: ld r24, 480(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r23, 472(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r22, 464(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 320 ; CHECK-NEXT: ld r21, 456(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r20, 448(r1) # 8-byte Folded Reload @@ -3100,23 +2833,23 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) { ; CHECK-NEXT: ld r18, 432(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r17, 424(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r16, 416(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 304 ; CHECK-NEXT: ld r15, 408(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r14, 400(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 288 -; CHECK-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 272 -; CHECK-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 256 -; CHECK-NEXT: lvx v23, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v23, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 240 -; CHECK-NEXT: lvx v22, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v22, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 224 -; CHECK-NEXT: lvx v21, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v21, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 208 -; CHECK-NEXT: lvx v20, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v20, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 688 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -3126,95 +2859,62 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) { ; FAST: # %bb.0: ; FAST-NEXT: mflr r0 ; FAST-NEXT: stdu r1, -480(r1) -; FAST-NEXT: std r0, 496(r1) -; FAST-NEXT: .cfi_def_cfa_offset 480 -; FAST-NEXT: .cfi_offset lr, 16 -; FAST-NEXT: .cfi_offset r30, -160 -; FAST-NEXT: .cfi_offset f14, -144 -; FAST-NEXT: .cfi_offset f15, -136 -; FAST-NEXT: .cfi_offset f16, -128 -; FAST-NEXT: .cfi_offset f17, -120 -; FAST-NEXT: .cfi_offset f18, -112 -; FAST-NEXT: .cfi_offset f19, -104 -; FAST-NEXT: .cfi_offset f20, -96 -; FAST-NEXT: .cfi_offset f21, -88 -; FAST-NEXT: .cfi_offset f22, -80 -; FAST-NEXT: .cfi_offset f23, -72 -; FAST-NEXT: .cfi_offset f24, -64 -; FAST-NEXT: .cfi_offset f25, -56 -; FAST-NEXT: .cfi_offset f26, -48 -; FAST-NEXT: .cfi_offset f27, -40 -; FAST-NEXT: .cfi_offset f28, -32 -; FAST-NEXT: .cfi_offset f29, -24 -; FAST-NEXT: .cfi_offset f30, -16 -; FAST-NEXT: .cfi_offset f31, -8 -; FAST-NEXT: .cfi_offset v20, -352 -; FAST-NEXT: .cfi_offset v21, -336 -; FAST-NEXT: .cfi_offset v22, -320 -; FAST-NEXT: .cfi_offset v23, -304 -; FAST-NEXT: .cfi_offset v24, -288 -; FAST-NEXT: .cfi_offset v25, -272 -; FAST-NEXT: .cfi_offset v26, -256 -; FAST-NEXT: .cfi_offset v27, -240 -; FAST-NEXT: .cfi_offset v28, -224 -; FAST-NEXT: .cfi_offset v29, -208 -; FAST-NEXT: .cfi_offset v30, -192 -; FAST-NEXT: .cfi_offset v31, -176 ; FAST-NEXT: li r4, 128 +; FAST-NEXT: std r0, 496(r1) ; FAST-NEXT: std r30, 320(r1) # 8-byte Folded Spill +; FAST-NEXT: mr r30, r3 ; FAST-NEXT: stfd f14, 336(r1) # 8-byte Folded Spill -; FAST-NEXT: fmr f14, f5 ; FAST-NEXT: stfd f15, 344(r1) # 8-byte Folded Spill +; FAST-NEXT: fmr f14, f5 ; FAST-NEXT: stfd f16, 352(r1) # 8-byte Folded Spill -; FAST-NEXT: fmr f16, f4 -; FAST-NEXT: mr r30, r3 -; FAST-NEXT: stvx v20, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v20, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 144 +; FAST-NEXT: fmr f16, f4 ; FAST-NEXT: stfd f17, 360(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f18, 368(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f19, 376(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f20, 384(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f21, 392(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f22, 400(r1) # 8-byte Folded Spill -; FAST-NEXT: stvx v21, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v21, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 160 +; FAST-NEXT: stfd f22, 400(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f23, 408(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f24, 416(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f25, 424(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f26, 432(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f27, 440(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f28, 448(r1) # 8-byte Folded Spill -; FAST-NEXT: stvx v22, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v22, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 176 ; FAST-NEXT: xxlor v22, f3, f3 +; FAST-NEXT: stfd f28, 448(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f29, 456(r1) # 8-byte Folded Spill ; FAST-NEXT: fmr f29, f9 ; FAST-NEXT: stfd f30, 464(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f31, 472(r1) # 8-byte Folded Spill -; FAST-NEXT: stvx v23, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v23, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 192 ; FAST-NEXT: xxlor v23, f2, f2 -; FAST-NEXT: stvx v24, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v24, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 208 -; FAST-NEXT: stvx v25, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v25, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 224 ; FAST-NEXT: xxlor v25, f13, f13 -; FAST-NEXT: stvx v26, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v26, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 240 ; FAST-NEXT: xxlor v26, f12, f12 -; FAST-NEXT: stvx v27, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v27, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 256 ; FAST-NEXT: xxlor v27, f11, f11 -; FAST-NEXT: stvx v28, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v28, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 272 ; FAST-NEXT: xxlor v28, f10, f10 -; FAST-NEXT: stvx v29, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v29, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 288 ; FAST-NEXT: xxlor v29, f8, f8 -; FAST-NEXT: stvx v30, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v30, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 304 ; FAST-NEXT: xxlor v30, f7, f7 -; FAST-NEXT: stvx v31, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v31, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 44 ; FAST-NEXT: xxlor v31, f6, f6 ; FAST-NEXT: stxsspx f1, r1, r4 # 4-byte Folded Spill @@ -3643,30 +3343,30 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) { ; FAST-NEXT: lfd f16, 352(r1) # 8-byte Folded Reload ; FAST-NEXT: lfd f15, 344(r1) # 8-byte Folded Reload ; FAST-NEXT: lfd f14, 336(r1) # 8-byte Folded Reload -; FAST-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 288 ; FAST-NEXT: ld r30, 320(r1) # 8-byte Folded Reload -; FAST-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 272 -; FAST-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 256 -; FAST-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 240 -; FAST-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 224 -; FAST-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 208 -; FAST-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 192 -; FAST-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 176 -; FAST-NEXT: lvx v23, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v23, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 160 -; FAST-NEXT: lvx v22, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v22, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 144 -; FAST-NEXT: lvx v21, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v21, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 128 -; FAST-NEXT: lvx v20, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v20, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: addi r1, r1, 480 ; FAST-NEXT: ld r0, 16(r1) ; FAST-NEXT: mtlr r0 @@ -3676,14 +3376,12 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) { } declare <32 x i64> @llvm.lrint.v32i64.v32f16(<32 x half>) -define <1 x i64> @lrint_v1f32(<1 x float> %x) { +define <1 x i64> @lrint_v1f32(<1 x float> %x) nounwind { ; BE-LABEL: lrint_v1f32: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -112(r1) ; BE-NEXT: std r0, 128(r1) -; BE-NEXT: .cfi_def_cfa_offset 112 -; BE-NEXT: .cfi_offset lr, 16 ; BE-NEXT: bl lrintf ; BE-NEXT: nop ; BE-NEXT: addi r1, r1, 112 @@ -3696,8 +3394,6 @@ define <1 x i64> @lrint_v1f32(<1 x float> %x) { ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -32(r1) ; CHECK-NEXT: std r0, 48(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: .cfi_offset lr, 16 ; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop ; CHECK-NEXT: addi r1, r1, 32 @@ -3715,15 +3411,13 @@ define <1 x i64> @lrint_v1f32(<1 x float> %x) { } declare <1 x i64> @llvm.lrint.v1i64.v1f32(<1 x float>) -define <2 x i64> @lrint_v2f32(<2 x float> %x) { +define <2 x i64> @lrint_v2f32(<2 x float> %x) nounwind { ; BE-LABEL: lrint_v2f32: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -144(r1) -; BE-NEXT: std r0, 160(r1) -; BE-NEXT: .cfi_def_cfa_offset 144 -; BE-NEXT: .cfi_offset lr, 16 ; BE-NEXT: addi r3, r1, 112 +; BE-NEXT: std r0, 160(r1) ; BE-NEXT: stxvw4x v2, 0, r3 ; BE-NEXT: lfs f1, 116(r1) ; BE-NEXT: bl lrintf @@ -3744,14 +3438,11 @@ define <2 x i64> @lrint_v2f32(<2 x float> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -64(r1) -; CHECK-NEXT: std r0, 80(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset v31, -16 ; CHECK-NEXT: xxsldwi vs0, v2, v2, 3 ; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: std r0, 80(r1) ; CHECK-NEXT: xscvspdpn f1, vs0 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: vmr v31, v2 ; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop @@ -3763,7 +3454,7 @@ define <2 x i64> @lrint_v2f32(<2 x float> %x) { ; CHECK-NEXT: mtfprd f0, r3 ; CHECK-NEXT: li r3, 48 ; CHECK-NEXT: xxmrghd v2, vs0, v31 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 64 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -3788,15 +3479,13 @@ define <2 x i64> @lrint_v2f32(<2 x float> %x) { } declare <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float>) -define <4 x i64> @lrint_v4f32(<4 x float> %x) { +define <4 x i64> @lrint_v4f32(<4 x float> %x) nounwind { ; BE-LABEL: lrint_v4f32: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -160(r1) -; BE-NEXT: std r0, 176(r1) -; BE-NEXT: .cfi_def_cfa_offset 160 -; BE-NEXT: .cfi_offset lr, 16 ; BE-NEXT: addi r3, r1, 112 +; BE-NEXT: std r0, 176(r1) ; BE-NEXT: stxvw4x v2, 0, r3 ; BE-NEXT: lfs f1, 116(r1) ; BE-NEXT: bl lrintf @@ -3827,17 +3516,13 @@ define <4 x i64> @lrint_v4f32(<4 x float> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -80(r1) -; CHECK-NEXT: std r0, 96(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 80 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset v30, -32 -; CHECK-NEXT: .cfi_offset v31, -16 ; CHECK-NEXT: xxsldwi vs0, v2, v2, 3 ; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: std r0, 96(r1) ; CHECK-NEXT: xscvspdpn f1, vs0 -; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 64 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: vmr v31, v2 ; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop @@ -3860,9 +3545,9 @@ define <4 x i64> @lrint_v4f32(<4 x float> %x) { ; CHECK-NEXT: li r3, 64 ; CHECK-NEXT: vmr v2, v30 ; CHECK-NEXT: xxmrghd v3, v31, vs0 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 80 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -3898,15 +3583,13 @@ define <4 x i64> @lrint_v4f32(<4 x float> %x) { } declare <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float>) -define <8 x i64> @lrint_v8f32(<8 x float> %x) { +define <8 x i64> @lrint_v8f32(<8 x float> %x) nounwind { ; BE-LABEL: lrint_v8f32: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -208(r1) -; BE-NEXT: std r0, 224(r1) -; BE-NEXT: .cfi_def_cfa_offset 208 -; BE-NEXT: .cfi_offset lr, 16 ; BE-NEXT: addi r3, r1, 112 +; BE-NEXT: std r0, 224(r1) ; BE-NEXT: stxvw4x v2, 0, r3 ; BE-NEXT: addi r3, r1, 128 ; BE-NEXT: stxvw4x v3, 0, r3 @@ -3959,24 +3642,18 @@ define <8 x i64> @lrint_v8f32(<8 x float> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -112(r1) -; CHECK-NEXT: std r0, 128(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 112 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset v28, -64 -; CHECK-NEXT: .cfi_offset v29, -48 -; CHECK-NEXT: .cfi_offset v30, -32 -; CHECK-NEXT: .cfi_offset v31, -16 ; CHECK-NEXT: li r3, 48 ; CHECK-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: std r0, 128(r1) +; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 64 ; CHECK-NEXT: xscvspdpn f1, vs0 -; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 80 -; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 96 ; CHECK-NEXT: vmr v30, v2 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: vmr v31, v3 ; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop @@ -4022,13 +3699,13 @@ define <8 x i64> @lrint_v8f32(<8 x float> %x) { ; CHECK-NEXT: vmr v2, v29 ; CHECK-NEXT: vmr v4, v28 ; CHECK-NEXT: xxmrghd v5, v31, vs0 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 80 -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 64 -; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 112 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -4086,15 +3763,13 @@ define <8 x i64> @lrint_v8f32(<8 x float> %x) { } declare <8 x i64> @llvm.lrint.v8i64.v8f32(<8 x float>) -define <16 x i64> @lrint_v16i64_v16f32(<16 x float> %x) { +define <16 x i64> @lrint_v16i64_v16f32(<16 x float> %x) nounwind { ; BE-LABEL: lrint_v16i64_v16f32: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -304(r1) -; BE-NEXT: std r0, 320(r1) -; BE-NEXT: .cfi_def_cfa_offset 304 -; BE-NEXT: .cfi_offset lr, 16 ; BE-NEXT: addi r3, r1, 112 +; BE-NEXT: std r0, 320(r1) ; BE-NEXT: stxvw4x v2, 0, r3 ; BE-NEXT: addi r3, r1, 128 ; BE-NEXT: stxvw4x v3, 0, r3 @@ -4191,38 +3866,28 @@ define <16 x i64> @lrint_v16i64_v16f32(<16 x float> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -176(r1) -; CHECK-NEXT: std r0, 192(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 176 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset v24, -128 -; CHECK-NEXT: .cfi_offset v25, -112 -; CHECK-NEXT: .cfi_offset v26, -96 -; CHECK-NEXT: .cfi_offset v27, -80 -; CHECK-NEXT: .cfi_offset v28, -64 -; CHECK-NEXT: .cfi_offset v29, -48 -; CHECK-NEXT: .cfi_offset v30, -32 -; CHECK-NEXT: .cfi_offset v31, -16 ; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: std r0, 192(r1) ; CHECK-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-NEXT: stvx v24, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v24, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 64 ; CHECK-NEXT: xscvspdpn f1, vs0 -; CHECK-NEXT: stvx v25, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 80 -; CHECK-NEXT: stvx v26, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 96 ; CHECK-NEXT: vmr v26, v3 -; CHECK-NEXT: stvx v27, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 112 -; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 128 ; CHECK-NEXT: vmr v28, v4 -; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 144 ; CHECK-NEXT: vmr v29, v2 -; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 160 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: vmr v31, v5 ; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop @@ -4314,21 +3979,21 @@ define <16 x i64> @lrint_v16i64_v16f32(<16 x float> %x) { ; CHECK-NEXT: vmr v6, v25 ; CHECK-NEXT: vmr v8, v24 ; CHECK-NEXT: xxmrghd v9, v31, vs0 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 144 -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 128 -; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 112 -; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 96 -; CHECK-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 80 -; CHECK-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 64 -; CHECK-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 176 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -4430,14 +4095,12 @@ define <16 x i64> @lrint_v16i64_v16f32(<16 x float> %x) { } declare <16 x i64> @llvm.lrint.v16i64.v16f32(<16 x float>) -define <1 x i64> @lrint_v1f64(<1 x double> %x) { +define <1 x i64> @lrint_v1f64(<1 x double> %x) nounwind { ; BE-LABEL: lrint_v1f64: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -112(r1) ; BE-NEXT: std r0, 128(r1) -; BE-NEXT: .cfi_def_cfa_offset 112 -; BE-NEXT: .cfi_offset lr, 16 ; BE-NEXT: bl lrint ; BE-NEXT: nop ; BE-NEXT: addi r1, r1, 112 @@ -4450,8 +4113,6 @@ define <1 x i64> @lrint_v1f64(<1 x double> %x) { ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -32(r1) ; CHECK-NEXT: std r0, 48(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: .cfi_offset lr, 16 ; CHECK-NEXT: bl lrint ; CHECK-NEXT: nop ; CHECK-NEXT: addi r1, r1, 32 @@ -4469,16 +4130,13 @@ define <1 x i64> @lrint_v1f64(<1 x double> %x) { } declare <1 x i64> @llvm.lrint.v1i64.v1f64(<1 x double>) -define <2 x i64> @lrint_v2f64(<2 x double> %x) { +define <2 x i64> @lrint_v2f64(<2 x double> %x) nounwind { ; BE-LABEL: lrint_v2f64: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -160(r1) -; BE-NEXT: std r0, 176(r1) -; BE-NEXT: .cfi_def_cfa_offset 160 -; BE-NEXT: .cfi_offset lr, 16 -; BE-NEXT: .cfi_offset v31, -16 ; BE-NEXT: li r3, 144 +; BE-NEXT: std r0, 176(r1) ; BE-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; BE-NEXT: vmr v31, v2 ; BE-NEXT: xxlor f1, v31, v31 @@ -4502,12 +4160,9 @@ define <2 x i64> @lrint_v2f64(<2 x double> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -64(r1) -; CHECK-NEXT: std r0, 80(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset v31, -16 ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: std r0, 80(r1) +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: vmr v31, v2 ; CHECK-NEXT: xxlor f1, v31, v31 ; CHECK-NEXT: bl lrint @@ -4519,7 +4174,7 @@ define <2 x i64> @lrint_v2f64(<2 x double> %x) { ; CHECK-NEXT: mtfprd f0, r3 ; CHECK-NEXT: li r3, 48 ; CHECK-NEXT: xxmrghd v2, v31, vs0 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 64 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -4542,17 +4197,13 @@ define <2 x i64> @lrint_v2f64(<2 x double> %x) { } declare <2 x i64> @llvm.lrint.v2i64.v2f64(<2 x double>) -define <4 x i64> @lrint_v4f64(<4 x double> %x) { +define <4 x i64> @lrint_v4f64(<4 x double> %x) nounwind { ; BE-LABEL: lrint_v4f64: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -192(r1) -; BE-NEXT: std r0, 208(r1) -; BE-NEXT: .cfi_def_cfa_offset 192 -; BE-NEXT: .cfi_offset lr, 16 -; BE-NEXT: .cfi_offset v30, -32 -; BE-NEXT: .cfi_offset v31, -16 ; BE-NEXT: li r3, 160 +; BE-NEXT: std r0, 208(r1) ; BE-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; BE-NEXT: vmr v30, v2 ; BE-NEXT: li r3, 176 @@ -4591,17 +4242,13 @@ define <4 x i64> @lrint_v4f64(<4 x double> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -80(r1) -; CHECK-NEXT: std r0, 96(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 80 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset v30, -32 -; CHECK-NEXT: .cfi_offset v31, -16 ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: std r0, 96(r1) +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: vmr v30, v2 ; CHECK-NEXT: li r3, 64 ; CHECK-NEXT: xxlor f1, v30, v30 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: vmr v31, v3 ; CHECK-NEXT: bl lrint ; CHECK-NEXT: nop @@ -4622,9 +4269,9 @@ define <4 x i64> @lrint_v4f64(<4 x double> %x) { ; CHECK-NEXT: li r3, 64 ; CHECK-NEXT: vmr v2, v30 ; CHECK-NEXT: xxmrghd v3, v31, vs0 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 80 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -4656,25 +4303,19 @@ define <4 x i64> @lrint_v4f64(<4 x double> %x) { } declare <4 x i64> @llvm.lrint.v4i64.v4f64(<4 x double>) -define <8 x i64> @lrint_v8f64(<8 x double> %x) { +define <8 x i64> @lrint_v8f64(<8 x double> %x) nounwind { ; BE-LABEL: lrint_v8f64: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -256(r1) -; BE-NEXT: std r0, 272(r1) -; BE-NEXT: .cfi_def_cfa_offset 256 -; BE-NEXT: .cfi_offset lr, 16 -; BE-NEXT: .cfi_offset v28, -64 -; BE-NEXT: .cfi_offset v29, -48 -; BE-NEXT: .cfi_offset v30, -32 -; BE-NEXT: .cfi_offset v31, -16 ; BE-NEXT: li r3, 192 +; BE-NEXT: std r0, 272(r1) ; BE-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill ; BE-NEXT: li r3, 208 ; BE-NEXT: vmr v28, v2 -; BE-NEXT: xxlor f1, v28, v28 ; BE-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill ; BE-NEXT: li r3, 224 +; BE-NEXT: xxlor f1, v28, v28 ; BE-NEXT: vmr v29, v3 ; BE-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; BE-NEXT: li r3, 240 @@ -4737,25 +4378,19 @@ define <8 x i64> @lrint_v8f64(<8 x double> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -112(r1) -; CHECK-NEXT: std r0, 128(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 112 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset v28, -64 -; CHECK-NEXT: .cfi_offset v29, -48 -; CHECK-NEXT: .cfi_offset v30, -32 -; CHECK-NEXT: .cfi_offset v31, -16 ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: std r0, 128(r1) +; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 64 ; CHECK-NEXT: vmr v28, v2 -; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 80 ; CHECK-NEXT: xxlor f1, v28, v28 ; CHECK-NEXT: vmr v29, v3 -; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 96 ; CHECK-NEXT: vmr v30, v4 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: vmr v31, v5 ; CHECK-NEXT: bl lrint ; CHECK-NEXT: nop @@ -4796,13 +4431,13 @@ define <8 x i64> @lrint_v8f64(<8 x double> %x) { ; CHECK-NEXT: vmr v3, v29 ; CHECK-NEXT: vmr v2, v28 ; CHECK-NEXT: xxmrghd v5, v31, vs0 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 80 -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 64 -; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 112 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -4852,14 +4487,12 @@ define <8 x i64> @lrint_v8f64(<8 x double> %x) { } declare <8 x i64> @llvm.lrint.v8i64.v8f64(<8 x double>) -define <1 x i64> @lrint_v1f128(<1 x fp128> %x) { +define <1 x i64> @lrint_v1f128(<1 x fp128> %x) nounwind { ; BE-LABEL: lrint_v1f128: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -112(r1) ; BE-NEXT: std r0, 128(r1) -; BE-NEXT: .cfi_def_cfa_offset 112 -; BE-NEXT: .cfi_offset lr, 16 ; BE-NEXT: bl lrintf128 ; BE-NEXT: nop ; BE-NEXT: addi r1, r1, 112 @@ -4872,8 +4505,6 @@ define <1 x i64> @lrint_v1f128(<1 x fp128> %x) { ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -32(r1) ; CHECK-NEXT: std r0, 48(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: .cfi_offset lr, 16 ; CHECK-NEXT: bl lrintf128 ; CHECK-NEXT: nop ; CHECK-NEXT: addi r1, r1, 32 @@ -4886,8 +4517,6 @@ define <1 x i64> @lrint_v1f128(<1 x fp128> %x) { ; FAST-NEXT: mflr r0 ; FAST-NEXT: stdu r1, -32(r1) ; FAST-NEXT: std r0, 48(r1) -; FAST-NEXT: .cfi_def_cfa_offset 32 -; FAST-NEXT: .cfi_offset lr, 16 ; FAST-NEXT: bl lrintf128 ; FAST-NEXT: nop ; FAST-NEXT: addi r1, r1, 32 @@ -4899,16 +4528,13 @@ define <1 x i64> @lrint_v1f128(<1 x fp128> %x) { } declare <1 x i64> @llvm.lrint.v1i64.v1f128(<1 x fp128>) -define <2 x i64> @lrint_v2f128(<2 x fp128> %x) { +define <2 x i64> @lrint_v2f128(<2 x fp128> %x) nounwind { ; BE-LABEL: lrint_v2f128: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -160(r1) -; BE-NEXT: std r0, 176(r1) -; BE-NEXT: .cfi_def_cfa_offset 160 -; BE-NEXT: .cfi_offset lr, 16 -; BE-NEXT: .cfi_offset v31, -16 ; BE-NEXT: li r3, 144 +; BE-NEXT: std r0, 176(r1) ; BE-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; BE-NEXT: vmr v31, v2 ; BE-NEXT: vmr v2, v3 @@ -4932,15 +4558,11 @@ define <2 x i64> @lrint_v2f128(<2 x fp128> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -80(r1) -; CHECK-NEXT: std r0, 96(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 80 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset v30, -32 -; CHECK-NEXT: .cfi_offset v31, -16 ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: std r0, 96(r1) +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 64 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: vmr v31, v3 ; CHECK-NEXT: bl lrintf128 ; CHECK-NEXT: nop @@ -4950,10 +4572,10 @@ define <2 x i64> @lrint_v2f128(<2 x fp128> %x) { ; CHECK-NEXT: nop ; CHECK-NEXT: mtfprd f0, r3 ; CHECK-NEXT: li r3, 64 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 ; CHECK-NEXT: xxmrghd v2, vs0, v30 -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 80 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -4963,15 +4585,11 @@ define <2 x i64> @lrint_v2f128(<2 x fp128> %x) { ; FAST: # %bb.0: ; FAST-NEXT: mflr r0 ; FAST-NEXT: stdu r1, -80(r1) -; FAST-NEXT: std r0, 96(r1) -; FAST-NEXT: .cfi_def_cfa_offset 80 -; FAST-NEXT: .cfi_offset lr, 16 -; FAST-NEXT: .cfi_offset v30, -32 -; FAST-NEXT: .cfi_offset v31, -16 ; FAST-NEXT: li r3, 48 -; FAST-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: std r0, 96(r1) +; FAST-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; FAST-NEXT: li r3, 64 -; FAST-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; FAST-NEXT: vmr v31, v3 ; FAST-NEXT: bl lrintf128 ; FAST-NEXT: nop @@ -4981,10 +4599,10 @@ define <2 x i64> @lrint_v2f128(<2 x fp128> %x) { ; FAST-NEXT: nop ; FAST-NEXT: mtfprd f0, r3 ; FAST-NEXT: li r3, 64 -; FAST-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 48 ; FAST-NEXT: xxmrghd v2, vs0, v30 -; FAST-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: addi r1, r1, 80 ; FAST-NEXT: ld r0, 16(r1) ; FAST-NEXT: mtlr r0 @@ -4994,18 +4612,13 @@ define <2 x i64> @lrint_v2f128(<2 x fp128> %x) { } declare <2 x i64> @llvm.lrint.v2i64.v2f128(<2 x fp128>) -define <4 x i64> @lrint_v4f128(<4 x fp128> %x) { +define <4 x i64> @lrint_v4f128(<4 x fp128> %x) nounwind { ; BE-LABEL: lrint_v4f128: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -208(r1) -; BE-NEXT: std r0, 224(r1) -; BE-NEXT: .cfi_def_cfa_offset 208 -; BE-NEXT: .cfi_offset lr, 16 -; BE-NEXT: .cfi_offset v29, -48 -; BE-NEXT: .cfi_offset v30, -32 -; BE-NEXT: .cfi_offset v31, -16 ; BE-NEXT: li r3, 160 +; BE-NEXT: std r0, 224(r1) ; BE-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill ; BE-NEXT: li r3, 176 ; BE-NEXT: vmr v29, v2 @@ -5049,23 +4662,17 @@ define <4 x i64> @lrint_v4f128(<4 x fp128> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -112(r1) -; CHECK-NEXT: std r0, 128(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 112 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset v28, -64 -; CHECK-NEXT: .cfi_offset v29, -48 -; CHECK-NEXT: .cfi_offset v30, -32 -; CHECK-NEXT: .cfi_offset v31, -16 ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: std r0, 128(r1) +; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 64 -; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 80 ; CHECK-NEXT: vmr v29, v3 -; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 96 ; CHECK-NEXT: vmr v30, v4 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: vmr v31, v5 ; CHECK-NEXT: bl lrintf128 ; CHECK-NEXT: nop @@ -5085,14 +4692,14 @@ define <4 x i64> @lrint_v4f128(<4 x fp128> %x) { ; CHECK-NEXT: mtfprd f0, r3 ; CHECK-NEXT: li r3, 96 ; CHECK-NEXT: vmr v2, v29 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 80 ; CHECK-NEXT: xxmrghd v3, vs0, v30 -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 64 -; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 112 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -5102,23 +4709,17 @@ define <4 x i64> @lrint_v4f128(<4 x fp128> %x) { ; FAST: # %bb.0: ; FAST-NEXT: mflr r0 ; FAST-NEXT: stdu r1, -112(r1) -; FAST-NEXT: std r0, 128(r1) -; FAST-NEXT: .cfi_def_cfa_offset 112 -; FAST-NEXT: .cfi_offset lr, 16 -; FAST-NEXT: .cfi_offset v28, -64 -; FAST-NEXT: .cfi_offset v29, -48 -; FAST-NEXT: .cfi_offset v30, -32 -; FAST-NEXT: .cfi_offset v31, -16 ; FAST-NEXT: li r3, 48 -; FAST-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: std r0, 128(r1) +; FAST-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill ; FAST-NEXT: li r3, 64 -; FAST-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill ; FAST-NEXT: li r3, 80 ; FAST-NEXT: vmr v29, v3 -; FAST-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; FAST-NEXT: li r3, 96 ; FAST-NEXT: vmr v30, v4 -; FAST-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; FAST-NEXT: vmr v31, v5 ; FAST-NEXT: bl lrintf128 ; FAST-NEXT: nop @@ -5138,14 +4739,14 @@ define <4 x i64> @lrint_v4f128(<4 x fp128> %x) { ; FAST-NEXT: mtfprd f0, r3 ; FAST-NEXT: li r3, 96 ; FAST-NEXT: vmr v2, v29 -; FAST-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 80 ; FAST-NEXT: xxmrghd v3, vs0, v30 -; FAST-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 64 -; FAST-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 48 -; FAST-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: addi r1, r1, 112 ; FAST-NEXT: ld r0, 16(r1) ; FAST-NEXT: mtlr r0 @@ -5155,22 +4756,13 @@ define <4 x i64> @lrint_v4f128(<4 x fp128> %x) { } declare <4 x i64> @llvm.lrint.v4i64.v4f128(<4 x fp128>) -define <8 x i64> @lrint_v8f128(<8 x fp128> %x) { +define <8 x i64> @lrint_v8f128(<8 x fp128> %x) nounwind { ; BE-LABEL: lrint_v8f128: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -304(r1) -; BE-NEXT: std r0, 320(r1) -; BE-NEXT: .cfi_def_cfa_offset 304 -; BE-NEXT: .cfi_offset lr, 16 -; BE-NEXT: .cfi_offset v25, -112 -; BE-NEXT: .cfi_offset v26, -96 -; BE-NEXT: .cfi_offset v27, -80 -; BE-NEXT: .cfi_offset v28, -64 -; BE-NEXT: .cfi_offset v29, -48 -; BE-NEXT: .cfi_offset v30, -32 -; BE-NEXT: .cfi_offset v31, -16 ; BE-NEXT: li r3, 192 +; BE-NEXT: std r0, 320(r1) ; BE-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill ; BE-NEXT: li r3, 208 ; BE-NEXT: vmr v25, v2 @@ -5254,39 +4846,29 @@ define <8 x i64> @lrint_v8f128(<8 x fp128> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -176(r1) -; CHECK-NEXT: std r0, 192(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 176 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset v24, -128 -; CHECK-NEXT: .cfi_offset v25, -112 -; CHECK-NEXT: .cfi_offset v26, -96 -; CHECK-NEXT: .cfi_offset v27, -80 -; CHECK-NEXT: .cfi_offset v28, -64 -; CHECK-NEXT: .cfi_offset v29, -48 -; CHECK-NEXT: .cfi_offset v30, -32 -; CHECK-NEXT: .cfi_offset v31, -16 ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: stvx v24, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: std r0, 192(r1) +; CHECK-NEXT: stxvd2x v24, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 64 -; CHECK-NEXT: stvx v25, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 80 ; CHECK-NEXT: vmr v25, v3 -; CHECK-NEXT: stvx v26, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 96 ; CHECK-NEXT: vmr v26, v4 -; CHECK-NEXT: stvx v27, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 112 ; CHECK-NEXT: vmr v27, v5 -; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 128 ; CHECK-NEXT: vmr v28, v6 -; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 144 ; CHECK-NEXT: vmr v29, v7 -; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 160 ; CHECK-NEXT: vmr v30, v8 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: vmr v31, v9 ; CHECK-NEXT: bl lrintf128 ; CHECK-NEXT: nop @@ -5324,24 +4906,24 @@ define <8 x i64> @lrint_v8f128(<8 x fp128> %x) { ; CHECK-NEXT: mtfprd f0, r3 ; CHECK-NEXT: li r3, 160 ; CHECK-NEXT: vmr v4, v29 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 144 ; CHECK-NEXT: vmr v3, v27 ; CHECK-NEXT: vmr v2, v25 ; CHECK-NEXT: xxmrghd v5, vs0, v30 -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 128 -; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 112 -; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 96 -; CHECK-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 80 -; CHECK-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 64 -; CHECK-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 176 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -5351,39 +4933,29 @@ define <8 x i64> @lrint_v8f128(<8 x fp128> %x) { ; FAST: # %bb.0: ; FAST-NEXT: mflr r0 ; FAST-NEXT: stdu r1, -176(r1) -; FAST-NEXT: std r0, 192(r1) -; FAST-NEXT: .cfi_def_cfa_offset 176 -; FAST-NEXT: .cfi_offset lr, 16 -; FAST-NEXT: .cfi_offset v24, -128 -; FAST-NEXT: .cfi_offset v25, -112 -; FAST-NEXT: .cfi_offset v26, -96 -; FAST-NEXT: .cfi_offset v27, -80 -; FAST-NEXT: .cfi_offset v28, -64 -; FAST-NEXT: .cfi_offset v29, -48 -; FAST-NEXT: .cfi_offset v30, -32 -; FAST-NEXT: .cfi_offset v31, -16 ; FAST-NEXT: li r3, 48 -; FAST-NEXT: stvx v24, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: std r0, 192(r1) +; FAST-NEXT: stxvd2x v24, r1, r3 # 16-byte Folded Spill ; FAST-NEXT: li r3, 64 -; FAST-NEXT: stvx v25, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill ; FAST-NEXT: li r3, 80 ; FAST-NEXT: vmr v25, v3 -; FAST-NEXT: stvx v26, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill ; FAST-NEXT: li r3, 96 ; FAST-NEXT: vmr v26, v4 -; FAST-NEXT: stvx v27, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill ; FAST-NEXT: li r3, 112 ; FAST-NEXT: vmr v27, v5 -; FAST-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill ; FAST-NEXT: li r3, 128 ; FAST-NEXT: vmr v28, v6 -; FAST-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill ; FAST-NEXT: li r3, 144 ; FAST-NEXT: vmr v29, v7 -; FAST-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; FAST-NEXT: li r3, 160 ; FAST-NEXT: vmr v30, v8 -; FAST-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; FAST-NEXT: vmr v31, v9 ; FAST-NEXT: bl lrintf128 ; FAST-NEXT: nop @@ -5421,24 +4993,24 @@ define <8 x i64> @lrint_v8f128(<8 x fp128> %x) { ; FAST-NEXT: mtfprd f0, r3 ; FAST-NEXT: li r3, 160 ; FAST-NEXT: vmr v4, v29 -; FAST-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 144 ; FAST-NEXT: vmr v3, v27 ; FAST-NEXT: vmr v2, v25 ; FAST-NEXT: xxmrghd v5, vs0, v30 -; FAST-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 128 -; FAST-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 112 -; FAST-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 96 -; FAST-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 80 -; FAST-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 64 -; FAST-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 48 -; FAST-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: addi r1, r1, 176 ; FAST-NEXT: ld r0, 16(r1) ; FAST-NEXT: mtlr r0 @@ -5448,27 +5020,13 @@ define <8 x i64> @lrint_v8f128(<8 x fp128> %x) { } declare <8 x i64> @llvm.lrint.v8i64.v8f128(<8 x fp128>) -define <16 x i64> @lrint_v16i64_v16f128(<16 x fp128> %x) { +define <16 x i64> @lrint_v16i64_v16f128(<16 x fp128> %x) nounwind { ; BE-LABEL: lrint_v16i64_v16f128: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -496(r1) -; BE-NEXT: std r0, 512(r1) -; BE-NEXT: .cfi_def_cfa_offset 496 -; BE-NEXT: .cfi_offset lr, 16 -; BE-NEXT: .cfi_offset v20, -192 -; BE-NEXT: .cfi_offset v21, -176 -; BE-NEXT: .cfi_offset v22, -160 -; BE-NEXT: .cfi_offset v23, -144 -; BE-NEXT: .cfi_offset v24, -128 -; BE-NEXT: .cfi_offset v25, -112 -; BE-NEXT: .cfi_offset v26, -96 -; BE-NEXT: .cfi_offset v27, -80 -; BE-NEXT: .cfi_offset v28, -64 -; BE-NEXT: .cfi_offset v29, -48 -; BE-NEXT: .cfi_offset v30, -32 -; BE-NEXT: .cfi_offset v31, -16 ; BE-NEXT: li r3, 304 +; BE-NEXT: std r0, 512(r1) ; BE-NEXT: stxvd2x v20, r1, r3 # 16-byte Folded Spill ; BE-NEXT: li r3, 320 ; BE-NEXT: stxvd2x v21, r1, r3 # 16-byte Folded Spill @@ -5632,55 +5190,41 @@ define <16 x i64> @lrint_v16i64_v16f128(<16 x fp128> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -304(r1) -; CHECK-NEXT: std r0, 320(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 304 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset v20, -192 -; CHECK-NEXT: .cfi_offset v21, -176 -; CHECK-NEXT: .cfi_offset v22, -160 -; CHECK-NEXT: .cfi_offset v23, -144 -; CHECK-NEXT: .cfi_offset v24, -128 -; CHECK-NEXT: .cfi_offset v25, -112 -; CHECK-NEXT: .cfi_offset v26, -96 -; CHECK-NEXT: .cfi_offset v27, -80 -; CHECK-NEXT: .cfi_offset v28, -64 -; CHECK-NEXT: .cfi_offset v29, -48 -; CHECK-NEXT: .cfi_offset v30, -32 -; CHECK-NEXT: .cfi_offset v31, -16 ; CHECK-NEXT: li r3, 112 -; CHECK-NEXT: stvx v20, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: std r0, 320(r1) +; CHECK-NEXT: stxvd2x v20, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 128 -; CHECK-NEXT: stvx v21, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v21, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 144 ; CHECK-NEXT: vmr v21, v4 -; CHECK-NEXT: stvx v22, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v22, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 160 ; CHECK-NEXT: vmr v22, v6 -; CHECK-NEXT: stvx v23, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v23, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 176 ; CHECK-NEXT: vmr v23, v8 -; CHECK-NEXT: stvx v24, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v24, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 192 ; CHECK-NEXT: vmr v24, v9 -; CHECK-NEXT: stvx v25, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 208 ; CHECK-NEXT: vmr v25, v7 -; CHECK-NEXT: stvx v26, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 224 ; CHECK-NEXT: vmr v26, v10 -; CHECK-NEXT: stvx v27, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 240 ; CHECK-NEXT: vmr v27, v5 -; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 256 ; CHECK-NEXT: vmr v28, v11 -; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 272 ; CHECK-NEXT: vmr v29, v12 -; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 288 ; CHECK-NEXT: vmr v30, v3 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 64 ; CHECK-NEXT: stxvd2x v13, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: addi r3, r1, 576 @@ -5777,36 +5321,36 @@ define <16 x i64> @lrint_v16i64_v16f128(<16 x fp128> %x) { ; CHECK-NEXT: mtfprd f0, r3 ; CHECK-NEXT: li r3, 288 ; CHECK-NEXT: vmr v8, v31 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 272 ; CHECK-NEXT: vmr v2, v30 ; CHECK-NEXT: vmr v7, v29 ; CHECK-NEXT: vmr v6, v28 ; CHECK-NEXT: vmr v3, v27 -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 256 ; CHECK-NEXT: vmr v4, v25 ; CHECK-NEXT: vmr v5, v24 -; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 240 -; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 224 -; CHECK-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 208 ; CHECK-NEXT: xxmrghd v9, vs0, v26 -; CHECK-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 192 -; CHECK-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 176 -; CHECK-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 160 -; CHECK-NEXT: lvx v23, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v23, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 144 -; CHECK-NEXT: lvx v22, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v22, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 128 -; CHECK-NEXT: lvx v21, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v21, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 112 -; CHECK-NEXT: lvx v20, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v20, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 304 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -5816,55 +5360,41 @@ define <16 x i64> @lrint_v16i64_v16f128(<16 x fp128> %x) { ; FAST: # %bb.0: ; FAST-NEXT: mflr r0 ; FAST-NEXT: stdu r1, -304(r1) -; FAST-NEXT: std r0, 320(r1) -; FAST-NEXT: .cfi_def_cfa_offset 304 -; FAST-NEXT: .cfi_offset lr, 16 -; FAST-NEXT: .cfi_offset v20, -192 -; FAST-NEXT: .cfi_offset v21, -176 -; FAST-NEXT: .cfi_offset v22, -160 -; FAST-NEXT: .cfi_offset v23, -144 -; FAST-NEXT: .cfi_offset v24, -128 -; FAST-NEXT: .cfi_offset v25, -112 -; FAST-NEXT: .cfi_offset v26, -96 -; FAST-NEXT: .cfi_offset v27, -80 -; FAST-NEXT: .cfi_offset v28, -64 -; FAST-NEXT: .cfi_offset v29, -48 -; FAST-NEXT: .cfi_offset v30, -32 -; FAST-NEXT: .cfi_offset v31, -16 ; FAST-NEXT: li r3, 112 -; FAST-NEXT: stvx v20, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: std r0, 320(r1) +; FAST-NEXT: stxvd2x v20, r1, r3 # 16-byte Folded Spill ; FAST-NEXT: li r3, 128 -; FAST-NEXT: stvx v21, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v21, r1, r3 # 16-byte Folded Spill ; FAST-NEXT: li r3, 144 ; FAST-NEXT: vmr v21, v4 -; FAST-NEXT: stvx v22, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v22, r1, r3 # 16-byte Folded Spill ; FAST-NEXT: li r3, 160 ; FAST-NEXT: vmr v22, v6 -; FAST-NEXT: stvx v23, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v23, r1, r3 # 16-byte Folded Spill ; FAST-NEXT: li r3, 176 ; FAST-NEXT: vmr v23, v8 -; FAST-NEXT: stvx v24, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v24, r1, r3 # 16-byte Folded Spill ; FAST-NEXT: li r3, 192 ; FAST-NEXT: vmr v24, v9 -; FAST-NEXT: stvx v25, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill ; FAST-NEXT: li r3, 208 ; FAST-NEXT: vmr v25, v7 -; FAST-NEXT: stvx v26, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill ; FAST-NEXT: li r3, 224 ; FAST-NEXT: vmr v26, v10 -; FAST-NEXT: stvx v27, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill ; FAST-NEXT: li r3, 240 ; FAST-NEXT: vmr v27, v5 -; FAST-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill ; FAST-NEXT: li r3, 256 ; FAST-NEXT: vmr v28, v11 -; FAST-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill ; FAST-NEXT: li r3, 272 ; FAST-NEXT: vmr v29, v12 -; FAST-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; FAST-NEXT: li r3, 288 ; FAST-NEXT: vmr v30, v3 -; FAST-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; FAST-NEXT: li r3, 64 ; FAST-NEXT: stxvd2x v13, r1, r3 # 16-byte Folded Spill ; FAST-NEXT: addi r3, r1, 576 @@ -5961,36 +5491,36 @@ define <16 x i64> @lrint_v16i64_v16f128(<16 x fp128> %x) { ; FAST-NEXT: mtfprd f0, r3 ; FAST-NEXT: li r3, 288 ; FAST-NEXT: vmr v8, v31 -; FAST-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 272 ; FAST-NEXT: vmr v2, v30 ; FAST-NEXT: vmr v7, v29 ; FAST-NEXT: vmr v6, v28 ; FAST-NEXT: vmr v3, v27 -; FAST-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 256 ; FAST-NEXT: vmr v4, v25 ; FAST-NEXT: vmr v5, v24 -; FAST-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 240 -; FAST-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 224 -; FAST-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 208 ; FAST-NEXT: xxmrghd v9, vs0, v26 -; FAST-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 192 -; FAST-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 176 -; FAST-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 160 -; FAST-NEXT: lvx v23, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v23, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 144 -; FAST-NEXT: lvx v22, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v22, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 128 -; FAST-NEXT: lvx v21, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v21, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 112 -; FAST-NEXT: lvx v20, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v20, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: addi r1, r1, 304 ; FAST-NEXT: ld r0, 16(r1) ; FAST-NEXT: mtlr r0 @@ -6000,27 +5530,12 @@ define <16 x i64> @lrint_v16i64_v16f128(<16 x fp128> %x) { } declare <16 x i64> @llvm.lrint.v16i64.v16f128(<16 x fp128>) -define <32 x i64> @lrint_v32i64_v32f128(<32 x fp128> %x) { +define <32 x i64> @lrint_v32i64_v32f128(<32 x fp128> %x) nounwind { ; BE-LABEL: lrint_v32i64_v32f128: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -896(r1) ; BE-NEXT: std r0, 912(r1) -; BE-NEXT: .cfi_def_cfa_offset 896 -; BE-NEXT: .cfi_offset lr, 16 -; BE-NEXT: .cfi_offset r30, -16 -; BE-NEXT: .cfi_offset v20, -208 -; BE-NEXT: .cfi_offset v21, -192 -; BE-NEXT: .cfi_offset v22, -176 -; BE-NEXT: .cfi_offset v23, -160 -; BE-NEXT: .cfi_offset v24, -144 -; BE-NEXT: .cfi_offset v25, -128 -; BE-NEXT: .cfi_offset v26, -112 -; BE-NEXT: .cfi_offset v27, -96 -; BE-NEXT: .cfi_offset v28, -80 -; BE-NEXT: .cfi_offset v29, -64 -; BE-NEXT: .cfi_offset v30, -48 -; BE-NEXT: .cfi_offset v31, -32 ; BE-NEXT: std r30, 880(r1) # 8-byte Folded Spill ; BE-NEXT: mr r30, r3 ; BE-NEXT: addi r3, r1, 1440 @@ -6370,63 +5885,48 @@ define <32 x i64> @lrint_v32i64_v32f128(<32 x fp128> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -640(r1) -; CHECK-NEXT: std r0, 656(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 640 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset r30, -16 -; CHECK-NEXT: .cfi_offset v20, -208 -; CHECK-NEXT: .cfi_offset v21, -192 -; CHECK-NEXT: .cfi_offset v22, -176 -; CHECK-NEXT: .cfi_offset v23, -160 -; CHECK-NEXT: .cfi_offset v24, -144 -; CHECK-NEXT: .cfi_offset v25, -128 -; CHECK-NEXT: .cfi_offset v26, -112 -; CHECK-NEXT: .cfi_offset v27, -96 -; CHECK-NEXT: .cfi_offset v28, -80 -; CHECK-NEXT: .cfi_offset v29, -64 -; CHECK-NEXT: .cfi_offset v30, -48 -; CHECK-NEXT: .cfi_offset v31, -32 ; CHECK-NEXT: li r4, 432 +; CHECK-NEXT: std r0, 656(r1) ; CHECK-NEXT: std r30, 624(r1) # 8-byte Folded Spill ; CHECK-NEXT: mr r30, r3 ; CHECK-NEXT: addi r3, r1, 1184 -; CHECK-NEXT: stvx v20, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v20, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 448 ; CHECK-NEXT: lxvd2x vs0, 0, r3 ; CHECK-NEXT: addi r3, r1, 1168 -; CHECK-NEXT: stvx v21, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v21, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 464 ; CHECK-NEXT: lxvd2x vs1, 0, r3 ; CHECK-NEXT: addi r3, r1, 1152 -; CHECK-NEXT: stvx v22, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v22, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 480 ; CHECK-NEXT: lxvd2x vs2, 0, r3 ; CHECK-NEXT: addi r3, r1, 1136 -; CHECK-NEXT: stvx v23, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v23, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 496 ; CHECK-NEXT: lxvd2x vs3, 0, r3 ; CHECK-NEXT: addi r3, r1, 1120 -; CHECK-NEXT: stvx v24, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v24, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 512 ; CHECK-NEXT: lxvd2x vs4, 0, r3 ; CHECK-NEXT: addi r3, r1, 1104 ; CHECK-NEXT: vmr v24, v3 -; CHECK-NEXT: stvx v25, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v25, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 528 ; CHECK-NEXT: lxvd2x vs5, 0, r3 -; CHECK-NEXT: xxswapd vs0, vs0 ; CHECK-NEXT: addi r3, r1, 1088 -; CHECK-NEXT: stvx v26, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v26, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 544 -; CHECK-NEXT: stvx v27, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: xxswapd vs0, vs0 +; CHECK-NEXT: stxvd2x v27, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 560 -; CHECK-NEXT: stvx v28, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v28, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 576 -; CHECK-NEXT: stvx v29, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v29, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 592 -; CHECK-NEXT: stvx v30, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v30, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 608 -; CHECK-NEXT: stvx v31, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 416 ; CHECK-NEXT: stxvd2x v13, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 400 @@ -6740,30 +6240,30 @@ define <32 x i64> @lrint_v32i64_v32f128(<32 x fp128> %x) { ; CHECK-NEXT: li r3, 608 ; CHECK-NEXT: xxswapd vs4, vs4 ; CHECK-NEXT: stxvd2x vs4, 0, r30 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 592 ; CHECK-NEXT: ld r30, 624(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 576 -; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 560 -; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 544 -; CHECK-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 528 -; CHECK-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 512 -; CHECK-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 496 -; CHECK-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 480 -; CHECK-NEXT: lvx v23, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v23, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 464 -; CHECK-NEXT: lvx v22, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v22, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 448 -; CHECK-NEXT: lvx v21, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v21, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 432 -; CHECK-NEXT: lvx v20, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v20, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 640 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -6773,63 +6273,48 @@ define <32 x i64> @lrint_v32i64_v32f128(<32 x fp128> %x) { ; FAST: # %bb.0: ; FAST-NEXT: mflr r0 ; FAST-NEXT: stdu r1, -640(r1) -; FAST-NEXT: std r0, 656(r1) -; FAST-NEXT: .cfi_def_cfa_offset 640 -; FAST-NEXT: .cfi_offset lr, 16 -; FAST-NEXT: .cfi_offset r30, -16 -; FAST-NEXT: .cfi_offset v20, -208 -; FAST-NEXT: .cfi_offset v21, -192 -; FAST-NEXT: .cfi_offset v22, -176 -; FAST-NEXT: .cfi_offset v23, -160 -; FAST-NEXT: .cfi_offset v24, -144 -; FAST-NEXT: .cfi_offset v25, -128 -; FAST-NEXT: .cfi_offset v26, -112 -; FAST-NEXT: .cfi_offset v27, -96 -; FAST-NEXT: .cfi_offset v28, -80 -; FAST-NEXT: .cfi_offset v29, -64 -; FAST-NEXT: .cfi_offset v30, -48 -; FAST-NEXT: .cfi_offset v31, -32 ; FAST-NEXT: li r4, 432 +; FAST-NEXT: std r0, 656(r1) ; FAST-NEXT: std r30, 624(r1) # 8-byte Folded Spill ; FAST-NEXT: mr r30, r3 ; FAST-NEXT: addi r3, r1, 1184 -; FAST-NEXT: stvx v20, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v20, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 448 ; FAST-NEXT: lxvd2x vs0, 0, r3 ; FAST-NEXT: addi r3, r1, 1168 -; FAST-NEXT: stvx v21, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v21, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 464 ; FAST-NEXT: lxvd2x vs1, 0, r3 ; FAST-NEXT: addi r3, r1, 1152 -; FAST-NEXT: stvx v22, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v22, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 480 ; FAST-NEXT: lxvd2x vs2, 0, r3 ; FAST-NEXT: addi r3, r1, 1136 -; FAST-NEXT: stvx v23, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v23, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 496 ; FAST-NEXT: lxvd2x vs3, 0, r3 ; FAST-NEXT: addi r3, r1, 1120 -; FAST-NEXT: stvx v24, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v24, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 512 ; FAST-NEXT: lxvd2x vs4, 0, r3 ; FAST-NEXT: addi r3, r1, 1104 ; FAST-NEXT: vmr v24, v3 -; FAST-NEXT: stvx v25, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v25, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 528 ; FAST-NEXT: lxvd2x vs5, 0, r3 -; FAST-NEXT: xxswapd vs0, vs0 ; FAST-NEXT: addi r3, r1, 1088 -; FAST-NEXT: stvx v26, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v26, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 544 -; FAST-NEXT: stvx v27, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: xxswapd vs0, vs0 +; FAST-NEXT: stxvd2x v27, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 560 -; FAST-NEXT: stvx v28, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v28, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 576 -; FAST-NEXT: stvx v29, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v29, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 592 -; FAST-NEXT: stvx v30, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v30, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 608 -; FAST-NEXT: stvx v31, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v31, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 416 ; FAST-NEXT: stxvd2x v13, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 400 @@ -7143,30 +6628,30 @@ define <32 x i64> @lrint_v32i64_v32f128(<32 x fp128> %x) { ; FAST-NEXT: li r3, 608 ; FAST-NEXT: xxswapd vs4, vs4 ; FAST-NEXT: stxvd2x vs4, 0, r30 -; FAST-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 592 ; FAST-NEXT: ld r30, 624(r1) # 8-byte Folded Reload -; FAST-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 576 -; FAST-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 560 -; FAST-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 544 -; FAST-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 528 -; FAST-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 512 -; FAST-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 496 -; FAST-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 480 -; FAST-NEXT: lvx v23, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v23, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 464 -; FAST-NEXT: lvx v22, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v22, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 448 -; FAST-NEXT: lvx v21, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v21, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 432 -; FAST-NEXT: lvx v20, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v20, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: addi r1, r1, 640 ; FAST-NEXT: ld r0, 16(r1) ; FAST-NEXT: mtlr r0 diff --git a/llvm/test/CodeGen/X86/vector-llrint-f16.ll b/llvm/test/CodeGen/X86/vector-llrint-f16.ll index d6a21e1c00502..eb7be61b719f2 100644 --- a/llvm/test/CodeGen/X86/vector-llrint-f16.ll +++ b/llvm/test/CodeGen/X86/vector-llrint-f16.ll @@ -7,7 +7,7 @@ ; RUN: sed 's/XRINT/lrint/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefix=FP16 ; RUN: sed 's/XRINT/llrint/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefix=FP16 -define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) { +define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) nounwind { ; AVX-LABEL: llrint_v1i64_v1f16: ; AVX: # %bb.0: ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 @@ -25,7 +25,7 @@ define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) { ret <1 x i64> %a } -define <2 x i64> @llrint_v2i64_v2f16(<2 x half> %x) { +define <2 x i64> @llrint_v2i64_v2f16(<2 x half> %x) nounwind { ; AVX-LABEL: llrint_v2i64_v2f16: ; AVX: # %bb.0: ; AVX-NEXT: vcvtph2ps %xmm0, %xmm1 @@ -52,7 +52,7 @@ define <2 x i64> @llrint_v2i64_v2f16(<2 x half> %x) { ret <2 x i64> %a } -define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) { +define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) nounwind { ; AVX-LABEL: llrint_v4i64_v4f16: ; AVX: # %bb.0: ; AVX-NEXT: vpsrlq $48, %xmm0, %xmm1 @@ -95,7 +95,7 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) { ret <4 x i64> %a } -define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) { +define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) nounwind { ; AVX-LABEL: llrint_v8i64_v8f16: ; AVX: # %bb.0: ; AVX-NEXT: vpsrlq $48, %xmm0, %xmm1 @@ -170,7 +170,7 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) { ret <8 x i64> %a } -define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) { +define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) nounwind { ; AVX-LABEL: llrint_v16i64_v16f16: ; AVX: # %bb.0: ; AVX-NEXT: vmovdqa %ymm0, %ymm2 @@ -310,7 +310,7 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) { ret <16 x i64> %a } -define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { +define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) nounwind { ; AVX-LABEL: llrint_v32i64_v32f16: ; AVX: # %bb.0: ; AVX-NEXT: movq %rdi, %rax diff --git a/llvm/test/CodeGen/X86/vector-llrint.ll b/llvm/test/CodeGen/X86/vector-llrint.ll index f393ffd8a0441..6fd1a35505aac 100644 --- a/llvm/test/CodeGen/X86/vector-llrint.ll +++ b/llvm/test/CodeGen/X86/vector-llrint.ll @@ -5,14 +5,11 @@ ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=AVX,AVX512 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512dq,avx512vl | FileCheck %s --check-prefixes=AVX512DQ -define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) { +define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) nounwind { ; X86-LABEL: llrint_v1i64_v1f32: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %ebp, -8 ; X86-NEXT: movl %esp, %ebp -; X86-NEXT: .cfi_def_cfa_register %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp ; X86-NEXT: flds 8(%ebp) @@ -21,7 +18,6 @@ define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) { ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp -; X86-NEXT: .cfi_def_cfa %esp, 4 ; X86-NEXT: retl ; ; SSE-LABEL: llrint_v1i64_v1f32: @@ -43,20 +39,15 @@ define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) { } declare <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float>) -define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) { +define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) nounwind { ; X86-LABEL: llrint_v2i64_v2f32: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %ebp, -8 ; X86-NEXT: movl %esp, %ebp -; X86-NEXT: .cfi_def_cfa_register %ebp ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $16, %esp -; X86-NEXT: .cfi_offset %esi, -16 -; X86-NEXT: .cfi_offset %edi, -12 ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: flds 16(%ebp) ; X86-NEXT: flds 12(%ebp) @@ -74,7 +65,6 @@ define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) { ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebp -; X86-NEXT: .cfi_def_cfa %esp, 4 ; X86-NEXT: retl $4 ; ; SSE-LABEL: llrint_v2i64_v2f32: @@ -107,22 +97,16 @@ define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) { } declare <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float>) -define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) { +define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) nounwind { ; X86-LABEL: llrint_v4i64_v4f32: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %ebp, -8 ; X86-NEXT: movl %esp, %ebp -; X86-NEXT: .cfi_def_cfa_register %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $56, %esp -; X86-NEXT: .cfi_offset %esi, -20 -; X86-NEXT: .cfi_offset %edi, -16 -; X86-NEXT: .cfi_offset %ebx, -12 ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: flds 24(%ebp) ; X86-NEXT: flds 20(%ebp) @@ -159,7 +143,6 @@ define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) { ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx ; X86-NEXT: popl %ebp -; X86-NEXT: .cfi_def_cfa %esp, 4 ; X86-NEXT: retl $4 ; ; SSE-LABEL: llrint_v4i64_v4f32: @@ -227,22 +210,16 @@ define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) { } declare <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float>) -define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) { +define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) nounwind { ; X86-LABEL: llrint_v8i64_v8f32: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %ebp, -8 ; X86-NEXT: movl %esp, %ebp -; X86-NEXT: .cfi_def_cfa_register %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $120, %esp -; X86-NEXT: .cfi_offset %esi, -20 -; X86-NEXT: .cfi_offset %edi, -16 -; X86-NEXT: .cfi_offset %ebx, -12 ; X86-NEXT: flds 12(%ebp) ; X86-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-NEXT: flds 16(%ebp) @@ -319,7 +296,6 @@ define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) { ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx ; X86-NEXT: popl %ebp -; X86-NEXT: .cfi_def_cfa %esp, 4 ; X86-NEXT: retl $4 ; ; SSE-LABEL: llrint_v8i64_v8f32: @@ -435,22 +411,16 @@ define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) { } declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>) -define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) { +define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) nounwind { ; X86-LABEL: llrint_v16i64_v16f32: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %ebp, -8 ; X86-NEXT: movl %esp, %ebp -; X86-NEXT: .cfi_def_cfa_register %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $248, %esp -; X86-NEXT: .cfi_offset %esi, -20 -; X86-NEXT: .cfi_offset %edi, -16 -; X86-NEXT: .cfi_offset %ebx, -12 ; X86-NEXT: flds 12(%ebp) ; X86-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-NEXT: flds 16(%ebp) @@ -607,7 +577,6 @@ define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) { ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx ; X86-NEXT: popl %ebp -; X86-NEXT: .cfi_def_cfa %esp, 4 ; X86-NEXT: retl $4 ; ; SSE-LABEL: llrint_v16i64_v16f32: @@ -825,14 +794,11 @@ define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) { } declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>) -define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) { +define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) nounwind { ; X86-LABEL: llrint_v1i64_v1f64: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %ebp, -8 ; X86-NEXT: movl %esp, %ebp -; X86-NEXT: .cfi_def_cfa_register %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp ; X86-NEXT: fldl 8(%ebp) @@ -841,7 +807,6 @@ define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) { ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp -; X86-NEXT: .cfi_def_cfa %esp, 4 ; X86-NEXT: retl ; ; SSE-LABEL: llrint_v1i64_v1f64: @@ -863,20 +828,15 @@ define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) { } declare <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double>) -define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) { +define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) nounwind { ; X86-LABEL: llrint_v2i64_v2f64: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %ebp, -8 ; X86-NEXT: movl %esp, %ebp -; X86-NEXT: .cfi_def_cfa_register %ebp ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $16, %esp -; X86-NEXT: .cfi_offset %esi, -16 -; X86-NEXT: .cfi_offset %edi, -12 ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: fldl 20(%ebp) ; X86-NEXT: fldl 12(%ebp) @@ -894,7 +854,6 @@ define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) { ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebp -; X86-NEXT: .cfi_def_cfa %esp, 4 ; X86-NEXT: retl $4 ; ; SSE-LABEL: llrint_v2i64_v2f64: @@ -927,22 +886,16 @@ define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) { } declare <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double>) -define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) { +define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) nounwind { ; X86-LABEL: llrint_v4i64_v4f64: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %ebp, -8 ; X86-NEXT: movl %esp, %ebp -; X86-NEXT: .cfi_def_cfa_register %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $56, %esp -; X86-NEXT: .cfi_offset %esi, -20 -; X86-NEXT: .cfi_offset %edi, -16 -; X86-NEXT: .cfi_offset %ebx, -12 ; X86-NEXT: movl 8(%ebp), %eax ; X86-NEXT: fldl 36(%ebp) ; X86-NEXT: fldl 28(%ebp) @@ -979,7 +932,6 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) { ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx ; X86-NEXT: popl %ebp -; X86-NEXT: .cfi_def_cfa %esp, 4 ; X86-NEXT: retl $4 ; ; SSE-LABEL: llrint_v4i64_v4f64: @@ -1045,22 +997,16 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) { } declare <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double>) -define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) { +define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) nounwind { ; X86-LABEL: llrint_v8i64_v8f64: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %ebp, -8 ; X86-NEXT: movl %esp, %ebp -; X86-NEXT: .cfi_def_cfa_register %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $120, %esp -; X86-NEXT: .cfi_offset %esi, -20 -; X86-NEXT: .cfi_offset %edi, -16 -; X86-NEXT: .cfi_offset %ebx, -12 ; X86-NEXT: fldl 12(%ebp) ; X86-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-NEXT: fldl 20(%ebp) @@ -1137,7 +1083,6 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) { ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx ; X86-NEXT: popl %ebp -; X86-NEXT: .cfi_def_cfa %esp, 4 ; X86-NEXT: retl $4 ; ; SSE-LABEL: llrint_v8i64_v8f64: @@ -1247,14 +1192,11 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) { } declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>) -define <1 x i64> @llrint_v1i64_v1f128(<1 x fp128> %x) { +define <1 x i64> @llrint_v1i64_v1f128(<1 x fp128> %x) nounwind { ; X86-LABEL: llrint_v1i64_v1f128: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %ebp, -8 ; X86-NEXT: movl %esp, %ebp -; X86-NEXT: .cfi_def_cfa_register %ebp ; X86-NEXT: andl $-16, %esp ; X86-NEXT: subl $16, %esp ; X86-NEXT: pushl 20(%ebp) @@ -1265,56 +1207,43 @@ define <1 x i64> @llrint_v1i64_v1f128(<1 x fp128> %x) { ; X86-NEXT: addl $16, %esp ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp -; X86-NEXT: .cfi_def_cfa %esp, 4 ; X86-NEXT: retl ; ; SSE-LABEL: llrint_v1i64_v1f128: ; SSE: # %bb.0: ; SSE-NEXT: pushq %rax -; SSE-NEXT: .cfi_def_cfa_offset 16 ; SSE-NEXT: callq llrintl@PLT ; SSE-NEXT: popq %rcx -; SSE-NEXT: .cfi_def_cfa_offset 8 ; SSE-NEXT: retq ; ; AVX-LABEL: llrint_v1i64_v1f128: ; AVX: # %bb.0: ; AVX-NEXT: pushq %rax -; AVX-NEXT: .cfi_def_cfa_offset 16 ; AVX-NEXT: callq llrintl@PLT ; AVX-NEXT: popq %rcx -; AVX-NEXT: .cfi_def_cfa_offset 8 ; AVX-NEXT: retq ; ; AVX512DQ-LABEL: llrint_v1i64_v1f128: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: pushq %rax -; AVX512DQ-NEXT: .cfi_def_cfa_offset 16 ; AVX512DQ-NEXT: callq llrintl@PLT ; AVX512DQ-NEXT: popq %rcx -; AVX512DQ-NEXT: .cfi_def_cfa_offset 8 ; AVX512DQ-NEXT: retq %a = call <1 x i64> @llvm.llrint.v1i64.v1f128(<1 x fp128> %x) ret <1 x i64> %a } declare <1 x i64> @llvm.llrint.v1i64.v1f128(<1 x fp128>) -define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) { +define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) nounwind { ; X86-LABEL: llrint_v2i64_v2f128: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %ebp, -8 ; X86-NEXT: movl %esp, %ebp -; X86-NEXT: .cfi_def_cfa_register %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-16, %esp ; X86-NEXT: subl $16, %esp -; X86-NEXT: .cfi_offset %esi, -20 -; X86-NEXT: .cfi_offset %edi, -16 -; X86-NEXT: .cfi_offset %ebx, -12 ; X86-NEXT: movl 8(%ebp), %esi ; X86-NEXT: pushl 24(%ebp) ; X86-NEXT: pushl 20(%ebp) @@ -1340,13 +1269,11 @@ define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) { ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx ; X86-NEXT: popl %ebp -; X86-NEXT: .cfi_def_cfa %esp, 4 ; X86-NEXT: retl $4 ; ; SSE-LABEL: llrint_v2i64_v2f128: ; SSE: # %bb.0: ; SSE-NEXT: subq $40, %rsp -; SSE-NEXT: .cfi_def_cfa_offset 48 ; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: callq llrintl@PLT @@ -1358,13 +1285,11 @@ define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) { ; SSE-NEXT: punpcklqdq (%rsp), %xmm0 # 16-byte Folded Reload ; SSE-NEXT: # xmm0 = xmm0[0],mem[0] ; SSE-NEXT: addq $40, %rsp -; SSE-NEXT: .cfi_def_cfa_offset 8 ; SSE-NEXT: retq ; ; AVX-LABEL: llrint_v2i64_v2f128: ; AVX: # %bb.0: ; AVX-NEXT: subq $40, %rsp -; AVX-NEXT: .cfi_def_cfa_offset 48 ; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; AVX-NEXT: vmovaps %xmm1, %xmm0 ; AVX-NEXT: callq llrintl@PLT @@ -1376,13 +1301,11 @@ define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) { ; AVX-NEXT: vpunpcklqdq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload ; AVX-NEXT: # xmm0 = xmm0[0],mem[0] ; AVX-NEXT: addq $40, %rsp -; AVX-NEXT: .cfi_def_cfa_offset 8 ; AVX-NEXT: retq ; ; AVX512DQ-LABEL: llrint_v2i64_v2f128: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: subq $40, %rsp -; AVX512DQ-NEXT: .cfi_def_cfa_offset 48 ; AVX512DQ-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; AVX512DQ-NEXT: vmovaps %xmm1, %xmm0 ; AVX512DQ-NEXT: callq llrintl@PLT @@ -1394,29 +1317,22 @@ define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) { ; AVX512DQ-NEXT: vpunpcklqdq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload ; AVX512DQ-NEXT: # xmm0 = xmm0[0],mem[0] ; AVX512DQ-NEXT: addq $40, %rsp -; AVX512DQ-NEXT: .cfi_def_cfa_offset 8 ; AVX512DQ-NEXT: retq %a = call <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128> %x) ret <2 x i64> %a } declare <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128>) -define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) { +define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) nounwind { ; X86-LABEL: llrint_v4i64_v4f128: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %ebp, -8 ; X86-NEXT: movl %esp, %ebp -; X86-NEXT: .cfi_def_cfa_register %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-16, %esp ; X86-NEXT: subl $32, %esp -; X86-NEXT: .cfi_offset %esi, -20 -; X86-NEXT: .cfi_offset %edi, -16 -; X86-NEXT: .cfi_offset %ebx, -12 ; X86-NEXT: movl 8(%ebp), %esi ; X86-NEXT: movl 36(%ebp), %edi ; X86-NEXT: movl 40(%ebp), %ebx @@ -1468,13 +1384,11 @@ define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) { ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx ; X86-NEXT: popl %ebp -; X86-NEXT: .cfi_def_cfa %esp, 4 ; X86-NEXT: retl $4 ; ; SSE-LABEL: llrint_v4i64_v4f128: ; SSE: # %bb.0: ; SSE-NEXT: subq $72, %rsp -; SSE-NEXT: .cfi_def_cfa_offset 80 ; SSE-NEXT: movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; SSE-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; SSE-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill @@ -1499,13 +1413,11 @@ define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) { ; SSE-NEXT: # xmm1 = xmm1[0],mem[0] ; SSE-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload ; SSE-NEXT: addq $72, %rsp -; SSE-NEXT: .cfi_def_cfa_offset 8 ; SSE-NEXT: retq ; ; AVX1-LABEL: llrint_v4i64_v4f128: ; AVX1: # %bb.0: ; AVX1-NEXT: subq $72, %rsp -; AVX1-NEXT: .cfi_def_cfa_offset 80 ; AVX1-NEXT: vmovaps %xmm2, (%rsp) # 16-byte Spill ; AVX1-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; AVX1-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill @@ -1530,13 +1442,11 @@ define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) { ; AVX1-NEXT: # xmm0 = xmm0[0],mem[0] ; AVX1-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload ; AVX1-NEXT: addq $72, %rsp -; AVX1-NEXT: .cfi_def_cfa_offset 8 ; AVX1-NEXT: retq ; ; AVX512-LABEL: llrint_v4i64_v4f128: ; AVX512: # %bb.0: ; AVX512-NEXT: subq $72, %rsp -; AVX512-NEXT: .cfi_def_cfa_offset 80 ; AVX512-NEXT: vmovaps %xmm2, (%rsp) # 16-byte Spill ; AVX512-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill @@ -1561,13 +1471,11 @@ define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) { ; AVX512-NEXT: # xmm0 = xmm0[0],mem[0] ; AVX512-NEXT: vinserti128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload ; AVX512-NEXT: addq $72, %rsp -; AVX512-NEXT: .cfi_def_cfa_offset 8 ; AVX512-NEXT: retq ; ; AVX512DQ-LABEL: llrint_v4i64_v4f128: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: subq $72, %rsp -; AVX512DQ-NEXT: .cfi_def_cfa_offset 80 ; AVX512DQ-NEXT: vmovaps %xmm2, (%rsp) # 16-byte Spill ; AVX512DQ-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; AVX512DQ-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill @@ -1592,29 +1500,22 @@ define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) { ; AVX512DQ-NEXT: # xmm0 = xmm0[0],mem[0] ; AVX512DQ-NEXT: vinserti128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload ; AVX512DQ-NEXT: addq $72, %rsp -; AVX512DQ-NEXT: .cfi_def_cfa_offset 8 ; AVX512DQ-NEXT: retq %a = call <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128> %x) ret <4 x i64> %a } declare <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128>) -define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) { +define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) nounwind { ; X86-LABEL: llrint_v8i64_v8f128: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %ebp, -8 ; X86-NEXT: movl %esp, %ebp -; X86-NEXT: .cfi_def_cfa_register %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-16, %esp ; X86-NEXT: subl $64, %esp -; X86-NEXT: .cfi_offset %esi, -20 -; X86-NEXT: .cfi_offset %edi, -16 -; X86-NEXT: .cfi_offset %ebx, -12 ; X86-NEXT: movl 8(%ebp), %esi ; X86-NEXT: movl 36(%ebp), %edi ; X86-NEXT: movl 40(%ebp), %ebx @@ -1714,13 +1615,11 @@ define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) { ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx ; X86-NEXT: popl %ebp -; X86-NEXT: .cfi_def_cfa %esp, 4 ; X86-NEXT: retl $4 ; ; SSE-LABEL: llrint_v8i64_v8f128: ; SSE: # %bb.0: ; SSE-NEXT: subq $136, %rsp -; SSE-NEXT: .cfi_def_cfa_offset 144 ; SSE-NEXT: movaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; SSE-NEXT: movaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; SSE-NEXT: movaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill @@ -1771,13 +1670,11 @@ define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) { ; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload ; SSE-NEXT: addq $136, %rsp -; SSE-NEXT: .cfi_def_cfa_offset 8 ; SSE-NEXT: retq ; ; AVX1-LABEL: llrint_v8i64_v8f128: ; AVX1: # %bb.0: ; AVX1-NEXT: subq $152, %rsp -; AVX1-NEXT: .cfi_def_cfa_offset 160 ; AVX1-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; AVX1-NEXT: vmovaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; AVX1-NEXT: vmovaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill @@ -1829,13 +1726,11 @@ define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) { ; AVX1-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm1 # 16-byte Folded Reload ; AVX1-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload ; AVX1-NEXT: addq $152, %rsp -; AVX1-NEXT: .cfi_def_cfa_offset 8 ; AVX1-NEXT: retq ; ; AVX512-LABEL: llrint_v8i64_v8f128: ; AVX512: # %bb.0: ; AVX512-NEXT: subq $152, %rsp -; AVX512-NEXT: .cfi_def_cfa_offset 160 ; AVX512-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill ; AVX512-NEXT: vmovaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; AVX512-NEXT: vmovaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill @@ -1887,13 +1782,11 @@ define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) { ; AVX512-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload ; AVX512-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload ; AVX512-NEXT: addq $152, %rsp -; AVX512-NEXT: .cfi_def_cfa_offset 8 ; AVX512-NEXT: retq ; ; AVX512DQ-LABEL: llrint_v8i64_v8f128: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: subq $152, %rsp -; AVX512DQ-NEXT: .cfi_def_cfa_offset 160 ; AVX512DQ-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill ; AVX512DQ-NEXT: vmovaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; AVX512DQ-NEXT: vmovaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill @@ -1945,7 +1838,6 @@ define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) { ; AVX512DQ-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload ; AVX512DQ-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload ; AVX512DQ-NEXT: addq $152, %rsp -; AVX512DQ-NEXT: .cfi_def_cfa_offset 8 ; AVX512DQ-NEXT: retq %a = call <8 x i64> @llvm.llrint.v8i64.v8f128(<8 x fp128> %x) ret <8 x i64> %a diff --git a/llvm/test/CodeGen/X86/vector-lrint-f16.ll b/llvm/test/CodeGen/X86/vector-lrint-f16.ll index 1316f808aa27e..fa3aeb09eae6f 100644 --- a/llvm/test/CodeGen/X86/vector-lrint-f16.ll +++ b/llvm/test/CodeGen/X86/vector-lrint-f16.ll @@ -8,7 +8,7 @@ ; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=x86_64-unknown -mattr=avx2,f16c | FileCheck %s --check-prefixes=X64-AVX-I32 ; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefixes=X64-FP16-I32 -define <1 x iXLen> @lrint_v1f16(<1 x half> %x) { +define <1 x iXLen> @lrint_v1f16(<1 x half> %x) nounwind { ; X86-AVX-I16-LABEL: lrint_v1f16: ; X86-AVX-I16: # %bb.0: ; X86-AVX-I16-NEXT: vcvtph2ps %xmm0, %xmm0 @@ -73,7 +73,7 @@ define <1 x iXLen> @lrint_v1f16(<1 x half> %x) { } declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half>) -define <2 x iXLen> @lrint_v2f16(<2 x half> %x) { +define <2 x iXLen> @lrint_v2f16(<2 x half> %x) nounwind { ; X86-AVX-I16-LABEL: lrint_v2f16: ; X86-AVX-I16: # %bb.0: ; X86-AVX-I16-NEXT: vpsrld $16, %xmm0, %xmm1 @@ -250,7 +250,7 @@ define <2 x iXLen> @lrint_v2f16(<2 x half> %x) { } declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half>) -define <4 x iXLen> @lrint_v4f16(<4 x half> %x) { +define <4 x iXLen> @lrint_v4f16(<4 x half> %x) nounwind { ; X86-AVX-I16-LABEL: lrint_v4f16: ; X86-AVX-I16: # %bb.0: ; X86-AVX-I16-NEXT: vpsrld $16, %xmm0, %xmm1 @@ -455,7 +455,7 @@ define <4 x iXLen> @lrint_v4f16(<4 x half> %x) { } declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half>) -define <8 x iXLen> @lrint_v8f16(<8 x half> %x) { +define <8 x iXLen> @lrint_v8f16(<8 x half> %x) nounwind { ; X86-AVX-I16-LABEL: lrint_v8f16: ; X86-AVX-I16: # %bb.0: ; X86-AVX-I16-NEXT: vpsrld $16, %xmm0, %xmm1 @@ -718,7 +718,7 @@ define <8 x iXLen> @lrint_v8f16(<8 x half> %x) { } declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half>) -define <16 x iXLen> @lrint_v16f16(<16 x half> %x) { +define <16 x iXLen> @lrint_v16f16(<16 x half> %x) nounwind { ; X86-AVX-I16-LABEL: lrint_v16f16: ; X86-AVX-I16: # %bb.0: ; X86-AVX-I16-NEXT: vextracti128 $1, %ymm0, %xmm1 @@ -1211,7 +1211,7 @@ define <16 x iXLen> @lrint_v16f16(<16 x half> %x) { } declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half>) -define <32 x iXLen> @lrint_v32f32(<32 x half> %x) { +define <32 x iXLen> @lrint_v32f32(<32 x half> %x) nounwind { ; X86-AVX-I16-LABEL: lrint_v32f32: ; X86-AVX-I16: # %bb.0: ; X86-AVX-I16-NEXT: vextracti128 $1, %ymm0, %xmm2 diff --git a/llvm/test/CodeGen/X86/vector-lrint.ll b/llvm/test/CodeGen/X86/vector-lrint.ll index 8900e94c50305..b3e5a0929b7a5 100644 --- a/llvm/test/CodeGen/X86/vector-lrint.ll +++ b/llvm/test/CodeGen/X86/vector-lrint.ll @@ -12,25 +12,20 @@ ; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64-AVX-i64,AVX512-i64 ; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512dq,avx512vl | FileCheck %s --check-prefixes=X64-AVX-i64,AVX512DQ-i64 -define <1 x iXLen> @lrint_v1f32(<1 x float> %x) { +define <1 x iXLen> @lrint_v1f32(<1 x float> %x) nounwind { ; X86-I32-LABEL: lrint_v1f32: ; X86-I32: # %bb.0: ; X86-I32-NEXT: pushl %eax -; X86-I32-NEXT: .cfi_def_cfa_offset 8 ; X86-I32-NEXT: flds {{[0-9]+}}(%esp) ; X86-I32-NEXT: fistpl (%esp) ; X86-I32-NEXT: movl (%esp), %eax ; X86-I32-NEXT: popl %ecx -; X86-I32-NEXT: .cfi_def_cfa_offset 4 ; X86-I32-NEXT: retl ; ; X86-I64-LABEL: lrint_v1f32: ; X86-I64: # %bb.0: ; X86-I64-NEXT: pushl %ebp -; X86-I64-NEXT: .cfi_def_cfa_offset 8 -; X86-I64-NEXT: .cfi_offset %ebp, -8 ; X86-I64-NEXT: movl %esp, %ebp -; X86-I64-NEXT: .cfi_def_cfa_register %ebp ; X86-I64-NEXT: andl $-8, %esp ; X86-I64-NEXT: subl $8, %esp ; X86-I64-NEXT: flds 8(%ebp) @@ -39,7 +34,6 @@ define <1 x iXLen> @lrint_v1f32(<1 x float> %x) { ; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-I64-NEXT: movl %ebp, %esp ; X86-I64-NEXT: popl %ebp -; X86-I64-NEXT: .cfi_def_cfa %esp, 4 ; X86-I64-NEXT: retl ; ; X86-SSE2-LABEL: lrint_v1f32: @@ -66,11 +60,10 @@ define <1 x iXLen> @lrint_v1f32(<1 x float> %x) { } declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float>) -define <2 x iXLen> @lrint_v2f32(<2 x float> %x) { +define <2 x iXLen> @lrint_v2f32(<2 x float> %x) nounwind { ; X86-I32-LABEL: lrint_v2f32: ; X86-I32: # %bb.0: ; X86-I32-NEXT: subl $8, %esp -; X86-I32-NEXT: .cfi_def_cfa_offset 12 ; X86-I32-NEXT: flds {{[0-9]+}}(%esp) ; X86-I32-NEXT: flds {{[0-9]+}}(%esp) ; X86-I32-NEXT: fistpl (%esp) @@ -78,22 +71,16 @@ define <2 x iXLen> @lrint_v2f32(<2 x float> %x) { ; X86-I32-NEXT: movl (%esp), %eax ; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-I32-NEXT: addl $8, %esp -; X86-I32-NEXT: .cfi_def_cfa_offset 4 ; X86-I32-NEXT: retl ; ; X86-I64-LABEL: lrint_v2f32: ; X86-I64: # %bb.0: ; X86-I64-NEXT: pushl %ebp -; X86-I64-NEXT: .cfi_def_cfa_offset 8 -; X86-I64-NEXT: .cfi_offset %ebp, -8 ; X86-I64-NEXT: movl %esp, %ebp -; X86-I64-NEXT: .cfi_def_cfa_register %ebp ; X86-I64-NEXT: pushl %edi ; X86-I64-NEXT: pushl %esi ; X86-I64-NEXT: andl $-8, %esp ; X86-I64-NEXT: subl $16, %esp -; X86-I64-NEXT: .cfi_offset %esi, -16 -; X86-I64-NEXT: .cfi_offset %edi, -12 ; X86-I64-NEXT: movl 8(%ebp), %eax ; X86-I64-NEXT: flds 16(%ebp) ; X86-I64-NEXT: flds 12(%ebp) @@ -111,7 +98,6 @@ define <2 x iXLen> @lrint_v2f32(<2 x float> %x) { ; X86-I64-NEXT: popl %esi ; X86-I64-NEXT: popl %edi ; X86-I64-NEXT: popl %ebp -; X86-I64-NEXT: .cfi_def_cfa %esp, 4 ; X86-I64-NEXT: retl $4 ; ; X86-SSE2-LABEL: lrint_v2f32: @@ -158,17 +144,12 @@ define <2 x iXLen> @lrint_v2f32(<2 x float> %x) { } declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float>) -define <4 x iXLen> @lrint_v4f32(<4 x float> %x) { +define <4 x iXLen> @lrint_v4f32(<4 x float> %x) nounwind { ; X86-I32-LABEL: lrint_v4f32: ; X86-I32: # %bb.0: ; X86-I32-NEXT: pushl %edi -; X86-I32-NEXT: .cfi_def_cfa_offset 8 ; X86-I32-NEXT: pushl %esi -; X86-I32-NEXT: .cfi_def_cfa_offset 12 ; X86-I32-NEXT: subl $16, %esp -; X86-I32-NEXT: .cfi_def_cfa_offset 28 -; X86-I32-NEXT: .cfi_offset %esi, -12 -; X86-I32-NEXT: .cfi_offset %edi, -8 ; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-I32-NEXT: flds {{[0-9]+}}(%esp) ; X86-I32-NEXT: flds {{[0-9]+}}(%esp) @@ -187,28 +168,19 @@ define <4 x iXLen> @lrint_v4f32(<4 x float> %x) { ; X86-I32-NEXT: movl %edx, 4(%eax) ; X86-I32-NEXT: movl %ecx, (%eax) ; X86-I32-NEXT: addl $16, %esp -; X86-I32-NEXT: .cfi_def_cfa_offset 12 ; X86-I32-NEXT: popl %esi -; X86-I32-NEXT: .cfi_def_cfa_offset 8 ; X86-I32-NEXT: popl %edi -; X86-I32-NEXT: .cfi_def_cfa_offset 4 ; X86-I32-NEXT: retl $4 ; ; X86-I64-LABEL: lrint_v4f32: ; X86-I64: # %bb.0: ; X86-I64-NEXT: pushl %ebp -; X86-I64-NEXT: .cfi_def_cfa_offset 8 -; X86-I64-NEXT: .cfi_offset %ebp, -8 ; X86-I64-NEXT: movl %esp, %ebp -; X86-I64-NEXT: .cfi_def_cfa_register %ebp ; X86-I64-NEXT: pushl %ebx ; X86-I64-NEXT: pushl %edi ; X86-I64-NEXT: pushl %esi ; X86-I64-NEXT: andl $-8, %esp ; X86-I64-NEXT: subl $56, %esp -; X86-I64-NEXT: .cfi_offset %esi, -20 -; X86-I64-NEXT: .cfi_offset %edi, -16 -; X86-I64-NEXT: .cfi_offset %ebx, -12 ; X86-I64-NEXT: movl 8(%ebp), %eax ; X86-I64-NEXT: flds 24(%ebp) ; X86-I64-NEXT: flds 20(%ebp) @@ -245,7 +217,6 @@ define <4 x iXLen> @lrint_v4f32(<4 x float> %x) { ; X86-I64-NEXT: popl %edi ; X86-I64-NEXT: popl %ebx ; X86-I64-NEXT: popl %ebp -; X86-I64-NEXT: .cfi_def_cfa %esp, 4 ; X86-I64-NEXT: retl $4 ; ; X86-SSE2-LABEL: lrint_v4f32: @@ -308,23 +279,14 @@ define <4 x iXLen> @lrint_v4f32(<4 x float> %x) { } declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float>) -define <8 x iXLen> @lrint_v8f32(<8 x float> %x) { +define <8 x iXLen> @lrint_v8f32(<8 x float> %x) nounwind { ; X86-I32-LABEL: lrint_v8f32: ; X86-I32: # %bb.0: ; X86-I32-NEXT: pushl %ebp -; X86-I32-NEXT: .cfi_def_cfa_offset 8 ; X86-I32-NEXT: pushl %ebx -; X86-I32-NEXT: .cfi_def_cfa_offset 12 ; X86-I32-NEXT: pushl %edi -; X86-I32-NEXT: .cfi_def_cfa_offset 16 ; X86-I32-NEXT: pushl %esi -; X86-I32-NEXT: .cfi_def_cfa_offset 20 ; X86-I32-NEXT: subl $40, %esp -; X86-I32-NEXT: .cfi_def_cfa_offset 60 -; X86-I32-NEXT: .cfi_offset %esi, -20 -; X86-I32-NEXT: .cfi_offset %edi, -16 -; X86-I32-NEXT: .cfi_offset %ebx, -12 -; X86-I32-NEXT: .cfi_offset %ebp, -8 ; X86-I32-NEXT: flds {{[0-9]+}}(%esp) ; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp) ; X86-I32-NEXT: flds {{[0-9]+}}(%esp) @@ -363,32 +325,21 @@ define <8 x iXLen> @lrint_v8f32(<8 x float> %x) { ; X86-I32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-I32-NEXT: movl %ecx, (%eax) ; X86-I32-NEXT: addl $40, %esp -; X86-I32-NEXT: .cfi_def_cfa_offset 20 ; X86-I32-NEXT: popl %esi -; X86-I32-NEXT: .cfi_def_cfa_offset 16 ; X86-I32-NEXT: popl %edi -; X86-I32-NEXT: .cfi_def_cfa_offset 12 ; X86-I32-NEXT: popl %ebx -; X86-I32-NEXT: .cfi_def_cfa_offset 8 ; X86-I32-NEXT: popl %ebp -; X86-I32-NEXT: .cfi_def_cfa_offset 4 ; X86-I32-NEXT: retl $4 ; ; X86-I64-LABEL: lrint_v8f32: ; X86-I64: # %bb.0: ; X86-I64-NEXT: pushl %ebp -; X86-I64-NEXT: .cfi_def_cfa_offset 8 -; X86-I64-NEXT: .cfi_offset %ebp, -8 ; X86-I64-NEXT: movl %esp, %ebp -; X86-I64-NEXT: .cfi_def_cfa_register %ebp ; X86-I64-NEXT: pushl %ebx ; X86-I64-NEXT: pushl %edi ; X86-I64-NEXT: pushl %esi ; X86-I64-NEXT: andl $-8, %esp ; X86-I64-NEXT: subl $120, %esp -; X86-I64-NEXT: .cfi_offset %esi, -20 -; X86-I64-NEXT: .cfi_offset %edi, -16 -; X86-I64-NEXT: .cfi_offset %ebx, -12 ; X86-I64-NEXT: flds 12(%ebp) ; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-I64-NEXT: flds 16(%ebp) @@ -465,7 +416,6 @@ define <8 x iXLen> @lrint_v8f32(<8 x float> %x) { ; X86-I64-NEXT: popl %edi ; X86-I64-NEXT: popl %ebx ; X86-I64-NEXT: popl %ebp -; X86-I64-NEXT: .cfi_def_cfa %esp, 4 ; X86-I64-NEXT: retl $4 ; ; X86-SSE2-LABEL: lrint_v8f32: @@ -561,31 +511,26 @@ define <8 x iXLen> @lrint_v8f32(<8 x float> %x) { } declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float>) -define <16 x iXLen> @lrint_v16iXLen_v16f32(<16 x float> %x) { +define <16 x iXLen> @lrint_v16iXLen_v16f32(<16 x float> %x) nounwind { %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float> %x) ret <16 x iXLen> %a } declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float>) -define <1 x iXLen> @lrint_v1f64(<1 x double> %x) { +define <1 x iXLen> @lrint_v1f64(<1 x double> %x) nounwind { ; X86-I32-LABEL: lrint_v1f64: ; X86-I32: # %bb.0: ; X86-I32-NEXT: pushl %eax -; X86-I32-NEXT: .cfi_def_cfa_offset 8 ; X86-I32-NEXT: fldl {{[0-9]+}}(%esp) ; X86-I32-NEXT: fistpl (%esp) ; X86-I32-NEXT: movl (%esp), %eax ; X86-I32-NEXT: popl %ecx -; X86-I32-NEXT: .cfi_def_cfa_offset 4 ; X86-I32-NEXT: retl ; ; X86-I64-LABEL: lrint_v1f64: ; X86-I64: # %bb.0: ; X86-I64-NEXT: pushl %ebp -; X86-I64-NEXT: .cfi_def_cfa_offset 8 -; X86-I64-NEXT: .cfi_offset %ebp, -8 ; X86-I64-NEXT: movl %esp, %ebp -; X86-I64-NEXT: .cfi_def_cfa_register %ebp ; X86-I64-NEXT: andl $-8, %esp ; X86-I64-NEXT: subl $8, %esp ; X86-I64-NEXT: fldl 8(%ebp) @@ -594,7 +539,6 @@ define <1 x iXLen> @lrint_v1f64(<1 x double> %x) { ; X86-I64-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-I64-NEXT: movl %ebp, %esp ; X86-I64-NEXT: popl %ebp -; X86-I64-NEXT: .cfi_def_cfa %esp, 4 ; X86-I64-NEXT: retl ; ; X86-SSE2-LABEL: lrint_v1f64: @@ -621,11 +565,10 @@ define <1 x iXLen> @lrint_v1f64(<1 x double> %x) { } declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double>) -define <2 x iXLen> @lrint_v2f64(<2 x double> %x) { +define <2 x iXLen> @lrint_v2f64(<2 x double> %x) nounwind { ; X86-I32-LABEL: lrint_v2f64: ; X86-I32: # %bb.0: ; X86-I32-NEXT: subl $8, %esp -; X86-I32-NEXT: .cfi_def_cfa_offset 12 ; X86-I32-NEXT: fldl {{[0-9]+}}(%esp) ; X86-I32-NEXT: fldl {{[0-9]+}}(%esp) ; X86-I32-NEXT: fistpl (%esp) @@ -633,22 +576,16 @@ define <2 x iXLen> @lrint_v2f64(<2 x double> %x) { ; X86-I32-NEXT: movl (%esp), %eax ; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-I32-NEXT: addl $8, %esp -; X86-I32-NEXT: .cfi_def_cfa_offset 4 ; X86-I32-NEXT: retl ; ; X86-I64-LABEL: lrint_v2f64: ; X86-I64: # %bb.0: ; X86-I64-NEXT: pushl %ebp -; X86-I64-NEXT: .cfi_def_cfa_offset 8 -; X86-I64-NEXT: .cfi_offset %ebp, -8 ; X86-I64-NEXT: movl %esp, %ebp -; X86-I64-NEXT: .cfi_def_cfa_register %ebp ; X86-I64-NEXT: pushl %edi ; X86-I64-NEXT: pushl %esi ; X86-I64-NEXT: andl $-8, %esp ; X86-I64-NEXT: subl $16, %esp -; X86-I64-NEXT: .cfi_offset %esi, -16 -; X86-I64-NEXT: .cfi_offset %edi, -12 ; X86-I64-NEXT: movl 8(%ebp), %eax ; X86-I64-NEXT: fldl 20(%ebp) ; X86-I64-NEXT: fldl 12(%ebp) @@ -666,7 +603,6 @@ define <2 x iXLen> @lrint_v2f64(<2 x double> %x) { ; X86-I64-NEXT: popl %esi ; X86-I64-NEXT: popl %edi ; X86-I64-NEXT: popl %ebp -; X86-I64-NEXT: .cfi_def_cfa %esp, 4 ; X86-I64-NEXT: retl $4 ; ; X86-SSE2-LABEL: lrint_v2f64: @@ -713,17 +649,12 @@ define <2 x iXLen> @lrint_v2f64(<2 x double> %x) { } declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double>) -define <4 x iXLen> @lrint_v4f64(<4 x double> %x) { +define <4 x iXLen> @lrint_v4f64(<4 x double> %x) nounwind { ; X86-I32-LABEL: lrint_v4f64: ; X86-I32: # %bb.0: ; X86-I32-NEXT: pushl %edi -; X86-I32-NEXT: .cfi_def_cfa_offset 8 ; X86-I32-NEXT: pushl %esi -; X86-I32-NEXT: .cfi_def_cfa_offset 12 ; X86-I32-NEXT: subl $16, %esp -; X86-I32-NEXT: .cfi_def_cfa_offset 28 -; X86-I32-NEXT: .cfi_offset %esi, -12 -; X86-I32-NEXT: .cfi_offset %edi, -8 ; X86-I32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-I32-NEXT: fldl {{[0-9]+}}(%esp) ; X86-I32-NEXT: fldl {{[0-9]+}}(%esp) @@ -742,28 +673,19 @@ define <4 x iXLen> @lrint_v4f64(<4 x double> %x) { ; X86-I32-NEXT: movl %edx, 4(%eax) ; X86-I32-NEXT: movl %ecx, (%eax) ; X86-I32-NEXT: addl $16, %esp -; X86-I32-NEXT: .cfi_def_cfa_offset 12 ; X86-I32-NEXT: popl %esi -; X86-I32-NEXT: .cfi_def_cfa_offset 8 ; X86-I32-NEXT: popl %edi -; X86-I32-NEXT: .cfi_def_cfa_offset 4 ; X86-I32-NEXT: retl $4 ; ; X86-I64-LABEL: lrint_v4f64: ; X86-I64: # %bb.0: ; X86-I64-NEXT: pushl %ebp -; X86-I64-NEXT: .cfi_def_cfa_offset 8 -; X86-I64-NEXT: .cfi_offset %ebp, -8 ; X86-I64-NEXT: movl %esp, %ebp -; X86-I64-NEXT: .cfi_def_cfa_register %ebp ; X86-I64-NEXT: pushl %ebx ; X86-I64-NEXT: pushl %edi ; X86-I64-NEXT: pushl %esi ; X86-I64-NEXT: andl $-8, %esp ; X86-I64-NEXT: subl $56, %esp -; X86-I64-NEXT: .cfi_offset %esi, -20 -; X86-I64-NEXT: .cfi_offset %edi, -16 -; X86-I64-NEXT: .cfi_offset %ebx, -12 ; X86-I64-NEXT: movl 8(%ebp), %eax ; X86-I64-NEXT: fldl 36(%ebp) ; X86-I64-NEXT: fldl 28(%ebp) @@ -800,7 +722,6 @@ define <4 x iXLen> @lrint_v4f64(<4 x double> %x) { ; X86-I64-NEXT: popl %edi ; X86-I64-NEXT: popl %ebx ; X86-I64-NEXT: popl %ebp -; X86-I64-NEXT: .cfi_def_cfa %esp, 4 ; X86-I64-NEXT: retl $4 ; ; X86-SSE2-LABEL: lrint_v4f64: @@ -867,23 +788,14 @@ define <4 x iXLen> @lrint_v4f64(<4 x double> %x) { } declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double>) -define <8 x iXLen> @lrint_v8f64(<8 x double> %x) { +define <8 x iXLen> @lrint_v8f64(<8 x double> %x) nounwind { ; X86-I32-LABEL: lrint_v8f64: ; X86-I32: # %bb.0: ; X86-I32-NEXT: pushl %ebp -; X86-I32-NEXT: .cfi_def_cfa_offset 8 ; X86-I32-NEXT: pushl %ebx -; X86-I32-NEXT: .cfi_def_cfa_offset 12 ; X86-I32-NEXT: pushl %edi -; X86-I32-NEXT: .cfi_def_cfa_offset 16 ; X86-I32-NEXT: pushl %esi -; X86-I32-NEXT: .cfi_def_cfa_offset 20 ; X86-I32-NEXT: subl $40, %esp -; X86-I32-NEXT: .cfi_def_cfa_offset 60 -; X86-I32-NEXT: .cfi_offset %esi, -20 -; X86-I32-NEXT: .cfi_offset %edi, -16 -; X86-I32-NEXT: .cfi_offset %ebx, -12 -; X86-I32-NEXT: .cfi_offset %ebp, -8 ; X86-I32-NEXT: fldl {{[0-9]+}}(%esp) ; X86-I32-NEXT: fistpl {{[0-9]+}}(%esp) ; X86-I32-NEXT: fldl {{[0-9]+}}(%esp) @@ -922,32 +834,21 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) { ; X86-I32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-I32-NEXT: movl %ecx, (%eax) ; X86-I32-NEXT: addl $40, %esp -; X86-I32-NEXT: .cfi_def_cfa_offset 20 ; X86-I32-NEXT: popl %esi -; X86-I32-NEXT: .cfi_def_cfa_offset 16 ; X86-I32-NEXT: popl %edi -; X86-I32-NEXT: .cfi_def_cfa_offset 12 ; X86-I32-NEXT: popl %ebx -; X86-I32-NEXT: .cfi_def_cfa_offset 8 ; X86-I32-NEXT: popl %ebp -; X86-I32-NEXT: .cfi_def_cfa_offset 4 ; X86-I32-NEXT: retl $4 ; ; X86-I64-LABEL: lrint_v8f64: ; X86-I64: # %bb.0: ; X86-I64-NEXT: pushl %ebp -; X86-I64-NEXT: .cfi_def_cfa_offset 8 -; X86-I64-NEXT: .cfi_offset %ebp, -8 ; X86-I64-NEXT: movl %esp, %ebp -; X86-I64-NEXT: .cfi_def_cfa_register %ebp ; X86-I64-NEXT: pushl %ebx ; X86-I64-NEXT: pushl %edi ; X86-I64-NEXT: pushl %esi ; X86-I64-NEXT: andl $-8, %esp ; X86-I64-NEXT: subl $120, %esp -; X86-I64-NEXT: .cfi_offset %esi, -20 -; X86-I64-NEXT: .cfi_offset %edi, -16 -; X86-I64-NEXT: .cfi_offset %ebx, -12 ; X86-I64-NEXT: fldl 12(%ebp) ; X86-I64-NEXT: fistpll {{[0-9]+}}(%esp) ; X86-I64-NEXT: fldl 20(%ebp) @@ -1024,16 +925,12 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) { ; X86-I64-NEXT: popl %edi ; X86-I64-NEXT: popl %ebx ; X86-I64-NEXT: popl %ebp -; X86-I64-NEXT: .cfi_def_cfa %esp, 4 ; X86-I64-NEXT: retl $4 ; ; X86-SSE2-LABEL: lrint_v8f64: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: pushl %ebp -; X86-SSE2-NEXT: .cfi_def_cfa_offset 8 -; X86-SSE2-NEXT: .cfi_offset %ebp, -8 ; X86-SSE2-NEXT: movl %esp, %ebp -; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp ; X86-SSE2-NEXT: andl $-16, %esp ; X86-SSE2-NEXT: subl $16, %esp ; X86-SSE2-NEXT: cvtpd2dq %xmm1, %xmm1 @@ -1044,7 +941,6 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) { ; X86-SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; X86-SSE2-NEXT: movl %ebp, %esp ; X86-SSE2-NEXT: popl %ebp -; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4 ; X86-SSE2-NEXT: retl ; ; X86-AVX1-LABEL: lrint_v8f64: @@ -1141,14 +1037,11 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) { } declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double>) -define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) { +define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) nounwind { ; X86-I32-LABEL: lrint_v1fp128: ; X86-I32: # %bb.0: ; X86-I32-NEXT: pushl %ebp -; X86-I32-NEXT: .cfi_def_cfa_offset 8 -; X86-I32-NEXT: .cfi_offset %ebp, -8 ; X86-I32-NEXT: movl %esp, %ebp -; X86-I32-NEXT: .cfi_def_cfa_register %ebp ; X86-I32-NEXT: andl $-16, %esp ; X86-I32-NEXT: subl $16, %esp ; X86-I32-NEXT: pushl 20(%ebp) @@ -1159,16 +1052,12 @@ define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) { ; X86-I32-NEXT: addl $16, %esp ; X86-I32-NEXT: movl %ebp, %esp ; X86-I32-NEXT: popl %ebp -; X86-I32-NEXT: .cfi_def_cfa %esp, 4 ; X86-I32-NEXT: retl ; ; X86-I64-LABEL: lrint_v1fp128: ; X86-I64: # %bb.0: ; X86-I64-NEXT: pushl %ebp -; X86-I64-NEXT: .cfi_def_cfa_offset 8 -; X86-I64-NEXT: .cfi_offset %ebp, -8 ; X86-I64-NEXT: movl %esp, %ebp -; X86-I64-NEXT: .cfi_def_cfa_register %ebp ; X86-I64-NEXT: andl $-16, %esp ; X86-I64-NEXT: subl $16, %esp ; X86-I64-NEXT: pushl 20(%ebp) @@ -1179,16 +1068,12 @@ define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) { ; X86-I64-NEXT: addl $16, %esp ; X86-I64-NEXT: movl %ebp, %esp ; X86-I64-NEXT: popl %ebp -; X86-I64-NEXT: .cfi_def_cfa %esp, 4 ; X86-I64-NEXT: retl ; ; X86-SSE2-LABEL: lrint_v1fp128: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: pushl %ebp -; X86-SSE2-NEXT: .cfi_def_cfa_offset 8 -; X86-SSE2-NEXT: .cfi_offset %ebp, -8 ; X86-SSE2-NEXT: movl %esp, %ebp -; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp ; X86-SSE2-NEXT: andl $-16, %esp ; X86-SSE2-NEXT: subl $16, %esp ; X86-SSE2-NEXT: pushl 20(%ebp) @@ -1199,16 +1084,12 @@ define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) { ; X86-SSE2-NEXT: addl $16, %esp ; X86-SSE2-NEXT: movl %ebp, %esp ; X86-SSE2-NEXT: popl %ebp -; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4 ; X86-SSE2-NEXT: retl ; ; X86-AVX-LABEL: lrint_v1fp128: ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: pushl %ebp -; X86-AVX-NEXT: .cfi_def_cfa_offset 8 -; X86-AVX-NEXT: .cfi_offset %ebp, -8 ; X86-AVX-NEXT: movl %esp, %ebp -; X86-AVX-NEXT: .cfi_def_cfa_register %ebp ; X86-AVX-NEXT: andl $-16, %esp ; X86-AVX-NEXT: subl $32, %esp ; X86-AVX-NEXT: vmovups 8(%ebp), %xmm0 @@ -1216,47 +1097,36 @@ define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) { ; X86-AVX-NEXT: calll lrintl ; X86-AVX-NEXT: movl %ebp, %esp ; X86-AVX-NEXT: popl %ebp -; X86-AVX-NEXT: .cfi_def_cfa %esp, 4 ; X86-AVX-NEXT: retl ; ; X64-AVX-i32-LABEL: lrint_v1fp128: ; X64-AVX-i32: # %bb.0: ; X64-AVX-i32-NEXT: pushq %rax -; X64-AVX-i32-NEXT: .cfi_def_cfa_offset 16 ; X64-AVX-i32-NEXT: callq lrintl@PLT ; X64-AVX-i32-NEXT: popq %rcx -; X64-AVX-i32-NEXT: .cfi_def_cfa_offset 8 ; X64-AVX-i32-NEXT: retq ; ; X64-AVX-i64-LABEL: lrint_v1fp128: ; X64-AVX-i64: # %bb.0: ; X64-AVX-i64-NEXT: pushq %rax -; X64-AVX-i64-NEXT: .cfi_def_cfa_offset 16 ; X64-AVX-i64-NEXT: callq lrintl@PLT ; X64-AVX-i64-NEXT: popq %rcx -; X64-AVX-i64-NEXT: .cfi_def_cfa_offset 8 ; X64-AVX-i64-NEXT: retq %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1fp128(<1 x fp128> %x) ret <1 x iXLen> %a } declare <1 x iXLen> @llvm.lrint.v1iXLen.v1fp128(<1 x fp128>) -define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) { +define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) nounwind { ; X86-I32-LABEL: lrint_v2fp128: ; X86-I32: # %bb.0: ; X86-I32-NEXT: pushl %ebp -; X86-I32-NEXT: .cfi_def_cfa_offset 8 -; X86-I32-NEXT: .cfi_offset %ebp, -8 ; X86-I32-NEXT: movl %esp, %ebp -; X86-I32-NEXT: .cfi_def_cfa_register %ebp ; X86-I32-NEXT: pushl %ebx ; X86-I32-NEXT: pushl %edi ; X86-I32-NEXT: pushl %esi ; X86-I32-NEXT: andl $-16, %esp ; X86-I32-NEXT: subl $16, %esp -; X86-I32-NEXT: .cfi_offset %esi, -20 -; X86-I32-NEXT: .cfi_offset %edi, -16 -; X86-I32-NEXT: .cfi_offset %ebx, -12 ; X86-I32-NEXT: movl 32(%ebp), %edi ; X86-I32-NEXT: movl 36(%ebp), %ebx ; X86-I32-NEXT: pushl 20(%ebp) @@ -1279,24 +1149,17 @@ define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) { ; X86-I32-NEXT: popl %edi ; X86-I32-NEXT: popl %ebx ; X86-I32-NEXT: popl %ebp -; X86-I32-NEXT: .cfi_def_cfa %esp, 4 ; X86-I32-NEXT: retl ; ; X86-I64-LABEL: lrint_v2fp128: ; X86-I64: # %bb.0: ; X86-I64-NEXT: pushl %ebp -; X86-I64-NEXT: .cfi_def_cfa_offset 8 -; X86-I64-NEXT: .cfi_offset %ebp, -8 ; X86-I64-NEXT: movl %esp, %ebp -; X86-I64-NEXT: .cfi_def_cfa_register %ebp ; X86-I64-NEXT: pushl %ebx ; X86-I64-NEXT: pushl %edi ; X86-I64-NEXT: pushl %esi ; X86-I64-NEXT: andl $-16, %esp ; X86-I64-NEXT: subl $16, %esp -; X86-I64-NEXT: .cfi_offset %esi, -20 -; X86-I64-NEXT: .cfi_offset %edi, -16 -; X86-I64-NEXT: .cfi_offset %ebx, -12 ; X86-I64-NEXT: movl 8(%ebp), %esi ; X86-I64-NEXT: pushl 24(%ebp) ; X86-I64-NEXT: pushl 20(%ebp) @@ -1322,24 +1185,17 @@ define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) { ; X86-I64-NEXT: popl %edi ; X86-I64-NEXT: popl %ebx ; X86-I64-NEXT: popl %ebp -; X86-I64-NEXT: .cfi_def_cfa %esp, 4 ; X86-I64-NEXT: retl $4 ; ; X86-SSE2-LABEL: lrint_v2fp128: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: pushl %ebp -; X86-SSE2-NEXT: .cfi_def_cfa_offset 8 -; X86-SSE2-NEXT: .cfi_offset %ebp, -8 ; X86-SSE2-NEXT: movl %esp, %ebp -; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp ; X86-SSE2-NEXT: pushl %ebx ; X86-SSE2-NEXT: pushl %edi ; X86-SSE2-NEXT: pushl %esi ; X86-SSE2-NEXT: andl $-16, %esp ; X86-SSE2-NEXT: subl $32, %esp -; X86-SSE2-NEXT: .cfi_offset %esi, -20 -; X86-SSE2-NEXT: .cfi_offset %edi, -16 -; X86-SSE2-NEXT: .cfi_offset %ebx, -12 ; X86-SSE2-NEXT: movl 12(%ebp), %edi ; X86-SSE2-NEXT: movl 16(%ebp), %ebx ; X86-SSE2-NEXT: movl 20(%ebp), %esi @@ -1365,16 +1221,12 @@ define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) { ; X86-SSE2-NEXT: popl %edi ; X86-SSE2-NEXT: popl %ebx ; X86-SSE2-NEXT: popl %ebp -; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4 ; X86-SSE2-NEXT: retl ; ; X86-AVX-LABEL: lrint_v2fp128: ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: pushl %ebp -; X86-AVX-NEXT: .cfi_def_cfa_offset 8 -; X86-AVX-NEXT: .cfi_offset %ebp, -8 ; X86-AVX-NEXT: movl %esp, %ebp -; X86-AVX-NEXT: .cfi_def_cfa_register %ebp ; X86-AVX-NEXT: andl $-16, %esp ; X86-AVX-NEXT: subl $48, %esp ; X86-AVX-NEXT: vmovups 8(%ebp), %xmm0 @@ -1389,16 +1241,12 @@ define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) { ; X86-AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ; X86-AVX-NEXT: movl %ebp, %esp ; X86-AVX-NEXT: popl %ebp -; X86-AVX-NEXT: .cfi_def_cfa %esp, 4 ; X86-AVX-NEXT: retl ; ; X64-AVX-i32-LABEL: lrint_v2fp128: ; X64-AVX-i32: # %bb.0: ; X64-AVX-i32-NEXT: pushq %rbx -; X64-AVX-i32-NEXT: .cfi_def_cfa_offset 16 ; X64-AVX-i32-NEXT: subq $16, %rsp -; X64-AVX-i32-NEXT: .cfi_def_cfa_offset 32 -; X64-AVX-i32-NEXT: .cfi_offset %rbx, -16 ; X64-AVX-i32-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill ; X64-AVX-i32-NEXT: vmovaps %xmm1, %xmm0 ; X64-AVX-i32-NEXT: callq lrintl@PLT @@ -1408,15 +1256,12 @@ define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) { ; X64-AVX-i32-NEXT: vmovd %eax, %xmm0 ; X64-AVX-i32-NEXT: vpinsrd $1, %ebx, %xmm0, %xmm0 ; X64-AVX-i32-NEXT: addq $16, %rsp -; X64-AVX-i32-NEXT: .cfi_def_cfa_offset 16 ; X64-AVX-i32-NEXT: popq %rbx -; X64-AVX-i32-NEXT: .cfi_def_cfa_offset 8 ; X64-AVX-i32-NEXT: retq ; ; X64-AVX-i64-LABEL: lrint_v2fp128: ; X64-AVX-i64: # %bb.0: ; X64-AVX-i64-NEXT: subq $40, %rsp -; X64-AVX-i64-NEXT: .cfi_def_cfa_offset 48 ; X64-AVX-i64-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; X64-AVX-i64-NEXT: vmovaps %xmm1, %xmm0 ; X64-AVX-i64-NEXT: callq lrintl@PLT @@ -1428,29 +1273,22 @@ define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) { ; X64-AVX-i64-NEXT: vpunpcklqdq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload ; X64-AVX-i64-NEXT: # xmm0 = xmm0[0],mem[0] ; X64-AVX-i64-NEXT: addq $40, %rsp -; X64-AVX-i64-NEXT: .cfi_def_cfa_offset 8 ; X64-AVX-i64-NEXT: retq %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2fp128(<2 x fp128> %x) ret <2 x iXLen> %a } declare <2 x iXLen> @llvm.lrint.v2iXLen.v2fp128(<2 x fp128>) -define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) { +define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) nounwind { ; X86-I32-LABEL: lrint_v4fp128: ; X86-I32: # %bb.0: ; X86-I32-NEXT: pushl %ebp -; X86-I32-NEXT: .cfi_def_cfa_offset 8 -; X86-I32-NEXT: .cfi_offset %ebp, -8 ; X86-I32-NEXT: movl %esp, %ebp -; X86-I32-NEXT: .cfi_def_cfa_register %ebp ; X86-I32-NEXT: pushl %ebx ; X86-I32-NEXT: pushl %edi ; X86-I32-NEXT: pushl %esi ; X86-I32-NEXT: andl $-16, %esp ; X86-I32-NEXT: subl $16, %esp -; X86-I32-NEXT: .cfi_offset %esi, -20 -; X86-I32-NEXT: .cfi_offset %edi, -16 -; X86-I32-NEXT: .cfi_offset %ebx, -12 ; X86-I32-NEXT: movl 8(%ebp), %esi ; X86-I32-NEXT: movl 36(%ebp), %ebx ; X86-I32-NEXT: movl 40(%ebp), %edi @@ -1492,24 +1330,17 @@ define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) { ; X86-I32-NEXT: popl %edi ; X86-I32-NEXT: popl %ebx ; X86-I32-NEXT: popl %ebp -; X86-I32-NEXT: .cfi_def_cfa %esp, 4 ; X86-I32-NEXT: retl $4 ; ; X86-I64-LABEL: lrint_v4fp128: ; X86-I64: # %bb.0: ; X86-I64-NEXT: pushl %ebp -; X86-I64-NEXT: .cfi_def_cfa_offset 8 -; X86-I64-NEXT: .cfi_offset %ebp, -8 ; X86-I64-NEXT: movl %esp, %ebp -; X86-I64-NEXT: .cfi_def_cfa_register %ebp ; X86-I64-NEXT: pushl %ebx ; X86-I64-NEXT: pushl %edi ; X86-I64-NEXT: pushl %esi ; X86-I64-NEXT: andl $-16, %esp ; X86-I64-NEXT: subl $32, %esp -; X86-I64-NEXT: .cfi_offset %esi, -20 -; X86-I64-NEXT: .cfi_offset %edi, -16 -; X86-I64-NEXT: .cfi_offset %ebx, -12 ; X86-I64-NEXT: movl 8(%ebp), %esi ; X86-I64-NEXT: movl 36(%ebp), %edi ; X86-I64-NEXT: movl 40(%ebp), %ebx @@ -1561,24 +1392,17 @@ define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) { ; X86-I64-NEXT: popl %edi ; X86-I64-NEXT: popl %ebx ; X86-I64-NEXT: popl %ebp -; X86-I64-NEXT: .cfi_def_cfa %esp, 4 ; X86-I64-NEXT: retl $4 ; ; X86-SSE2-LABEL: lrint_v4fp128: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: pushl %ebp -; X86-SSE2-NEXT: .cfi_def_cfa_offset 8 -; X86-SSE2-NEXT: .cfi_offset %ebp, -8 ; X86-SSE2-NEXT: movl %esp, %ebp -; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp ; X86-SSE2-NEXT: pushl %ebx ; X86-SSE2-NEXT: pushl %edi ; X86-SSE2-NEXT: pushl %esi ; X86-SSE2-NEXT: andl $-16, %esp ; X86-SSE2-NEXT: subl $48, %esp -; X86-SSE2-NEXT: .cfi_offset %esi, -20 -; X86-SSE2-NEXT: .cfi_offset %edi, -16 -; X86-SSE2-NEXT: .cfi_offset %ebx, -12 ; X86-SSE2-NEXT: movl 48(%ebp), %edi ; X86-SSE2-NEXT: movl 52(%ebp), %ebx ; X86-SSE2-NEXT: pushl 36(%ebp) @@ -1623,22 +1447,16 @@ define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) { ; X86-SSE2-NEXT: popl %edi ; X86-SSE2-NEXT: popl %ebx ; X86-SSE2-NEXT: popl %ebp -; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4 ; X86-SSE2-NEXT: retl ; ; X86-AVX-LABEL: lrint_v4fp128: ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: pushl %ebp -; X86-AVX-NEXT: .cfi_def_cfa_offset 8 -; X86-AVX-NEXT: .cfi_offset %ebp, -8 ; X86-AVX-NEXT: movl %esp, %ebp -; X86-AVX-NEXT: .cfi_def_cfa_register %ebp ; X86-AVX-NEXT: pushl %edi ; X86-AVX-NEXT: pushl %esi ; X86-AVX-NEXT: andl $-16, %esp ; X86-AVX-NEXT: subl $32, %esp -; X86-AVX-NEXT: .cfi_offset %esi, -16 -; X86-AVX-NEXT: .cfi_offset %edi, -12 ; X86-AVX-NEXT: vmovups 40(%ebp), %xmm0 ; X86-AVX-NEXT: vmovups %xmm0, (%esp) ; X86-AVX-NEXT: calll lrintl @@ -1663,16 +1481,12 @@ define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) { ; X86-AVX-NEXT: popl %esi ; X86-AVX-NEXT: popl %edi ; X86-AVX-NEXT: popl %ebp -; X86-AVX-NEXT: .cfi_def_cfa %esp, 4 ; X86-AVX-NEXT: retl ; ; X64-AVX-i32-LABEL: lrint_v4fp128: ; X64-AVX-i32: # %bb.0: ; X64-AVX-i32-NEXT: pushq %rbx -; X64-AVX-i32-NEXT: .cfi_def_cfa_offset 16 ; X64-AVX-i32-NEXT: subq $48, %rsp -; X64-AVX-i32-NEXT: .cfi_def_cfa_offset 64 -; X64-AVX-i32-NEXT: .cfi_offset %rbx, -16 ; X64-AVX-i32-NEXT: vmovaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; X64-AVX-i32-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; X64-AVX-i32-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill @@ -1694,15 +1508,12 @@ define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) { ; X64-AVX-i32-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; X64-AVX-i32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 ; X64-AVX-i32-NEXT: addq $48, %rsp -; X64-AVX-i32-NEXT: .cfi_def_cfa_offset 16 ; X64-AVX-i32-NEXT: popq %rbx -; X64-AVX-i32-NEXT: .cfi_def_cfa_offset 8 ; X64-AVX-i32-NEXT: retq ; ; X64-AVX1-i64-LABEL: lrint_v4fp128: ; X64-AVX1-i64: # %bb.0: ; X64-AVX1-i64-NEXT: subq $72, %rsp -; X64-AVX1-i64-NEXT: .cfi_def_cfa_offset 80 ; X64-AVX1-i64-NEXT: vmovaps %xmm2, (%rsp) # 16-byte Spill ; X64-AVX1-i64-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; X64-AVX1-i64-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill @@ -1727,13 +1538,11 @@ define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) { ; X64-AVX1-i64-NEXT: # xmm0 = xmm0[0],mem[0] ; X64-AVX1-i64-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload ; X64-AVX1-i64-NEXT: addq $72, %rsp -; X64-AVX1-i64-NEXT: .cfi_def_cfa_offset 8 ; X64-AVX1-i64-NEXT: retq ; ; AVX512-i64-LABEL: lrint_v4fp128: ; AVX512-i64: # %bb.0: ; AVX512-i64-NEXT: subq $72, %rsp -; AVX512-i64-NEXT: .cfi_def_cfa_offset 80 ; AVX512-i64-NEXT: vmovaps %xmm2, (%rsp) # 16-byte Spill ; AVX512-i64-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; AVX512-i64-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill @@ -1758,13 +1567,11 @@ define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) { ; AVX512-i64-NEXT: # xmm0 = xmm0[0],mem[0] ; AVX512-i64-NEXT: vinserti128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload ; AVX512-i64-NEXT: addq $72, %rsp -; AVX512-i64-NEXT: .cfi_def_cfa_offset 8 ; AVX512-i64-NEXT: retq ; ; AVX512DQ-i64-LABEL: lrint_v4fp128: ; AVX512DQ-i64: # %bb.0: ; AVX512DQ-i64-NEXT: subq $72, %rsp -; AVX512DQ-i64-NEXT: .cfi_def_cfa_offset 80 ; AVX512DQ-i64-NEXT: vmovaps %xmm2, (%rsp) # 16-byte Spill ; AVX512DQ-i64-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; AVX512DQ-i64-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill @@ -1789,29 +1596,22 @@ define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) { ; AVX512DQ-i64-NEXT: # xmm0 = xmm0[0],mem[0] ; AVX512DQ-i64-NEXT: vinserti128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload ; AVX512DQ-i64-NEXT: addq $72, %rsp -; AVX512DQ-i64-NEXT: .cfi_def_cfa_offset 8 ; AVX512DQ-i64-NEXT: retq %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128> %x) ret <4 x iXLen> %a } declare <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128>) -define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) { +define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) nounwind { ; X86-I32-LABEL: lrint_v8fp128: ; X86-I32: # %bb.0: ; X86-I32-NEXT: pushl %ebp -; X86-I32-NEXT: .cfi_def_cfa_offset 8 -; X86-I32-NEXT: .cfi_offset %ebp, -8 ; X86-I32-NEXT: movl %esp, %ebp -; X86-I32-NEXT: .cfi_def_cfa_register %ebp ; X86-I32-NEXT: pushl %ebx ; X86-I32-NEXT: pushl %edi ; X86-I32-NEXT: pushl %esi ; X86-I32-NEXT: andl $-16, %esp ; X86-I32-NEXT: subl $32, %esp -; X86-I32-NEXT: .cfi_offset %esi, -20 -; X86-I32-NEXT: .cfi_offset %edi, -16 -; X86-I32-NEXT: .cfi_offset %ebx, -12 ; X86-I32-NEXT: movl 8(%ebp), %esi ; X86-I32-NEXT: movl 36(%ebp), %ebx ; X86-I32-NEXT: movl 40(%ebp), %edi @@ -1889,24 +1689,17 @@ define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) { ; X86-I32-NEXT: popl %edi ; X86-I32-NEXT: popl %ebx ; X86-I32-NEXT: popl %ebp -; X86-I32-NEXT: .cfi_def_cfa %esp, 4 ; X86-I32-NEXT: retl $4 ; ; X86-I64-LABEL: lrint_v8fp128: ; X86-I64: # %bb.0: ; X86-I64-NEXT: pushl %ebp -; X86-I64-NEXT: .cfi_def_cfa_offset 8 -; X86-I64-NEXT: .cfi_offset %ebp, -8 ; X86-I64-NEXT: movl %esp, %ebp -; X86-I64-NEXT: .cfi_def_cfa_register %ebp ; X86-I64-NEXT: pushl %ebx ; X86-I64-NEXT: pushl %edi ; X86-I64-NEXT: pushl %esi ; X86-I64-NEXT: andl $-16, %esp ; X86-I64-NEXT: subl $64, %esp -; X86-I64-NEXT: .cfi_offset %esi, -20 -; X86-I64-NEXT: .cfi_offset %edi, -16 -; X86-I64-NEXT: .cfi_offset %ebx, -12 ; X86-I64-NEXT: movl 8(%ebp), %esi ; X86-I64-NEXT: movl 36(%ebp), %edi ; X86-I64-NEXT: movl 40(%ebp), %ebx @@ -2006,24 +1799,17 @@ define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) { ; X86-I64-NEXT: popl %edi ; X86-I64-NEXT: popl %ebx ; X86-I64-NEXT: popl %ebp -; X86-I64-NEXT: .cfi_def_cfa %esp, 4 ; X86-I64-NEXT: retl $4 ; ; X86-SSE2-LABEL: lrint_v8fp128: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: pushl %ebp -; X86-SSE2-NEXT: .cfi_def_cfa_offset 8 -; X86-SSE2-NEXT: .cfi_offset %ebp, -8 ; X86-SSE2-NEXT: movl %esp, %ebp -; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp ; X86-SSE2-NEXT: pushl %ebx ; X86-SSE2-NEXT: pushl %edi ; X86-SSE2-NEXT: pushl %esi ; X86-SSE2-NEXT: andl $-16, %esp ; X86-SSE2-NEXT: subl $64, %esp -; X86-SSE2-NEXT: .cfi_offset %esi, -20 -; X86-SSE2-NEXT: .cfi_offset %edi, -16 -; X86-SSE2-NEXT: .cfi_offset %ebx, -12 ; X86-SSE2-NEXT: movl 108(%ebp), %esi ; X86-SSE2-NEXT: movl 112(%ebp), %edi ; X86-SSE2-NEXT: movl 116(%ebp), %ebx @@ -2109,24 +1895,17 @@ define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) { ; X86-SSE2-NEXT: popl %edi ; X86-SSE2-NEXT: popl %ebx ; X86-SSE2-NEXT: popl %ebp -; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4 ; X86-SSE2-NEXT: retl ; ; X86-AVX1-LABEL: lrint_v8fp128: ; X86-AVX1: # %bb.0: ; X86-AVX1-NEXT: pushl %ebp -; X86-AVX1-NEXT: .cfi_def_cfa_offset 8 -; X86-AVX1-NEXT: .cfi_offset %ebp, -8 ; X86-AVX1-NEXT: movl %esp, %ebp -; X86-AVX1-NEXT: .cfi_def_cfa_register %ebp ; X86-AVX1-NEXT: pushl %ebx ; X86-AVX1-NEXT: pushl %edi ; X86-AVX1-NEXT: pushl %esi ; X86-AVX1-NEXT: andl $-16, %esp ; X86-AVX1-NEXT: subl $80, %esp -; X86-AVX1-NEXT: .cfi_offset %esi, -20 -; X86-AVX1-NEXT: .cfi_offset %edi, -16 -; X86-AVX1-NEXT: .cfi_offset %ebx, -12 ; X86-AVX1-NEXT: vmovups 40(%ebp), %xmm0 ; X86-AVX1-NEXT: vmovups %xmm0, (%esp) ; X86-AVX1-NEXT: calll lrintl @@ -2175,16 +1954,12 @@ define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) { ; X86-AVX1-NEXT: popl %edi ; X86-AVX1-NEXT: popl %ebx ; X86-AVX1-NEXT: popl %ebp -; X86-AVX1-NEXT: .cfi_def_cfa %esp, 4 ; X86-AVX1-NEXT: retl ; ; X64-AVX1-i32-LABEL: lrint_v8fp128: ; X64-AVX1-i32: # %bb.0: ; X64-AVX1-i32-NEXT: pushq %rbx -; X64-AVX1-i32-NEXT: .cfi_def_cfa_offset 16 ; X64-AVX1-i32-NEXT: subq $112, %rsp -; X64-AVX1-i32-NEXT: .cfi_def_cfa_offset 128 -; X64-AVX1-i32-NEXT: .cfi_offset %rbx, -16 ; X64-AVX1-i32-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; X64-AVX1-i32-NEXT: vmovaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; X64-AVX1-i32-NEXT: vmovaps %xmm4, (%rsp) # 16-byte Spill @@ -2229,15 +2004,12 @@ define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) { ; X64-AVX1-i32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 ; X64-AVX1-i32-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload ; X64-AVX1-i32-NEXT: addq $112, %rsp -; X64-AVX1-i32-NEXT: .cfi_def_cfa_offset 16 ; X64-AVX1-i32-NEXT: popq %rbx -; X64-AVX1-i32-NEXT: .cfi_def_cfa_offset 8 ; X64-AVX1-i32-NEXT: retq ; ; X64-AVX1-i64-LABEL: lrint_v8fp128: ; X64-AVX1-i64: # %bb.0: ; X64-AVX1-i64-NEXT: subq $152, %rsp -; X64-AVX1-i64-NEXT: .cfi_def_cfa_offset 160 ; X64-AVX1-i64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; X64-AVX1-i64-NEXT: vmovaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; X64-AVX1-i64-NEXT: vmovaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill @@ -2289,13 +2061,11 @@ define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) { ; X64-AVX1-i64-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm1 # 16-byte Folded Reload ; X64-AVX1-i64-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload ; X64-AVX1-i64-NEXT: addq $152, %rsp -; X64-AVX1-i64-NEXT: .cfi_def_cfa_offset 8 ; X64-AVX1-i64-NEXT: retq ; ; AVX512-i64-LABEL: lrint_v8fp128: ; AVX512-i64: # %bb.0: ; AVX512-i64-NEXT: subq $152, %rsp -; AVX512-i64-NEXT: .cfi_def_cfa_offset 160 ; AVX512-i64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill ; AVX512-i64-NEXT: vmovaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; AVX512-i64-NEXT: vmovaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill @@ -2347,13 +2117,11 @@ define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) { ; AVX512-i64-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload ; AVX512-i64-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload ; AVX512-i64-NEXT: addq $152, %rsp -; AVX512-i64-NEXT: .cfi_def_cfa_offset 8 ; AVX512-i64-NEXT: retq ; ; AVX512DQ-i64-LABEL: lrint_v8fp128: ; AVX512DQ-i64: # %bb.0: ; AVX512DQ-i64-NEXT: subq $152, %rsp -; AVX512DQ-i64-NEXT: .cfi_def_cfa_offset 160 ; AVX512DQ-i64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill ; AVX512DQ-i64-NEXT: vmovaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; AVX512DQ-i64-NEXT: vmovaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill @@ -2405,7 +2173,6 @@ define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) { ; AVX512DQ-i64-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload ; AVX512DQ-i64-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload ; AVX512DQ-i64-NEXT: addq $152, %rsp -; AVX512DQ-i64-NEXT: .cfi_def_cfa_offset 8 ; AVX512DQ-i64-NEXT: retq %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128> %x) ret <8 x iXLen> %a From 5255ea8fbd982e5f7ec237ee8d2ffa0454331e27 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 9 Aug 2025 23:05:31 -0500 Subject: [PATCH 6/8] specify the type for intrinsic calls --- llvm/test/CodeGen/ARM/llrint-conv.ll | 12 +++++------ llvm/test/CodeGen/AVR/llrint.ll | 12 +++++------ llvm/test/CodeGen/Mips/llrint-conv.ll | 22 ++++++++++---------- llvm/test/CodeGen/PowerPC/llrint-conv.ll | 26 ++++++++++++------------ llvm/test/CodeGen/X86/llrint-conv.ll | 16 +++++++-------- 5 files changed, 44 insertions(+), 44 deletions(-) diff --git a/llvm/test/CodeGen/ARM/llrint-conv.ll b/llvm/test/CodeGen/ARM/llrint-conv.ll index f0fb2e7543be6..749ee00a3c68e 100644 --- a/llvm/test/CodeGen/ARM/llrint-conv.ll +++ b/llvm/test/CodeGen/ARM/llrint-conv.ll @@ -7,7 +7,7 @@ ; HARDFP: bl llrintf define i64 @testmsxh_builtin(half %x) { entry: - %0 = tail call i64 @llvm.llrint.f16(half %x) + %0 = tail call i64 @llvm.llrint.i64.f16(half %x) ret i64 %0 } @@ -17,7 +17,7 @@ entry: ; HARDFP: bl llrintf define i64 @testmsxs_builtin(float %x) { entry: - %0 = tail call i64 @llvm.llrint.f32(float %x) + %0 = tail call i64 @llvm.llrint.i64.f32(float %x) ret i64 %0 } @@ -27,7 +27,7 @@ entry: ; HARDFP: bl llrint define i64 @testmsxd_builtin(double %x) { entry: - %0 = tail call i64 @llvm.llrint.f64(double %x) + %0 = tail call i64 @llvm.llrint.i64.f64(double %x) ret i64 %0 } @@ -38,9 +38,9 @@ entry: ; HARDFP: bl llrintl define i64 @testmsxq_builtin(fp128 %x) { entry: - %0 = tail call i64 @llvm.llrint.f128(fp128 %x) + %0 = tail call i64 @llvm.llrint.i64.f128(fp128 %x) ret i64 %0 } -declare i64 @llvm.llrint.f32(float) nounwind readnone -declare i64 @llvm.llrint.f64(double) nounwind readnone +declare i64 @llvm.llrint.i64.f32(float) nounwind readnone +declare i64 @llvm.llrint.i64.f64(double) nounwind readnone diff --git a/llvm/test/CodeGen/AVR/llrint.ll b/llvm/test/CodeGen/AVR/llrint.ll index c55664f2d7353..2980879bb6e3e 100644 --- a/llvm/test/CodeGen/AVR/llrint.ll +++ b/llvm/test/CodeGen/AVR/llrint.ll @@ -4,7 +4,7 @@ ; FIXME: crash "Input type needs to be promoted!" ; define i64 @testmsxh_builtin(half %x) { ; entry: -; %0 = tail call i64 @llvm.llrint.f16(half %x) +; %0 = tail call i64 @llvm.llrint.i64.f16(half %x) ; ret i64 %0 ; } @@ -14,7 +14,7 @@ define i64 @testmsxs_builtin(float %x) { ; CHECK-NEXT: call llrintf ; CHECK-NEXT: ret entry: - %0 = tail call i64 @llvm.llrint.f32(float %x) + %0 = tail call i64 @llvm.llrint.i64.f32(float %x) ret i64 %0 } @@ -24,7 +24,7 @@ define i64 @testmsxd_builtin(double %x) { ; CHECK-NEXT: call llrint ; CHECK-NEXT: ret entry: - %0 = tail call i64 @llvm.llrint.f64(double %x) + %0 = tail call i64 @llvm.llrint.i64.f64(double %x) ret i64 %0 } @@ -35,9 +35,9 @@ define i64 @testmsxq_builtin(fp128 %x) { ; CHECK-NEXT: call llrintl ; CHECK-NEXT: ret entry: - %0 = tail call i64 @llvm.llrint.fp128(fp128 %x) + %0 = tail call i64 @llvm.llrint.i64.fp128(fp128 %x) ret i64 %0 } -declare i64 @llvm.llrint.f32(float) nounwind readnone -declare i64 @llvm.llrint.f64(double) nounwind readnone +declare i64 @llvm.llrint.i64.f32(float) nounwind readnone +declare i64 @llvm.llrint.i64.f64(double) nounwind readnone diff --git a/llvm/test/CodeGen/Mips/llrint-conv.ll b/llvm/test/CodeGen/Mips/llrint-conv.ll index ee3c0d99253a6..592d40c0f65aa 100644 --- a/llvm/test/CodeGen/Mips/llrint-conv.ll +++ b/llvm/test/CodeGen/Mips/llrint-conv.ll @@ -4,14 +4,14 @@ ; FIXME: crash ; define signext i32 @testmswh(half %x) { ; entry: -; %0 = tail call i64 @llvm.llrint.f16(half %x) +; %0 = tail call i64 @llvm.llrint.i64.f16(half %x) ; %conv = trunc i64 %0 to i32 ; ret i32 %conv ; } ; define i64 @testmsxh(half %x) { ; entry: -; %0 = tail call i64 @llvm.llrint.f16(half %x) +; %0 = tail call i64 @llvm.llrint.i64.f16(half %x) ; ret i64 %0 ; } @@ -19,7 +19,7 @@ define signext i32 @testmsws(float %x) { ; CHECK-LABEL: testmsws: ; CHECK: jal llrintf entry: - %0 = tail call i64 @llvm.llrint.f32(float %x) + %0 = tail call i64 @llvm.llrint.i64.f32(float %x) %conv = trunc i64 %0 to i32 ret i32 %conv } @@ -28,7 +28,7 @@ define i64 @testmsxs(float %x) { ; CHECK-LABEL: testmsxs: ; CHECK: jal llrintf entry: - %0 = tail call i64 @llvm.llrint.f32(float %x) + %0 = tail call i64 @llvm.llrint.i64.f32(float %x) ret i64 %0 } @@ -36,7 +36,7 @@ define signext i32 @testmswd(double %x) { ; CHECK-LABEL: testmswd: ; CHECK: jal llrint entry: - %0 = tail call i64 @llvm.llrint.f64(double %x) + %0 = tail call i64 @llvm.llrint.i64.f64(double %x) %conv = trunc i64 %0 to i32 ret i32 %conv } @@ -45,7 +45,7 @@ define i64 @testmsxd(double %x) { ; CHECK-LABEL: testmsxd: ; CHECK: jal llrint entry: - %0 = tail call i64 @llvm.llrint.f64(double %x) + %0 = tail call i64 @llvm.llrint.i64.f64(double %x) ret i64 %0 } @@ -53,7 +53,7 @@ define signext i32 @testmswl(fp128 %x) { ; CHECK-LABEL: testmswl: ; CHECK: jal llrintl entry: - %0 = tail call i64 @llvm.llrint.f128(fp128 %x) + %0 = tail call i64 @llvm.llrint.i64.f128(fp128 %x) %conv = trunc i64 %0 to i32 ret i32 %conv } @@ -62,10 +62,10 @@ define i64 @testmsll(fp128 %x) { ; CHECK-LABEL: testmsll: ; CHECK: jal llrintl entry: - %0 = tail call i64 @llvm.llrint.f128(fp128 %x) + %0 = tail call i64 @llvm.llrint.i64.f128(fp128 %x) ret i64 %0 } -declare i64 @llvm.llrint.f32(float) nounwind readnone -declare i64 @llvm.llrint.f64(double) nounwind readnone -declare i64 @llvm.llrint.f128(fp128) nounwind readnone +declare i64 @llvm.llrint.i64.f32(float) nounwind readnone +declare i64 @llvm.llrint.i64.f64(double) nounwind readnone +declare i64 @llvm.llrint.i64.f128(fp128) nounwind readnone diff --git a/llvm/test/CodeGen/PowerPC/llrint-conv.ll b/llvm/test/CodeGen/PowerPC/llrint-conv.ll index dcd3bd25a83c5..8e49ddcc6355f 100644 --- a/llvm/test/CodeGen/PowerPC/llrint-conv.ll +++ b/llvm/test/CodeGen/PowerPC/llrint-conv.ll @@ -4,14 +4,14 @@ ; FIXME: crash "Input type needs to be promoted!" ; define signext i32 @testmswh(half %x) { ; entry: -; %0 = tail call i64 @llvm.llrint.f16(half %x) +; %0 = tail call i64 @llvm.llrint.i64.f16(half %x) ; %conv = trunc i64 %0 to i32 ; ret i32 %conv ; } ; define i64 @testmsxh(half %x) { ; entry: -; %0 = tail call i64 @llvm.llrint.f16(half %x) +; %0 = tail call i64 @llvm.llrint.i64.f16(half %x) ; ret i64 %0 ; } @@ -19,7 +19,7 @@ ; CHECK: bl llrintf define signext i32 @testmsws(float %x) { entry: - %0 = tail call i64 @llvm.llrint.f32(float %x) + %0 = tail call i64 @llvm.llrint.i64.f32(float %x) %conv = trunc i64 %0 to i32 ret i32 %conv } @@ -28,7 +28,7 @@ entry: ; CHECK: bl llrintf define i64 @testmsxs(float %x) { entry: - %0 = tail call i64 @llvm.llrint.f32(float %x) + %0 = tail call i64 @llvm.llrint.i64.f32(float %x) ret i64 %0 } @@ -36,7 +36,7 @@ entry: ; CHECK: bl llrint define signext i32 @testmswd(double %x) { entry: - %0 = tail call i64 @llvm.llrint.f64(double %x) + %0 = tail call i64 @llvm.llrint.i64.f64(double %x) %conv = trunc i64 %0 to i32 ret i32 %conv } @@ -45,7 +45,7 @@ entry: ; CHECK: bl llrint define i64 @testmsxd(double %x) { entry: - %0 = tail call i64 @llvm.llrint.f64(double %x) + %0 = tail call i64 @llvm.llrint.i64.f64(double %x) ret i64 %0 } @@ -53,7 +53,7 @@ entry: ; CHECK: bl llrintl define signext i32 @testmswl(ppc_fp128 %x) { entry: - %0 = tail call i64 @llvm.llrint.ppcf128(ppc_fp128 %x) + %0 = tail call i64 @llvm.llrint.i64.ppcf128(ppc_fp128 %x) %conv = trunc i64 %0 to i32 ret i32 %conv } @@ -62,7 +62,7 @@ entry: ; CHECK: bl llrintl define i64 @testmsll(ppc_fp128 %x) { entry: - %0 = tail call i64 @llvm.llrint.ppcf128(ppc_fp128 %x) + %0 = tail call i64 @llvm.llrint.i64.ppcf128(ppc_fp128 %x) ret i64 %0 } @@ -70,7 +70,7 @@ entry: ; CHECK: bl llrintf128 define signext i32 @testmswq(fp128 %x) { entry: - %0 = tail call i64 @llvm.llrint.f128(fp128 %x) + %0 = tail call i64 @llvm.llrint.i64.f128(fp128 %x) %conv = trunc i64 %0 to i32 ret i32 %conv } @@ -79,10 +79,10 @@ entry: ; CHECK: bl llrintf128 define i64 @testmslq(fp128 %x) { entry: - %0 = tail call i64 @llvm.llrint.f128(fp128 %x) + %0 = tail call i64 @llvm.llrint.i64.f128(fp128 %x) ret i64 %0 } -declare i64 @llvm.llrint.f32(float) nounwind readnone -declare i64 @llvm.llrint.f64(double) nounwind readnone -declare i64 @llvm.llrint.ppcf128(ppc_fp128) nounwind readnone +declare i64 @llvm.llrint.i64.f32(float) nounwind readnone +declare i64 @llvm.llrint.i64.f64(double) nounwind readnone +declare i64 @llvm.llrint.i64.ppcf128(ppc_fp128) nounwind readnone diff --git a/llvm/test/CodeGen/X86/llrint-conv.ll b/llvm/test/CodeGen/X86/llrint-conv.ll index d3eca5197a94b..7bcf573118538 100644 --- a/llvm/test/CodeGen/X86/llrint-conv.ll +++ b/llvm/test/CodeGen/X86/llrint-conv.ll @@ -42,7 +42,7 @@ define i64 @testmsxh(half %x) nounwind { ; X64-SSE-NEXT: popq %rcx ; X64-SSE-NEXT: retq entry: - %0 = tail call i64 @llvm.llrint.f16(half %x) + %0 = tail call i64 @llvm.llrint.i64.f16(half %x) ret i64 %0 } @@ -103,7 +103,7 @@ define i64 @testmsxs(float %x) nounwind { ; X64-AVX-NEXT: vcvtss2si %xmm0, %rax ; X64-AVX-NEXT: retq entry: - %0 = tail call i64 @llvm.llrint.f32(float %x) + %0 = tail call i64 @llvm.llrint.i64.f32(float %x) ret i64 %0 } @@ -164,7 +164,7 @@ define i64 @testmsxd(double %x) nounwind { ; X64-AVX-NEXT: vcvtsd2si %xmm0, %rax ; X64-AVX-NEXT: retq entry: - %0 = tail call i64 @llvm.llrint.f64(double %x) + %0 = tail call i64 @llvm.llrint.i64.f64(double %x) ret i64 %0 } @@ -190,7 +190,7 @@ define i64 @testmsll(x86_fp80 %x) nounwind { ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax ; X64-NEXT: retq entry: - %0 = tail call i64 @llvm.llrint.f80(x86_fp80 %x) + %0 = tail call i64 @llvm.llrint.i64.f80(x86_fp80 %x) ret i64 %0 } @@ -245,10 +245,10 @@ define i64 @testmslq(fp128 %x) nounwind { ; X64: # %bb.0: # %entry ; X64-NEXT: jmp llrintl@PLT # TAILCALL entry: - %0 = tail call i64 @llvm.llrint.fp128(fp128 %x) + %0 = tail call i64 @llvm.llrint.i64.fp128(fp128 %x) ret i64 %0 } -declare i64 @llvm.llrint.f32(float) nounwind readnone -declare i64 @llvm.llrint.f64(double) nounwind readnone -declare i64 @llvm.llrint.f80(x86_fp80) nounwind readnone +declare i64 @llvm.llrint.i64.f32(float) nounwind readnone +declare i64 @llvm.llrint.i64.f64(double) nounwind readnone +declare i64 @llvm.llrint.i64.f80(x86_fp80) nounwind readnone From 81edb7c48e5855c4ff9ce6df50a2173228c5c1a1 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 9 Aug 2025 23:22:36 -0500 Subject: [PATCH 7/8] get rid of excessively large <32 x *> and <16 x fp128> tests on arm --- llvm/test/CodeGen/ARM/vector-llrint.ll | 10652 ++----------------- llvm/test/CodeGen/ARM/vector-lrint.ll | 12382 +++-------------------- 2 files changed, 2158 insertions(+), 20876 deletions(-) diff --git a/llvm/test/CodeGen/ARM/vector-llrint.ll b/llvm/test/CodeGen/ARM/vector-llrint.ll index 870947fac063e..5f4e39125da12 100644 --- a/llvm/test/CodeGen/ARM/vector-llrint.ll +++ b/llvm/test/CodeGen/ARM/vector-llrint.ll @@ -1,13 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armv7-unknown-none-eabihf | FileCheck %s --check-prefix=LE -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armv7-unknown-none-eabihf | FileCheck %s --check-prefix=LE -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefix=LE-NEON -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefix=LE-NEON -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armebv7-unknown-none-eabihf | FileCheck %s --check-prefix=BE -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armebv7-unknown-none-eabihf | FileCheck %s --check-prefix=BE -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefix=BE-NEON -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefix=BE-NEON +; RUN: llc %s -o - -mtriple=armv7-unknown-none-eabihf | FileCheck %s --check-prefixes=LE +; RUN: llc %s -o - -mtriple=armv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefixes=LE +; RUN: llc %s -o - -mtriple=armebv7-unknown-none-eabihf | FileCheck %s --check-prefixes=BE +; RUN: llc %s -o - -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefixes=BE define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) { ; LE-LABEL: llrint_v1i64_v1f16: @@ -23,19 +19,6 @@ define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) { ; LE-NEXT: vmov.32 d0[1], r1 ; LE-NEXT: pop {r11, pc} ; -; LE-NEON-LABEL: llrint_v1i64_v1f16: -; LE-NEON: @ %bb.0: -; LE-NEON-NEXT: .save {r11, lr} -; LE-NEON-NEXT: push {r11, lr} -; LE-NEON-NEXT: vmov r0, s0 -; LE-NEON-NEXT: bl __aeabi_f2h -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vmov s0, r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.32 d0[0], r0 -; LE-NEON-NEXT: vmov.32 d0[1], r1 -; LE-NEON-NEXT: pop {r11, pc} -; ; BE-LABEL: llrint_v1i64_v1f16: ; BE: @ %bb.0: ; BE-NEXT: .save {r11, lr} @@ -49,20 +32,6 @@ define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) { ; BE-NEXT: vmov.32 d16[1], r1 ; BE-NEXT: vrev64.32 d0, d16 ; BE-NEXT: pop {r11, pc} -; -; BE-NEON-LABEL: llrint_v1i64_v1f16: -; BE-NEON: @ %bb.0: -; BE-NEON-NEXT: .save {r11, lr} -; BE-NEON-NEXT: push {r11, lr} -; BE-NEON-NEXT: vmov r0, s0 -; BE-NEON-NEXT: bl __aeabi_f2h -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vmov s0, r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.32 d16[0], r0 -; BE-NEON-NEXT: vmov.32 d16[1], r1 -; BE-NEON-NEXT: vrev64.32 d0, d16 -; BE-NEON-NEXT: pop {r11, pc} %a = call <1 x i64> @llvm.llrint.v1i64.v1f16(<1 x half> %x) ret <1 x i64> %a } @@ -94,31 +63,6 @@ define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) { ; LE-NEXT: vpop {d8, d9} ; LE-NEXT: pop {r4, r5, r11, pc} ; -; LE-NEON-LABEL: llrint_v1i64_v2f16: -; LE-NEON: @ %bb.0: -; LE-NEON-NEXT: .save {r4, r5, r11, lr} -; LE-NEON-NEXT: push {r4, r5, r11, lr} -; LE-NEON-NEXT: .vsave {d8, d9} -; LE-NEON-NEXT: vpush {d8, d9} -; LE-NEON-NEXT: vmov r0, s1 -; LE-NEON-NEXT: vmov.f32 s16, s0 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vmov s0, r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: mov r4, r0 -; LE-NEON-NEXT: vmov r0, s16 -; LE-NEON-NEXT: mov r5, r1 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vmov s0, r0 -; LE-NEON-NEXT: vmov.32 d9[0], r4 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.32 d8[0], r0 -; LE-NEON-NEXT: vmov.32 d9[1], r5 -; LE-NEON-NEXT: vmov.32 d8[1], r1 -; LE-NEON-NEXT: vorr q0, q4, q4 -; LE-NEON-NEXT: vpop {d8, d9} -; LE-NEON-NEXT: pop {r4, r5, r11, pc} -; ; BE-LABEL: llrint_v1i64_v2f16: ; BE: @ %bb.0: ; BE-NEXT: .save {r4, r5, r11, lr} @@ -144,32 +88,6 @@ define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) { ; BE-NEXT: vrev64.32 d0, d16 ; BE-NEXT: vpop {d8} ; BE-NEXT: pop {r4, r5, r11, pc} -; -; BE-NEON-LABEL: llrint_v1i64_v2f16: -; BE-NEON: @ %bb.0: -; BE-NEON-NEXT: .save {r4, r5, r11, lr} -; BE-NEON-NEXT: push {r4, r5, r11, lr} -; BE-NEON-NEXT: .vsave {d8} -; BE-NEON-NEXT: vpush {d8} -; BE-NEON-NEXT: vmov r0, s1 -; BE-NEON-NEXT: vmov.f32 s16, s0 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vmov s0, r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: mov r4, r0 -; BE-NEON-NEXT: vmov r0, s16 -; BE-NEON-NEXT: mov r5, r1 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vmov s0, r0 -; BE-NEON-NEXT: vmov.32 d8[0], r4 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.32 d16[0], r0 -; BE-NEON-NEXT: vmov.32 d8[1], r5 -; BE-NEON-NEXT: vmov.32 d16[1], r1 -; BE-NEON-NEXT: vrev64.32 d1, d8 -; BE-NEON-NEXT: vrev64.32 d0, d16 -; BE-NEON-NEXT: vpop {d8} -; BE-NEON-NEXT: pop {r4, r5, r11, pc} %a = call <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half> %x) ret <2 x i64> %a } @@ -222,52 +140,6 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) { ; LE-NEXT: vpop {d12, d13} ; LE-NEXT: pop {r4, r5, r6, r7, r11, pc} ; -; LE-NEON-LABEL: llrint_v4i64_v4f16: -; LE-NEON: @ %bb.0: -; LE-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr} -; LE-NEON-NEXT: push {r4, r5, r6, r7, r11, lr} -; LE-NEON-NEXT: .vsave {d12, d13} -; LE-NEON-NEXT: vpush {d12, d13} -; LE-NEON-NEXT: .vsave {d8, d9, d10} -; LE-NEON-NEXT: vpush {d8, d9, d10} -; LE-NEON-NEXT: vmov r0, s1 -; LE-NEON-NEXT: vmov.f32 s16, s3 -; LE-NEON-NEXT: vmov.f32 s20, s2 -; LE-NEON-NEXT: vmov.f32 s18, s0 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vmov s0, r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: mov r5, r0 -; LE-NEON-NEXT: vmov r0, s18 -; LE-NEON-NEXT: mov r4, r1 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: mov r7, r0 -; LE-NEON-NEXT: vmov r0, s16 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vmov s0, r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov s0, r7 -; LE-NEON-NEXT: mov r6, r1 -; LE-NEON-NEXT: vmov.32 d9[0], r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.32 d12[0], r0 -; LE-NEON-NEXT: vmov r0, s20 -; LE-NEON-NEXT: mov r7, r1 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vmov s0, r0 -; LE-NEON-NEXT: vmov.32 d13[0], r5 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.32 d8[0], r0 -; LE-NEON-NEXT: vmov.32 d13[1], r4 -; LE-NEON-NEXT: vmov.32 d9[1], r6 -; LE-NEON-NEXT: vmov.32 d12[1], r7 -; LE-NEON-NEXT: vmov.32 d8[1], r1 -; LE-NEON-NEXT: vorr q0, q6, q6 -; LE-NEON-NEXT: vorr q1, q4, q4 -; LE-NEON-NEXT: vpop {d8, d9, d10} -; LE-NEON-NEXT: vpop {d12, d13} -; LE-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc} -; ; BE-LABEL: llrint_v4i64_v4f16: ; BE: @ %bb.0: ; BE-NEXT: .save {r4, r5, r6, r7, r11, lr} @@ -312,51 +184,6 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) { ; BE-NEXT: vrev64.32 d2, d16 ; BE-NEXT: vpop {d8, d9, d10} ; BE-NEXT: pop {r4, r5, r6, r7, r11, pc} -; -; BE-NEON-LABEL: llrint_v4i64_v4f16: -; BE-NEON: @ %bb.0: -; BE-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr} -; BE-NEON-NEXT: push {r4, r5, r6, r7, r11, lr} -; BE-NEON-NEXT: .vsave {d8, d9, d10} -; BE-NEON-NEXT: vpush {d8, d9, d10} -; BE-NEON-NEXT: vmov r0, s1 -; BE-NEON-NEXT: vmov.f32 s16, s3 -; BE-NEON-NEXT: vmov.f32 s18, s2 -; BE-NEON-NEXT: vmov.f32 s20, s0 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vmov s0, r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: mov r5, r0 -; BE-NEON-NEXT: vmov r0, s20 -; BE-NEON-NEXT: mov r4, r1 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: mov r7, r0 -; BE-NEON-NEXT: vmov r0, s16 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vmov s0, r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov s0, r7 -; BE-NEON-NEXT: mov r6, r1 -; BE-NEON-NEXT: vmov.32 d8[0], r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.32 d10[0], r0 -; BE-NEON-NEXT: vmov r0, s18 -; BE-NEON-NEXT: mov r7, r1 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vmov s0, r0 -; BE-NEON-NEXT: vmov.32 d9[0], r5 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.32 d16[0], r0 -; BE-NEON-NEXT: vmov.32 d9[1], r4 -; BE-NEON-NEXT: vmov.32 d8[1], r6 -; BE-NEON-NEXT: vmov.32 d10[1], r7 -; BE-NEON-NEXT: vmov.32 d16[1], r1 -; BE-NEON-NEXT: vrev64.32 d1, d9 -; BE-NEON-NEXT: vrev64.32 d3, d8 -; BE-NEON-NEXT: vrev64.32 d0, d10 -; BE-NEON-NEXT: vrev64.32 d2, d16 -; BE-NEON-NEXT: vpop {d8, d9, d10} -; BE-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc} %a = call <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half> %x) ret <4 x i64> %a } @@ -452,95 +279,6 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) { ; LE-NEXT: add sp, sp, #4 ; LE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; -; LE-NEON-LABEL: llrint_v8i64_v8f16: -; LE-NEON: @ %bb.0: -; LE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-NEON-NEXT: .pad #4 -; LE-NEON-NEXT: sub sp, sp, #4 -; LE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEON-NEXT: .pad #8 -; LE-NEON-NEXT: sub sp, sp, #8 -; LE-NEON-NEXT: vmov r0, s1 -; LE-NEON-NEXT: vstr s6, [sp, #4] @ 4-byte Spill -; LE-NEON-NEXT: vmov.f32 s16, s7 -; LE-NEON-NEXT: vmov.f32 s18, s5 -; LE-NEON-NEXT: vmov.f32 s20, s4 -; LE-NEON-NEXT: vmov.f32 s22, s3 -; LE-NEON-NEXT: vmov.f32 s24, s2 -; LE-NEON-NEXT: vmov.f32 s26, s0 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vmov s0, r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: mov r9, r0 -; LE-NEON-NEXT: vmov r0, s26 -; LE-NEON-NEXT: str r1, [sp] @ 4-byte Spill -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: mov r10, r0 -; LE-NEON-NEXT: vmov r0, s22 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: mov r5, r0 -; LE-NEON-NEXT: vmov r0, s24 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: mov r7, r0 -; LE-NEON-NEXT: vmov r0, s18 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: mov r6, r0 -; LE-NEON-NEXT: vmov r0, s20 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: mov r4, r0 -; LE-NEON-NEXT: vmov r0, s16 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vmov s0, r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov s0, r4 -; LE-NEON-NEXT: mov r11, r1 -; LE-NEON-NEXT: vmov.32 d11[0], r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov s0, r6 -; LE-NEON-NEXT: mov r8, r1 -; LE-NEON-NEXT: vmov.32 d12[0], r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov s0, r7 -; LE-NEON-NEXT: mov r6, r1 -; LE-NEON-NEXT: vmov.32 d13[0], r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov s0, r5 -; LE-NEON-NEXT: mov r7, r1 -; LE-NEON-NEXT: vmov.32 d14[0], r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov s0, r10 -; LE-NEON-NEXT: mov r5, r1 -; LE-NEON-NEXT: vmov.32 d15[0], r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vldr s0, [sp, #4] @ 4-byte Reload -; LE-NEON-NEXT: mov r4, r1 -; LE-NEON-NEXT: vmov.32 d8[0], r0 -; LE-NEON-NEXT: vmov r0, s0 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vmov s0, r0 -; LE-NEON-NEXT: vmov.32 d9[0], r9 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.32 d10[0], r0 -; LE-NEON-NEXT: ldr r0, [sp] @ 4-byte Reload -; LE-NEON-NEXT: vmov.32 d15[1], r5 -; LE-NEON-NEXT: vmov.32 d9[1], r0 -; LE-NEON-NEXT: vmov.32 d13[1], r6 -; LE-NEON-NEXT: vmov.32 d11[1], r11 -; LE-NEON-NEXT: vmov.32 d8[1], r4 -; LE-NEON-NEXT: vmov.32 d14[1], r7 -; LE-NEON-NEXT: vorr q0, q4, q4 -; LE-NEON-NEXT: vmov.32 d12[1], r8 -; LE-NEON-NEXT: vorr q1, q7, q7 -; LE-NEON-NEXT: vmov.32 d10[1], r1 -; LE-NEON-NEXT: vorr q2, q6, q6 -; LE-NEON-NEXT: vorr q3, q5, q5 -; LE-NEON-NEXT: add sp, sp, #8 -; LE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEON-NEXT: add sp, sp, #4 -; LE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; ; BE-LABEL: llrint_v8i64_v8f16: ; BE: @ %bb.0: ; BE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} @@ -632,98 +370,6 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) { ; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14} ; BE-NEXT: add sp, sp, #4 ; BE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; -; BE-NEON-LABEL: llrint_v8i64_v8f16: -; BE-NEON: @ %bb.0: -; BE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-NEON-NEXT: .pad #4 -; BE-NEON-NEXT: sub sp, sp, #4 -; BE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14} -; BE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14} -; BE-NEON-NEXT: .pad #8 -; BE-NEON-NEXT: sub sp, sp, #8 -; BE-NEON-NEXT: vmov r0, s1 -; BE-NEON-NEXT: vmov.f32 s18, s7 -; BE-NEON-NEXT: vmov.f32 s16, s6 -; BE-NEON-NEXT: vmov.f32 s20, s5 -; BE-NEON-NEXT: vmov.f32 s22, s4 -; BE-NEON-NEXT: vmov.f32 s24, s3 -; BE-NEON-NEXT: vmov.f32 s26, s2 -; BE-NEON-NEXT: vmov.f32 s28, s0 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vmov s0, r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: mov r9, r0 -; BE-NEON-NEXT: vmov r0, s28 -; BE-NEON-NEXT: str r1, [sp, #4] @ 4-byte Spill -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: mov r10, r0 -; BE-NEON-NEXT: vmov r0, s24 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: mov r5, r0 -; BE-NEON-NEXT: vmov r0, s26 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: mov r7, r0 -; BE-NEON-NEXT: vmov r0, s20 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: mov r6, r0 -; BE-NEON-NEXT: vmov r0, s22 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: mov r4, r0 -; BE-NEON-NEXT: vmov r0, s18 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vmov s0, r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov s0, r4 -; BE-NEON-NEXT: mov r11, r1 -; BE-NEON-NEXT: vmov.32 d9[0], r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov s0, r6 -; BE-NEON-NEXT: mov r8, r1 -; BE-NEON-NEXT: vmov.32 d10[0], r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov s0, r7 -; BE-NEON-NEXT: mov r6, r1 -; BE-NEON-NEXT: vmov.32 d11[0], r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov s0, r5 -; BE-NEON-NEXT: mov r7, r1 -; BE-NEON-NEXT: vmov.32 d12[0], r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov s0, r10 -; BE-NEON-NEXT: mov r5, r1 -; BE-NEON-NEXT: vmov.32 d13[0], r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.32 d14[0], r0 -; BE-NEON-NEXT: vmov r0, s16 -; BE-NEON-NEXT: mov r4, r1 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vmov s0, r0 -; BE-NEON-NEXT: vmov.32 d8[0], r9 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.32 d16[0], r0 -; BE-NEON-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; BE-NEON-NEXT: vmov.32 d13[1], r5 -; BE-NEON-NEXT: vmov.32 d8[1], r0 -; BE-NEON-NEXT: vmov.32 d11[1], r6 -; BE-NEON-NEXT: vmov.32 d9[1], r11 -; BE-NEON-NEXT: vmov.32 d14[1], r4 -; BE-NEON-NEXT: vmov.32 d12[1], r7 -; BE-NEON-NEXT: vmov.32 d10[1], r8 -; BE-NEON-NEXT: vmov.32 d16[1], r1 -; BE-NEON-NEXT: vrev64.32 d1, d8 -; BE-NEON-NEXT: vrev64.32 d3, d13 -; BE-NEON-NEXT: vrev64.32 d5, d11 -; BE-NEON-NEXT: vrev64.32 d7, d9 -; BE-NEON-NEXT: vrev64.32 d0, d14 -; BE-NEON-NEXT: vrev64.32 d2, d12 -; BE-NEON-NEXT: vrev64.32 d4, d10 -; BE-NEON-NEXT: vrev64.32 d6, d16 -; BE-NEON-NEXT: add sp, sp, #8 -; BE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14} -; BE-NEON-NEXT: add sp, sp, #4 -; BE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} %a = call <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half> %x) ret <8 x i64> %a } @@ -929,205 +575,6 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) { ; LE-NEXT: add sp, sp, #4 ; LE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; -; LE-NEON-LABEL: llrint_v16i64_v16f16: -; LE-NEON: @ %bb.0: -; LE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-NEON-NEXT: .pad #4 -; LE-NEON-NEXT: sub sp, sp, #4 -; LE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEON-NEXT: .pad #120 -; LE-NEON-NEXT: sub sp, sp, #120 -; LE-NEON-NEXT: mov r11, r0 -; LE-NEON-NEXT: vmov r0, s7 -; LE-NEON-NEXT: vstr s15, [sp, #24] @ 4-byte Spill -; LE-NEON-NEXT: vmov.f32 s23, s13 -; LE-NEON-NEXT: vstr s14, [sp, #100] @ 4-byte Spill -; LE-NEON-NEXT: vmov.f32 s25, s12 -; LE-NEON-NEXT: vmov.f32 s27, s11 -; LE-NEON-NEXT: vstr s10, [sp, #104] @ 4-byte Spill -; LE-NEON-NEXT: vstr s9, [sp, #108] @ 4-byte Spill -; LE-NEON-NEXT: vmov.f32 s24, s8 -; LE-NEON-NEXT: vmov.f32 s19, s6 -; LE-NEON-NEXT: vmov.f32 s29, s5 -; LE-NEON-NEXT: vmov.f32 s17, s4 -; LE-NEON-NEXT: vmov.f32 s16, s3 -; LE-NEON-NEXT: vmov.f32 s21, s2 -; LE-NEON-NEXT: vmov.f32 s26, s1 -; LE-NEON-NEXT: vmov.f32 s18, s0 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vmov s0, r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: mov r7, r0 -; LE-NEON-NEXT: vmov r0, s25 -; LE-NEON-NEXT: str r1, [sp, #56] @ 4-byte Spill -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vmov s0, r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: mov r5, r0 -; LE-NEON-NEXT: vmov r0, s27 -; LE-NEON-NEXT: str r1, [sp, #116] @ 4-byte Spill -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vmov s0, r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: mov r6, r0 -; LE-NEON-NEXT: vmov r0, s29 -; LE-NEON-NEXT: str r1, [sp, #112] @ 4-byte Spill -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vmov s0, r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.32 d15[0], r0 -; LE-NEON-NEXT: vmov r0, s23 -; LE-NEON-NEXT: mov r4, r1 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vmov s0, r0 -; LE-NEON-NEXT: add lr, sp, #80 -; LE-NEON-NEXT: vmov.32 d17[0], r6 -; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: mov r6, r0 -; LE-NEON-NEXT: vmov r0, s17 -; LE-NEON-NEXT: vmov r8, s21 -; LE-NEON-NEXT: str r1, [sp, #76] @ 4-byte Spill -; LE-NEON-NEXT: vmov r10, s19 -; LE-NEON-NEXT: vmov.32 d10[0], r5 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vmov s0, r0 -; LE-NEON-NEXT: add lr, sp, #40 -; LE-NEON-NEXT: vmov.32 d11[0], r6 -; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.32 d14[0], r0 -; LE-NEON-NEXT: mov r0, r10 -; LE-NEON-NEXT: mov r9, r1 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vmov s0, r0 -; LE-NEON-NEXT: vmov.32 d11[0], r7 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.32 d10[0], r0 -; LE-NEON-NEXT: mov r0, r8 -; LE-NEON-NEXT: mov r7, r1 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: mov r6, r0 -; LE-NEON-NEXT: ldr r0, [sp, #56] @ 4-byte Reload -; LE-NEON-NEXT: vmov.32 d11[1], r0 -; LE-NEON-NEXT: vmov r0, s18 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: mov r5, r0 -; LE-NEON-NEXT: vmov r0, s16 -; LE-NEON-NEXT: vmov.32 d10[1], r7 -; LE-NEON-NEXT: add lr, sp, #56 -; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vmov s0, r0 -; LE-NEON-NEXT: vmov.32 d15[1], r4 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.32 d9[0], r0 -; LE-NEON-NEXT: vmov r0, s26 -; LE-NEON-NEXT: add lr, sp, #24 -; LE-NEON-NEXT: vmov r8, s24 -; LE-NEON-NEXT: vmov.32 d14[1], r9 -; LE-NEON-NEXT: mov r10, r1 -; LE-NEON-NEXT: vmov s24, r5 -; LE-NEON-NEXT: vldr s0, [sp, #24] @ 4-byte Reload -; LE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-NEON-NEXT: vmov r7, s0 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vmov.f32 s0, s24 -; LE-NEON-NEXT: vmov s22, r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s22 -; LE-NEON-NEXT: mov r5, r1 -; LE-NEON-NEXT: vmov.32 d14[0], r0 -; LE-NEON-NEXT: vmov s24, r6 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.32 d15[0], r0 -; LE-NEON-NEXT: mov r0, r7 -; LE-NEON-NEXT: mov r6, r1 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vmov.f32 s0, s24 -; LE-NEON-NEXT: vmov s22, r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s22 -; LE-NEON-NEXT: vmov.32 d8[0], r0 -; LE-NEON-NEXT: add lr, sp, #8 -; LE-NEON-NEXT: mov r9, r1 -; LE-NEON-NEXT: vmov.32 d15[1], r6 -; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.32 d13[0], r0 -; LE-NEON-NEXT: mov r0, r8 -; LE-NEON-NEXT: mov r6, r1 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vldr s0, [sp, #100] @ 4-byte Reload -; LE-NEON-NEXT: mov r7, r0 -; LE-NEON-NEXT: vmov.32 d14[1], r5 -; LE-NEON-NEXT: vmov r0, s0 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vldr s0, [sp, #104] @ 4-byte Reload -; LE-NEON-NEXT: vmov s20, r0 -; LE-NEON-NEXT: vmov.32 d13[1], r6 -; LE-NEON-NEXT: vmov r4, s0 -; LE-NEON-NEXT: vldr s0, [sp, #108] @ 4-byte Reload -; LE-NEON-NEXT: vmov r0, s0 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vmov.f32 s0, s20 -; LE-NEON-NEXT: vmov s16, r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s16 -; LE-NEON-NEXT: mov r5, r1 -; LE-NEON-NEXT: vmov.32 d12[0], r0 -; LE-NEON-NEXT: vmov s18, r7 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.32 d11[0], r0 -; LE-NEON-NEXT: mov r0, r4 -; LE-NEON-NEXT: mov r6, r1 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vmov.f32 s0, s18 -; LE-NEON-NEXT: vmov s16, r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s16 -; LE-NEON-NEXT: vmov.32 d10[0], r0 -; LE-NEON-NEXT: mov r4, r1 -; LE-NEON-NEXT: vmov.32 d11[1], r6 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: add lr, sp, #80 -; LE-NEON-NEXT: vmov.32 d10[1], r4 -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #40 -; LE-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #8 -; LE-NEON-NEXT: vmov.32 d16[0], r0 -; LE-NEON-NEXT: ldr r0, [sp, #76] @ 4-byte Reload -; LE-NEON-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #24 -; LE-NEON-NEXT: vmov.32 d19[1], r0 -; LE-NEON-NEXT: ldr r0, [sp, #116] @ 4-byte Reload -; LE-NEON-NEXT: vmov.32 d21[1], r10 -; LE-NEON-NEXT: vmov.32 d18[1], r0 -; LE-NEON-NEXT: ldr r0, [sp, #112] @ 4-byte Reload -; LE-NEON-NEXT: vmov.32 d12[1], r5 -; LE-NEON-NEXT: vmov.32 d17[1], r0 -; LE-NEON-NEXT: add r0, r11, #64 -; LE-NEON-NEXT: vmov.32 d16[1], r1 -; LE-NEON-NEXT: vst1.64 {d10, d11}, [r0:128]! -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! -; LE-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]! -; LE-NEON-NEXT: vmov.32 d20[1], r9 -; LE-NEON-NEXT: vst1.64 {d12, d13}, [r0:128] -; LE-NEON-NEXT: vst1.64 {d14, d15}, [r11:128]! -; LE-NEON-NEXT: vst1.64 {d20, d21}, [r11:128]! -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #56 -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r11:128]! -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r11:128] -; LE-NEON-NEXT: add sp, sp, #120 -; LE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEON-NEXT: add sp, sp, #4 -; LE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; ; BE-LABEL: llrint_v16i64_v16f16: ; BE: @ %bb.0: ; BE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} @@ -1337,3057 +784,296 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) { ; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; BE-NEXT: add sp, sp, #4 ; BE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; -; BE-NEON-LABEL: llrint_v16i64_v16f16: -; BE-NEON: @ %bb.0: -; BE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-NEON-NEXT: .pad #4 -; BE-NEON-NEXT: sub sp, sp, #4 -; BE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEON-NEXT: .pad #112 -; BE-NEON-NEXT: sub sp, sp, #112 -; BE-NEON-NEXT: mov r11, r0 -; BE-NEON-NEXT: vmov r0, s14 -; BE-NEON-NEXT: vmov.f32 s17, s15 -; BE-NEON-NEXT: vstr s13, [sp, #52] @ 4-byte Spill -; BE-NEON-NEXT: vmov.f32 s21, s12 -; BE-NEON-NEXT: vstr s10, [sp, #68] @ 4-byte Spill -; BE-NEON-NEXT: vmov.f32 s23, s11 -; BE-NEON-NEXT: vstr s7, [sp, #72] @ 4-byte Spill -; BE-NEON-NEXT: vmov.f32 s19, s9 -; BE-NEON-NEXT: vstr s4, [sp, #28] @ 4-byte Spill -; BE-NEON-NEXT: vmov.f32 s26, s8 -; BE-NEON-NEXT: vmov.f32 s24, s6 -; BE-NEON-NEXT: vmov.f32 s18, s5 -; BE-NEON-NEXT: vmov.f32 s25, s3 -; BE-NEON-NEXT: vmov.f32 s16, s2 -; BE-NEON-NEXT: vmov.f32 s27, s1 -; BE-NEON-NEXT: vmov.f32 s29, s0 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vmov s0, r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: mov r8, r0 -; BE-NEON-NEXT: vmov r0, s29 -; BE-NEON-NEXT: mov r4, r1 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: mov r9, r0 -; BE-NEON-NEXT: vmov r0, s27 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: mov r7, r0 -; BE-NEON-NEXT: vmov r0, s21 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: mov r6, r0 -; BE-NEON-NEXT: vmov r0, s25 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: mov r5, r0 -; BE-NEON-NEXT: vmov r0, s23 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vmov s0, r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.32 d16[0], r0 -; BE-NEON-NEXT: vmov s0, r5 -; BE-NEON-NEXT: str r1, [sp, #108] @ 4-byte Spill -; BE-NEON-NEXT: vstr d16, [sp, #96] @ 8-byte Spill -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.32 d16[0], r0 -; BE-NEON-NEXT: vmov s0, r6 -; BE-NEON-NEXT: str r1, [sp, #92] @ 4-byte Spill -; BE-NEON-NEXT: vstr d16, [sp, #80] @ 8-byte Spill -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.32 d16[0], r0 -; BE-NEON-NEXT: vmov s0, r7 -; BE-NEON-NEXT: str r1, [sp, #76] @ 4-byte Spill -; BE-NEON-NEXT: vstr d16, [sp, #56] @ 8-byte Spill -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov s0, r9 -; BE-NEON-NEXT: mov r10, r1 -; BE-NEON-NEXT: vmov.32 d14[0], r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.32 d15[0], r0 -; BE-NEON-NEXT: vmov r0, s17 -; BE-NEON-NEXT: mov r5, r1 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vmov s0, r0 -; BE-NEON-NEXT: vmov.32 d10[0], r8 -; BE-NEON-NEXT: vmov r6, s19 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.32 d11[0], r0 -; BE-NEON-NEXT: mov r0, r6 -; BE-NEON-NEXT: mov r7, r1 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: mov r6, r0 -; BE-NEON-NEXT: vmov r0, s18 -; BE-NEON-NEXT: vmov.32 d10[1], r4 -; BE-NEON-NEXT: vstr d10, [sp, #40] @ 8-byte Spill -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: mov r4, r0 -; BE-NEON-NEXT: vmov r0, s16 -; BE-NEON-NEXT: vmov.32 d11[1], r7 -; BE-NEON-NEXT: vstr d11, [sp, #32] @ 8-byte Spill -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vmov.32 d15[1], r5 -; BE-NEON-NEXT: vmov s0, r0 -; BE-NEON-NEXT: vstr d15, [sp, #16] @ 8-byte Spill -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vldr s0, [sp, #28] @ 4-byte Reload -; BE-NEON-NEXT: vmov r5, s26 -; BE-NEON-NEXT: vmov.32 d16[0], r0 -; BE-NEON-NEXT: vmov s26, r4 -; BE-NEON-NEXT: vmov r0, s0 -; BE-NEON-NEXT: mov r8, r1 -; BE-NEON-NEXT: vmov.32 d14[1], r10 -; BE-NEON-NEXT: vmov r4, s24 -; BE-NEON-NEXT: vstr d16, [sp] @ 8-byte Spill -; BE-NEON-NEXT: vstr d14, [sp, #8] @ 8-byte Spill -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vmov.f32 s0, s26 -; BE-NEON-NEXT: vmov s22, r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s22 -; BE-NEON-NEXT: mov r7, r1 -; BE-NEON-NEXT: vmov.32 d13[0], r0 -; BE-NEON-NEXT: vmov s24, r6 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.32 d14[0], r0 -; BE-NEON-NEXT: mov r0, r4 -; BE-NEON-NEXT: mov r6, r1 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vmov.f32 s0, s24 -; BE-NEON-NEXT: vmov s22, r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s22 -; BE-NEON-NEXT: mov r9, r1 -; BE-NEON-NEXT: vmov.32 d12[0], r0 -; BE-NEON-NEXT: vmov.32 d14[1], r6 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.32 d11[0], r0 -; BE-NEON-NEXT: mov r0, r5 -; BE-NEON-NEXT: mov r6, r1 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vldr s0, [sp, #52] @ 4-byte Reload -; BE-NEON-NEXT: mov r4, r0 -; BE-NEON-NEXT: vmov.32 d13[1], r7 -; BE-NEON-NEXT: vmov r0, s0 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vldr s0, [sp, #68] @ 4-byte Reload -; BE-NEON-NEXT: vmov s20, r0 -; BE-NEON-NEXT: vmov.32 d11[1], r6 -; BE-NEON-NEXT: vmov r7, s0 -; BE-NEON-NEXT: vldr s0, [sp, #72] @ 4-byte Reload -; BE-NEON-NEXT: vmov r0, s0 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vmov.f32 s0, s20 -; BE-NEON-NEXT: vmov s16, r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s16 -; BE-NEON-NEXT: mov r5, r1 -; BE-NEON-NEXT: vmov.32 d10[0], r0 -; BE-NEON-NEXT: vmov s18, r4 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.32 d15[0], r0 -; BE-NEON-NEXT: mov r0, r7 -; BE-NEON-NEXT: mov r4, r1 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vmov.f32 s0, s18 -; BE-NEON-NEXT: vmov s16, r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s16 -; BE-NEON-NEXT: mov r6, r1 -; BE-NEON-NEXT: vmov.32 d9[0], r0 -; BE-NEON-NEXT: vmov.32 d15[1], r4 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.32 d24[0], r0 -; BE-NEON-NEXT: ldr r0, [sp, #76] @ 4-byte Reload -; BE-NEON-NEXT: vldr d23, [sp, #56] @ 8-byte Reload -; BE-NEON-NEXT: vldr d20, [sp, #8] @ 8-byte Reload -; BE-NEON-NEXT: vmov.32 d23[1], r0 -; BE-NEON-NEXT: ldr r0, [sp, #92] @ 4-byte Reload -; BE-NEON-NEXT: vldr d22, [sp, #80] @ 8-byte Reload -; BE-NEON-NEXT: vldr d26, [sp, #16] @ 8-byte Reload -; BE-NEON-NEXT: vrev64.32 d21, d20 -; BE-NEON-NEXT: vmov.32 d22[1], r0 -; BE-NEON-NEXT: ldr r0, [sp, #108] @ 4-byte Reload -; BE-NEON-NEXT: vldr d30, [sp] @ 8-byte Reload -; BE-NEON-NEXT: vldr d25, [sp, #96] @ 8-byte Reload -; BE-NEON-NEXT: vrev64.32 d20, d26 -; BE-NEON-NEXT: vldr d26, [sp, #32] @ 8-byte Reload -; BE-NEON-NEXT: vmov.32 d10[1], r5 -; BE-NEON-NEXT: vmov.32 d12[1], r9 -; BE-NEON-NEXT: vldr d28, [sp, #40] @ 8-byte Reload -; BE-NEON-NEXT: vrev64.32 d27, d26 -; BE-NEON-NEXT: vmov.32 d25[1], r0 -; BE-NEON-NEXT: add r0, r11, #64 -; BE-NEON-NEXT: vmov.32 d30[1], r8 -; BE-NEON-NEXT: vmov.32 d9[1], r6 -; BE-NEON-NEXT: vrev64.32 d26, d28 -; BE-NEON-NEXT: vrev64.32 d29, d10 -; BE-NEON-NEXT: vmov.32 d24[1], r1 -; BE-NEON-NEXT: vrev64.32 d1, d12 -; BE-NEON-NEXT: vrev64.32 d28, d23 -; BE-NEON-NEXT: vrev64.32 d23, d22 -; BE-NEON-NEXT: vrev64.32 d22, d30 -; BE-NEON-NEXT: vrev64.32 d31, d25 -; BE-NEON-NEXT: vrev64.32 d0, d9 -; BE-NEON-NEXT: vrev64.32 d30, d24 -; BE-NEON-NEXT: vst1.64 {d0, d1}, [r0:128]! -; BE-NEON-NEXT: vst1.64 {d30, d31}, [r0:128]! -; BE-NEON-NEXT: vst1.64 {d28, d29}, [r0:128]! -; BE-NEON-NEXT: vrev64.32 d19, d13 -; BE-NEON-NEXT: vst1.64 {d26, d27}, [r0:128] -; BE-NEON-NEXT: vst1.64 {d20, d21}, [r11:128]! -; BE-NEON-NEXT: vrev64.32 d18, d14 -; BE-NEON-NEXT: vst1.64 {d22, d23}, [r11:128]! -; BE-NEON-NEXT: vrev64.32 d17, d15 -; BE-NEON-NEXT: vrev64.32 d16, d11 -; BE-NEON-NEXT: vst1.64 {d18, d19}, [r11:128]! -; BE-NEON-NEXT: vst1.64 {d16, d17}, [r11:128] -; BE-NEON-NEXT: add sp, sp, #112 -; BE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEON-NEXT: add sp, sp, #4 -; BE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} %a = call <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half> %x) ret <16 x i64> %a } declare <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half>) -define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { -; LE-LABEL: llrint_v32i64_v32f16: +define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) { +; LE-LABEL: llrint_v1i64_v1f32: ; LE: @ %bb.0: -; LE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-NEXT: .pad #4 -; LE-NEXT: sub sp, sp, #4 -; LE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEXT: .pad #248 -; LE-NEXT: sub sp, sp, #248 -; LE-NEXT: add lr, sp, #256 -; LE-NEXT: mov r11, r0 -; LE-NEXT: vstr s15, [sp, #176] @ 4-byte Spill -; LE-NEXT: vmov.f32 s19, s14 -; LE-NEXT: ldrh r0, [lr, #132] -; LE-NEXT: vmov.f32 s17, s11 -; LE-NEXT: vstr s13, [sp, #196] @ 4-byte Spill -; LE-NEXT: vstr s12, [sp, #112] @ 4-byte Spill -; LE-NEXT: vstr s10, [sp, #136] @ 4-byte Spill -; LE-NEXT: vstr s9, [sp, #160] @ 4-byte Spill -; LE-NEXT: vstr s8, [sp, #200] @ 4-byte Spill -; LE-NEXT: vstr s7, [sp, #100] @ 4-byte Spill -; LE-NEXT: vstr s6, [sp, #116] @ 4-byte Spill -; LE-NEXT: vstr s5, [sp, #76] @ 4-byte Spill -; LE-NEXT: vstr s4, [sp, #120] @ 4-byte Spill -; LE-NEXT: vstr s3, [sp, #156] @ 4-byte Spill -; LE-NEXT: vstr s2, [sp, #192] @ 4-byte Spill -; LE-NEXT: vstr s1, [sp, #104] @ 4-byte Spill -; LE-NEXT: vstr s0, [sp, #108] @ 4-byte Spill -; LE-NEXT: bl __aeabi_h2f -; LE-NEXT: vmov s0, r0 -; LE-NEXT: bl llrintf -; LE-NEXT: add lr, sp, #256 -; LE-NEXT: str r0, [sp, #52] @ 4-byte Spill -; LE-NEXT: str r1, [sp, #56] @ 4-byte Spill -; LE-NEXT: ldrh r0, [lr, #108] -; LE-NEXT: bl __aeabi_h2f -; LE-NEXT: add lr, sp, #256 -; LE-NEXT: str r0, [sp, #32] @ 4-byte Spill -; LE-NEXT: ldrh r0, [lr, #96] -; LE-NEXT: bl __aeabi_h2f -; LE-NEXT: add lr, sp, #256 -; LE-NEXT: mov r5, r0 -; LE-NEXT: ldrh r0, [lr, #100] -; LE-NEXT: bl __aeabi_h2f -; LE-NEXT: add lr, sp, #256 -; LE-NEXT: mov r7, r0 -; LE-NEXT: ldrh r0, [lr, #156] -; LE-NEXT: bl __aeabi_h2f -; LE-NEXT: add lr, sp, #256 -; LE-NEXT: mov r6, r0 -; LE-NEXT: ldrh r0, [lr, #152] -; LE-NEXT: bl __aeabi_h2f -; LE-NEXT: add lr, sp, #256 -; LE-NEXT: mov r4, r0 -; LE-NEXT: ldrh r0, [lr, #148] -; LE-NEXT: bl __aeabi_h2f -; LE-NEXT: vmov s0, r0 +; LE-NEXT: .save {r11, lr} +; LE-NEXT: push {r11, lr} ; LE-NEXT: bl llrintf -; LE-NEXT: vmov s0, r4 -; LE-NEXT: mov r8, r1 -; LE-NEXT: vmov.32 d13[0], r0 +; LE-NEXT: vmov.32 d0[0], r0 +; LE-NEXT: vmov.32 d0[1], r1 +; LE-NEXT: pop {r11, pc} +; +; BE-LABEL: llrint_v1i64_v1f32: +; BE: @ %bb.0: +; BE-NEXT: .save {r11, lr} +; BE-NEXT: push {r11, lr} +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d16[0], r0 +; BE-NEXT: vmov.32 d16[1], r1 +; BE-NEXT: vrev64.32 d0, d16 +; BE-NEXT: pop {r11, pc} + %a = call <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float> %x) + ret <1 x i64> %a +} +declare <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float>) + +define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) { +; LE-LABEL: llrint_v2i64_v2f32: +; LE: @ %bb.0: +; LE-NEXT: .save {r4, lr} +; LE-NEXT: push {r4, lr} +; LE-NEXT: .vsave {d10, d11} +; LE-NEXT: vpush {d10, d11} +; LE-NEXT: .vsave {d8} +; LE-NEXT: vpush {d8} +; LE-NEXT: vmov.f64 d8, d0 +; LE-NEXT: vmov.f32 s0, s17 ; LE-NEXT: bl llrintf -; LE-NEXT: vmov s0, r6 +; LE-NEXT: vmov.f32 s0, s16 ; LE-NEXT: mov r4, r1 -; LE-NEXT: vmov.32 d14[0], r0 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov s0, r7 -; LE-NEXT: mov r6, r1 -; LE-NEXT: vmov.32 d15[0], r0 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov s0, r5 -; LE-NEXT: mov r7, r1 ; LE-NEXT: vmov.32 d11[0], r0 ; LE-NEXT: bl llrintf -; LE-NEXT: add lr, sp, #256 ; LE-NEXT: vmov.32 d10[0], r0 -; LE-NEXT: mov r5, r1 -; LE-NEXT: ldrh r0, [lr, #144] -; LE-NEXT: bl __aeabi_h2f -; LE-NEXT: add lr, sp, #256 -; LE-NEXT: mov r10, r0 -; LE-NEXT: vmov.32 d11[1], r7 -; LE-NEXT: ldrh r0, [lr, #104] -; LE-NEXT: bl __aeabi_h2f -; LE-NEXT: vmov.32 d10[1], r5 -; LE-NEXT: add lr, sp, #80 -; LE-NEXT: mov r7, r0 -; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-NEXT: add lr, sp, #256 -; LE-NEXT: ldrh r0, [lr, #124] -; LE-NEXT: bl __aeabi_h2f -; LE-NEXT: add lr, sp, #256 -; LE-NEXT: mov r5, r0 -; LE-NEXT: vmov.32 d15[1], r6 -; LE-NEXT: ldrh r0, [lr, #120] -; LE-NEXT: bl __aeabi_h2f -; LE-NEXT: vmov.32 d14[1], r4 -; LE-NEXT: add lr, sp, #16 -; LE-NEXT: mov r6, r0 -; LE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-NEXT: add lr, sp, #256 -; LE-NEXT: ldrh r0, [lr, #116] -; LE-NEXT: bl __aeabi_h2f -; LE-NEXT: add lr, sp, #256 -; LE-NEXT: vorr q5, q6, q6 -; LE-NEXT: mov r4, r0 -; LE-NEXT: ldrh r0, [lr, #112] -; LE-NEXT: vmov.32 d11[1], r8 -; LE-NEXT: bl __aeabi_h2f -; LE-NEXT: vmov s0, r0 +; LE-NEXT: vmov.32 d11[1], r4 +; LE-NEXT: vmov.32 d10[1], r1 +; LE-NEXT: vorr q0, q5, q5 +; LE-NEXT: vpop {d8} +; LE-NEXT: vpop {d10, d11} +; LE-NEXT: pop {r4, pc} +; +; BE-LABEL: llrint_v2i64_v2f32: +; BE: @ %bb.0: +; BE-NEXT: .save {r4, lr} +; BE-NEXT: push {r4, lr} +; BE-NEXT: .vsave {d10, d11} +; BE-NEXT: vpush {d10, d11} +; BE-NEXT: .vsave {d8} +; BE-NEXT: vpush {d8} +; BE-NEXT: vrev64.32 d8, d0 +; BE-NEXT: vmov.f32 s0, s17 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s16 +; BE-NEXT: mov r4, r1 +; BE-NEXT: vmov.32 d11[0], r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: vmov.32 d11[1], r4 +; BE-NEXT: vmov.32 d10[1], r1 +; BE-NEXT: vrev64.32 q0, q5 +; BE-NEXT: vpop {d8} +; BE-NEXT: vpop {d10, d11} +; BE-NEXT: pop {r4, pc} + %a = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> %x) + ret <2 x i64> %a +} +declare <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float>) + +define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) { +; LE-LABEL: llrint_v4i64_v4f32: +; LE: @ %bb.0: +; LE-NEXT: .save {r4, r5, r6, lr} +; LE-NEXT: push {r4, r5, r6, lr} +; LE-NEXT: .vsave {d8, d9, d10, d11, d12, d13} +; LE-NEXT: vpush {d8, d9, d10, d11, d12, d13} +; LE-NEXT: vorr q5, q0, q0 +; LE-NEXT: vmov.f32 s0, s23 ; LE-NEXT: bl llrintf -; LE-NEXT: vmov s0, r4 -; LE-NEXT: str r1, [sp, #12] @ 4-byte Spill +; LE-NEXT: vmov.f32 s0, s20 +; LE-NEXT: mov r4, r1 +; LE-NEXT: vmov.32 d9[0], r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s21 +; LE-NEXT: mov r5, r1 ; LE-NEXT: vmov.32 d12[0], r0 ; LE-NEXT: bl llrintf -; LE-NEXT: vmov s0, r6 -; LE-NEXT: add lr, sp, #216 +; LE-NEXT: vmov.f32 s0, s22 +; LE-NEXT: mov r6, r1 ; LE-NEXT: vmov.32 d13[0], r0 -; LE-NEXT: str r1, [sp, #8] @ 4-byte Spill -; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill ; LE-NEXT: bl llrintf -; LE-NEXT: vmov s0, r5 -; LE-NEXT: mov r4, r1 -; LE-NEXT: vmov.32 d12[0], r0 +; LE-NEXT: vmov.32 d8[0], r0 +; LE-NEXT: vmov.32 d13[1], r6 +; LE-NEXT: vmov.32 d9[1], r4 +; LE-NEXT: vmov.32 d12[1], r5 +; LE-NEXT: vmov.32 d8[1], r1 +; LE-NEXT: vorr q0, q6, q6 +; LE-NEXT: vorr q1, q4, q4 +; LE-NEXT: vpop {d8, d9, d10, d11, d12, d13} +; LE-NEXT: pop {r4, r5, r6, pc} +; +; BE-LABEL: llrint_v4i64_v4f32: +; BE: @ %bb.0: +; BE-NEXT: .save {r4, r5, r6, lr} +; BE-NEXT: push {r4, r5, r6, lr} +; BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13} +; BE-NEXT: vpush {d8, d9, d10, d11, d12, d13} +; BE-NEXT: vrev64.32 d8, d1 +; BE-NEXT: vrev64.32 d9, d0 +; BE-NEXT: vmov.f32 s0, s17 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s18 +; BE-NEXT: mov r4, r1 +; BE-NEXT: vmov.32 d11[0], r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s19 +; BE-NEXT: mov r5, r1 +; BE-NEXT: vmov.32 d12[0], r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.f32 s0, s16 +; BE-NEXT: mov r6, r1 +; BE-NEXT: vmov.32 d13[0], r0 +; BE-NEXT: bl llrintf +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: vmov.32 d13[1], r6 +; BE-NEXT: vmov.32 d11[1], r4 +; BE-NEXT: vmov.32 d12[1], r5 +; BE-NEXT: vmov.32 d10[1], r1 +; BE-NEXT: vrev64.32 q0, q6 +; BE-NEXT: vrev64.32 q1, q5 +; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13} +; BE-NEXT: pop {r4, r5, r6, pc} + %a = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> %x) + ret <4 x i64> %a +} +declare <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float>) + +define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) { +; LE-LABEL: llrint_v8i64_v8f32: +; LE: @ %bb.0: +; LE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; LE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; LE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-NEXT: .pad #40 +; LE-NEXT: sub sp, sp, #40 +; LE-NEXT: vorr q6, q1, q1 +; LE-NEXT: add lr, sp, #24 +; LE-NEXT: vorr q7, q0, q0 +; LE-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill +; LE-NEXT: vmov.f32 s0, s27 ; LE-NEXT: bl llrintf -; LE-NEXT: vmov s0, r7 -; LE-NEXT: add lr, sp, #232 -; LE-NEXT: vmov.32 d13[0], r0 +; LE-NEXT: vmov.f32 s0, s24 ; LE-NEXT: mov r8, r1 -; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEXT: vmov.32 d9[0], r0 ; LE-NEXT: bl llrintf -; LE-NEXT: vmov s0, r10 +; LE-NEXT: vmov.f32 s0, s25 ; LE-NEXT: mov r9, r1 -; LE-NEXT: vmov.32 d12[0], r0 -; LE-NEXT: bl llrintf -; LE-NEXT: add lr, sp, #256 ; LE-NEXT: vmov.32 d10[0], r0 -; LE-NEXT: mov r5, r1 -; LE-NEXT: ldrh r0, [lr, #140] -; LE-NEXT: bl __aeabi_h2f -; LE-NEXT: vmov s0, r0 -; LE-NEXT: ldr r0, [sp, #32] @ 4-byte Reload -; LE-NEXT: vmov.32 d10[1], r5 -; LE-NEXT: add lr, sp, #32 -; LE-NEXT: vmov s16, r0 -; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill ; LE-NEXT: bl llrintf -; LE-NEXT: add lr, sp, #256 -; LE-NEXT: mov r6, r1 +; LE-NEXT: vorr q6, q7, q7 +; LE-NEXT: add lr, sp, #8 +; LE-NEXT: mov r10, r1 +; LE-NEXT: vmov.32 d11[0], r0 +; LE-NEXT: vmov.f32 s0, s26 +; LE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s27 +; LE-NEXT: mov r7, r1 +; LE-NEXT: vmov.32 d14[0], r0 +; LE-NEXT: bl llrintf +; LE-NEXT: vmov.f32 s0, s24 +; LE-NEXT: mov r4, r1 ; LE-NEXT: vmov.32 d15[0], r0 -; LE-NEXT: ldrh r1, [lr, #128] -; LE-NEXT: mov r0, r1 -; LE-NEXT: bl __aeabi_h2f -; LE-NEXT: vmov.f32 s0, s16 -; LE-NEXT: vmov s18, r0 ; LE-NEXT: bl llrintf -; LE-NEXT: add lr, sp, #256 +; LE-NEXT: add lr, sp, #8 ; LE-NEXT: mov r5, r1 -; LE-NEXT: vmov.32 d13[0], r0 -; LE-NEXT: ldr r1, [sp, #52] @ 4-byte Reload -; LE-NEXT: ldrh r0, [lr, #136] -; LE-NEXT: vmov.32 d15[1], r6 -; LE-NEXT: vmov.32 d11[0], r1 -; LE-NEXT: bl __aeabi_h2f -; LE-NEXT: vmov.f32 s0, s18 -; LE-NEXT: vmov s16, r0 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.f32 s0, s16 -; LE-NEXT: vmov.32 d10[0], r0 -; LE-NEXT: ldr r0, [sp, #56] @ 4-byte Reload -; LE-NEXT: mov r6, r1 -; LE-NEXT: vmov.32 d11[1], r0 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.32 d13[1], r5 -; LE-NEXT: add lr, sp, #56 -; LE-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; LE-NEXT: vmov.32 d12[1], r9 -; LE-NEXT: vmov.32 d14[0], r0 -; LE-NEXT: vmov r0, s19 -; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-NEXT: add lr, sp, #232 -; LE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-NEXT: vmov.32 d13[1], r8 -; LE-NEXT: vmov.32 d12[1], r4 -; LE-NEXT: vmov.32 d10[1], r6 -; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-NEXT: add lr, sp, #216 -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: vmov.32 d17[1], r2 -; LE-NEXT: ldr r2, [sp, #12] @ 4-byte Reload -; LE-NEXT: vmov.32 d14[1], r1 -; LE-NEXT: add r1, r11, #192 -; LE-NEXT: vmov.32 d16[1], r2 -; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEXT: add lr, sp, #32 -; LE-NEXT: vst1.64 {d10, d11}, [r1:128]! -; LE-NEXT: vst1.64 {d14, d15}, [r1:128]! -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: add lr, sp, #16 -; LE-NEXT: vst1.64 {d16, d17}, [r1:128]! -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: vst1.64 {d16, d17}, [r1:128] -; LE-NEXT: bl __aeabi_h2f -; LE-NEXT: str r0, [sp, #52] @ 4-byte Spill -; LE-NEXT: vmov r0, s17 -; LE-NEXT: bl __aeabi_h2f -; LE-NEXT: vldr s0, [sp, #76] @ 4-byte Reload -; LE-NEXT: mov r10, r0 -; LE-NEXT: vmov r0, s0 -; LE-NEXT: bl __aeabi_h2f -; LE-NEXT: vldr s0, [sp, #100] @ 4-byte Reload -; LE-NEXT: mov r4, r0 -; LE-NEXT: vmov r0, s0 -; LE-NEXT: bl __aeabi_h2f -; LE-NEXT: vldr s0, [sp, #104] @ 4-byte Reload -; LE-NEXT: mov r7, r0 -; LE-NEXT: vmov r0, s0 -; LE-NEXT: bl __aeabi_h2f -; LE-NEXT: vldr s0, [sp, #108] @ 4-byte Reload -; LE-NEXT: mov r5, r0 -; LE-NEXT: vmov r0, s0 -; LE-NEXT: bl __aeabi_h2f -; LE-NEXT: vldr s0, [sp, #112] @ 4-byte Reload -; LE-NEXT: mov r6, r0 -; LE-NEXT: vmov r0, s0 -; LE-NEXT: bl __aeabi_h2f -; LE-NEXT: vmov s0, r0 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov s0, r6 -; LE-NEXT: str r1, [sp, #112] @ 4-byte Spill -; LE-NEXT: vmov.32 d14[0], r0 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov s0, r5 -; LE-NEXT: mov r9, r1 -; LE-NEXT: vmov.32 d8[0], r0 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov s0, r7 -; LE-NEXT: mov r8, r1 -; LE-NEXT: vmov.32 d9[0], r0 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov s0, r4 -; LE-NEXT: mov r5, r1 -; LE-NEXT: vmov.32 d11[0], r0 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov s0, r10 -; LE-NEXT: mov r4, r1 -; LE-NEXT: vmov.32 d13[0], r0 -; LE-NEXT: bl llrintf -; LE-NEXT: vldr s0, [sp, #116] @ 4-byte Reload -; LE-NEXT: mov r6, r0 -; LE-NEXT: str r1, [sp, #108] @ 4-byte Spill -; LE-NEXT: vmov.32 d11[1], r5 -; LE-NEXT: vmov r0, s0 -; LE-NEXT: bl __aeabi_h2f -; LE-NEXT: vmov s0, r0 -; LE-NEXT: vmov.32 d13[1], r4 -; LE-NEXT: bl llrintf -; LE-NEXT: vldr s0, [sp, #120] @ 4-byte Reload -; LE-NEXT: mov r4, r0 -; LE-NEXT: mov r5, r1 -; LE-NEXT: vmov.32 d9[1], r8 -; LE-NEXT: vmov r0, s0 -; LE-NEXT: bl __aeabi_h2f -; LE-NEXT: vldr s0, [sp, #136] @ 4-byte Reload -; LE-NEXT: vmov.32 d10[0], r4 -; LE-NEXT: vmov r7, s0 -; LE-NEXT: vmov s0, r0 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.32 d12[0], r0 -; LE-NEXT: add lr, sp, #136 -; LE-NEXT: add r10, r11, #128 -; LE-NEXT: mov r0, r7 -; LE-NEXT: vmov.32 d10[1], r5 -; LE-NEXT: vmov.32 d12[1], r1 -; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-NEXT: add lr, sp, #120 -; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-NEXT: add lr, sp, #80 -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: add lr, sp, #56 -; LE-NEXT: vmov.32 d13[0], r6 -; LE-NEXT: vst1.64 {d16, d17}, [r10:128]! -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: vst1.64 {d16, d17}, [r10:128]! -; LE-NEXT: vldr s0, [sp, #156] @ 4-byte Reload -; LE-NEXT: vmov r4, s0 -; LE-NEXT: bl __aeabi_h2f -; LE-NEXT: vldr s0, [sp, #160] @ 4-byte Reload -; LE-NEXT: mov r5, r0 -; LE-NEXT: ldr r0, [sp, #52] @ 4-byte Reload -; LE-NEXT: vmov.32 d8[1], r9 -; LE-NEXT: vmov r7, s0 -; LE-NEXT: add lr, sp, #160 -; LE-NEXT: vldr s0, [sp, #176] @ 4-byte Reload -; LE-NEXT: vmov s20, r0 -; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-NEXT: vmov r0, s0 -; LE-NEXT: bl __aeabi_h2f -; LE-NEXT: vmov.f32 s0, s20 -; LE-NEXT: vmov s18, r0 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.f32 s0, s18 -; LE-NEXT: mov r6, r1 -; LE-NEXT: vmov.32 d10[0], r0 -; LE-NEXT: vmov s16, r5 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.32 d11[0], r0 -; LE-NEXT: mov r0, r7 -; LE-NEXT: mov r5, r1 -; LE-NEXT: bl __aeabi_h2f -; LE-NEXT: vmov.f32 s0, s16 -; LE-NEXT: vmov s18, r0 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.f32 s0, s18 -; LE-NEXT: vmov.32 d12[0], r0 -; LE-NEXT: add lr, sp, #176 -; LE-NEXT: mov r8, r1 -; LE-NEXT: vmov.32 d11[1], r5 -; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.32 d13[0], r0 -; LE-NEXT: mov r0, r4 -; LE-NEXT: mov r5, r1 -; LE-NEXT: bl __aeabi_h2f -; LE-NEXT: vldr s0, [sp, #196] @ 4-byte Reload -; LE-NEXT: mov r7, r0 -; LE-NEXT: vmov.32 d10[1], r6 -; LE-NEXT: vmov r0, s0 -; LE-NEXT: bl __aeabi_h2f -; LE-NEXT: vldr s0, [sp, #192] @ 4-byte Reload -; LE-NEXT: vmov s16, r0 -; LE-NEXT: vmov.32 d13[1], r5 -; LE-NEXT: vmov r6, s0 -; LE-NEXT: vldr s0, [sp, #200] @ 4-byte Reload -; LE-NEXT: vmov r0, s0 -; LE-NEXT: bl __aeabi_h2f -; LE-NEXT: vmov.f32 s0, s16 -; LE-NEXT: vmov s18, r0 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.f32 s0, s18 -; LE-NEXT: add lr, sp, #200 -; LE-NEXT: vmov.32 d15[0], r0 -; LE-NEXT: mov r4, r1 -; LE-NEXT: vmov s16, r7 -; LE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.32 d12[0], r0 -; LE-NEXT: mov r0, r6 -; LE-NEXT: mov r5, r1 -; LE-NEXT: bl __aeabi_h2f -; LE-NEXT: vmov.f32 s0, s16 -; LE-NEXT: vmov s18, r0 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.f32 s0, s18 -; LE-NEXT: mov r6, r1 -; LE-NEXT: vmov.32 d15[0], r0 -; LE-NEXT: vmov.32 d12[1], r5 -; LE-NEXT: bl llrintf -; LE-NEXT: add lr, sp, #200 -; LE-NEXT: vmov.32 d14[0], r0 -; LE-NEXT: ldr r0, [sp, #112] @ 4-byte Reload -; LE-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; LE-NEXT: add lr, sp, #176 -; LE-NEXT: vmov.32 d19[1], r4 -; LE-NEXT: vmov.32 d18[1], r0 -; LE-NEXT: ldr r0, [sp, #108] @ 4-byte Reload -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: add lr, sp, #216 -; LE-NEXT: vmov.32 d17[1], r0 -; LE-NEXT: add r0, r11, #64 -; LE-NEXT: vmov.32 d16[1], r8 -; LE-NEXT: vorr q10, q8, q8 -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: add lr, sp, #232 -; LE-NEXT: vmov.32 d15[1], r6 -; LE-NEXT: vst1.64 {d16, d17}, [r10:128]! -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: add lr, sp, #160 -; LE-NEXT: vmov.32 d14[1], r1 -; LE-NEXT: vst1.64 {d16, d17}, [r10:128] -; LE-NEXT: vst1.64 {d12, d13}, [r0:128]! -; LE-NEXT: vst1.64 {d20, d21}, [r0:128]! -; LE-NEXT: vst1.64 {d18, d19}, [r0:128]! -; LE-NEXT: vst1.64 {d10, d11}, [r0:128] -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: add lr, sp, #120 -; LE-NEXT: vst1.64 {d16, d17}, [r11:128]! -; LE-NEXT: vst1.64 {d14, d15}, [r11:128]! -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: add lr, sp, #136 -; LE-NEXT: vst1.64 {d16, d17}, [r11:128]! -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: vst1.64 {d16, d17}, [r11:128] -; LE-NEXT: add sp, sp, #248 -; LE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEXT: add sp, sp, #4 -; LE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; -; LE-NEON-LABEL: llrint_v32i64_v32f16: -; LE-NEON: @ %bb.0: -; LE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-NEON-NEXT: .pad #4 -; LE-NEON-NEXT: sub sp, sp, #4 -; LE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEON-NEXT: .pad #248 -; LE-NEON-NEXT: sub sp, sp, #248 -; LE-NEON-NEXT: add lr, sp, #256 -; LE-NEON-NEXT: mov r11, r0 -; LE-NEON-NEXT: vstr s15, [sp, #176] @ 4-byte Spill -; LE-NEON-NEXT: vmov.f32 s19, s14 -; LE-NEON-NEXT: ldrh r0, [lr, #132] -; LE-NEON-NEXT: vmov.f32 s17, s11 -; LE-NEON-NEXT: vstr s13, [sp, #196] @ 4-byte Spill -; LE-NEON-NEXT: vstr s12, [sp, #112] @ 4-byte Spill -; LE-NEON-NEXT: vstr s10, [sp, #136] @ 4-byte Spill -; LE-NEON-NEXT: vstr s9, [sp, #160] @ 4-byte Spill -; LE-NEON-NEXT: vstr s8, [sp, #200] @ 4-byte Spill -; LE-NEON-NEXT: vstr s7, [sp, #100] @ 4-byte Spill -; LE-NEON-NEXT: vstr s6, [sp, #116] @ 4-byte Spill -; LE-NEON-NEXT: vstr s5, [sp, #76] @ 4-byte Spill -; LE-NEON-NEXT: vstr s4, [sp, #120] @ 4-byte Spill -; LE-NEON-NEXT: vstr s3, [sp, #156] @ 4-byte Spill -; LE-NEON-NEXT: vstr s2, [sp, #192] @ 4-byte Spill -; LE-NEON-NEXT: vstr s1, [sp, #104] @ 4-byte Spill -; LE-NEON-NEXT: vstr s0, [sp, #108] @ 4-byte Spill -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vmov s0, r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: add lr, sp, #256 -; LE-NEON-NEXT: str r0, [sp, #52] @ 4-byte Spill -; LE-NEON-NEXT: str r1, [sp, #56] @ 4-byte Spill -; LE-NEON-NEXT: ldrh r0, [lr, #108] -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: add lr, sp, #256 -; LE-NEON-NEXT: str r0, [sp, #32] @ 4-byte Spill -; LE-NEON-NEXT: ldrh r0, [lr, #96] -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: add lr, sp, #256 -; LE-NEON-NEXT: mov r5, r0 -; LE-NEON-NEXT: ldrh r0, [lr, #100] -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: add lr, sp, #256 -; LE-NEON-NEXT: mov r7, r0 -; LE-NEON-NEXT: ldrh r0, [lr, #156] -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: add lr, sp, #256 -; LE-NEON-NEXT: mov r6, r0 -; LE-NEON-NEXT: ldrh r0, [lr, #152] -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: add lr, sp, #256 -; LE-NEON-NEXT: mov r4, r0 -; LE-NEON-NEXT: ldrh r0, [lr, #148] -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vmov s0, r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov s0, r4 -; LE-NEON-NEXT: mov r8, r1 -; LE-NEON-NEXT: vmov.32 d13[0], r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov s0, r6 -; LE-NEON-NEXT: mov r4, r1 -; LE-NEON-NEXT: vmov.32 d14[0], r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov s0, r7 -; LE-NEON-NEXT: mov r6, r1 -; LE-NEON-NEXT: vmov.32 d15[0], r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov s0, r5 -; LE-NEON-NEXT: mov r7, r1 -; LE-NEON-NEXT: vmov.32 d11[0], r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: add lr, sp, #256 -; LE-NEON-NEXT: vmov.32 d10[0], r0 -; LE-NEON-NEXT: mov r5, r1 -; LE-NEON-NEXT: ldrh r0, [lr, #144] -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: add lr, sp, #256 -; LE-NEON-NEXT: mov r10, r0 -; LE-NEON-NEXT: vmov.32 d11[1], r7 -; LE-NEON-NEXT: ldrh r0, [lr, #104] -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vmov.32 d10[1], r5 -; LE-NEON-NEXT: add lr, sp, #80 -; LE-NEON-NEXT: mov r7, r0 -; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #256 -; LE-NEON-NEXT: ldrh r0, [lr, #124] -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: add lr, sp, #256 -; LE-NEON-NEXT: mov r5, r0 -; LE-NEON-NEXT: vmov.32 d15[1], r6 -; LE-NEON-NEXT: ldrh r0, [lr, #120] -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vmov.32 d14[1], r4 -; LE-NEON-NEXT: add lr, sp, #16 -; LE-NEON-NEXT: mov r6, r0 -; LE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #256 -; LE-NEON-NEXT: ldrh r0, [lr, #116] -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: add lr, sp, #256 -; LE-NEON-NEXT: vorr q5, q6, q6 -; LE-NEON-NEXT: mov r4, r0 -; LE-NEON-NEXT: ldrh r0, [lr, #112] -; LE-NEON-NEXT: vmov.32 d11[1], r8 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vmov s0, r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov s0, r4 -; LE-NEON-NEXT: str r1, [sp, #12] @ 4-byte Spill -; LE-NEON-NEXT: vmov.32 d12[0], r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov s0, r6 -; LE-NEON-NEXT: add lr, sp, #216 -; LE-NEON-NEXT: vmov.32 d13[0], r0 -; LE-NEON-NEXT: str r1, [sp, #8] @ 4-byte Spill -; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov s0, r5 -; LE-NEON-NEXT: mov r4, r1 -; LE-NEON-NEXT: vmov.32 d12[0], r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov s0, r7 -; LE-NEON-NEXT: add lr, sp, #232 -; LE-NEON-NEXT: vmov.32 d13[0], r0 -; LE-NEON-NEXT: mov r8, r1 -; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov s0, r10 -; LE-NEON-NEXT: mov r9, r1 -; LE-NEON-NEXT: vmov.32 d12[0], r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: add lr, sp, #256 -; LE-NEON-NEXT: vmov.32 d10[0], r0 -; LE-NEON-NEXT: mov r5, r1 -; LE-NEON-NEXT: ldrh r0, [lr, #140] -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vmov s0, r0 -; LE-NEON-NEXT: ldr r0, [sp, #32] @ 4-byte Reload -; LE-NEON-NEXT: vmov.32 d10[1], r5 -; LE-NEON-NEXT: add lr, sp, #32 -; LE-NEON-NEXT: vmov s16, r0 -; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: add lr, sp, #256 -; LE-NEON-NEXT: mov r6, r1 -; LE-NEON-NEXT: vmov.32 d15[0], r0 -; LE-NEON-NEXT: ldrh r1, [lr, #128] -; LE-NEON-NEXT: mov r0, r1 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vmov.f32 s0, s16 -; LE-NEON-NEXT: vmov s18, r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: add lr, sp, #256 -; LE-NEON-NEXT: mov r5, r1 -; LE-NEON-NEXT: vmov.32 d13[0], r0 -; LE-NEON-NEXT: ldr r1, [sp, #52] @ 4-byte Reload -; LE-NEON-NEXT: ldrh r0, [lr, #136] -; LE-NEON-NEXT: vmov.32 d15[1], r6 -; LE-NEON-NEXT: vmov.32 d11[0], r1 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vmov.f32 s0, s18 -; LE-NEON-NEXT: vmov s16, r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s16 -; LE-NEON-NEXT: vmov.32 d10[0], r0 -; LE-NEON-NEXT: ldr r0, [sp, #56] @ 4-byte Reload -; LE-NEON-NEXT: mov r6, r1 -; LE-NEON-NEXT: vmov.32 d11[1], r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.32 d13[1], r5 -; LE-NEON-NEXT: add lr, sp, #56 -; LE-NEON-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; LE-NEON-NEXT: vmov.32 d12[1], r9 -; LE-NEON-NEXT: vmov.32 d14[0], r0 -; LE-NEON-NEXT: vmov r0, s19 -; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #232 -; LE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-NEON-NEXT: vmov.32 d13[1], r8 -; LE-NEON-NEXT: vmov.32 d12[1], r4 -; LE-NEON-NEXT: vmov.32 d10[1], r6 -; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #216 -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: vmov.32 d17[1], r2 -; LE-NEON-NEXT: ldr r2, [sp, #12] @ 4-byte Reload -; LE-NEON-NEXT: vmov.32 d14[1], r1 -; LE-NEON-NEXT: add r1, r11, #192 -; LE-NEON-NEXT: vmov.32 d16[1], r2 -; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #32 -; LE-NEON-NEXT: vst1.64 {d10, d11}, [r1:128]! -; LE-NEON-NEXT: vst1.64 {d14, d15}, [r1:128]! -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #16 -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r1:128]! -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r1:128] -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: str r0, [sp, #52] @ 4-byte Spill -; LE-NEON-NEXT: vmov r0, s17 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vldr s0, [sp, #76] @ 4-byte Reload -; LE-NEON-NEXT: mov r10, r0 -; LE-NEON-NEXT: vmov r0, s0 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vldr s0, [sp, #100] @ 4-byte Reload -; LE-NEON-NEXT: mov r4, r0 -; LE-NEON-NEXT: vmov r0, s0 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vldr s0, [sp, #104] @ 4-byte Reload -; LE-NEON-NEXT: mov r7, r0 -; LE-NEON-NEXT: vmov r0, s0 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vldr s0, [sp, #108] @ 4-byte Reload -; LE-NEON-NEXT: mov r5, r0 -; LE-NEON-NEXT: vmov r0, s0 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vldr s0, [sp, #112] @ 4-byte Reload -; LE-NEON-NEXT: mov r6, r0 -; LE-NEON-NEXT: vmov r0, s0 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vmov s0, r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov s0, r6 -; LE-NEON-NEXT: str r1, [sp, #112] @ 4-byte Spill -; LE-NEON-NEXT: vmov.32 d14[0], r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov s0, r5 -; LE-NEON-NEXT: mov r9, r1 -; LE-NEON-NEXT: vmov.32 d8[0], r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov s0, r7 -; LE-NEON-NEXT: mov r8, r1 -; LE-NEON-NEXT: vmov.32 d9[0], r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov s0, r4 -; LE-NEON-NEXT: mov r5, r1 -; LE-NEON-NEXT: vmov.32 d11[0], r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov s0, r10 -; LE-NEON-NEXT: mov r4, r1 -; LE-NEON-NEXT: vmov.32 d13[0], r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vldr s0, [sp, #116] @ 4-byte Reload -; LE-NEON-NEXT: mov r6, r0 -; LE-NEON-NEXT: str r1, [sp, #108] @ 4-byte Spill -; LE-NEON-NEXT: vmov.32 d11[1], r5 -; LE-NEON-NEXT: vmov r0, s0 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vmov s0, r0 -; LE-NEON-NEXT: vmov.32 d13[1], r4 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vldr s0, [sp, #120] @ 4-byte Reload -; LE-NEON-NEXT: mov r4, r0 -; LE-NEON-NEXT: mov r5, r1 -; LE-NEON-NEXT: vmov.32 d9[1], r8 -; LE-NEON-NEXT: vmov r0, s0 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vldr s0, [sp, #136] @ 4-byte Reload -; LE-NEON-NEXT: vmov.32 d10[0], r4 -; LE-NEON-NEXT: vmov r7, s0 -; LE-NEON-NEXT: vmov s0, r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.32 d12[0], r0 -; LE-NEON-NEXT: add lr, sp, #136 -; LE-NEON-NEXT: add r10, r11, #128 -; LE-NEON-NEXT: mov r0, r7 -; LE-NEON-NEXT: vmov.32 d10[1], r5 -; LE-NEON-NEXT: vmov.32 d12[1], r1 -; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #120 -; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #80 -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #56 -; LE-NEON-NEXT: vmov.32 d13[0], r6 -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r10:128]! -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r10:128]! -; LE-NEON-NEXT: vldr s0, [sp, #156] @ 4-byte Reload -; LE-NEON-NEXT: vmov r4, s0 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vldr s0, [sp, #160] @ 4-byte Reload -; LE-NEON-NEXT: mov r5, r0 -; LE-NEON-NEXT: ldr r0, [sp, #52] @ 4-byte Reload -; LE-NEON-NEXT: vmov.32 d8[1], r9 -; LE-NEON-NEXT: vmov r7, s0 -; LE-NEON-NEXT: add lr, sp, #160 -; LE-NEON-NEXT: vldr s0, [sp, #176] @ 4-byte Reload -; LE-NEON-NEXT: vmov s20, r0 -; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-NEON-NEXT: vmov r0, s0 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vmov.f32 s0, s20 -; LE-NEON-NEXT: vmov s18, r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s18 -; LE-NEON-NEXT: mov r6, r1 -; LE-NEON-NEXT: vmov.32 d10[0], r0 -; LE-NEON-NEXT: vmov s16, r5 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.32 d11[0], r0 -; LE-NEON-NEXT: mov r0, r7 -; LE-NEON-NEXT: mov r5, r1 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vmov.f32 s0, s16 -; LE-NEON-NEXT: vmov s18, r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s18 -; LE-NEON-NEXT: vmov.32 d12[0], r0 -; LE-NEON-NEXT: add lr, sp, #176 -; LE-NEON-NEXT: mov r8, r1 -; LE-NEON-NEXT: vmov.32 d11[1], r5 -; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.32 d13[0], r0 -; LE-NEON-NEXT: mov r0, r4 -; LE-NEON-NEXT: mov r5, r1 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vldr s0, [sp, #196] @ 4-byte Reload -; LE-NEON-NEXT: mov r7, r0 -; LE-NEON-NEXT: vmov.32 d10[1], r6 -; LE-NEON-NEXT: vmov r0, s0 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vldr s0, [sp, #192] @ 4-byte Reload -; LE-NEON-NEXT: vmov s16, r0 -; LE-NEON-NEXT: vmov.32 d13[1], r5 -; LE-NEON-NEXT: vmov r6, s0 -; LE-NEON-NEXT: vldr s0, [sp, #200] @ 4-byte Reload -; LE-NEON-NEXT: vmov r0, s0 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vmov.f32 s0, s16 -; LE-NEON-NEXT: vmov s18, r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s18 -; LE-NEON-NEXT: add lr, sp, #200 -; LE-NEON-NEXT: vmov.32 d15[0], r0 -; LE-NEON-NEXT: mov r4, r1 -; LE-NEON-NEXT: vmov s16, r7 -; LE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.32 d12[0], r0 -; LE-NEON-NEXT: mov r0, r6 -; LE-NEON-NEXT: mov r5, r1 -; LE-NEON-NEXT: bl __aeabi_h2f -; LE-NEON-NEXT: vmov.f32 s0, s16 -; LE-NEON-NEXT: vmov s18, r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s18 -; LE-NEON-NEXT: mov r6, r1 -; LE-NEON-NEXT: vmov.32 d15[0], r0 -; LE-NEON-NEXT: vmov.32 d12[1], r5 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: add lr, sp, #200 -; LE-NEON-NEXT: vmov.32 d14[0], r0 -; LE-NEON-NEXT: ldr r0, [sp, #112] @ 4-byte Reload -; LE-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #176 -; LE-NEON-NEXT: vmov.32 d19[1], r4 -; LE-NEON-NEXT: vmov.32 d18[1], r0 -; LE-NEON-NEXT: ldr r0, [sp, #108] @ 4-byte Reload -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #216 -; LE-NEON-NEXT: vmov.32 d17[1], r0 -; LE-NEON-NEXT: add r0, r11, #64 -; LE-NEON-NEXT: vmov.32 d16[1], r8 -; LE-NEON-NEXT: vorr q10, q8, q8 -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #232 -; LE-NEON-NEXT: vmov.32 d15[1], r6 -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r10:128]! -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #160 -; LE-NEON-NEXT: vmov.32 d14[1], r1 -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r10:128] -; LE-NEON-NEXT: vst1.64 {d12, d13}, [r0:128]! -; LE-NEON-NEXT: vst1.64 {d20, d21}, [r0:128]! -; LE-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]! -; LE-NEON-NEXT: vst1.64 {d10, d11}, [r0:128] -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #120 -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r11:128]! -; LE-NEON-NEXT: vst1.64 {d14, d15}, [r11:128]! -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #136 -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r11:128]! -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r11:128] -; LE-NEON-NEXT: add sp, sp, #248 -; LE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEON-NEXT: add sp, sp, #4 -; LE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; -; BE-LABEL: llrint_v32i64_v32f16: -; BE: @ %bb.0: -; BE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-NEXT: .pad #4 -; BE-NEXT: sub sp, sp, #4 -; BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEXT: .pad #176 -; BE-NEXT: sub sp, sp, #176 -; BE-NEXT: add lr, sp, #256 -; BE-NEXT: mov r10, r0 -; BE-NEXT: vstr s15, [sp, #112] @ 4-byte Spill -; BE-NEXT: ldrh r0, [lr, #74] -; BE-NEXT: vstr s14, [sp, #80] @ 4-byte Spill -; BE-NEXT: vstr s13, [sp, #48] @ 4-byte Spill -; BE-NEXT: vstr s12, [sp, #148] @ 4-byte Spill -; BE-NEXT: vstr s11, [sp, #76] @ 4-byte Spill -; BE-NEXT: vstr s10, [sp, #152] @ 4-byte Spill -; BE-NEXT: vstr s9, [sp, #156] @ 4-byte Spill -; BE-NEXT: vstr s8, [sp, #120] @ 4-byte Spill -; BE-NEXT: vstr s7, [sp, #136] @ 4-byte Spill -; BE-NEXT: vstr s6, [sp, #132] @ 4-byte Spill -; BE-NEXT: vstr s5, [sp, #144] @ 4-byte Spill -; BE-NEXT: vstr s4, [sp, #64] @ 4-byte Spill -; BE-NEXT: vstr s3, [sp, #104] @ 4-byte Spill -; BE-NEXT: vstr s2, [sp, #88] @ 4-byte Spill -; BE-NEXT: vstr s1, [sp, #56] @ 4-byte Spill -; BE-NEXT: vstr s0, [sp, #96] @ 4-byte Spill -; BE-NEXT: bl __aeabi_h2f -; BE-NEXT: vmov s0, r0 -; BE-NEXT: bl llrintf -; BE-NEXT: add lr, sp, #256 -; BE-NEXT: mov r9, r0 -; BE-NEXT: mov r8, r1 -; BE-NEXT: ldrh r0, [lr, #62] -; BE-NEXT: bl __aeabi_h2f -; BE-NEXT: add lr, sp, #256 -; BE-NEXT: mov r6, r0 -; BE-NEXT: ldrh r0, [lr, #58] -; BE-NEXT: bl __aeabi_h2f -; BE-NEXT: add lr, sp, #256 -; BE-NEXT: mov r7, r0 -; BE-NEXT: ldrh r0, [lr, #66] -; BE-NEXT: bl __aeabi_h2f -; BE-NEXT: add lr, sp, #256 -; BE-NEXT: mov r4, r0 -; BE-NEXT: ldrh r0, [lr, #54] -; BE-NEXT: bl __aeabi_h2f -; BE-NEXT: add lr, sp, #256 -; BE-NEXT: mov r5, r0 -; BE-NEXT: ldrh r0, [lr, #50] -; BE-NEXT: bl __aeabi_h2f -; BE-NEXT: vmov s0, r0 -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.32 d16[0], r0 -; BE-NEXT: vmov s0, r5 -; BE-NEXT: str r1, [sp, #44] @ 4-byte Spill -; BE-NEXT: vstr d16, [sp, #168] @ 8-byte Spill -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.32 d16[0], r0 -; BE-NEXT: vmov s0, r4 -; BE-NEXT: str r1, [sp, #40] @ 4-byte Spill -; BE-NEXT: vstr d16, [sp, #160] @ 8-byte Spill -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.32 d16[0], r0 -; BE-NEXT: vmov s0, r7 -; BE-NEXT: mov r4, r1 -; BE-NEXT: vstr d16, [sp, #32] @ 8-byte Spill -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.32 d16[0], r0 -; BE-NEXT: vmov s0, r6 -; BE-NEXT: mov r11, r1 -; BE-NEXT: vstr d16, [sp, #24] @ 8-byte Spill -; BE-NEXT: bl llrintf -; BE-NEXT: add lr, sp, #256 -; BE-NEXT: vmov.32 d16[0], r0 -; BE-NEXT: mov r6, r1 -; BE-NEXT: ldrh r0, [lr, #34] -; BE-NEXT: vstr d16, [sp, #16] @ 8-byte Spill -; BE-NEXT: bl __aeabi_h2f -; BE-NEXT: vmov s0, r0 -; BE-NEXT: vmov.32 d8[0], r9 -; BE-NEXT: bl llrintf -; BE-NEXT: add lr, sp, #256 -; BE-NEXT: mov r7, r1 -; BE-NEXT: vmov.32 d12[0], r0 -; BE-NEXT: ldrh r1, [lr, #38] -; BE-NEXT: mov r0, r1 -; BE-NEXT: bl __aeabi_h2f -; BE-NEXT: vmov.32 d8[1], r8 -; BE-NEXT: vmov s0, r0 -; BE-NEXT: vstr d8, [sp, #8] @ 8-byte Spill -; BE-NEXT: bl llrintf -; BE-NEXT: add lr, sp, #256 -; BE-NEXT: mov r5, r1 -; BE-NEXT: vmov.32 d13[0], r0 -; BE-NEXT: ldrh r1, [lr, #26] -; BE-NEXT: mov r0, r1 -; BE-NEXT: bl __aeabi_h2f -; BE-NEXT: vmov s0, r0 -; BE-NEXT: vmov.32 d12[1], r7 -; BE-NEXT: bl llrintf -; BE-NEXT: add lr, sp, #256 -; BE-NEXT: mov r7, r1 -; BE-NEXT: vmov.32 d9[0], r0 -; BE-NEXT: ldrh r1, [lr, #30] -; BE-NEXT: mov r0, r1 -; BE-NEXT: bl __aeabi_h2f -; BE-NEXT: vmov s0, r0 -; BE-NEXT: vmov.32 d13[1], r5 -; BE-NEXT: bl llrintf -; BE-NEXT: add lr, sp, #256 -; BE-NEXT: mov r5, r1 -; BE-NEXT: vmov.32 d15[0], r0 -; BE-NEXT: ldrh r1, [lr, #78] -; BE-NEXT: mov r0, r1 -; BE-NEXT: bl __aeabi_h2f -; BE-NEXT: vmov s0, r0 -; BE-NEXT: vmov.32 d9[1], r7 -; BE-NEXT: bl llrintf -; BE-NEXT: add lr, sp, #256 -; BE-NEXT: mov r7, r1 -; BE-NEXT: vmov.32 d14[0], r0 -; BE-NEXT: ldrh r1, [lr, #82] -; BE-NEXT: mov r0, r1 -; BE-NEXT: bl __aeabi_h2f -; BE-NEXT: vmov s0, r0 -; BE-NEXT: vmov.32 d15[1], r5 -; BE-NEXT: bl llrintf -; BE-NEXT: add lr, sp, #256 -; BE-NEXT: mov r5, r1 -; BE-NEXT: vmov.32 d8[0], r0 -; BE-NEXT: ldrh r1, [lr, #86] -; BE-NEXT: mov r0, r1 -; BE-NEXT: bl __aeabi_h2f -; BE-NEXT: vmov s0, r0 -; BE-NEXT: vmov.32 d14[1], r7 -; BE-NEXT: bl llrintf -; BE-NEXT: add lr, sp, #256 -; BE-NEXT: mov r7, r1 -; BE-NEXT: vmov.32 d10[0], r0 -; BE-NEXT: ldrh r1, [lr, #70] -; BE-NEXT: mov r0, r1 -; BE-NEXT: bl __aeabi_h2f -; BE-NEXT: vmov s0, r0 -; BE-NEXT: vmov.32 d8[1], r5 -; BE-NEXT: bl llrintf -; BE-NEXT: add lr, sp, #256 -; BE-NEXT: mov r5, r1 -; BE-NEXT: vmov.32 d11[0], r0 -; BE-NEXT: ldrh r1, [lr, #46] -; BE-NEXT: mov r0, r1 -; BE-NEXT: bl __aeabi_h2f -; BE-NEXT: vmov s0, r0 -; BE-NEXT: vmov.32 d10[1], r7 -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.32 d25[0], r0 -; BE-NEXT: ldr r0, [sp, #40] @ 4-byte Reload -; BE-NEXT: ldr r2, [sp, #44] @ 4-byte Reload -; BE-NEXT: vldr d24, [sp, #160] @ 8-byte Reload -; BE-NEXT: vldr s0, [sp, #48] @ 4-byte Reload -; BE-NEXT: vmov.32 d24[1], r0 -; BE-NEXT: vmov r0, s0 -; BE-NEXT: vldr d26, [sp, #16] @ 8-byte Reload -; BE-NEXT: vstr d24, [sp, #160] @ 8-byte Spill -; BE-NEXT: vldr d24, [sp, #8] @ 8-byte Reload -; BE-NEXT: vrev64.32 d23, d14 -; BE-NEXT: vldr d29, [sp, #24] @ 8-byte Reload -; BE-NEXT: vrev64.32 d22, d24 -; BE-NEXT: vldr d24, [sp, #168] @ 8-byte Reload -; BE-NEXT: vmov.32 d26[1], r6 -; BE-NEXT: vldr d28, [sp, #32] @ 8-byte Reload -; BE-NEXT: vmov.32 d25[1], r1 -; BE-NEXT: add r1, r10, #192 -; BE-NEXT: vmov.32 d29[1], r11 -; BE-NEXT: add r11, r10, #128 -; BE-NEXT: vmov.32 d24[1], r2 -; BE-NEXT: vmov.32 d11[1], r5 -; BE-NEXT: vmov.32 d28[1], r4 -; BE-NEXT: vrev64.32 d27, d26 -; BE-NEXT: vstr d24, [sp, #168] @ 8-byte Spill -; BE-NEXT: vstr d25, [sp, #48] @ 8-byte Spill -; BE-NEXT: vrev64.32 d25, d11 -; BE-NEXT: vrev64.32 d26, d29 -; BE-NEXT: vrev64.32 d24, d28 -; BE-NEXT: vst1.64 {d26, d27}, [r1:128]! -; BE-NEXT: vst1.64 {d24, d25}, [r1:128]! -; BE-NEXT: vrev64.32 d21, d10 -; BE-NEXT: vrev64.32 d19, d15 -; BE-NEXT: vrev64.32 d17, d13 -; BE-NEXT: vrev64.32 d20, d8 -; BE-NEXT: vst1.64 {d22, d23}, [r1:128]! -; BE-NEXT: vrev64.32 d18, d9 -; BE-NEXT: vrev64.32 d16, d12 -; BE-NEXT: vst1.64 {d20, d21}, [r1:128] -; BE-NEXT: vst1.64 {d18, d19}, [r11:128]! -; BE-NEXT: vst1.64 {d16, d17}, [r11:128]! -; BE-NEXT: bl __aeabi_h2f -; BE-NEXT: vmov s0, r0 -; BE-NEXT: bl llrintf -; BE-NEXT: add lr, sp, #256 -; BE-NEXT: mov r7, r0 -; BE-NEXT: mov r8, r1 -; BE-NEXT: ldrh r0, [lr, #42] -; BE-NEXT: bl __aeabi_h2f -; BE-NEXT: vldr s0, [sp, #56] @ 4-byte Reload -; BE-NEXT: mov r4, r0 -; BE-NEXT: vmov r0, s0 -; BE-NEXT: bl __aeabi_h2f -; BE-NEXT: vmov s0, r0 -; BE-NEXT: bl llrintf -; BE-NEXT: vmov s0, r4 -; BE-NEXT: str r1, [sp, #44] @ 4-byte Spill -; BE-NEXT: vmov.32 d10[0], r0 -; BE-NEXT: bl llrintf -; BE-NEXT: vldr s0, [sp, #64] @ 4-byte Reload -; BE-NEXT: vmov.32 d16[0], r0 -; BE-NEXT: vmov r2, s0 -; BE-NEXT: vldr s0, [sp, #80] @ 4-byte Reload -; BE-NEXT: vmov.32 d16[1], r1 -; BE-NEXT: vmov r4, s0 -; BE-NEXT: vldr s0, [sp, #76] @ 4-byte Reload -; BE-NEXT: vstr d16, [sp, #80] @ 8-byte Spill -; BE-NEXT: vmov r5, s0 -; BE-NEXT: mov r0, r2 -; BE-NEXT: bl __aeabi_h2f -; BE-NEXT: vmov s0, r0 -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.32 d11[0], r0 -; BE-NEXT: mov r0, r4 -; BE-NEXT: mov r9, r1 -; BE-NEXT: bl __aeabi_h2f -; BE-NEXT: vmov s0, r0 -; BE-NEXT: vmov.32 d8[0], r7 -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.32 d12[0], r0 -; BE-NEXT: mov r0, r5 -; BE-NEXT: mov r6, r1 -; BE-NEXT: bl __aeabi_h2f -; BE-NEXT: vldr s0, [sp, #88] @ 4-byte Reload -; BE-NEXT: mov r4, r0 -; BE-NEXT: vmov.32 d8[1], r8 -; BE-NEXT: vmov r7, s0 -; BE-NEXT: vldr s0, [sp, #96] @ 4-byte Reload -; BE-NEXT: vstr d8, [sp, #88] @ 8-byte Spill -; BE-NEXT: vmov r0, s0 -; BE-NEXT: bl __aeabi_h2f -; BE-NEXT: vldr s0, [sp, #104] @ 4-byte Reload -; BE-NEXT: vmov s19, r0 -; BE-NEXT: vmov.32 d12[1], r6 -; BE-NEXT: vmov r5, s0 -; BE-NEXT: vldr s0, [sp, #112] @ 4-byte Reload -; BE-NEXT: vstr d12, [sp, #104] @ 8-byte Spill -; BE-NEXT: vmov r0, s0 -; BE-NEXT: bl __aeabi_h2f -; BE-NEXT: vmov.f32 s0, s19 -; BE-NEXT: vmov s30, r0 -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.f32 s0, s30 -; BE-NEXT: mov r6, r1 -; BE-NEXT: vmov.32 d9[0], r0 -; BE-NEXT: vmov s17, r4 -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.32 d12[0], r0 -; BE-NEXT: mov r0, r5 -; BE-NEXT: mov r4, r1 -; BE-NEXT: bl __aeabi_h2f -; BE-NEXT: vmov.f32 s0, s17 -; BE-NEXT: vmov s30, r0 -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.f32 s0, s30 -; BE-NEXT: vmov.32 d16[0], r0 -; BE-NEXT: str r1, [sp, #76] @ 4-byte Spill -; BE-NEXT: vmov.32 d12[1], r4 -; BE-NEXT: vstr d16, [sp, #64] @ 8-byte Spill -; BE-NEXT: vstr d12, [sp, #112] @ 8-byte Spill -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.32 d16[0], r0 -; BE-NEXT: mov r0, r7 -; BE-NEXT: mov r8, r1 -; BE-NEXT: vmov.32 d9[1], r6 -; BE-NEXT: vstr d16, [sp, #56] @ 8-byte Spill -; BE-NEXT: vstr d9, [sp, #96] @ 8-byte Spill -; BE-NEXT: bl __aeabi_h2f -; BE-NEXT: vldr s0, [sp, #120] @ 4-byte Reload -; BE-NEXT: mov r5, r0 -; BE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload -; BE-NEXT: vmov r7, s0 -; BE-NEXT: vldr s0, [sp, #132] @ 4-byte Reload -; BE-NEXT: vmov.32 d10[1], r0 -; BE-NEXT: vmov r0, s0 -; BE-NEXT: vstr d10, [sp, #120] @ 8-byte Spill -; BE-NEXT: bl __aeabi_h2f -; BE-NEXT: vldr s0, [sp, #136] @ 4-byte Reload -; BE-NEXT: vmov s26, r0 -; BE-NEXT: vmov.32 d11[1], r9 -; BE-NEXT: vmov r4, s0 -; BE-NEXT: vldr s0, [sp, #144] @ 4-byte Reload -; BE-NEXT: vstr d11, [sp, #136] @ 8-byte Spill -; BE-NEXT: vmov r0, s0 -; BE-NEXT: bl __aeabi_h2f -; BE-NEXT: vmov.f32 s0, s26 -; BE-NEXT: vmov s22, r0 -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.f32 s0, s22 -; BE-NEXT: mov r6, r1 -; BE-NEXT: vmov.32 d13[0], r0 -; BE-NEXT: vmov s24, r5 -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.32 d14[0], r0 -; BE-NEXT: mov r0, r4 -; BE-NEXT: mov r5, r1 -; BE-NEXT: bl __aeabi_h2f -; BE-NEXT: vmov.f32 s0, s24 -; BE-NEXT: vmov s22, r0 -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.f32 s0, s22 -; BE-NEXT: mov r9, r1 -; BE-NEXT: vmov.32 d12[0], r0 -; BE-NEXT: vmov.32 d14[1], r5 -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.32 d11[0], r0 -; BE-NEXT: mov r0, r7 -; BE-NEXT: mov r5, r1 -; BE-NEXT: bl __aeabi_h2f -; BE-NEXT: vldr s0, [sp, #148] @ 4-byte Reload -; BE-NEXT: mov r7, r0 -; BE-NEXT: vmov.32 d13[1], r6 -; BE-NEXT: vmov r0, s0 -; BE-NEXT: bl __aeabi_h2f -; BE-NEXT: vldr s0, [sp, #152] @ 4-byte Reload -; BE-NEXT: vmov s20, r0 -; BE-NEXT: vmov.32 d11[1], r5 -; BE-NEXT: vmov r4, s0 -; BE-NEXT: vldr s0, [sp, #156] @ 4-byte Reload -; BE-NEXT: vmov r0, s0 -; BE-NEXT: bl __aeabi_h2f -; BE-NEXT: vmov.f32 s0, s20 -; BE-NEXT: vmov s16, r0 -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.f32 s0, s16 -; BE-NEXT: mov r6, r1 -; BE-NEXT: vmov.32 d10[0], r0 -; BE-NEXT: vmov s18, r7 -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.32 d15[0], r0 -; BE-NEXT: mov r0, r4 -; BE-NEXT: mov r5, r1 -; BE-NEXT: bl __aeabi_h2f -; BE-NEXT: vmov.f32 s0, s18 -; BE-NEXT: vmov s16, r0 -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.f32 s0, s16 -; BE-NEXT: mov r4, r1 -; BE-NEXT: vmov.32 d9[0], r0 -; BE-NEXT: vmov.32 d15[1], r5 -; BE-NEXT: bl llrintf -; BE-NEXT: vldr d16, [sp, #160] @ 8-byte Reload -; BE-NEXT: vldr d20, [sp, #136] @ 8-byte Reload -; BE-NEXT: vrev64.32 d19, d14 -; BE-NEXT: vrev64.32 d31, d16 -; BE-NEXT: vldr d16, [sp, #168] @ 8-byte Reload -; BE-NEXT: vrev64.32 d18, d20 -; BE-NEXT: vldr d20, [sp, #120] @ 8-byte Reload -; BE-NEXT: vldr d22, [sp, #96] @ 8-byte Reload -; BE-NEXT: vmov.32 d28[0], r0 -; BE-NEXT: ldr r0, [sp, #76] @ 4-byte Reload -; BE-NEXT: vrev64.32 d21, d20 -; BE-NEXT: vrev64.32 d30, d16 -; BE-NEXT: vldr d16, [sp, #48] @ 8-byte Reload -; BE-NEXT: vldr d23, [sp, #64] @ 8-byte Reload -; BE-NEXT: vrev64.32 d20, d22 -; BE-NEXT: vldr d22, [sp, #112] @ 8-byte Reload -; BE-NEXT: vrev64.32 d1, d16 -; BE-NEXT: vldr d16, [sp, #80] @ 8-byte Reload -; BE-NEXT: vmov.32 d23[1], r0 -; BE-NEXT: add r0, r10, #64 -; BE-NEXT: vrev64.32 d25, d22 -; BE-NEXT: vldr d22, [sp, #104] @ 8-byte Reload -; BE-NEXT: vmov.32 d9[1], r4 -; BE-NEXT: vrev64.32 d0, d16 -; BE-NEXT: vmov.32 d28[1], r1 -; BE-NEXT: vldr d29, [sp, #56] @ 8-byte Reload -; BE-NEXT: vrev64.32 d3, d15 -; BE-NEXT: vrev64.32 d24, d22 -; BE-NEXT: vldr d22, [sp, #88] @ 8-byte Reload -; BE-NEXT: vmov.32 d10[1], r6 -; BE-NEXT: vrev64.32 d5, d23 -; BE-NEXT: vst1.64 {d0, d1}, [r11:128]! -; BE-NEXT: vrev64.32 d2, d9 -; BE-NEXT: vrev64.32 d27, d22 -; BE-NEXT: vmov.32 d29[1], r8 -; BE-NEXT: vrev64.32 d4, d28 -; BE-NEXT: vst1.64 {d30, d31}, [r11:128] -; BE-NEXT: vst1.64 {d2, d3}, [r0:128]! -; BE-NEXT: vmov.32 d12[1], r9 -; BE-NEXT: vrev64.32 d26, d10 -; BE-NEXT: vst1.64 {d4, d5}, [r0:128]! -; BE-NEXT: vrev64.32 d23, d29 -; BE-NEXT: vst1.64 {d26, d27}, [r0:128]! -; BE-NEXT: vrev64.32 d22, d12 -; BE-NEXT: vst1.64 {d24, d25}, [r0:128] -; BE-NEXT: vst1.64 {d20, d21}, [r10:128]! -; BE-NEXT: vst1.64 {d22, d23}, [r10:128]! -; BE-NEXT: vrev64.32 d17, d11 -; BE-NEXT: vrev64.32 d16, d13 -; BE-NEXT: vst1.64 {d18, d19}, [r10:128]! -; BE-NEXT: vst1.64 {d16, d17}, [r10:128] -; BE-NEXT: add sp, sp, #176 -; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEXT: add sp, sp, #4 -; BE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; -; BE-NEON-LABEL: llrint_v32i64_v32f16: -; BE-NEON: @ %bb.0: -; BE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-NEON-NEXT: .pad #4 -; BE-NEON-NEXT: sub sp, sp, #4 -; BE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEON-NEXT: .pad #176 -; BE-NEON-NEXT: sub sp, sp, #176 -; BE-NEON-NEXT: add lr, sp, #256 -; BE-NEON-NEXT: mov r10, r0 -; BE-NEON-NEXT: vstr s15, [sp, #112] @ 4-byte Spill -; BE-NEON-NEXT: ldrh r0, [lr, #74] -; BE-NEON-NEXT: vstr s14, [sp, #80] @ 4-byte Spill -; BE-NEON-NEXT: vstr s13, [sp, #48] @ 4-byte Spill -; BE-NEON-NEXT: vstr s12, [sp, #148] @ 4-byte Spill -; BE-NEON-NEXT: vstr s11, [sp, #76] @ 4-byte Spill -; BE-NEON-NEXT: vstr s10, [sp, #152] @ 4-byte Spill -; BE-NEON-NEXT: vstr s9, [sp, #156] @ 4-byte Spill -; BE-NEON-NEXT: vstr s8, [sp, #120] @ 4-byte Spill -; BE-NEON-NEXT: vstr s7, [sp, #136] @ 4-byte Spill -; BE-NEON-NEXT: vstr s6, [sp, #132] @ 4-byte Spill -; BE-NEON-NEXT: vstr s5, [sp, #144] @ 4-byte Spill -; BE-NEON-NEXT: vstr s4, [sp, #64] @ 4-byte Spill -; BE-NEON-NEXT: vstr s3, [sp, #104] @ 4-byte Spill -; BE-NEON-NEXT: vstr s2, [sp, #88] @ 4-byte Spill -; BE-NEON-NEXT: vstr s1, [sp, #56] @ 4-byte Spill -; BE-NEON-NEXT: vstr s0, [sp, #96] @ 4-byte Spill -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vmov s0, r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: add lr, sp, #256 -; BE-NEON-NEXT: mov r9, r0 -; BE-NEON-NEXT: mov r8, r1 -; BE-NEON-NEXT: ldrh r0, [lr, #62] -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: add lr, sp, #256 -; BE-NEON-NEXT: mov r6, r0 -; BE-NEON-NEXT: ldrh r0, [lr, #58] -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: add lr, sp, #256 -; BE-NEON-NEXT: mov r7, r0 -; BE-NEON-NEXT: ldrh r0, [lr, #66] -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: add lr, sp, #256 -; BE-NEON-NEXT: mov r4, r0 -; BE-NEON-NEXT: ldrh r0, [lr, #54] -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: add lr, sp, #256 -; BE-NEON-NEXT: mov r5, r0 -; BE-NEON-NEXT: ldrh r0, [lr, #50] -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vmov s0, r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.32 d16[0], r0 -; BE-NEON-NEXT: vmov s0, r5 -; BE-NEON-NEXT: str r1, [sp, #44] @ 4-byte Spill -; BE-NEON-NEXT: vstr d16, [sp, #168] @ 8-byte Spill -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.32 d16[0], r0 -; BE-NEON-NEXT: vmov s0, r4 -; BE-NEON-NEXT: str r1, [sp, #40] @ 4-byte Spill -; BE-NEON-NEXT: vstr d16, [sp, #160] @ 8-byte Spill -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.32 d16[0], r0 -; BE-NEON-NEXT: vmov s0, r7 -; BE-NEON-NEXT: mov r4, r1 -; BE-NEON-NEXT: vstr d16, [sp, #32] @ 8-byte Spill -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.32 d16[0], r0 -; BE-NEON-NEXT: vmov s0, r6 -; BE-NEON-NEXT: mov r11, r1 -; BE-NEON-NEXT: vstr d16, [sp, #24] @ 8-byte Spill -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: add lr, sp, #256 -; BE-NEON-NEXT: vmov.32 d16[0], r0 -; BE-NEON-NEXT: mov r6, r1 -; BE-NEON-NEXT: ldrh r0, [lr, #34] -; BE-NEON-NEXT: vstr d16, [sp, #16] @ 8-byte Spill -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vmov s0, r0 -; BE-NEON-NEXT: vmov.32 d8[0], r9 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: add lr, sp, #256 -; BE-NEON-NEXT: mov r7, r1 -; BE-NEON-NEXT: vmov.32 d12[0], r0 -; BE-NEON-NEXT: ldrh r1, [lr, #38] -; BE-NEON-NEXT: mov r0, r1 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vmov.32 d8[1], r8 -; BE-NEON-NEXT: vmov s0, r0 -; BE-NEON-NEXT: vstr d8, [sp, #8] @ 8-byte Spill -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: add lr, sp, #256 -; BE-NEON-NEXT: mov r5, r1 -; BE-NEON-NEXT: vmov.32 d13[0], r0 -; BE-NEON-NEXT: ldrh r1, [lr, #26] -; BE-NEON-NEXT: mov r0, r1 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vmov s0, r0 -; BE-NEON-NEXT: vmov.32 d12[1], r7 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: add lr, sp, #256 -; BE-NEON-NEXT: mov r7, r1 -; BE-NEON-NEXT: vmov.32 d9[0], r0 -; BE-NEON-NEXT: ldrh r1, [lr, #30] -; BE-NEON-NEXT: mov r0, r1 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vmov s0, r0 -; BE-NEON-NEXT: vmov.32 d13[1], r5 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: add lr, sp, #256 -; BE-NEON-NEXT: mov r5, r1 -; BE-NEON-NEXT: vmov.32 d15[0], r0 -; BE-NEON-NEXT: ldrh r1, [lr, #78] -; BE-NEON-NEXT: mov r0, r1 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vmov s0, r0 -; BE-NEON-NEXT: vmov.32 d9[1], r7 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: add lr, sp, #256 -; BE-NEON-NEXT: mov r7, r1 -; BE-NEON-NEXT: vmov.32 d14[0], r0 -; BE-NEON-NEXT: ldrh r1, [lr, #82] -; BE-NEON-NEXT: mov r0, r1 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vmov s0, r0 -; BE-NEON-NEXT: vmov.32 d15[1], r5 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: add lr, sp, #256 -; BE-NEON-NEXT: mov r5, r1 -; BE-NEON-NEXT: vmov.32 d8[0], r0 -; BE-NEON-NEXT: ldrh r1, [lr, #86] -; BE-NEON-NEXT: mov r0, r1 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vmov s0, r0 -; BE-NEON-NEXT: vmov.32 d14[1], r7 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: add lr, sp, #256 -; BE-NEON-NEXT: mov r7, r1 -; BE-NEON-NEXT: vmov.32 d10[0], r0 -; BE-NEON-NEXT: ldrh r1, [lr, #70] -; BE-NEON-NEXT: mov r0, r1 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vmov s0, r0 -; BE-NEON-NEXT: vmov.32 d8[1], r5 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: add lr, sp, #256 -; BE-NEON-NEXT: mov r5, r1 -; BE-NEON-NEXT: vmov.32 d11[0], r0 -; BE-NEON-NEXT: ldrh r1, [lr, #46] -; BE-NEON-NEXT: mov r0, r1 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vmov s0, r0 -; BE-NEON-NEXT: vmov.32 d10[1], r7 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.32 d25[0], r0 -; BE-NEON-NEXT: ldr r0, [sp, #40] @ 4-byte Reload -; BE-NEON-NEXT: ldr r2, [sp, #44] @ 4-byte Reload -; BE-NEON-NEXT: vldr d24, [sp, #160] @ 8-byte Reload -; BE-NEON-NEXT: vldr s0, [sp, #48] @ 4-byte Reload -; BE-NEON-NEXT: vmov.32 d24[1], r0 -; BE-NEON-NEXT: vmov r0, s0 -; BE-NEON-NEXT: vldr d26, [sp, #16] @ 8-byte Reload -; BE-NEON-NEXT: vstr d24, [sp, #160] @ 8-byte Spill -; BE-NEON-NEXT: vldr d24, [sp, #8] @ 8-byte Reload -; BE-NEON-NEXT: vrev64.32 d23, d14 -; BE-NEON-NEXT: vldr d29, [sp, #24] @ 8-byte Reload -; BE-NEON-NEXT: vrev64.32 d22, d24 -; BE-NEON-NEXT: vldr d24, [sp, #168] @ 8-byte Reload -; BE-NEON-NEXT: vmov.32 d26[1], r6 -; BE-NEON-NEXT: vldr d28, [sp, #32] @ 8-byte Reload -; BE-NEON-NEXT: vmov.32 d25[1], r1 -; BE-NEON-NEXT: add r1, r10, #192 -; BE-NEON-NEXT: vmov.32 d29[1], r11 -; BE-NEON-NEXT: add r11, r10, #128 -; BE-NEON-NEXT: vmov.32 d24[1], r2 -; BE-NEON-NEXT: vmov.32 d11[1], r5 -; BE-NEON-NEXT: vmov.32 d28[1], r4 -; BE-NEON-NEXT: vrev64.32 d27, d26 -; BE-NEON-NEXT: vstr d24, [sp, #168] @ 8-byte Spill -; BE-NEON-NEXT: vstr d25, [sp, #48] @ 8-byte Spill -; BE-NEON-NEXT: vrev64.32 d25, d11 -; BE-NEON-NEXT: vrev64.32 d26, d29 -; BE-NEON-NEXT: vrev64.32 d24, d28 -; BE-NEON-NEXT: vst1.64 {d26, d27}, [r1:128]! -; BE-NEON-NEXT: vst1.64 {d24, d25}, [r1:128]! -; BE-NEON-NEXT: vrev64.32 d21, d10 -; BE-NEON-NEXT: vrev64.32 d19, d15 -; BE-NEON-NEXT: vrev64.32 d17, d13 -; BE-NEON-NEXT: vrev64.32 d20, d8 -; BE-NEON-NEXT: vst1.64 {d22, d23}, [r1:128]! -; BE-NEON-NEXT: vrev64.32 d18, d9 -; BE-NEON-NEXT: vrev64.32 d16, d12 -; BE-NEON-NEXT: vst1.64 {d20, d21}, [r1:128] -; BE-NEON-NEXT: vst1.64 {d18, d19}, [r11:128]! -; BE-NEON-NEXT: vst1.64 {d16, d17}, [r11:128]! -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vmov s0, r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: add lr, sp, #256 -; BE-NEON-NEXT: mov r7, r0 -; BE-NEON-NEXT: mov r8, r1 -; BE-NEON-NEXT: ldrh r0, [lr, #42] -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vldr s0, [sp, #56] @ 4-byte Reload -; BE-NEON-NEXT: mov r4, r0 -; BE-NEON-NEXT: vmov r0, s0 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vmov s0, r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov s0, r4 -; BE-NEON-NEXT: str r1, [sp, #44] @ 4-byte Spill -; BE-NEON-NEXT: vmov.32 d10[0], r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vldr s0, [sp, #64] @ 4-byte Reload -; BE-NEON-NEXT: vmov.32 d16[0], r0 -; BE-NEON-NEXT: vmov r2, s0 -; BE-NEON-NEXT: vldr s0, [sp, #80] @ 4-byte Reload -; BE-NEON-NEXT: vmov.32 d16[1], r1 -; BE-NEON-NEXT: vmov r4, s0 -; BE-NEON-NEXT: vldr s0, [sp, #76] @ 4-byte Reload -; BE-NEON-NEXT: vstr d16, [sp, #80] @ 8-byte Spill -; BE-NEON-NEXT: vmov r5, s0 -; BE-NEON-NEXT: mov r0, r2 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vmov s0, r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.32 d11[0], r0 -; BE-NEON-NEXT: mov r0, r4 -; BE-NEON-NEXT: mov r9, r1 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vmov s0, r0 -; BE-NEON-NEXT: vmov.32 d8[0], r7 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.32 d12[0], r0 -; BE-NEON-NEXT: mov r0, r5 -; BE-NEON-NEXT: mov r6, r1 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vldr s0, [sp, #88] @ 4-byte Reload -; BE-NEON-NEXT: mov r4, r0 -; BE-NEON-NEXT: vmov.32 d8[1], r8 -; BE-NEON-NEXT: vmov r7, s0 -; BE-NEON-NEXT: vldr s0, [sp, #96] @ 4-byte Reload -; BE-NEON-NEXT: vstr d8, [sp, #88] @ 8-byte Spill -; BE-NEON-NEXT: vmov r0, s0 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vldr s0, [sp, #104] @ 4-byte Reload -; BE-NEON-NEXT: vmov s19, r0 -; BE-NEON-NEXT: vmov.32 d12[1], r6 -; BE-NEON-NEXT: vmov r5, s0 -; BE-NEON-NEXT: vldr s0, [sp, #112] @ 4-byte Reload -; BE-NEON-NEXT: vstr d12, [sp, #104] @ 8-byte Spill -; BE-NEON-NEXT: vmov r0, s0 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vmov.f32 s0, s19 -; BE-NEON-NEXT: vmov s30, r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s30 -; BE-NEON-NEXT: mov r6, r1 -; BE-NEON-NEXT: vmov.32 d9[0], r0 -; BE-NEON-NEXT: vmov s17, r4 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.32 d12[0], r0 -; BE-NEON-NEXT: mov r0, r5 -; BE-NEON-NEXT: mov r4, r1 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vmov.f32 s0, s17 -; BE-NEON-NEXT: vmov s30, r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s30 -; BE-NEON-NEXT: vmov.32 d16[0], r0 -; BE-NEON-NEXT: str r1, [sp, #76] @ 4-byte Spill -; BE-NEON-NEXT: vmov.32 d12[1], r4 -; BE-NEON-NEXT: vstr d16, [sp, #64] @ 8-byte Spill -; BE-NEON-NEXT: vstr d12, [sp, #112] @ 8-byte Spill -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.32 d16[0], r0 -; BE-NEON-NEXT: mov r0, r7 -; BE-NEON-NEXT: mov r8, r1 -; BE-NEON-NEXT: vmov.32 d9[1], r6 -; BE-NEON-NEXT: vstr d16, [sp, #56] @ 8-byte Spill -; BE-NEON-NEXT: vstr d9, [sp, #96] @ 8-byte Spill -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vldr s0, [sp, #120] @ 4-byte Reload -; BE-NEON-NEXT: mov r5, r0 -; BE-NEON-NEXT: ldr r0, [sp, #44] @ 4-byte Reload -; BE-NEON-NEXT: vmov r7, s0 -; BE-NEON-NEXT: vldr s0, [sp, #132] @ 4-byte Reload -; BE-NEON-NEXT: vmov.32 d10[1], r0 -; BE-NEON-NEXT: vmov r0, s0 -; BE-NEON-NEXT: vstr d10, [sp, #120] @ 8-byte Spill -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vldr s0, [sp, #136] @ 4-byte Reload -; BE-NEON-NEXT: vmov s26, r0 -; BE-NEON-NEXT: vmov.32 d11[1], r9 -; BE-NEON-NEXT: vmov r4, s0 -; BE-NEON-NEXT: vldr s0, [sp, #144] @ 4-byte Reload -; BE-NEON-NEXT: vstr d11, [sp, #136] @ 8-byte Spill -; BE-NEON-NEXT: vmov r0, s0 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vmov.f32 s0, s26 -; BE-NEON-NEXT: vmov s22, r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s22 -; BE-NEON-NEXT: mov r6, r1 -; BE-NEON-NEXT: vmov.32 d13[0], r0 -; BE-NEON-NEXT: vmov s24, r5 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.32 d14[0], r0 -; BE-NEON-NEXT: mov r0, r4 -; BE-NEON-NEXT: mov r5, r1 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vmov.f32 s0, s24 -; BE-NEON-NEXT: vmov s22, r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s22 -; BE-NEON-NEXT: mov r9, r1 -; BE-NEON-NEXT: vmov.32 d12[0], r0 -; BE-NEON-NEXT: vmov.32 d14[1], r5 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.32 d11[0], r0 -; BE-NEON-NEXT: mov r0, r7 -; BE-NEON-NEXT: mov r5, r1 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vldr s0, [sp, #148] @ 4-byte Reload -; BE-NEON-NEXT: mov r7, r0 -; BE-NEON-NEXT: vmov.32 d13[1], r6 -; BE-NEON-NEXT: vmov r0, s0 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vldr s0, [sp, #152] @ 4-byte Reload -; BE-NEON-NEXT: vmov s20, r0 -; BE-NEON-NEXT: vmov.32 d11[1], r5 -; BE-NEON-NEXT: vmov r4, s0 -; BE-NEON-NEXT: vldr s0, [sp, #156] @ 4-byte Reload -; BE-NEON-NEXT: vmov r0, s0 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vmov.f32 s0, s20 -; BE-NEON-NEXT: vmov s16, r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s16 -; BE-NEON-NEXT: mov r6, r1 -; BE-NEON-NEXT: vmov.32 d10[0], r0 -; BE-NEON-NEXT: vmov s18, r7 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.32 d15[0], r0 -; BE-NEON-NEXT: mov r0, r4 -; BE-NEON-NEXT: mov r5, r1 -; BE-NEON-NEXT: bl __aeabi_h2f -; BE-NEON-NEXT: vmov.f32 s0, s18 -; BE-NEON-NEXT: vmov s16, r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s16 -; BE-NEON-NEXT: mov r4, r1 -; BE-NEON-NEXT: vmov.32 d9[0], r0 -; BE-NEON-NEXT: vmov.32 d15[1], r5 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vldr d16, [sp, #160] @ 8-byte Reload -; BE-NEON-NEXT: vldr d20, [sp, #136] @ 8-byte Reload -; BE-NEON-NEXT: vrev64.32 d19, d14 -; BE-NEON-NEXT: vrev64.32 d31, d16 -; BE-NEON-NEXT: vldr d16, [sp, #168] @ 8-byte Reload -; BE-NEON-NEXT: vrev64.32 d18, d20 -; BE-NEON-NEXT: vldr d20, [sp, #120] @ 8-byte Reload -; BE-NEON-NEXT: vldr d22, [sp, #96] @ 8-byte Reload -; BE-NEON-NEXT: vmov.32 d28[0], r0 -; BE-NEON-NEXT: ldr r0, [sp, #76] @ 4-byte Reload -; BE-NEON-NEXT: vrev64.32 d21, d20 -; BE-NEON-NEXT: vrev64.32 d30, d16 -; BE-NEON-NEXT: vldr d16, [sp, #48] @ 8-byte Reload -; BE-NEON-NEXT: vldr d23, [sp, #64] @ 8-byte Reload -; BE-NEON-NEXT: vrev64.32 d20, d22 -; BE-NEON-NEXT: vldr d22, [sp, #112] @ 8-byte Reload -; BE-NEON-NEXT: vrev64.32 d1, d16 -; BE-NEON-NEXT: vldr d16, [sp, #80] @ 8-byte Reload -; BE-NEON-NEXT: vmov.32 d23[1], r0 -; BE-NEON-NEXT: add r0, r10, #64 -; BE-NEON-NEXT: vrev64.32 d25, d22 -; BE-NEON-NEXT: vldr d22, [sp, #104] @ 8-byte Reload -; BE-NEON-NEXT: vmov.32 d9[1], r4 -; BE-NEON-NEXT: vrev64.32 d0, d16 -; BE-NEON-NEXT: vmov.32 d28[1], r1 -; BE-NEON-NEXT: vldr d29, [sp, #56] @ 8-byte Reload -; BE-NEON-NEXT: vrev64.32 d3, d15 -; BE-NEON-NEXT: vrev64.32 d24, d22 -; BE-NEON-NEXT: vldr d22, [sp, #88] @ 8-byte Reload -; BE-NEON-NEXT: vmov.32 d10[1], r6 -; BE-NEON-NEXT: vrev64.32 d5, d23 -; BE-NEON-NEXT: vst1.64 {d0, d1}, [r11:128]! -; BE-NEON-NEXT: vrev64.32 d2, d9 -; BE-NEON-NEXT: vrev64.32 d27, d22 -; BE-NEON-NEXT: vmov.32 d29[1], r8 -; BE-NEON-NEXT: vrev64.32 d4, d28 -; BE-NEON-NEXT: vst1.64 {d30, d31}, [r11:128] -; BE-NEON-NEXT: vst1.64 {d2, d3}, [r0:128]! -; BE-NEON-NEXT: vmov.32 d12[1], r9 -; BE-NEON-NEXT: vrev64.32 d26, d10 -; BE-NEON-NEXT: vst1.64 {d4, d5}, [r0:128]! -; BE-NEON-NEXT: vrev64.32 d23, d29 -; BE-NEON-NEXT: vst1.64 {d26, d27}, [r0:128]! -; BE-NEON-NEXT: vrev64.32 d22, d12 -; BE-NEON-NEXT: vst1.64 {d24, d25}, [r0:128] -; BE-NEON-NEXT: vst1.64 {d20, d21}, [r10:128]! -; BE-NEON-NEXT: vst1.64 {d22, d23}, [r10:128]! -; BE-NEON-NEXT: vrev64.32 d17, d11 -; BE-NEON-NEXT: vrev64.32 d16, d13 -; BE-NEON-NEXT: vst1.64 {d18, d19}, [r10:128]! -; BE-NEON-NEXT: vst1.64 {d16, d17}, [r10:128] -; BE-NEON-NEXT: add sp, sp, #176 -; BE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEON-NEXT: add sp, sp, #4 -; BE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} - %a = call <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half> %x) - ret <32 x i64> %a -} -declare <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half>) - -define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) { -; LE-LABEL: llrint_v1i64_v1f32: -; LE: @ %bb.0: -; LE-NEXT: .save {r11, lr} -; LE-NEXT: push {r11, lr} -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.32 d0[0], r0 -; LE-NEXT: vmov.32 d0[1], r1 -; LE-NEXT: pop {r11, pc} -; -; LE-NEON-LABEL: llrint_v1i64_v1f32: -; LE-NEON: @ %bb.0: -; LE-NEON-NEXT: .save {r11, lr} -; LE-NEON-NEXT: push {r11, lr} -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.32 d0[0], r0 -; LE-NEON-NEXT: vmov.32 d0[1], r1 -; LE-NEON-NEXT: pop {r11, pc} -; -; BE-LABEL: llrint_v1i64_v1f32: -; BE: @ %bb.0: -; BE-NEXT: .save {r11, lr} -; BE-NEXT: push {r11, lr} -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.32 d16[0], r0 -; BE-NEXT: vmov.32 d16[1], r1 -; BE-NEXT: vrev64.32 d0, d16 -; BE-NEXT: pop {r11, pc} -; -; BE-NEON-LABEL: llrint_v1i64_v1f32: -; BE-NEON: @ %bb.0: -; BE-NEON-NEXT: .save {r11, lr} -; BE-NEON-NEXT: push {r11, lr} -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.32 d16[0], r0 -; BE-NEON-NEXT: vmov.32 d16[1], r1 -; BE-NEON-NEXT: vrev64.32 d0, d16 -; BE-NEON-NEXT: pop {r11, pc} - %a = call <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float> %x) - ret <1 x i64> %a -} -declare <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float>) - -define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) { -; LE-LABEL: llrint_v2i64_v2f32: -; LE: @ %bb.0: -; LE-NEXT: .save {r4, lr} -; LE-NEXT: push {r4, lr} -; LE-NEXT: .vsave {d10, d11} -; LE-NEXT: vpush {d10, d11} -; LE-NEXT: .vsave {d8} -; LE-NEXT: vpush {d8} -; LE-NEXT: vmov.f64 d8, d0 -; LE-NEXT: vmov.f32 s0, s17 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.f32 s0, s16 -; LE-NEXT: mov r4, r1 -; LE-NEXT: vmov.32 d11[0], r0 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.32 d10[0], r0 -; LE-NEXT: vmov.32 d11[1], r4 -; LE-NEXT: vmov.32 d10[1], r1 -; LE-NEXT: vorr q0, q5, q5 -; LE-NEXT: vpop {d8} -; LE-NEXT: vpop {d10, d11} -; LE-NEXT: pop {r4, pc} -; -; LE-NEON-LABEL: llrint_v2i64_v2f32: -; LE-NEON: @ %bb.0: -; LE-NEON-NEXT: .save {r4, lr} -; LE-NEON-NEXT: push {r4, lr} -; LE-NEON-NEXT: .vsave {d10, d11} -; LE-NEON-NEXT: vpush {d10, d11} -; LE-NEON-NEXT: .vsave {d8} -; LE-NEON-NEXT: vpush {d8} -; LE-NEON-NEXT: vmov.f64 d8, d0 -; LE-NEON-NEXT: vmov.f32 s0, s17 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s16 -; LE-NEON-NEXT: mov r4, r1 -; LE-NEON-NEXT: vmov.32 d11[0], r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.32 d10[0], r0 -; LE-NEON-NEXT: vmov.32 d11[1], r4 -; LE-NEON-NEXT: vmov.32 d10[1], r1 -; LE-NEON-NEXT: vorr q0, q5, q5 -; LE-NEON-NEXT: vpop {d8} -; LE-NEON-NEXT: vpop {d10, d11} -; LE-NEON-NEXT: pop {r4, pc} -; -; BE-LABEL: llrint_v2i64_v2f32: -; BE: @ %bb.0: -; BE-NEXT: .save {r4, lr} -; BE-NEXT: push {r4, lr} -; BE-NEXT: .vsave {d10, d11} -; BE-NEXT: vpush {d10, d11} -; BE-NEXT: .vsave {d8} -; BE-NEXT: vpush {d8} -; BE-NEXT: vrev64.32 d8, d0 -; BE-NEXT: vmov.f32 s0, s17 -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.f32 s0, s16 -; BE-NEXT: mov r4, r1 -; BE-NEXT: vmov.32 d11[0], r0 -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.32 d10[0], r0 -; BE-NEXT: vmov.32 d11[1], r4 -; BE-NEXT: vmov.32 d10[1], r1 -; BE-NEXT: vrev64.32 q0, q5 -; BE-NEXT: vpop {d8} -; BE-NEXT: vpop {d10, d11} -; BE-NEXT: pop {r4, pc} -; -; BE-NEON-LABEL: llrint_v2i64_v2f32: -; BE-NEON: @ %bb.0: -; BE-NEON-NEXT: .save {r4, lr} -; BE-NEON-NEXT: push {r4, lr} -; BE-NEON-NEXT: .vsave {d10, d11} -; BE-NEON-NEXT: vpush {d10, d11} -; BE-NEON-NEXT: .vsave {d8} -; BE-NEON-NEXT: vpush {d8} -; BE-NEON-NEXT: vrev64.32 d8, d0 -; BE-NEON-NEXT: vmov.f32 s0, s17 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s16 -; BE-NEON-NEXT: mov r4, r1 -; BE-NEON-NEXT: vmov.32 d11[0], r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.32 d10[0], r0 -; BE-NEON-NEXT: vmov.32 d11[1], r4 -; BE-NEON-NEXT: vmov.32 d10[1], r1 -; BE-NEON-NEXT: vrev64.32 q0, q5 -; BE-NEON-NEXT: vpop {d8} -; BE-NEON-NEXT: vpop {d10, d11} -; BE-NEON-NEXT: pop {r4, pc} - %a = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> %x) - ret <2 x i64> %a -} -declare <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float>) - -define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) { -; LE-LABEL: llrint_v4i64_v4f32: -; LE: @ %bb.0: -; LE-NEXT: .save {r4, r5, r6, lr} -; LE-NEXT: push {r4, r5, r6, lr} -; LE-NEXT: .vsave {d8, d9, d10, d11, d12, d13} -; LE-NEXT: vpush {d8, d9, d10, d11, d12, d13} -; LE-NEXT: vorr q5, q0, q0 -; LE-NEXT: vmov.f32 s0, s23 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.f32 s0, s20 -; LE-NEXT: mov r4, r1 -; LE-NEXT: vmov.32 d9[0], r0 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.f32 s0, s21 -; LE-NEXT: mov r5, r1 -; LE-NEXT: vmov.32 d12[0], r0 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.f32 s0, s22 -; LE-NEXT: mov r6, r1 -; LE-NEXT: vmov.32 d13[0], r0 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.32 d8[0], r0 -; LE-NEXT: vmov.32 d13[1], r6 -; LE-NEXT: vmov.32 d9[1], r4 -; LE-NEXT: vmov.32 d12[1], r5 -; LE-NEXT: vmov.32 d8[1], r1 -; LE-NEXT: vorr q0, q6, q6 -; LE-NEXT: vorr q1, q4, q4 -; LE-NEXT: vpop {d8, d9, d10, d11, d12, d13} -; LE-NEXT: pop {r4, r5, r6, pc} -; -; LE-NEON-LABEL: llrint_v4i64_v4f32: -; LE-NEON: @ %bb.0: -; LE-NEON-NEXT: .save {r4, r5, r6, lr} -; LE-NEON-NEXT: push {r4, r5, r6, lr} -; LE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13} -; LE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13} -; LE-NEON-NEXT: vorr q5, q0, q0 -; LE-NEON-NEXT: vmov.f32 s0, s23 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s20 -; LE-NEON-NEXT: mov r4, r1 -; LE-NEON-NEXT: vmov.32 d9[0], r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s21 -; LE-NEON-NEXT: mov r5, r1 -; LE-NEON-NEXT: vmov.32 d12[0], r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s22 -; LE-NEON-NEXT: mov r6, r1 -; LE-NEON-NEXT: vmov.32 d13[0], r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.32 d8[0], r0 -; LE-NEON-NEXT: vmov.32 d13[1], r6 -; LE-NEON-NEXT: vmov.32 d9[1], r4 -; LE-NEON-NEXT: vmov.32 d12[1], r5 -; LE-NEON-NEXT: vmov.32 d8[1], r1 -; LE-NEON-NEXT: vorr q0, q6, q6 -; LE-NEON-NEXT: vorr q1, q4, q4 -; LE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13} -; LE-NEON-NEXT: pop {r4, r5, r6, pc} -; -; BE-LABEL: llrint_v4i64_v4f32: -; BE: @ %bb.0: -; BE-NEXT: .save {r4, r5, r6, lr} -; BE-NEXT: push {r4, r5, r6, lr} -; BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13} -; BE-NEXT: vpush {d8, d9, d10, d11, d12, d13} -; BE-NEXT: vrev64.32 d8, d1 -; BE-NEXT: vrev64.32 d9, d0 -; BE-NEXT: vmov.f32 s0, s17 -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.f32 s0, s18 -; BE-NEXT: mov r4, r1 -; BE-NEXT: vmov.32 d11[0], r0 -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.f32 s0, s19 -; BE-NEXT: mov r5, r1 -; BE-NEXT: vmov.32 d12[0], r0 -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.f32 s0, s16 -; BE-NEXT: mov r6, r1 -; BE-NEXT: vmov.32 d13[0], r0 -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.32 d10[0], r0 -; BE-NEXT: vmov.32 d13[1], r6 -; BE-NEXT: vmov.32 d11[1], r4 -; BE-NEXT: vmov.32 d12[1], r5 -; BE-NEXT: vmov.32 d10[1], r1 -; BE-NEXT: vrev64.32 q0, q6 -; BE-NEXT: vrev64.32 q1, q5 -; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13} -; BE-NEXT: pop {r4, r5, r6, pc} -; -; BE-NEON-LABEL: llrint_v4i64_v4f32: -; BE-NEON: @ %bb.0: -; BE-NEON-NEXT: .save {r4, r5, r6, lr} -; BE-NEON-NEXT: push {r4, r5, r6, lr} -; BE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13} -; BE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13} -; BE-NEON-NEXT: vrev64.32 d8, d1 -; BE-NEON-NEXT: vrev64.32 d9, d0 -; BE-NEON-NEXT: vmov.f32 s0, s17 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s18 -; BE-NEON-NEXT: mov r4, r1 -; BE-NEON-NEXT: vmov.32 d11[0], r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s19 -; BE-NEON-NEXT: mov r5, r1 -; BE-NEON-NEXT: vmov.32 d12[0], r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s16 -; BE-NEON-NEXT: mov r6, r1 -; BE-NEON-NEXT: vmov.32 d13[0], r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.32 d10[0], r0 -; BE-NEON-NEXT: vmov.32 d13[1], r6 -; BE-NEON-NEXT: vmov.32 d11[1], r4 -; BE-NEON-NEXT: vmov.32 d12[1], r5 -; BE-NEON-NEXT: vmov.32 d10[1], r1 -; BE-NEON-NEXT: vrev64.32 q0, q6 -; BE-NEON-NEXT: vrev64.32 q1, q5 -; BE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13} -; BE-NEON-NEXT: pop {r4, r5, r6, pc} - %a = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> %x) - ret <4 x i64> %a -} -declare <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float>) - -define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) { -; LE-LABEL: llrint_v8i64_v8f32: -; LE: @ %bb.0: -; LE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; LE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} -; LE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEXT: .pad #40 -; LE-NEXT: sub sp, sp, #40 -; LE-NEXT: vorr q6, q1, q1 -; LE-NEXT: add lr, sp, #24 -; LE-NEXT: vorr q7, q0, q0 -; LE-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill -; LE-NEXT: vmov.f32 s0, s27 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.f32 s0, s24 -; LE-NEXT: mov r8, r1 -; LE-NEXT: vmov.32 d9[0], r0 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.f32 s0, s25 -; LE-NEXT: mov r9, r1 -; LE-NEXT: vmov.32 d10[0], r0 -; LE-NEXT: bl llrintf -; LE-NEXT: vorr q6, q7, q7 -; LE-NEXT: add lr, sp, #8 -; LE-NEXT: mov r10, r1 -; LE-NEXT: vmov.32 d11[0], r0 -; LE-NEXT: vmov.f32 s0, s26 -; LE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.f32 s0, s27 -; LE-NEXT: mov r7, r1 -; LE-NEXT: vmov.32 d14[0], r0 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.f32 s0, s24 -; LE-NEXT: mov r4, r1 -; LE-NEXT: vmov.32 d15[0], r0 -; LE-NEXT: bl llrintf -; LE-NEXT: add lr, sp, #8 -; LE-NEXT: mov r5, r1 -; LE-NEXT: vmov.32 d12[0], r0 -; LE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-NEXT: vmov.f32 s0, s1 -; LE-NEXT: bl llrintf -; LE-NEXT: add lr, sp, #24 -; LE-NEXT: mov r6, r1 -; LE-NEXT: vmov.32 d13[0], r0 -; LE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-NEXT: vmov.f32 s0, s2 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.32 d8[0], r0 -; LE-NEXT: vmov.32 d13[1], r6 -; LE-NEXT: vmov.32 d15[1], r4 -; LE-NEXT: vmov.32 d11[1], r10 -; LE-NEXT: vmov.32 d9[1], r8 -; LE-NEXT: vmov.32 d12[1], r5 -; LE-NEXT: vmov.32 d14[1], r7 -; LE-NEXT: vorr q0, q6, q6 -; LE-NEXT: vmov.32 d10[1], r9 -; LE-NEXT: vorr q1, q7, q7 -; LE-NEXT: vmov.32 d8[1], r1 -; LE-NEXT: vorr q2, q5, q5 -; LE-NEXT: vorr q3, q4, q4 -; LE-NEXT: add sp, sp, #40 -; LE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} -; -; LE-NEON-LABEL: llrint_v8i64_v8f32: -; LE-NEON: @ %bb.0: -; LE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; LE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} -; LE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEON-NEXT: .pad #40 -; LE-NEON-NEXT: sub sp, sp, #40 -; LE-NEON-NEXT: vorr q6, q1, q1 -; LE-NEON-NEXT: add lr, sp, #24 -; LE-NEON-NEXT: vorr q7, q0, q0 -; LE-NEON-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill -; LE-NEON-NEXT: vmov.f32 s0, s27 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s24 -; LE-NEON-NEXT: mov r8, r1 -; LE-NEON-NEXT: vmov.32 d9[0], r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s25 -; LE-NEON-NEXT: mov r9, r1 -; LE-NEON-NEXT: vmov.32 d10[0], r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vorr q6, q7, q7 -; LE-NEON-NEXT: add lr, sp, #8 -; LE-NEON-NEXT: mov r10, r1 -; LE-NEON-NEXT: vmov.32 d11[0], r0 -; LE-NEON-NEXT: vmov.f32 s0, s26 -; LE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s27 -; LE-NEON-NEXT: mov r7, r1 -; LE-NEON-NEXT: vmov.32 d14[0], r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s24 -; LE-NEON-NEXT: mov r4, r1 -; LE-NEON-NEXT: vmov.32 d15[0], r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: add lr, sp, #8 -; LE-NEON-NEXT: mov r5, r1 -; LE-NEON-NEXT: vmov.32 d12[0], r0 -; LE-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-NEON-NEXT: vmov.f32 s0, s1 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: add lr, sp, #24 -; LE-NEON-NEXT: mov r6, r1 -; LE-NEON-NEXT: vmov.32 d13[0], r0 -; LE-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-NEON-NEXT: vmov.f32 s0, s2 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.32 d8[0], r0 -; LE-NEON-NEXT: vmov.32 d13[1], r6 -; LE-NEON-NEXT: vmov.32 d15[1], r4 -; LE-NEON-NEXT: vmov.32 d11[1], r10 -; LE-NEON-NEXT: vmov.32 d9[1], r8 -; LE-NEON-NEXT: vmov.32 d12[1], r5 -; LE-NEON-NEXT: vmov.32 d14[1], r7 -; LE-NEON-NEXT: vorr q0, q6, q6 -; LE-NEON-NEXT: vmov.32 d10[1], r9 -; LE-NEON-NEXT: vorr q1, q7, q7 -; LE-NEON-NEXT: vmov.32 d8[1], r1 -; LE-NEON-NEXT: vorr q2, q5, q5 -; LE-NEON-NEXT: vorr q3, q4, q4 -; LE-NEON-NEXT: add sp, sp, #40 -; LE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} -; -; BE-LABEL: llrint_v8i64_v8f32: -; BE: @ %bb.0: -; BE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; BE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} -; BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEXT: .pad #32 -; BE-NEXT: sub sp, sp, #32 -; BE-NEXT: vorr q4, q1, q1 -; BE-NEXT: add lr, sp, #8 -; BE-NEXT: vorr q5, q0, q0 -; BE-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; BE-NEXT: vrev64.32 d12, d8 -; BE-NEXT: vmov.f32 s0, s25 -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.f32 s0, s24 -; BE-NEXT: mov r8, r1 -; BE-NEXT: vmov.32 d15[0], r0 -; BE-NEXT: bl llrintf -; BE-NEXT: vrev64.32 d0, d11 -; BE-NEXT: mov r9, r1 -; BE-NEXT: vrev64.32 d8, d9 -; BE-NEXT: vorr d9, d0, d0 -; BE-NEXT: vmov.32 d14[0], r0 -; BE-NEXT: vstr d8, [sp, #24] @ 8-byte Spill -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.f32 s0, s17 -; BE-NEXT: mov r10, r1 -; BE-NEXT: vmov.32 d10[0], r0 -; BE-NEXT: bl llrintf -; BE-NEXT: add lr, sp, #8 -; BE-NEXT: vmov.f32 s0, s19 -; BE-NEXT: mov r7, r1 -; BE-NEXT: vmov.32 d13[0], r0 -; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEXT: vrev64.32 d8, d16 -; BE-NEXT: vstr d8, [sp, #8] @ 8-byte Spill -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.f32 s0, s16 -; BE-NEXT: mov r4, r1 -; BE-NEXT: vmov.32 d11[0], r0 -; BE-NEXT: bl llrintf -; BE-NEXT: vldr d0, [sp, #8] @ 8-byte Reload -; BE-NEXT: mov r5, r1 -; BE-NEXT: vmov.32 d8[0], r0 -; BE-NEXT: vmov.f32 s0, s1 -; BE-NEXT: bl llrintf -; BE-NEXT: vldr d0, [sp, #24] @ 8-byte Reload -; BE-NEXT: mov r6, r1 -; BE-NEXT: @ kill: def $s0 killed $s0 killed $d0 -; BE-NEXT: vmov.32 d9[0], r0 -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.32 d12[0], r0 -; BE-NEXT: vmov.32 d9[1], r6 -; BE-NEXT: vmov.32 d11[1], r4 -; BE-NEXT: vmov.32 d15[1], r8 -; BE-NEXT: vmov.32 d13[1], r7 -; BE-NEXT: vmov.32 d8[1], r5 -; BE-NEXT: vmov.32 d10[1], r10 -; BE-NEXT: vmov.32 d14[1], r9 -; BE-NEXT: vmov.32 d12[1], r1 -; BE-NEXT: vrev64.32 q0, q4 -; BE-NEXT: vrev64.32 q1, q5 -; BE-NEXT: vrev64.32 q2, q7 -; BE-NEXT: vrev64.32 q3, q6 -; BE-NEXT: add sp, sp, #32 -; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} -; -; BE-NEON-LABEL: llrint_v8i64_v8f32: -; BE-NEON: @ %bb.0: -; BE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; BE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} -; BE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEON-NEXT: .pad #32 -; BE-NEON-NEXT: sub sp, sp, #32 -; BE-NEON-NEXT: vorr q4, q1, q1 -; BE-NEON-NEXT: add lr, sp, #8 -; BE-NEON-NEXT: vorr q5, q0, q0 -; BE-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; BE-NEON-NEXT: vrev64.32 d12, d8 -; BE-NEON-NEXT: vmov.f32 s0, s25 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s24 -; BE-NEON-NEXT: mov r8, r1 -; BE-NEON-NEXT: vmov.32 d15[0], r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vrev64.32 d0, d11 -; BE-NEON-NEXT: mov r9, r1 -; BE-NEON-NEXT: vrev64.32 d8, d9 -; BE-NEON-NEXT: vorr d9, d0, d0 -; BE-NEON-NEXT: vmov.32 d14[0], r0 -; BE-NEON-NEXT: vstr d8, [sp, #24] @ 8-byte Spill -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s17 -; BE-NEON-NEXT: mov r10, r1 -; BE-NEON-NEXT: vmov.32 d10[0], r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: add lr, sp, #8 -; BE-NEON-NEXT: vmov.f32 s0, s19 -; BE-NEON-NEXT: mov r7, r1 -; BE-NEON-NEXT: vmov.32 d13[0], r0 -; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEON-NEXT: vrev64.32 d8, d16 -; BE-NEON-NEXT: vstr d8, [sp, #8] @ 8-byte Spill -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s16 -; BE-NEON-NEXT: mov r4, r1 -; BE-NEON-NEXT: vmov.32 d11[0], r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vldr d0, [sp, #8] @ 8-byte Reload -; BE-NEON-NEXT: mov r5, r1 -; BE-NEON-NEXT: vmov.32 d8[0], r0 -; BE-NEON-NEXT: vmov.f32 s0, s1 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vldr d0, [sp, #24] @ 8-byte Reload -; BE-NEON-NEXT: mov r6, r1 -; BE-NEON-NEXT: @ kill: def $s0 killed $s0 killed $d0 -; BE-NEON-NEXT: vmov.32 d9[0], r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.32 d12[0], r0 -; BE-NEON-NEXT: vmov.32 d9[1], r6 -; BE-NEON-NEXT: vmov.32 d11[1], r4 -; BE-NEON-NEXT: vmov.32 d15[1], r8 -; BE-NEON-NEXT: vmov.32 d13[1], r7 -; BE-NEON-NEXT: vmov.32 d8[1], r5 -; BE-NEON-NEXT: vmov.32 d10[1], r10 -; BE-NEON-NEXT: vmov.32 d14[1], r9 -; BE-NEON-NEXT: vmov.32 d12[1], r1 -; BE-NEON-NEXT: vrev64.32 q0, q4 -; BE-NEON-NEXT: vrev64.32 q1, q5 -; BE-NEON-NEXT: vrev64.32 q2, q7 -; BE-NEON-NEXT: vrev64.32 q3, q6 -; BE-NEON-NEXT: add sp, sp, #32 -; BE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} - %a = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> %x) - ret <8 x i64> %a -} -declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>) - -define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) { -; LE-LABEL: llrint_v16i64_v16f32: -; LE: @ %bb.0: -; LE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-NEXT: .pad #4 -; LE-NEXT: sub sp, sp, #4 -; LE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEXT: .pad #160 -; LE-NEXT: sub sp, sp, #160 -; LE-NEXT: add lr, sp, #112 -; LE-NEXT: vorr q5, q3, q3 -; LE-NEXT: vorr q6, q0, q0 -; LE-NEXT: mov r4, r0 -; LE-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill -; LE-NEXT: add lr, sp, #48 -; LE-NEXT: vorr q7, q1, q1 -; LE-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; LE-NEXT: vmov.f32 s0, s23 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.f32 s0, s24 -; LE-NEXT: add lr, sp, #144 -; LE-NEXT: vmov.32 d17[0], r0 -; LE-NEXT: str r1, [sp, #108] @ 4-byte Spill -; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.f32 s0, s25 -; LE-NEXT: str r1, [sp, #84] @ 4-byte Spill -; LE-NEXT: vmov.32 d8[0], r0 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.f32 s0, s28 -; LE-NEXT: add lr, sp, #128 -; LE-NEXT: vmov.32 d9[0], r0 -; LE-NEXT: str r1, [sp, #44] @ 4-byte Spill -; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.f32 s0, s29 -; LE-NEXT: mov r9, r1 -; LE-NEXT: vmov.32 d8[0], r0 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.f32 s0, s30 -; LE-NEXT: mov r6, r1 -; LE-NEXT: vmov.32 d9[0], r0 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.f32 s0, s31 -; LE-NEXT: mov r5, r1 -; LE-NEXT: vmov.32 d12[0], r0 -; LE-NEXT: bl llrintf -; LE-NEXT: add lr, sp, #112 -; LE-NEXT: mov r7, r1 -; LE-NEXT: vmov.32 d13[0], r0 -; LE-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload -; LE-NEXT: vmov.f32 s0, s29 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.f32 s0, s22 -; LE-NEXT: add lr, sp, #24 -; LE-NEXT: vmov.32 d17[0], r0 -; LE-NEXT: mov r11, r1 -; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEXT: vmov.32 d13[1], r7 -; LE-NEXT: bl llrintf -; LE-NEXT: add lr, sp, #144 -; LE-NEXT: vmov.f32 s0, s21 -; LE-NEXT: vmov.32 d12[1], r5 -; LE-NEXT: str r1, [sp, #40] @ 4-byte Spill -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: vmov.32 d16[0], r0 -; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEXT: add lr, sp, #88 -; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.f32 s0, s20 -; LE-NEXT: mov r10, r1 -; LE-NEXT: vmov.32 d13[0], r0 -; LE-NEXT: vmov.32 d9[1], r6 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.f32 s0, s31 -; LE-NEXT: vmov.32 d12[0], r0 -; LE-NEXT: add lr, sp, #8 -; LE-NEXT: mov r8, r1 -; LE-NEXT: vmov.32 d8[1], r9 -; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-NEXT: add lr, sp, #64 -; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-NEXT: bl llrintf -; LE-NEXT: add lr, sp, #128 -; LE-NEXT: vmov.32 d9[0], r0 -; LE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload -; LE-NEXT: mov r9, r1 -; LE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-NEXT: add lr, sp, #48 -; LE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-NEXT: vmov.f32 s0, s27 -; LE-NEXT: vmov.32 d11[1], r0 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.f32 s0, s26 -; LE-NEXT: vmov.32 d15[0], r0 -; LE-NEXT: ldr r0, [sp, #84] @ 4-byte Reload -; LE-NEXT: add lr, sp, #128 -; LE-NEXT: mov r7, r1 -; LE-NEXT: vmov.32 d10[1], r0 -; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.32 d14[0], r0 -; LE-NEXT: add lr, sp, #144 -; LE-NEXT: ldr r0, [sp, #108] @ 4-byte Reload -; LE-NEXT: mov r5, r1 -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: vmov.32 d17[1], r0 -; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEXT: add lr, sp, #112 -; LE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-NEXT: vmov.f32 s0, s20 +; LE-NEXT: vmov.32 d12[0], r0 +; LE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-NEXT: vmov.f32 s0, s1 ; LE-NEXT: bl llrintf ; LE-NEXT: add lr, sp, #24 -; LE-NEXT: vmov.f32 s0, s22 ; LE-NEXT: mov r6, r1 -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: vmov.32 d16[0], r0 -; LE-NEXT: vmov.32 d17[1], r11 -; LE-NEXT: vorr q6, q8, q8 +; LE-NEXT: vmov.32 d13[0], r0 +; LE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-NEXT: vmov.f32 s0, s2 ; LE-NEXT: bl llrintf -; LE-NEXT: add lr, sp, #144 ; LE-NEXT: vmov.32 d8[0], r0 -; LE-NEXT: ldr r0, [sp, #40] @ 4-byte Reload -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: add lr, sp, #8 -; LE-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; LE-NEXT: add lr, sp, #128 -; LE-NEXT: vmov.32 d9[1], r9 -; LE-NEXT: vmov.32 d12[1], r6 -; LE-NEXT: vmov.32 d19[1], r10 +; LE-NEXT: vmov.32 d13[1], r6 +; LE-NEXT: vmov.32 d15[1], r4 +; LE-NEXT: vmov.32 d11[1], r10 +; LE-NEXT: vmov.32 d9[1], r8 +; LE-NEXT: vmov.32 d12[1], r5 +; LE-NEXT: vmov.32 d14[1], r7 +; LE-NEXT: vorr q0, q6, q6 +; LE-NEXT: vmov.32 d10[1], r9 +; LE-NEXT: vorr q1, q7, q7 ; LE-NEXT: vmov.32 d8[1], r1 -; LE-NEXT: vmov.32 d16[1], r0 -; LE-NEXT: add r0, r4, #64 -; LE-NEXT: vmov.32 d18[1], r8 -; LE-NEXT: vst1.64 {d12, d13}, [r0:128]! -; LE-NEXT: vst1.64 {d8, d9}, [r0:128]! -; LE-NEXT: vst1.64 {d18, d19}, [r0:128]! -; LE-NEXT: vst1.64 {d16, d17}, [r0:128] -; LE-NEXT: vmov.32 d15[1], r7 -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: add lr, sp, #64 -; LE-NEXT: vmov.32 d14[1], r5 -; LE-NEXT: vst1.64 {d16, d17}, [r4:128]! -; LE-NEXT: vst1.64 {d14, d15}, [r4:128]! -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: add lr, sp, #88 -; LE-NEXT: vst1.64 {d16, d17}, [r4:128]! -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: vst1.64 {d16, d17}, [r4:128] -; LE-NEXT: add sp, sp, #160 +; LE-NEXT: vorr q2, q5, q5 +; LE-NEXT: vorr q3, q4, q4 +; LE-NEXT: add sp, sp, #40 ; LE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEXT: add sp, sp, #4 -; LE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; -; LE-NEON-LABEL: llrint_v16i64_v16f32: -; LE-NEON: @ %bb.0: -; LE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-NEON-NEXT: .pad #4 -; LE-NEON-NEXT: sub sp, sp, #4 -; LE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEON-NEXT: .pad #160 -; LE-NEON-NEXT: sub sp, sp, #160 -; LE-NEON-NEXT: add lr, sp, #112 -; LE-NEON-NEXT: vorr q5, q3, q3 -; LE-NEON-NEXT: vorr q6, q0, q0 -; LE-NEON-NEXT: mov r4, r0 -; LE-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #48 -; LE-NEON-NEXT: vorr q7, q1, q1 -; LE-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; LE-NEON-NEXT: vmov.f32 s0, s23 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s24 -; LE-NEON-NEXT: add lr, sp, #144 -; LE-NEON-NEXT: vmov.32 d17[0], r0 -; LE-NEON-NEXT: str r1, [sp, #108] @ 4-byte Spill -; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s25 -; LE-NEON-NEXT: str r1, [sp, #84] @ 4-byte Spill -; LE-NEON-NEXT: vmov.32 d8[0], r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s28 -; LE-NEON-NEXT: add lr, sp, #128 -; LE-NEON-NEXT: vmov.32 d9[0], r0 -; LE-NEON-NEXT: str r1, [sp, #44] @ 4-byte Spill -; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s29 -; LE-NEON-NEXT: mov r9, r1 -; LE-NEON-NEXT: vmov.32 d8[0], r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s30 -; LE-NEON-NEXT: mov r6, r1 -; LE-NEON-NEXT: vmov.32 d9[0], r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s31 -; LE-NEON-NEXT: mov r5, r1 -; LE-NEON-NEXT: vmov.32 d12[0], r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: add lr, sp, #112 -; LE-NEON-NEXT: mov r7, r1 -; LE-NEON-NEXT: vmov.32 d13[0], r0 -; LE-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload -; LE-NEON-NEXT: vmov.f32 s0, s29 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s22 -; LE-NEON-NEXT: add lr, sp, #24 -; LE-NEON-NEXT: vmov.32 d17[0], r0 -; LE-NEON-NEXT: mov r11, r1 -; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEON-NEXT: vmov.32 d13[1], r7 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: add lr, sp, #144 -; LE-NEON-NEXT: vmov.f32 s0, s21 -; LE-NEON-NEXT: vmov.32 d12[1], r5 -; LE-NEON-NEXT: str r1, [sp, #40] @ 4-byte Spill -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: vmov.32 d16[0], r0 -; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #88 -; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s20 -; LE-NEON-NEXT: mov r10, r1 -; LE-NEON-NEXT: vmov.32 d13[0], r0 -; LE-NEON-NEXT: vmov.32 d9[1], r6 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s31 -; LE-NEON-NEXT: vmov.32 d12[0], r0 -; LE-NEON-NEXT: add lr, sp, #8 -; LE-NEON-NEXT: mov r8, r1 -; LE-NEON-NEXT: vmov.32 d8[1], r9 -; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #64 -; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: add lr, sp, #128 -; LE-NEON-NEXT: vmov.32 d9[0], r0 -; LE-NEON-NEXT: ldr r0, [sp, #44] @ 4-byte Reload -; LE-NEON-NEXT: mov r9, r1 -; LE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #48 -; LE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-NEON-NEXT: vmov.f32 s0, s27 -; LE-NEON-NEXT: vmov.32 d11[1], r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s26 -; LE-NEON-NEXT: vmov.32 d15[0], r0 -; LE-NEON-NEXT: ldr r0, [sp, #84] @ 4-byte Reload -; LE-NEON-NEXT: add lr, sp, #128 -; LE-NEON-NEXT: mov r7, r1 -; LE-NEON-NEXT: vmov.32 d10[1], r0 -; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.32 d14[0], r0 -; LE-NEON-NEXT: add lr, sp, #144 -; LE-NEON-NEXT: ldr r0, [sp, #108] @ 4-byte Reload -; LE-NEON-NEXT: mov r5, r1 -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: vmov.32 d17[1], r0 -; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #112 -; LE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-NEON-NEXT: vmov.f32 s0, s20 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: add lr, sp, #24 -; LE-NEON-NEXT: vmov.f32 s0, s22 -; LE-NEON-NEXT: mov r6, r1 -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: vmov.32 d16[0], r0 -; LE-NEON-NEXT: vmov.32 d17[1], r11 -; LE-NEON-NEXT: vorr q6, q8, q8 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: add lr, sp, #144 -; LE-NEON-NEXT: vmov.32 d8[0], r0 -; LE-NEON-NEXT: ldr r0, [sp, #40] @ 4-byte Reload -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #8 -; LE-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #128 -; LE-NEON-NEXT: vmov.32 d9[1], r9 -; LE-NEON-NEXT: vmov.32 d12[1], r6 -; LE-NEON-NEXT: vmov.32 d19[1], r10 -; LE-NEON-NEXT: vmov.32 d8[1], r1 -; LE-NEON-NEXT: vmov.32 d16[1], r0 -; LE-NEON-NEXT: add r0, r4, #64 -; LE-NEON-NEXT: vmov.32 d18[1], r8 -; LE-NEON-NEXT: vst1.64 {d12, d13}, [r0:128]! -; LE-NEON-NEXT: vst1.64 {d8, d9}, [r0:128]! -; LE-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]! -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] -; LE-NEON-NEXT: vmov.32 d15[1], r7 -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #64 -; LE-NEON-NEXT: vmov.32 d14[1], r5 -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r4:128]! -; LE-NEON-NEXT: vst1.64 {d14, d15}, [r4:128]! -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #88 -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r4:128]! -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r4:128] -; LE-NEON-NEXT: add sp, sp, #160 -; LE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEON-NEXT: add sp, sp, #4 -; LE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; LE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} ; -; BE-LABEL: llrint_v16i64_v16f32: +; BE-LABEL: llrint_v8i64_v8f32: ; BE: @ %bb.0: -; BE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-NEXT: .pad #4 -; BE-NEXT: sub sp, sp, #4 +; BE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; BE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} ; BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} ; BE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEXT: .pad #144 -; BE-NEXT: sub sp, sp, #144 -; BE-NEXT: vorr q6, q3, q3 -; BE-NEXT: add lr, sp, #112 -; BE-NEXT: vorr q7, q0, q0 -; BE-NEXT: mov r4, r0 -; BE-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill -; BE-NEXT: add lr, sp, #96 -; BE-NEXT: vrev64.32 d8, d13 -; BE-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill -; BE-NEXT: vmov.f32 s0, s17 -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.f32 s0, s16 -; BE-NEXT: str r1, [sp, #88] @ 4-byte Spill -; BE-NEXT: vmov.32 d11[0], r0 -; BE-NEXT: bl llrintf -; BE-NEXT: vrev64.32 d8, d14 -; BE-NEXT: add lr, sp, #128 -; BE-NEXT: vmov.32 d10[0], r0 -; BE-NEXT: str r1, [sp, #92] @ 4-byte Spill -; BE-NEXT: vmov.f32 s0, s16 -; BE-NEXT: vrev64.32 d9, d12 -; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-NEXT: vstr d9, [sp, #64] @ 8-byte Spill -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.f32 s0, s19 -; BE-NEXT: mov r9, r1 -; BE-NEXT: vmov.32 d12[0], r0 -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.f32 s0, s17 -; BE-NEXT: str r1, [sp, #84] @ 4-byte Spill -; BE-NEXT: vmov.32 d11[0], r0 -; BE-NEXT: vrev64.32 d9, d15 -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.f32 s0, s18 -; BE-NEXT: mov r6, r1 -; BE-NEXT: vmov.32 d13[0], r0 -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.f32 s0, s19 -; BE-NEXT: mov r5, r1 -; BE-NEXT: vmov.32 d14[0], r0 -; BE-NEXT: bl llrintf -; BE-NEXT: vldr d0, [sp, #64] @ 8-byte Reload -; BE-NEXT: mov r7, r1 -; BE-NEXT: @ kill: def $s0 killed $s0 killed $d0 -; BE-NEXT: vmov.32 d15[0], r0 -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.32 d10[0], r0 -; BE-NEXT: add lr, sp, #40 -; BE-NEXT: str r1, [sp, #60] @ 4-byte Spill -; BE-NEXT: vmov.32 d15[1], r7 -; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-NEXT: add lr, sp, #96 -; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEXT: vrev64.32 d8, d16 -; BE-NEXT: vmov.f32 s0, s17 -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.f32 s0, s16 -; BE-NEXT: vmov.32 d14[1], r5 -; BE-NEXT: add lr, sp, #64 -; BE-NEXT: mov r10, r1 -; BE-NEXT: vmov.32 d11[0], r0 -; BE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.32 d10[0], r0 -; BE-NEXT: add lr, sp, #24 -; BE-NEXT: mov r11, r1 -; BE-NEXT: vmov.32 d13[1], r6 -; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-NEXT: add lr, sp, #96 -; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEXT: vrev64.32 d8, d17 -; BE-NEXT: vmov.f32 s0, s17 -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.f32 s0, s16 -; BE-NEXT: vmov.32 d12[1], r9 -; BE-NEXT: add lr, sp, #96 -; BE-NEXT: mov r8, r1 -; BE-NEXT: vmov.32 d11[0], r0 -; BE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: .pad #32 +; BE-NEXT: sub sp, sp, #32 +; BE-NEXT: vorr q4, q1, q1 ; BE-NEXT: add lr, sp, #8 -; BE-NEXT: ldr r0, [sp, #88] @ 4-byte Reload -; BE-NEXT: mov r9, r1 -; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-NEXT: add lr, sp, #112 -; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEXT: add lr, sp, #128 -; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-NEXT: vrev64.32 d8, d16 -; BE-NEXT: vmov.32 d11[1], r0 -; BE-NEXT: vmov.f32 s0, s17 +; BE-NEXT: vorr q5, q0, q0 +; BE-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; BE-NEXT: vrev64.32 d12, d8 +; BE-NEXT: vmov.f32 s0, s25 ; BE-NEXT: bl llrintf -; BE-NEXT: vmov.f32 s0, s16 +; BE-NEXT: vmov.f32 s0, s24 +; BE-NEXT: mov r8, r1 ; BE-NEXT: vmov.32 d15[0], r0 -; BE-NEXT: ldr r0, [sp, #92] @ 4-byte Reload -; BE-NEXT: add lr, sp, #128 -; BE-NEXT: mov r7, r1 -; BE-NEXT: vmov.32 d10[1], r0 -; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill ; BE-NEXT: bl llrintf -; BE-NEXT: add lr, sp, #112 +; BE-NEXT: vrev64.32 d0, d11 +; BE-NEXT: mov r9, r1 +; BE-NEXT: vrev64.32 d8, d9 +; BE-NEXT: vorr d9, d0, d0 ; BE-NEXT: vmov.32 d14[0], r0 -; BE-NEXT: ldr r0, [sp, #84] @ 4-byte Reload -; BE-NEXT: mov r5, r1 -; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEXT: add lr, sp, #40 -; BE-NEXT: vrev64.32 d8, d17 -; BE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; BE-NEXT: vstr d8, [sp, #24] @ 8-byte Spill +; BE-NEXT: bl llrintf ; BE-NEXT: vmov.f32 s0, s17 -; BE-NEXT: vmov.32 d13[1], r0 +; BE-NEXT: mov r10, r1 +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: bl llrintf +; BE-NEXT: add lr, sp, #8 +; BE-NEXT: vmov.f32 s0, s19 +; BE-NEXT: mov r7, r1 +; BE-NEXT: vmov.32 d13[0], r0 +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: vrev64.32 d8, d16 +; BE-NEXT: vstr d8, [sp, #8] @ 8-byte Spill ; BE-NEXT: bl llrintf ; BE-NEXT: vmov.f32 s0, s16 +; BE-NEXT: mov r4, r1 ; BE-NEXT: vmov.32 d11[0], r0 -; BE-NEXT: ldr r0, [sp, #60] @ 4-byte Reload +; BE-NEXT: bl llrintf +; BE-NEXT: vldr d0, [sp, #8] @ 8-byte Reload +; BE-NEXT: mov r5, r1 +; BE-NEXT: vmov.32 d8[0], r0 +; BE-NEXT: vmov.f32 s0, s1 +; BE-NEXT: bl llrintf +; BE-NEXT: vldr d0, [sp, #24] @ 8-byte Reload ; BE-NEXT: mov r6, r1 -; BE-NEXT: vmov.32 d12[1], r0 +; BE-NEXT: @ kill: def $s0 killed $s0 killed $d0 +; BE-NEXT: vmov.32 d9[0], r0 ; BE-NEXT: bl llrintf -; BE-NEXT: add lr, sp, #24 -; BE-NEXT: vmov.32 d10[0], r0 -; BE-NEXT: add r0, r4, #64 -; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEXT: add lr, sp, #8 -; BE-NEXT: vmov.32 d17[1], r10 -; BE-NEXT: vmov.32 d16[1], r11 -; BE-NEXT: vorr q12, q8, q8 -; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEXT: add lr, sp, #128 -; BE-NEXT: vmov.32 d15[1], r7 -; BE-NEXT: vmov.32 d11[1], r6 -; BE-NEXT: vmov.32 d14[1], r5 -; BE-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; BE-NEXT: add lr, sp, #96 -; BE-NEXT: vmov.32 d10[1], r1 -; BE-NEXT: vmov.32 d17[1], r8 -; BE-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload -; BE-NEXT: add lr, sp, #64 -; BE-NEXT: vmov.32 d16[1], r9 -; BE-NEXT: vrev64.32 q14, q7 -; BE-NEXT: vorr q13, q8, q8 -; BE-NEXT: vrev64.32 q15, q5 -; BE-NEXT: vldmia lr, {d22, d23} @ 16-byte Reload -; BE-NEXT: vrev64.32 q8, q6 -; BE-NEXT: vst1.64 {d28, d29}, [r0:128]! -; BE-NEXT: vst1.64 {d30, d31}, [r0:128]! -; BE-NEXT: vrev64.32 q9, q9 -; BE-NEXT: vrev64.32 q10, q10 -; BE-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-NEXT: vrev64.32 q11, q11 -; BE-NEXT: vrev64.32 q12, q12 -; BE-NEXT: vst1.64 {d18, d19}, [r0:128] -; BE-NEXT: vst1.64 {d20, d21}, [r4:128]! -; BE-NEXT: vst1.64 {d22, d23}, [r4:128]! -; BE-NEXT: vrev64.32 q13, q13 -; BE-NEXT: vst1.64 {d24, d25}, [r4:128]! -; BE-NEXT: vst1.64 {d26, d27}, [r4:128] -; BE-NEXT: add sp, sp, #144 +; BE-NEXT: vmov.32 d12[0], r0 +; BE-NEXT: vmov.32 d9[1], r6 +; BE-NEXT: vmov.32 d11[1], r4 +; BE-NEXT: vmov.32 d15[1], r8 +; BE-NEXT: vmov.32 d13[1], r7 +; BE-NEXT: vmov.32 d8[1], r5 +; BE-NEXT: vmov.32 d10[1], r10 +; BE-NEXT: vmov.32 d14[1], r9 +; BE-NEXT: vmov.32 d12[1], r1 +; BE-NEXT: vrev64.32 q0, q4 +; BE-NEXT: vrev64.32 q1, q5 +; BE-NEXT: vrev64.32 q2, q7 +; BE-NEXT: vrev64.32 q3, q6 +; BE-NEXT: add sp, sp, #32 ; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEXT: add sp, sp, #4 -; BE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; -; BE-NEON-LABEL: llrint_v16i64_v16f32: -; BE-NEON: @ %bb.0: -; BE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-NEON-NEXT: .pad #4 -; BE-NEON-NEXT: sub sp, sp, #4 -; BE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEON-NEXT: .pad #144 -; BE-NEON-NEXT: sub sp, sp, #144 -; BE-NEON-NEXT: vorr q6, q3, q3 -; BE-NEON-NEXT: add lr, sp, #112 -; BE-NEON-NEXT: vorr q7, q0, q0 -; BE-NEON-NEXT: mov r4, r0 -; BE-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #96 -; BE-NEON-NEXT: vrev64.32 d8, d13 -; BE-NEON-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill -; BE-NEON-NEXT: vmov.f32 s0, s17 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s16 -; BE-NEON-NEXT: str r1, [sp, #88] @ 4-byte Spill -; BE-NEON-NEXT: vmov.32 d11[0], r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vrev64.32 d8, d14 -; BE-NEON-NEXT: add lr, sp, #128 -; BE-NEON-NEXT: vmov.32 d10[0], r0 -; BE-NEON-NEXT: str r1, [sp, #92] @ 4-byte Spill -; BE-NEON-NEXT: vmov.f32 s0, s16 -; BE-NEON-NEXT: vrev64.32 d9, d12 -; BE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-NEON-NEXT: vstr d9, [sp, #64] @ 8-byte Spill -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s19 -; BE-NEON-NEXT: mov r9, r1 -; BE-NEON-NEXT: vmov.32 d12[0], r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s17 -; BE-NEON-NEXT: str r1, [sp, #84] @ 4-byte Spill -; BE-NEON-NEXT: vmov.32 d11[0], r0 -; BE-NEON-NEXT: vrev64.32 d9, d15 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s18 -; BE-NEON-NEXT: mov r6, r1 -; BE-NEON-NEXT: vmov.32 d13[0], r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s19 -; BE-NEON-NEXT: mov r5, r1 -; BE-NEON-NEXT: vmov.32 d14[0], r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vldr d0, [sp, #64] @ 8-byte Reload -; BE-NEON-NEXT: mov r7, r1 -; BE-NEON-NEXT: @ kill: def $s0 killed $s0 killed $d0 -; BE-NEON-NEXT: vmov.32 d15[0], r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.32 d10[0], r0 -; BE-NEON-NEXT: add lr, sp, #40 -; BE-NEON-NEXT: str r1, [sp, #60] @ 4-byte Spill -; BE-NEON-NEXT: vmov.32 d15[1], r7 -; BE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #96 -; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEON-NEXT: vrev64.32 d8, d16 -; BE-NEON-NEXT: vmov.f32 s0, s17 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s16 -; BE-NEON-NEXT: vmov.32 d14[1], r5 -; BE-NEON-NEXT: add lr, sp, #64 -; BE-NEON-NEXT: mov r10, r1 -; BE-NEON-NEXT: vmov.32 d11[0], r0 -; BE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.32 d10[0], r0 -; BE-NEON-NEXT: add lr, sp, #24 -; BE-NEON-NEXT: mov r11, r1 -; BE-NEON-NEXT: vmov.32 d13[1], r6 -; BE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #96 -; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEON-NEXT: vrev64.32 d8, d17 -; BE-NEON-NEXT: vmov.f32 s0, s17 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s16 -; BE-NEON-NEXT: vmov.32 d12[1], r9 -; BE-NEON-NEXT: add lr, sp, #96 -; BE-NEON-NEXT: mov r8, r1 -; BE-NEON-NEXT: vmov.32 d11[0], r0 -; BE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.32 d10[0], r0 -; BE-NEON-NEXT: add lr, sp, #8 -; BE-NEON-NEXT: ldr r0, [sp, #88] @ 4-byte Reload -; BE-NEON-NEXT: mov r9, r1 -; BE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #112 -; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEON-NEXT: add lr, sp, #128 -; BE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-NEON-NEXT: vrev64.32 d8, d16 -; BE-NEON-NEXT: vmov.32 d11[1], r0 -; BE-NEON-NEXT: vmov.f32 s0, s17 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s16 -; BE-NEON-NEXT: vmov.32 d15[0], r0 -; BE-NEON-NEXT: ldr r0, [sp, #92] @ 4-byte Reload -; BE-NEON-NEXT: add lr, sp, #128 -; BE-NEON-NEXT: mov r7, r1 -; BE-NEON-NEXT: vmov.32 d10[1], r0 -; BE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: add lr, sp, #112 -; BE-NEON-NEXT: vmov.32 d14[0], r0 -; BE-NEON-NEXT: ldr r0, [sp, #84] @ 4-byte Reload -; BE-NEON-NEXT: mov r5, r1 -; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEON-NEXT: add lr, sp, #40 -; BE-NEON-NEXT: vrev64.32 d8, d17 -; BE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; BE-NEON-NEXT: vmov.f32 s0, s17 -; BE-NEON-NEXT: vmov.32 d13[1], r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s16 -; BE-NEON-NEXT: vmov.32 d11[0], r0 -; BE-NEON-NEXT: ldr r0, [sp, #60] @ 4-byte Reload -; BE-NEON-NEXT: mov r6, r1 -; BE-NEON-NEXT: vmov.32 d12[1], r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: add lr, sp, #24 -; BE-NEON-NEXT: vmov.32 d10[0], r0 -; BE-NEON-NEXT: add r0, r4, #64 -; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEON-NEXT: add lr, sp, #8 -; BE-NEON-NEXT: vmov.32 d17[1], r10 -; BE-NEON-NEXT: vmov.32 d16[1], r11 -; BE-NEON-NEXT: vorr q12, q8, q8 -; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEON-NEXT: add lr, sp, #128 -; BE-NEON-NEXT: vmov.32 d15[1], r7 -; BE-NEON-NEXT: vmov.32 d11[1], r6 -; BE-NEON-NEXT: vmov.32 d14[1], r5 -; BE-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; BE-NEON-NEXT: add lr, sp, #96 -; BE-NEON-NEXT: vmov.32 d10[1], r1 -; BE-NEON-NEXT: vmov.32 d17[1], r8 -; BE-NEON-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload -; BE-NEON-NEXT: add lr, sp, #64 -; BE-NEON-NEXT: vmov.32 d16[1], r9 -; BE-NEON-NEXT: vrev64.32 q14, q7 -; BE-NEON-NEXT: vorr q13, q8, q8 -; BE-NEON-NEXT: vrev64.32 q15, q5 -; BE-NEON-NEXT: vldmia lr, {d22, d23} @ 16-byte Reload -; BE-NEON-NEXT: vrev64.32 q8, q6 -; BE-NEON-NEXT: vst1.64 {d28, d29}, [r0:128]! -; BE-NEON-NEXT: vst1.64 {d30, d31}, [r0:128]! -; BE-NEON-NEXT: vrev64.32 q9, q9 -; BE-NEON-NEXT: vrev64.32 q10, q10 -; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-NEON-NEXT: vrev64.32 q11, q11 -; BE-NEON-NEXT: vrev64.32 q12, q12 -; BE-NEON-NEXT: vst1.64 {d18, d19}, [r0:128] -; BE-NEON-NEXT: vst1.64 {d20, d21}, [r4:128]! -; BE-NEON-NEXT: vst1.64 {d22, d23}, [r4:128]! -; BE-NEON-NEXT: vrev64.32 q13, q13 -; BE-NEON-NEXT: vst1.64 {d24, d25}, [r4:128]! -; BE-NEON-NEXT: vst1.64 {d26, d27}, [r4:128] -; BE-NEON-NEXT: add sp, sp, #144 -; BE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEON-NEXT: add sp, sp, #4 -; BE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} - %a = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> %x) - ret <16 x i64> %a +; BE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} + %a = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> %x) + ret <8 x i64> %a } -declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>) +declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>) -define <32 x i64> @llrint_v32i64_v32f32(<32 x float> %x) { -; LE-LABEL: llrint_v32i64_v32f32: +define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) { +; LE-LABEL: llrint_v16i64_v16f32: ; LE: @ %bb.0: ; LE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; LE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} @@ -4395,625 +1081,155 @@ define <32 x i64> @llrint_v32i64_v32f32(<32 x float> %x) { ; LE-NEXT: sub sp, sp, #4 ; LE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} ; LE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEXT: .pad #184 -; LE-NEXT: sub sp, sp, #184 -; LE-NEXT: add lr, sp, #152 -; LE-NEXT: vorr q7, q3, q3 -; LE-NEXT: vorr q4, q2, q2 -; LE-NEXT: mov r5, r0 -; LE-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill -; LE-NEXT: add lr, sp, #88 +; LE-NEXT: .pad #160 +; LE-NEXT: sub sp, sp, #160 +; LE-NEXT: add lr, sp, #112 +; LE-NEXT: vorr q5, q3, q3 +; LE-NEXT: vorr q6, q0, q0 +; LE-NEXT: mov r4, r0 +; LE-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill +; LE-NEXT: add lr, sp, #48 +; LE-NEXT: vorr q7, q1, q1 ; LE-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; LE-NEXT: vmov.f32 s0, s3 -; LE-NEXT: str r0, [sp, #68] @ 4-byte Spill +; LE-NEXT: vmov.f32 s0, s23 ; LE-NEXT: bl llrintf -; LE-NEXT: vmov.f32 s0, s18 -; LE-NEXT: add lr, sp, #168 +; LE-NEXT: vmov.f32 s0, s24 +; LE-NEXT: add lr, sp, #144 ; LE-NEXT: vmov.32 d17[0], r0 -; LE-NEXT: str r1, [sp, #16] @ 4-byte Spill +; LE-NEXT: str r1, [sp, #108] @ 4-byte Spill ; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill ; LE-NEXT: bl llrintf -; LE-NEXT: vmov.f32 s0, s16 -; LE-NEXT: mov r8, r1 -; LE-NEXT: vmov.32 d10[0], r0 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.f32 s0, s17 -; LE-NEXT: mov r6, r1 -; LE-NEXT: vmov.32 d12[0], r0 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.f32 s0, s19 -; LE-NEXT: mov r4, r1 -; LE-NEXT: vmov.32 d13[0], r0 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.f32 s0, s31 -; LE-NEXT: mov r7, r1 -; LE-NEXT: vmov.32 d11[0], r0 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.f32 s0, s30 -; LE-NEXT: str r1, [sp, #8] @ 4-byte Spill -; LE-NEXT: vmov.32 d9[0], r0 -; LE-NEXT: vmov.32 d11[1], r7 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.f32 s0, s29 +; LE-NEXT: vmov.f32 s0, s25 +; LE-NEXT: str r1, [sp, #84] @ 4-byte Spill ; LE-NEXT: vmov.32 d8[0], r0 -; LE-NEXT: add lr, sp, #104 -; LE-NEXT: str r1, [sp, #12] @ 4-byte Spill -; LE-NEXT: vmov.32 d13[1], r4 -; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.32 d9[0], r0 -; LE-NEXT: add r0, sp, #320 -; LE-NEXT: add lr, sp, #120 -; LE-NEXT: mov r11, r1 -; LE-NEXT: vld1.64 {d0, d1}, [r0] -; LE-NEXT: add r0, sp, #304 -; LE-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; LE-NEXT: add lr, sp, #72 -; LE-NEXT: vld1.64 {d0, d1}, [r0] -; LE-NEXT: add r0, sp, #336 -; LE-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; LE-NEXT: add lr, sp, #32 -; LE-NEXT: vld1.64 {d0, d1}, [r0] -; LE-NEXT: add r0, sp, #288 -; LE-NEXT: vmov.32 d12[1], r6 -; LE-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; LE-NEXT: add lr, sp, #48 -; LE-NEXT: vld1.64 {d0, d1}, [r0] -; LE-NEXT: vmov.32 d10[1], r8 -; LE-NEXT: add r8, r5, #64 -; LE-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; LE-NEXT: add lr, sp, #152 -; LE-NEXT: vst1.64 {d12, d13}, [r8:128]! -; LE-NEXT: vst1.64 {d10, d11}, [r8:128]! -; LE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-NEXT: vmov.f32 s0, s27 ; LE-NEXT: bl llrintf ; LE-NEXT: vmov.f32 s0, s28 -; LE-NEXT: mov r4, r1 -; LE-NEXT: vmov.32 d11[0], r0 +; LE-NEXT: add lr, sp, #128 +; LE-NEXT: vmov.32 d9[0], r0 +; LE-NEXT: str r1, [sp, #44] @ 4-byte Spill +; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill ; LE-NEXT: bl llrintf -; LE-NEXT: vmov.f32 s0, s26 +; LE-NEXT: vmov.f32 s0, s29 ; LE-NEXT: mov r9, r1 ; LE-NEXT: vmov.32 d8[0], r0 -; LE-NEXT: vmov.32 d11[1], r4 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.32 d10[0], r0 -; LE-NEXT: add lr, sp, #136 -; LE-NEXT: ldr r0, [sp, #16] @ 4-byte Reload -; LE-NEXT: mov r10, r1 -; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-NEXT: add lr, sp, #168 -; LE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-NEXT: add lr, sp, #88 -; LE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-NEXT: vmov.f32 s0, s26 -; LE-NEXT: vmov.32 d11[1], r0 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.f32 s0, s25 -; LE-NEXT: vmov.32 d10[0], r0 -; LE-NEXT: add lr, sp, #168 -; LE-NEXT: ldr r0, [sp, #8] @ 4-byte Reload -; LE-NEXT: mov r7, r1 -; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-NEXT: add lr, sp, #104 -; LE-NEXT: vorr q5, q6, q6 -; LE-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload -; LE-NEXT: vmov.32 d15[1], r0 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.f32 s0, s20 -; LE-NEXT: vmov.32 d13[0], r0 -; LE-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; LE-NEXT: add lr, sp, #104 -; LE-NEXT: mov r4, r1 -; LE-NEXT: vmov.32 d14[1], r0 -; LE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill ; LE-NEXT: bl llrintf -; LE-NEXT: vmov.32 d12[0], r0 -; LE-NEXT: add lr, sp, #152 +; LE-NEXT: vmov.f32 s0, s30 ; LE-NEXT: mov r6, r1 -; LE-NEXT: vorr q7, q6, q6 -; LE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-NEXT: vmov.32 d9[1], r11 -; LE-NEXT: vmov.f32 s0, s25 +; LE-NEXT: vmov.32 d9[0], r0 ; LE-NEXT: bl llrintf -; LE-NEXT: vmov.f32 s0, s24 +; LE-NEXT: vmov.f32 s0, s31 ; LE-NEXT: mov r5, r1 -; LE-NEXT: vmov.32 d11[0], r0 -; LE-NEXT: vmov.32 d8[1], r9 +; LE-NEXT: vmov.32 d12[0], r0 ; LE-NEXT: bl llrintf -; LE-NEXT: add lr, sp, #136 -; LE-NEXT: mov r11, r1 -; LE-NEXT: vmov.32 d10[0], r0 -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: vmov.32 d16[1], r10 -; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEXT: add lr, sp, #120 -; LE-NEXT: vst1.64 {d8, d9}, [r8:128]! -; LE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-NEXT: vmov.f32 s0, s1 +; LE-NEXT: add lr, sp, #112 +; LE-NEXT: mov r7, r1 +; LE-NEXT: vmov.32 d13[0], r0 +; LE-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload +; LE-NEXT: vmov.f32 s0, s29 ; LE-NEXT: bl llrintf -; LE-NEXT: add lr, sp, #152 +; LE-NEXT: vmov.f32 s0, s22 +; LE-NEXT: add lr, sp, #24 ; LE-NEXT: vmov.32 d17[0], r0 -; LE-NEXT: mov r10, r1 +; LE-NEXT: mov r11, r1 ; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEXT: add lr, sp, #104 -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: add lr, sp, #72 -; LE-NEXT: vst1.64 {d16, d17}, [r8:128] -; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-NEXT: vmov.f32 s0, s19 +; LE-NEXT: vmov.32 d13[1], r7 ; LE-NEXT: bl llrintf -; LE-NEXT: add lr, sp, #168 -; LE-NEXT: vmov.f32 s0, s18 -; LE-NEXT: mov r8, r1 -; LE-NEXT: vmov.32 d13[0], r0 +; LE-NEXT: add lr, sp, #144 +; LE-NEXT: vmov.f32 s0, s21 +; LE-NEXT: vmov.32 d12[1], r5 +; LE-NEXT: str r1, [sp, #40] @ 4-byte Spill ; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: vmov.32 d16[1], r7 +; LE-NEXT: vmov.32 d16[0], r0 ; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.f32 s0, s17 -; LE-NEXT: vmov.32 d12[0], r0 -; LE-NEXT: add lr, sp, #104 -; LE-NEXT: mov r7, r1 -; LE-NEXT: vmov.32 d15[1], r4 -; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.f32 s0, s16 -; LE-NEXT: mov r9, r1 -; LE-NEXT: vmov.32 d13[0], r0 -; LE-NEXT: vmov.32 d14[1], r6 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.32 d12[0], r0 ; LE-NEXT: add lr, sp, #88 -; LE-NEXT: mov r6, r1 -; LE-NEXT: vmov.32 d11[1], r5 -; LE-NEXT: vmov.32 d10[1], r11 -; LE-NEXT: ldr r11, [sp, #68] @ 4-byte Reload ; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-NEXT: add lr, sp, #16 -; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-NEXT: add lr, sp, #32 -; LE-NEXT: vst1.64 {d14, d15}, [r11:128]! -; LE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-NEXT: vmov.f32 s0, s23 ; LE-NEXT: bl llrintf -; LE-NEXT: add lr, sp, #152 -; LE-NEXT: mov r4, r1 -; LE-NEXT: vmov.32 d9[0], r0 -; LE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-NEXT: add lr, sp, #120 -; LE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-NEXT: @ kill: def $s0 killed $s0 killed $q0 -; LE-NEXT: vmov.32 d13[1], r10 +; LE-NEXT: vmov.f32 s0, s20 +; LE-NEXT: mov r10, r1 +; LE-NEXT: vmov.32 d13[0], r0 +; LE-NEXT: vmov.32 d9[1], r6 ; LE-NEXT: bl llrintf -; LE-NEXT: vmov.f32 s0, s22 +; LE-NEXT: vmov.f32 s0, s31 ; LE-NEXT: vmov.32 d12[0], r0 -; LE-NEXT: add lr, sp, #152 -; LE-NEXT: mov r5, r1 -; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-NEXT: add lr, sp, #104 -; LE-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload -; LE-NEXT: vmov.32 d15[1], r8 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.f32 s0, s21 -; LE-NEXT: vmov.32 d8[0], r0 -; LE-NEXT: add lr, sp, #72 +; LE-NEXT: add lr, sp, #8 ; LE-NEXT: mov r8, r1 -; LE-NEXT: vmov.32 d14[1], r7 +; LE-NEXT: vmov.32 d8[1], r9 +; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEXT: add lr, sp, #64 ; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-NEXT: add lr, sp, #104 -; LE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill ; LE-NEXT: bl llrintf -; LE-NEXT: vmov.f32 s0, s20 -; LE-NEXT: add lr, sp, #88 -; LE-NEXT: mov r7, r1 +; LE-NEXT: add lr, sp, #128 ; LE-NEXT: vmov.32 d9[0], r0 -; LE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-NEXT: vmov.32 d13[1], r9 -; LE-NEXT: bl llrintf -; LE-NEXT: vmov.32 d8[0], r0 -; LE-NEXT: add lr, sp, #32 +; LE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload ; LE-NEXT: mov r9, r1 -; LE-NEXT: vmov.32 d12[1], r6 -; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-NEXT: add lr, sp, #88 -; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-NEXT: add lr, sp, #120 -; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-NEXT: vmov.f32 s0, s19 +; LE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-NEXT: add lr, sp, #48 +; LE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-NEXT: vmov.f32 s0, s27 +; LE-NEXT: vmov.32 d11[1], r0 ; LE-NEXT: bl llrintf -; LE-NEXT: vmov.f32 s0, s18 -; LE-NEXT: add lr, sp, #72 -; LE-NEXT: mov r10, r1 +; LE-NEXT: vmov.f32 s0, s26 ; LE-NEXT: vmov.32 d15[0], r0 -; LE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-NEXT: vmov.32 d13[1], r4 +; LE-NEXT: ldr r0, [sp, #84] @ 4-byte Reload +; LE-NEXT: add lr, sp, #128 +; LE-NEXT: mov r7, r1 +; LE-NEXT: vmov.32 d10[1], r0 +; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill ; LE-NEXT: bl llrintf -; LE-NEXT: add lr, sp, #152 -; LE-NEXT: mov r4, r1 ; LE-NEXT: vmov.32 d14[0], r0 +; LE-NEXT: add lr, sp, #144 +; LE-NEXT: ldr r0, [sp, #108] @ 4-byte Reload +; LE-NEXT: mov r5, r1 ; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: vmov.32 d16[1], r5 +; LE-NEXT: vmov.32 d17[1], r0 ; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEXT: add lr, sp, #168 -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: add lr, sp, #48 -; LE-NEXT: vst1.64 {d16, d17}, [r11:128]! +; LE-NEXT: add lr, sp, #112 ; LE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-NEXT: vmov.f32 s0, s21 -; LE-NEXT: bl llrintf ; LE-NEXT: vmov.f32 s0, s20 -; LE-NEXT: vmov.32 d12[1], r8 -; LE-NEXT: add lr, sp, #72 -; LE-NEXT: mov r5, r1 -; LE-NEXT: vmov.32 d9[0], r0 -; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill ; LE-NEXT: bl llrintf -; LE-NEXT: vmov.f32 s0, s23 -; LE-NEXT: add lr, sp, #32 +; LE-NEXT: add lr, sp, #24 +; LE-NEXT: vmov.f32 s0, s22 ; LE-NEXT: mov r6, r1 -; LE-NEXT: vmov.32 d8[0], r0 -; LE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-NEXT: vmov.32 d13[1], r7 -; LE-NEXT: bl llrintf -; LE-NEXT: add lr, sp, #48 -; LE-NEXT: mov r7, r1 -; LE-NEXT: vmov.32 d11[0], r0 -; LE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-NEXT: vmov.f32 s0, s2 -; LE-NEXT: vmov.32 d12[1], r9 -; LE-NEXT: bl llrintf -; LE-NEXT: add lr, sp, #16 -; LE-NEXT: vmov.32 d10[0], r0 -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: add lr, sp, #136 -; LE-NEXT: vmov.32 d11[1], r7 -; LE-NEXT: vst1.64 {d16, d17}, [r11:128]! ; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: add lr, sp, #152 -; LE-NEXT: vmov.32 d15[1], r10 -; LE-NEXT: vst1.64 {d16, d17}, [r11:128] -; LE-NEXT: vmov.32 d10[1], r1 -; LE-NEXT: ldr r1, [sp, #68] @ 4-byte Reload +; LE-NEXT: vmov.32 d16[0], r0 +; LE-NEXT: vmov.32 d17[1], r11 +; LE-NEXT: vorr q6, q8, q8 +; LE-NEXT: bl llrintf +; LE-NEXT: add lr, sp, #144 +; LE-NEXT: vmov.32 d8[0], r0 +; LE-NEXT: ldr r0, [sp, #40] @ 4-byte Reload ; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: add r0, r1, #192 -; LE-NEXT: add lr, sp, #72 -; LE-NEXT: vmov.32 d14[1], r4 -; LE-NEXT: vst1.64 {d16, d17}, [r0:128]! -; LE-NEXT: vst1.64 {d14, d15}, [r0:128]! +; LE-NEXT: add lr, sp, #8 +; LE-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; LE-NEXT: add lr, sp, #128 +; LE-NEXT: vmov.32 d9[1], r9 +; LE-NEXT: vmov.32 d12[1], r6 +; LE-NEXT: vmov.32 d19[1], r10 +; LE-NEXT: vmov.32 d8[1], r1 +; LE-NEXT: vmov.32 d16[1], r0 +; LE-NEXT: add r0, r4, #64 +; LE-NEXT: vmov.32 d18[1], r8 ; LE-NEXT: vst1.64 {d12, d13}, [r0:128]! -; LE-NEXT: vmov.32 d9[1], r5 -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: add lr, sp, #88 -; LE-NEXT: vmov.32 d8[1], r6 -; LE-NEXT: vst1.64 {d16, d17}, [r0:128] -; LE-NEXT: add r0, r1, #128 ; LE-NEXT: vst1.64 {d8, d9}, [r0:128]! -; LE-NEXT: vst1.64 {d10, d11}, [r0:128]! +; LE-NEXT: vst1.64 {d18, d19}, [r0:128]! +; LE-NEXT: vst1.64 {d16, d17}, [r0:128] +; LE-NEXT: vmov.32 d15[1], r7 ; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: add lr, sp, #104 -; LE-NEXT: vst1.64 {d16, d17}, [r0:128]! +; LE-NEXT: add lr, sp, #64 +; LE-NEXT: vmov.32 d14[1], r5 +; LE-NEXT: vst1.64 {d16, d17}, [r4:128]! +; LE-NEXT: vst1.64 {d14, d15}, [r4:128]! ; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: vst1.64 {d16, d17}, [r0:128] -; LE-NEXT: add sp, sp, #184 +; LE-NEXT: add lr, sp, #88 +; LE-NEXT: vst1.64 {d16, d17}, [r4:128]! +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: vst1.64 {d16, d17}, [r4:128] +; LE-NEXT: add sp, sp, #160 ; LE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; LE-NEXT: add sp, sp, #4 ; LE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; -; LE-NEON-LABEL: llrint_v32i64_v32f32: -; LE-NEON: @ %bb.0: -; LE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-NEON-NEXT: .pad #4 -; LE-NEON-NEXT: sub sp, sp, #4 -; LE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEON-NEXT: .pad #184 -; LE-NEON-NEXT: sub sp, sp, #184 -; LE-NEON-NEXT: add lr, sp, #152 -; LE-NEON-NEXT: vorr q7, q3, q3 -; LE-NEON-NEXT: vorr q4, q2, q2 -; LE-NEON-NEXT: mov r5, r0 -; LE-NEON-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #88 -; LE-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; LE-NEON-NEXT: vmov.f32 s0, s3 -; LE-NEON-NEXT: str r0, [sp, #68] @ 4-byte Spill -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s18 -; LE-NEON-NEXT: add lr, sp, #168 -; LE-NEON-NEXT: vmov.32 d17[0], r0 -; LE-NEON-NEXT: str r1, [sp, #16] @ 4-byte Spill -; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s16 -; LE-NEON-NEXT: mov r8, r1 -; LE-NEON-NEXT: vmov.32 d10[0], r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s17 -; LE-NEON-NEXT: mov r6, r1 -; LE-NEON-NEXT: vmov.32 d12[0], r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s19 -; LE-NEON-NEXT: mov r4, r1 -; LE-NEON-NEXT: vmov.32 d13[0], r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s31 -; LE-NEON-NEXT: mov r7, r1 -; LE-NEON-NEXT: vmov.32 d11[0], r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s30 -; LE-NEON-NEXT: str r1, [sp, #8] @ 4-byte Spill -; LE-NEON-NEXT: vmov.32 d9[0], r0 -; LE-NEON-NEXT: vmov.32 d11[1], r7 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s29 -; LE-NEON-NEXT: vmov.32 d8[0], r0 -; LE-NEON-NEXT: add lr, sp, #104 -; LE-NEON-NEXT: str r1, [sp, #12] @ 4-byte Spill -; LE-NEON-NEXT: vmov.32 d13[1], r4 -; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.32 d9[0], r0 -; LE-NEON-NEXT: add r0, sp, #320 -; LE-NEON-NEXT: add lr, sp, #120 -; LE-NEON-NEXT: mov r11, r1 -; LE-NEON-NEXT: vld1.64 {d0, d1}, [r0] -; LE-NEON-NEXT: add r0, sp, #304 -; LE-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #72 -; LE-NEON-NEXT: vld1.64 {d0, d1}, [r0] -; LE-NEON-NEXT: add r0, sp, #336 -; LE-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #32 -; LE-NEON-NEXT: vld1.64 {d0, d1}, [r0] -; LE-NEON-NEXT: add r0, sp, #288 -; LE-NEON-NEXT: vmov.32 d12[1], r6 -; LE-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #48 -; LE-NEON-NEXT: vld1.64 {d0, d1}, [r0] -; LE-NEON-NEXT: vmov.32 d10[1], r8 -; LE-NEON-NEXT: add r8, r5, #64 -; LE-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #152 -; LE-NEON-NEXT: vst1.64 {d12, d13}, [r8:128]! -; LE-NEON-NEXT: vst1.64 {d10, d11}, [r8:128]! -; LE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-NEON-NEXT: vmov.f32 s0, s27 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s28 -; LE-NEON-NEXT: mov r4, r1 -; LE-NEON-NEXT: vmov.32 d11[0], r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s26 -; LE-NEON-NEXT: mov r9, r1 -; LE-NEON-NEXT: vmov.32 d8[0], r0 -; LE-NEON-NEXT: vmov.32 d11[1], r4 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.32 d10[0], r0 -; LE-NEON-NEXT: add lr, sp, #136 -; LE-NEON-NEXT: ldr r0, [sp, #16] @ 4-byte Reload -; LE-NEON-NEXT: mov r10, r1 -; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #168 -; LE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #88 -; LE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-NEON-NEXT: vmov.f32 s0, s26 -; LE-NEON-NEXT: vmov.32 d11[1], r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s25 -; LE-NEON-NEXT: vmov.32 d10[0], r0 -; LE-NEON-NEXT: add lr, sp, #168 -; LE-NEON-NEXT: ldr r0, [sp, #8] @ 4-byte Reload -; LE-NEON-NEXT: mov r7, r1 -; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #104 -; LE-NEON-NEXT: vorr q5, q6, q6 -; LE-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload -; LE-NEON-NEXT: vmov.32 d15[1], r0 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s20 -; LE-NEON-NEXT: vmov.32 d13[0], r0 -; LE-NEON-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; LE-NEON-NEXT: add lr, sp, #104 -; LE-NEON-NEXT: mov r4, r1 -; LE-NEON-NEXT: vmov.32 d14[1], r0 -; LE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.32 d12[0], r0 -; LE-NEON-NEXT: add lr, sp, #152 -; LE-NEON-NEXT: mov r6, r1 -; LE-NEON-NEXT: vorr q7, q6, q6 -; LE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-NEON-NEXT: vmov.32 d9[1], r11 -; LE-NEON-NEXT: vmov.f32 s0, s25 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s24 -; LE-NEON-NEXT: mov r5, r1 -; LE-NEON-NEXT: vmov.32 d11[0], r0 -; LE-NEON-NEXT: vmov.32 d8[1], r9 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: add lr, sp, #136 -; LE-NEON-NEXT: mov r11, r1 -; LE-NEON-NEXT: vmov.32 d10[0], r0 -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: vmov.32 d16[1], r10 -; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #120 -; LE-NEON-NEXT: vst1.64 {d8, d9}, [r8:128]! -; LE-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-NEON-NEXT: vmov.f32 s0, s1 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: add lr, sp, #152 -; LE-NEON-NEXT: vmov.32 d17[0], r0 -; LE-NEON-NEXT: mov r10, r1 -; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #104 -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #72 -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r8:128] -; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-NEON-NEXT: vmov.f32 s0, s19 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: add lr, sp, #168 -; LE-NEON-NEXT: vmov.f32 s0, s18 -; LE-NEON-NEXT: mov r8, r1 -; LE-NEON-NEXT: vmov.32 d13[0], r0 -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: vmov.32 d16[1], r7 -; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s17 -; LE-NEON-NEXT: vmov.32 d12[0], r0 -; LE-NEON-NEXT: add lr, sp, #104 -; LE-NEON-NEXT: mov r7, r1 -; LE-NEON-NEXT: vmov.32 d15[1], r4 -; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s16 -; LE-NEON-NEXT: mov r9, r1 -; LE-NEON-NEXT: vmov.32 d13[0], r0 -; LE-NEON-NEXT: vmov.32 d14[1], r6 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.32 d12[0], r0 -; LE-NEON-NEXT: add lr, sp, #88 -; LE-NEON-NEXT: mov r6, r1 -; LE-NEON-NEXT: vmov.32 d11[1], r5 -; LE-NEON-NEXT: vmov.32 d10[1], r11 -; LE-NEON-NEXT: ldr r11, [sp, #68] @ 4-byte Reload -; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #16 -; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #32 -; LE-NEON-NEXT: vst1.64 {d14, d15}, [r11:128]! -; LE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-NEON-NEXT: vmov.f32 s0, s23 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: add lr, sp, #152 -; LE-NEON-NEXT: mov r4, r1 -; LE-NEON-NEXT: vmov.32 d9[0], r0 -; LE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #120 -; LE-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-NEON-NEXT: @ kill: def $s0 killed $s0 killed $q0 -; LE-NEON-NEXT: vmov.32 d13[1], r10 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s22 -; LE-NEON-NEXT: vmov.32 d12[0], r0 -; LE-NEON-NEXT: add lr, sp, #152 -; LE-NEON-NEXT: mov r5, r1 -; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #104 -; LE-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload -; LE-NEON-NEXT: vmov.32 d15[1], r8 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s21 -; LE-NEON-NEXT: vmov.32 d8[0], r0 -; LE-NEON-NEXT: add lr, sp, #72 -; LE-NEON-NEXT: mov r8, r1 -; LE-NEON-NEXT: vmov.32 d14[1], r7 -; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #104 -; LE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s20 -; LE-NEON-NEXT: add lr, sp, #88 -; LE-NEON-NEXT: mov r7, r1 -; LE-NEON-NEXT: vmov.32 d9[0], r0 -; LE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-NEON-NEXT: vmov.32 d13[1], r9 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.32 d8[0], r0 -; LE-NEON-NEXT: add lr, sp, #32 -; LE-NEON-NEXT: mov r9, r1 -; LE-NEON-NEXT: vmov.32 d12[1], r6 -; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #88 -; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #120 -; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-NEON-NEXT: vmov.f32 s0, s19 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s18 -; LE-NEON-NEXT: add lr, sp, #72 -; LE-NEON-NEXT: mov r10, r1 -; LE-NEON-NEXT: vmov.32 d15[0], r0 -; LE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-NEON-NEXT: vmov.32 d13[1], r4 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: add lr, sp, #152 -; LE-NEON-NEXT: mov r4, r1 -; LE-NEON-NEXT: vmov.32 d14[0], r0 -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: vmov.32 d16[1], r5 -; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #168 -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #48 -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r11:128]! -; LE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-NEON-NEXT: vmov.f32 s0, s21 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s20 -; LE-NEON-NEXT: vmov.32 d12[1], r8 -; LE-NEON-NEXT: add lr, sp, #72 -; LE-NEON-NEXT: mov r5, r1 -; LE-NEON-NEXT: vmov.32 d9[0], r0 -; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: vmov.f32 s0, s23 -; LE-NEON-NEXT: add lr, sp, #32 -; LE-NEON-NEXT: mov r6, r1 -; LE-NEON-NEXT: vmov.32 d8[0], r0 -; LE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-NEON-NEXT: vmov.32 d13[1], r7 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: add lr, sp, #48 -; LE-NEON-NEXT: mov r7, r1 -; LE-NEON-NEXT: vmov.32 d11[0], r0 -; LE-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-NEON-NEXT: vmov.f32 s0, s2 -; LE-NEON-NEXT: vmov.32 d12[1], r9 -; LE-NEON-NEXT: bl llrintf -; LE-NEON-NEXT: add lr, sp, #16 -; LE-NEON-NEXT: vmov.32 d10[0], r0 -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #136 -; LE-NEON-NEXT: vmov.32 d11[1], r7 -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r11:128]! -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #152 -; LE-NEON-NEXT: vmov.32 d15[1], r10 -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r11:128] -; LE-NEON-NEXT: vmov.32 d10[1], r1 -; LE-NEON-NEXT: ldr r1, [sp, #68] @ 4-byte Reload -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: add r0, r1, #192 -; LE-NEON-NEXT: add lr, sp, #72 -; LE-NEON-NEXT: vmov.32 d14[1], r4 -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! -; LE-NEON-NEXT: vst1.64 {d14, d15}, [r0:128]! -; LE-NEON-NEXT: vst1.64 {d12, d13}, [r0:128]! -; LE-NEON-NEXT: vmov.32 d9[1], r5 -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #88 -; LE-NEON-NEXT: vmov.32 d8[1], r6 -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] -; LE-NEON-NEXT: add r0, r1, #128 -; LE-NEON-NEXT: vst1.64 {d8, d9}, [r0:128]! -; LE-NEON-NEXT: vst1.64 {d10, d11}, [r0:128]! -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #104 -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] -; LE-NEON-NEXT: add sp, sp, #184 -; LE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEON-NEXT: add sp, sp, #4 -; LE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; -; BE-LABEL: llrint_v32i64_v32f32: +; BE-LABEL: llrint_v16i64_v16f32: ; BE: @ %bb.0: ; BE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; BE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} @@ -5021,655 +1237,171 @@ define <32 x i64> @llrint_v32i64_v32f32(<32 x float> %x) { ; BE-NEXT: sub sp, sp, #4 ; BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} ; BE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEXT: .pad #256 -; BE-NEXT: sub sp, sp, #256 -; BE-NEXT: add lr, sp, #208 -; BE-NEXT: str r0, [sp, #156] @ 4-byte Spill -; BE-NEXT: add r0, sp, #408 -; BE-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill -; BE-NEXT: add lr, sp, #120 -; BE-NEXT: vld1.64 {d10, d11}, [r0] -; BE-NEXT: add r0, sp, #392 +; BE-NEXT: .pad #144 +; BE-NEXT: sub sp, sp, #144 +; BE-NEXT: vorr q6, q3, q3 +; BE-NEXT: add lr, sp, #112 +; BE-NEXT: vorr q7, q0, q0 +; BE-NEXT: mov r4, r0 ; BE-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill -; BE-NEXT: add lr, sp, #160 +; BE-NEXT: add lr, sp, #96 +; BE-NEXT: vrev64.32 d8, d13 ; BE-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill -; BE-NEXT: add lr, sp, #176 -; BE-NEXT: vrev64.32 d8, d10 -; BE-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; BE-NEXT: add lr, sp, #136 ; BE-NEXT: vmov.f32 s0, s17 -; BE-NEXT: vld1.64 {d12, d13}, [r0] -; BE-NEXT: add r0, sp, #360 -; BE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; BE-NEXT: add lr, sp, #192 -; BE-NEXT: vld1.64 {d16, d17}, [r0] -; BE-NEXT: add r0, sp, #376 -; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEXT: add lr, sp, #40 -; BE-NEXT: vld1.64 {d16, d17}, [r0] -; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill ; BE-NEXT: bl llrintf ; BE-NEXT: vmov.f32 s0, s16 ; BE-NEXT: str r1, [sp, #88] @ 4-byte Spill -; BE-NEXT: vmov.32 d15[0], r0 -; BE-NEXT: bl llrintf -; BE-NEXT: vrev64.32 d9, d11 -; BE-NEXT: add lr, sp, #240 -; BE-NEXT: vmov.32 d14[0], r0 -; BE-NEXT: str r1, [sp, #104] @ 4-byte Spill -; BE-NEXT: vmov.f32 s0, s18 -; BE-NEXT: vrev64.32 d8, d13 -; BE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.f32 s0, s17 -; BE-NEXT: mov r5, r1 -; BE-NEXT: vmov.32 d14[0], r0 -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.f32 s0, s19 -; BE-NEXT: add lr, sp, #192 -; BE-NEXT: str r1, [sp, #72] @ 4-byte Spill -; BE-NEXT: vmov.32 d13[0], r0 -; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEXT: vrev64.32 d10, d16 -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.f32 s0, s20 -; BE-NEXT: add lr, sp, #224 -; BE-NEXT: vmov.32 d15[0], r0 -; BE-NEXT: mov r4, r1 -; BE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.f32 s0, s21 -; BE-NEXT: mov r7, r1 -; BE-NEXT: vmov.32 d14[0], r0 -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.f32 s0, s16 -; BE-NEXT: mov r6, r1 -; BE-NEXT: vmov.32 d15[0], r0 -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.32 d12[0], r0 -; BE-NEXT: add lr, sp, #24 -; BE-NEXT: mov r9, r1 -; BE-NEXT: vmov.32 d15[1], r6 -; BE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; BE-NEXT: add lr, sp, #192 -; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEXT: vrev64.32 d8, d17 -; BE-NEXT: vmov.f32 s0, s17 -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.f32 s0, s16 -; BE-NEXT: vmov.32 d14[1], r7 -; BE-NEXT: add lr, sp, #56 -; BE-NEXT: mov r10, r1 ; BE-NEXT: vmov.32 d11[0], r0 -; BE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill ; BE-NEXT: bl llrintf +; BE-NEXT: vrev64.32 d8, d14 +; BE-NEXT: add lr, sp, #128 ; BE-NEXT: vmov.32 d10[0], r0 -; BE-NEXT: add lr, sp, #192 -; BE-NEXT: mov r11, r1 -; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-NEXT: add lr, sp, #40 -; BE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; BE-NEXT: add lr, sp, #224 -; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-NEXT: vrev64.32 d8, d12 -; BE-NEXT: vmov.32 d11[1], r4 -; BE-NEXT: vmov.f32 s0, s17 -; BE-NEXT: bl llrintf +; BE-NEXT: str r1, [sp, #92] @ 4-byte Spill ; BE-NEXT: vmov.f32 s0, s16 -; BE-NEXT: vmov.32 d10[1], r5 -; BE-NEXT: add lr, sp, #224 -; BE-NEXT: mov r8, r1 -; BE-NEXT: vmov.32 d15[0], r0 +; BE-NEXT: vrev64.32 d9, d12 ; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-NEXT: vstr d9, [sp, #64] @ 8-byte Spill ; BE-NEXT: bl llrintf -; BE-NEXT: vrev64.32 d8, d13 -; BE-NEXT: add lr, sp, #8 -; BE-NEXT: vmov.32 d14[0], r0 -; BE-NEXT: ldr r0, [sp, #88] @ 4-byte Reload -; BE-NEXT: vmov.f32 s0, s17 -; BE-NEXT: mov r5, r1 -; BE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; BE-NEXT: add lr, sp, #240 -; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-NEXT: vmov.32 d11[1], r0 -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.f32 s0, s16 -; BE-NEXT: vmov.32 d15[0], r0 -; BE-NEXT: ldr r0, [sp, #104] @ 4-byte Reload -; BE-NEXT: add lr, sp, #240 -; BE-NEXT: mov r4, r1 -; BE-NEXT: vmov.32 d10[1], r0 -; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-NEXT: vmov.f32 s0, s19 +; BE-NEXT: mov r9, r1 +; BE-NEXT: vmov.32 d12[0], r0 ; BE-NEXT: bl llrintf -; BE-NEXT: add lr, sp, #136 -; BE-NEXT: vmov.32 d14[0], r0 -; BE-NEXT: ldr r0, [sp, #72] @ 4-byte Reload -; BE-NEXT: mov r6, r1 -; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEXT: add lr, sp, #24 -; BE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; BE-NEXT: vrev64.32 d8, d16 -; BE-NEXT: vmov.32 d13[1], r0 ; BE-NEXT: vmov.f32 s0, s17 -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.f32 s0, s16 -; BE-NEXT: mov r7, r1 +; BE-NEXT: str r1, [sp, #84] @ 4-byte Spill ; BE-NEXT: vmov.32 d11[0], r0 -; BE-NEXT: vmov.32 d12[1], r9 -; BE-NEXT: bl llrintf -; BE-NEXT: add lr, sp, #192 -; BE-NEXT: vmov.32 d15[1], r4 -; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEXT: add lr, sp, #8 -; BE-NEXT: vmov.32 d17[1], r10 -; BE-NEXT: vmov.32 d16[1], r11 -; BE-NEXT: vorr q9, q8, q8 -; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEXT: add lr, sp, #192 -; BE-NEXT: vmov.32 d17[1], r8 -; BE-NEXT: vmov.32 d16[1], r5 -; BE-NEXT: vorr q10, q8, q8 -; BE-NEXT: vrev64.32 q8, q6 -; BE-NEXT: vmov.32 d14[1], r6 -; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEXT: add lr, sp, #240 -; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEXT: vmov.32 d10[0], r0 -; BE-NEXT: vrev64.32 q8, q8 -; BE-NEXT: vmov.32 d11[1], r7 -; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEXT: add lr, sp, #224 -; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEXT: vmov.32 d10[1], r1 -; BE-NEXT: vrev64.32 q8, q8 -; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEXT: add lr, sp, #56 -; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEXT: add lr, sp, #136 -; BE-NEXT: vrev64.32 q8, q8 -; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEXT: add lr, sp, #104 -; BE-NEXT: vrev64.32 q8, q9 -; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEXT: add lr, sp, #88 -; BE-NEXT: vrev64.32 q8, q10 -; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEXT: add lr, sp, #72 -; BE-NEXT: vrev64.32 q8, q7 -; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEXT: add lr, sp, #208 -; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEXT: add lr, sp, #56 -; BE-NEXT: vrev64.32 d8, d17 -; BE-NEXT: vrev64.32 q8, q5 -; BE-NEXT: vmov.f32 s0, s17 -; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEXT: vrev64.32 d9, d15 ; BE-NEXT: bl llrintf -; BE-NEXT: vmov.f32 s0, s16 -; BE-NEXT: mov r4, r1 +; BE-NEXT: vmov.f32 s0, s18 +; BE-NEXT: mov r6, r1 ; BE-NEXT: vmov.32 d13[0], r0 ; BE-NEXT: bl llrintf -; BE-NEXT: add lr, sp, #120 -; BE-NEXT: vmov.32 d12[0], r0 -; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-NEXT: vmov.32 d13[1], r4 -; BE-NEXT: vrev64.32 d8, d10 -; BE-NEXT: vmov.32 d12[1], r1 -; BE-NEXT: vmov.f32 s0, s17 -; BE-NEXT: vrev64.32 q6, q6 -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.f32 s0, s16 -; BE-NEXT: vmov.32 d15[0], r0 -; BE-NEXT: vmov.32 d15[1], r1 -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.32 d14[0], r0 -; BE-NEXT: ldr r6, [sp, #156] @ 4-byte Reload -; BE-NEXT: vrev64.32 d8, d11 -; BE-NEXT: add r5, r6, #64 -; BE-NEXT: vmov.32 d14[1], r1 -; BE-NEXT: vmov.f32 s0, s17 -; BE-NEXT: vrev64.32 q8, q7 -; BE-NEXT: vst1.64 {d16, d17}, [r5:128]! -; BE-NEXT: bl llrintf -; BE-NEXT: vmov.f32 s0, s16 -; BE-NEXT: vmov.32 d15[0], r0 -; BE-NEXT: vmov.32 d15[1], r1 -; BE-NEXT: bl llrintf -; BE-NEXT: add lr, sp, #208 -; BE-NEXT: vmov.32 d14[0], r0 -; BE-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; BE-NEXT: vmov.32 d14[1], r1 -; BE-NEXT: vrev64.32 d8, d18 -; BE-NEXT: vrev64.32 q8, q7 -; BE-NEXT: vmov.f32 s0, s17 -; BE-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-NEXT: vmov.f32 s0, s19 +; BE-NEXT: mov r5, r1 +; BE-NEXT: vmov.32 d14[0], r0 ; BE-NEXT: bl llrintf -; BE-NEXT: vmov.f32 s0, s16 -; BE-NEXT: mov r4, r1 +; BE-NEXT: vldr d0, [sp, #64] @ 8-byte Reload +; BE-NEXT: mov r7, r1 +; BE-NEXT: @ kill: def $s0 killed $s0 killed $d0 ; BE-NEXT: vmov.32 d15[0], r0 ; BE-NEXT: bl llrintf -; BE-NEXT: vmov.32 d14[0], r0 -; BE-NEXT: add lr, sp, #160 -; BE-NEXT: vmov.32 d15[1], r4 -; BE-NEXT: vmov.32 d14[1], r1 -; BE-NEXT: vrev64.32 q8, q7 -; BE-NEXT: vst1.64 {d16, d17}, [r5:128]! -; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-NEXT: vrev64.32 d8, d11 -; BE-NEXT: vst1.64 {d12, d13}, [r5:128] +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: add lr, sp, #40 +; BE-NEXT: str r1, [sp, #60] @ 4-byte Spill +; BE-NEXT: vmov.32 d15[1], r7 +; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-NEXT: add lr, sp, #96 +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: vrev64.32 d8, d16 ; BE-NEXT: vmov.f32 s0, s17 ; BE-NEXT: bl llrintf ; BE-NEXT: vmov.f32 s0, s16 -; BE-NEXT: mov r4, r1 -; BE-NEXT: vmov.32 d13[0], r0 +; BE-NEXT: vmov.32 d14[1], r5 +; BE-NEXT: add lr, sp, #64 +; BE-NEXT: mov r10, r1 +; BE-NEXT: vmov.32 d11[0], r0 +; BE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill ; BE-NEXT: bl llrintf -; BE-NEXT: vmov.32 d12[0], r0 -; BE-NEXT: add lr, sp, #208 -; BE-NEXT: vmov.32 d13[1], r4 -; BE-NEXT: vmov.32 d12[1], r1 -; BE-NEXT: vrev64.32 q8, q6 -; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEXT: add lr, sp, #176 -; BE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; BE-NEXT: vrev64.32 d8, d12 +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: add lr, sp, #24 +; BE-NEXT: mov r11, r1 +; BE-NEXT: vmov.32 d13[1], r6 +; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-NEXT: add lr, sp, #96 +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: vrev64.32 d8, d17 ; BE-NEXT: vmov.f32 s0, s17 ; BE-NEXT: bl llrintf ; BE-NEXT: vmov.f32 s0, s16 -; BE-NEXT: vmov.32 d15[0], r0 -; BE-NEXT: vmov.32 d15[1], r1 +; BE-NEXT: vmov.32 d12[1], r9 +; BE-NEXT: add lr, sp, #96 +; BE-NEXT: mov r8, r1 +; BE-NEXT: vmov.32 d11[0], r0 +; BE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill ; BE-NEXT: bl llrintf -; BE-NEXT: vmov.32 d14[0], r0 -; BE-NEXT: mov r5, r6 -; BE-NEXT: vrev64.32 d8, d13 -; BE-NEXT: vmov.32 d14[1], r1 +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: add lr, sp, #8 +; BE-NEXT: ldr r0, [sp, #88] @ 4-byte Reload +; BE-NEXT: mov r9, r1 +; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-NEXT: add lr, sp, #112 +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: add lr, sp, #128 +; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; BE-NEXT: vrev64.32 d8, d16 +; BE-NEXT: vmov.32 d11[1], r0 ; BE-NEXT: vmov.f32 s0, s17 -; BE-NEXT: vrev64.32 q8, q7 -; BE-NEXT: vst1.64 {d16, d17}, [r5:128]! ; BE-NEXT: bl llrintf ; BE-NEXT: vmov.f32 s0, s16 ; BE-NEXT: vmov.32 d15[0], r0 -; BE-NEXT: vmov.32 d15[1], r1 +; BE-NEXT: ldr r0, [sp, #92] @ 4-byte Reload +; BE-NEXT: add lr, sp, #128 +; BE-NEXT: mov r7, r1 +; BE-NEXT: vmov.32 d10[1], r0 +; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill ; BE-NEXT: bl llrintf +; BE-NEXT: add lr, sp, #112 ; BE-NEXT: vmov.32 d14[0], r0 -; BE-NEXT: vrev64.32 d8, d10 -; BE-NEXT: vmov.32 d14[1], r1 +; BE-NEXT: ldr r0, [sp, #84] @ 4-byte Reload +; BE-NEXT: mov r5, r1 +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: add lr, sp, #40 +; BE-NEXT: vrev64.32 d8, d17 +; BE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload ; BE-NEXT: vmov.f32 s0, s17 -; BE-NEXT: vrev64.32 q8, q7 -; BE-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-NEXT: vmov.32 d13[1], r0 ; BE-NEXT: bl llrintf ; BE-NEXT: vmov.f32 s0, s16 -; BE-NEXT: mov r4, r1 -; BE-NEXT: vmov.32 d15[0], r0 +; BE-NEXT: vmov.32 d11[0], r0 +; BE-NEXT: ldr r0, [sp, #60] @ 4-byte Reload +; BE-NEXT: mov r6, r1 +; BE-NEXT: vmov.32 d12[1], r0 ; BE-NEXT: bl llrintf -; BE-NEXT: vmov.32 d14[0], r0 -; BE-NEXT: add lr, sp, #208 -; BE-NEXT: add r0, r6, #192 -; BE-NEXT: vmov.32 d15[1], r4 -; BE-NEXT: vmov.32 d14[1], r1 -; BE-NEXT: vrev64.32 q8, q7 -; BE-NEXT: vst1.64 {d16, d17}, [r5:128]! -; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEXT: add lr, sp, #56 -; BE-NEXT: vst1.64 {d16, d17}, [r5:128] -; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEXT: add lr, sp, #192 -; BE-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEXT: add lr, sp, #240 -; BE-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEXT: add lr, sp, #224 -; BE-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEXT: add lr, sp, #136 -; BE-NEXT: vst1.64 {d16, d17}, [r0:128] -; BE-NEXT: add r0, r6, #128 -; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEXT: add lr, sp, #104 -; BE-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-NEXT: add lr, sp, #24 +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: add r0, r4, #64 ; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEXT: add lr, sp, #88 -; BE-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-NEXT: add lr, sp, #8 +; BE-NEXT: vmov.32 d17[1], r10 +; BE-NEXT: vmov.32 d16[1], r11 +; BE-NEXT: vorr q12, q8, q8 ; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEXT: add lr, sp, #72 +; BE-NEXT: add lr, sp, #128 +; BE-NEXT: vmov.32 d15[1], r7 +; BE-NEXT: vmov.32 d11[1], r6 +; BE-NEXT: vmov.32 d14[1], r5 +; BE-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; BE-NEXT: add lr, sp, #96 +; BE-NEXT: vmov.32 d10[1], r1 +; BE-NEXT: vmov.32 d17[1], r8 +; BE-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload +; BE-NEXT: add lr, sp, #64 +; BE-NEXT: vmov.32 d16[1], r9 +; BE-NEXT: vrev64.32 q14, q7 +; BE-NEXT: vorr q13, q8, q8 +; BE-NEXT: vrev64.32 q15, q5 +; BE-NEXT: vldmia lr, {d22, d23} @ 16-byte Reload +; BE-NEXT: vrev64.32 q8, q6 +; BE-NEXT: vst1.64 {d28, d29}, [r0:128]! +; BE-NEXT: vst1.64 {d30, d31}, [r0:128]! +; BE-NEXT: vrev64.32 q9, q9 +; BE-NEXT: vrev64.32 q10, q10 ; BE-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEXT: vst1.64 {d16, d17}, [r0:128] -; BE-NEXT: add sp, sp, #256 +; BE-NEXT: vrev64.32 q11, q11 +; BE-NEXT: vrev64.32 q12, q12 +; BE-NEXT: vst1.64 {d18, d19}, [r0:128] +; BE-NEXT: vst1.64 {d20, d21}, [r4:128]! +; BE-NEXT: vst1.64 {d22, d23}, [r4:128]! +; BE-NEXT: vrev64.32 q13, q13 +; BE-NEXT: vst1.64 {d24, d25}, [r4:128]! +; BE-NEXT: vst1.64 {d26, d27}, [r4:128] +; BE-NEXT: add sp, sp, #144 ; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; BE-NEXT: add sp, sp, #4 ; BE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; -; BE-NEON-LABEL: llrint_v32i64_v32f32: -; BE-NEON: @ %bb.0: -; BE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-NEON-NEXT: .pad #4 -; BE-NEON-NEXT: sub sp, sp, #4 -; BE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEON-NEXT: .pad #256 -; BE-NEON-NEXT: sub sp, sp, #256 -; BE-NEON-NEXT: add lr, sp, #208 -; BE-NEON-NEXT: str r0, [sp, #156] @ 4-byte Spill -; BE-NEON-NEXT: add r0, sp, #408 -; BE-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #120 -; BE-NEON-NEXT: vld1.64 {d10, d11}, [r0] -; BE-NEON-NEXT: add r0, sp, #392 -; BE-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #160 -; BE-NEON-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #176 -; BE-NEON-NEXT: vrev64.32 d8, d10 -; BE-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #136 -; BE-NEON-NEXT: vmov.f32 s0, s17 -; BE-NEON-NEXT: vld1.64 {d12, d13}, [r0] -; BE-NEON-NEXT: add r0, sp, #360 -; BE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #192 -; BE-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; BE-NEON-NEXT: add r0, sp, #376 -; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #40 -; BE-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s16 -; BE-NEON-NEXT: str r1, [sp, #88] @ 4-byte Spill -; BE-NEON-NEXT: vmov.32 d15[0], r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vrev64.32 d9, d11 -; BE-NEON-NEXT: add lr, sp, #240 -; BE-NEON-NEXT: vmov.32 d14[0], r0 -; BE-NEON-NEXT: str r1, [sp, #104] @ 4-byte Spill -; BE-NEON-NEXT: vmov.f32 s0, s18 -; BE-NEON-NEXT: vrev64.32 d8, d13 -; BE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s17 -; BE-NEON-NEXT: mov r5, r1 -; BE-NEON-NEXT: vmov.32 d14[0], r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s19 -; BE-NEON-NEXT: add lr, sp, #192 -; BE-NEON-NEXT: str r1, [sp, #72] @ 4-byte Spill -; BE-NEON-NEXT: vmov.32 d13[0], r0 -; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEON-NEXT: vrev64.32 d10, d16 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s20 -; BE-NEON-NEXT: add lr, sp, #224 -; BE-NEON-NEXT: vmov.32 d15[0], r0 -; BE-NEON-NEXT: mov r4, r1 -; BE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s21 -; BE-NEON-NEXT: mov r7, r1 -; BE-NEON-NEXT: vmov.32 d14[0], r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s16 -; BE-NEON-NEXT: mov r6, r1 -; BE-NEON-NEXT: vmov.32 d15[0], r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.32 d12[0], r0 -; BE-NEON-NEXT: add lr, sp, #24 -; BE-NEON-NEXT: mov r9, r1 -; BE-NEON-NEXT: vmov.32 d15[1], r6 -; BE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #192 -; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEON-NEXT: vrev64.32 d8, d17 -; BE-NEON-NEXT: vmov.f32 s0, s17 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s16 -; BE-NEON-NEXT: vmov.32 d14[1], r7 -; BE-NEON-NEXT: add lr, sp, #56 -; BE-NEON-NEXT: mov r10, r1 -; BE-NEON-NEXT: vmov.32 d11[0], r0 -; BE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.32 d10[0], r0 -; BE-NEON-NEXT: add lr, sp, #192 -; BE-NEON-NEXT: mov r11, r1 -; BE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #40 -; BE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; BE-NEON-NEXT: add lr, sp, #224 -; BE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-NEON-NEXT: vrev64.32 d8, d12 -; BE-NEON-NEXT: vmov.32 d11[1], r4 -; BE-NEON-NEXT: vmov.f32 s0, s17 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s16 -; BE-NEON-NEXT: vmov.32 d10[1], r5 -; BE-NEON-NEXT: add lr, sp, #224 -; BE-NEON-NEXT: mov r8, r1 -; BE-NEON-NEXT: vmov.32 d15[0], r0 -; BE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vrev64.32 d8, d13 -; BE-NEON-NEXT: add lr, sp, #8 -; BE-NEON-NEXT: vmov.32 d14[0], r0 -; BE-NEON-NEXT: ldr r0, [sp, #88] @ 4-byte Reload -; BE-NEON-NEXT: vmov.f32 s0, s17 -; BE-NEON-NEXT: mov r5, r1 -; BE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #240 -; BE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-NEON-NEXT: vmov.32 d11[1], r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s16 -; BE-NEON-NEXT: vmov.32 d15[0], r0 -; BE-NEON-NEXT: ldr r0, [sp, #104] @ 4-byte Reload -; BE-NEON-NEXT: add lr, sp, #240 -; BE-NEON-NEXT: mov r4, r1 -; BE-NEON-NEXT: vmov.32 d10[1], r0 -; BE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: add lr, sp, #136 -; BE-NEON-NEXT: vmov.32 d14[0], r0 -; BE-NEON-NEXT: ldr r0, [sp, #72] @ 4-byte Reload -; BE-NEON-NEXT: mov r6, r1 -; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEON-NEXT: add lr, sp, #24 -; BE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; BE-NEON-NEXT: vrev64.32 d8, d16 -; BE-NEON-NEXT: vmov.32 d13[1], r0 -; BE-NEON-NEXT: vmov.f32 s0, s17 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s16 -; BE-NEON-NEXT: mov r7, r1 -; BE-NEON-NEXT: vmov.32 d11[0], r0 -; BE-NEON-NEXT: vmov.32 d12[1], r9 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: add lr, sp, #192 -; BE-NEON-NEXT: vmov.32 d15[1], r4 -; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEON-NEXT: add lr, sp, #8 -; BE-NEON-NEXT: vmov.32 d17[1], r10 -; BE-NEON-NEXT: vmov.32 d16[1], r11 -; BE-NEON-NEXT: vorr q9, q8, q8 -; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEON-NEXT: add lr, sp, #192 -; BE-NEON-NEXT: vmov.32 d17[1], r8 -; BE-NEON-NEXT: vmov.32 d16[1], r5 -; BE-NEON-NEXT: vorr q10, q8, q8 -; BE-NEON-NEXT: vrev64.32 q8, q6 -; BE-NEON-NEXT: vmov.32 d14[1], r6 -; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #240 -; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEON-NEXT: vmov.32 d10[0], r0 -; BE-NEON-NEXT: vrev64.32 q8, q8 -; BE-NEON-NEXT: vmov.32 d11[1], r7 -; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #224 -; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEON-NEXT: vmov.32 d10[1], r1 -; BE-NEON-NEXT: vrev64.32 q8, q8 -; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #56 -; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEON-NEXT: add lr, sp, #136 -; BE-NEON-NEXT: vrev64.32 q8, q8 -; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #104 -; BE-NEON-NEXT: vrev64.32 q8, q9 -; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #88 -; BE-NEON-NEXT: vrev64.32 q8, q10 -; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #72 -; BE-NEON-NEXT: vrev64.32 q8, q7 -; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #208 -; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEON-NEXT: add lr, sp, #56 -; BE-NEON-NEXT: vrev64.32 d8, d17 -; BE-NEON-NEXT: vrev64.32 q8, q5 -; BE-NEON-NEXT: vmov.f32 s0, s17 -; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s16 -; BE-NEON-NEXT: mov r4, r1 -; BE-NEON-NEXT: vmov.32 d13[0], r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: add lr, sp, #120 -; BE-NEON-NEXT: vmov.32 d12[0], r0 -; BE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-NEON-NEXT: vmov.32 d13[1], r4 -; BE-NEON-NEXT: vrev64.32 d8, d10 -; BE-NEON-NEXT: vmov.32 d12[1], r1 -; BE-NEON-NEXT: vmov.f32 s0, s17 -; BE-NEON-NEXT: vrev64.32 q6, q6 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s16 -; BE-NEON-NEXT: vmov.32 d15[0], r0 -; BE-NEON-NEXT: vmov.32 d15[1], r1 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.32 d14[0], r0 -; BE-NEON-NEXT: ldr r6, [sp, #156] @ 4-byte Reload -; BE-NEON-NEXT: vrev64.32 d8, d11 -; BE-NEON-NEXT: add r5, r6, #64 -; BE-NEON-NEXT: vmov.32 d14[1], r1 -; BE-NEON-NEXT: vmov.f32 s0, s17 -; BE-NEON-NEXT: vrev64.32 q8, q7 -; BE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s16 -; BE-NEON-NEXT: vmov.32 d15[0], r0 -; BE-NEON-NEXT: vmov.32 d15[1], r1 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: add lr, sp, #208 -; BE-NEON-NEXT: vmov.32 d14[0], r0 -; BE-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; BE-NEON-NEXT: vmov.32 d14[1], r1 -; BE-NEON-NEXT: vrev64.32 d8, d18 -; BE-NEON-NEXT: vrev64.32 q8, q7 -; BE-NEON-NEXT: vmov.f32 s0, s17 -; BE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s16 -; BE-NEON-NEXT: mov r4, r1 -; BE-NEON-NEXT: vmov.32 d15[0], r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.32 d14[0], r0 -; BE-NEON-NEXT: add lr, sp, #160 -; BE-NEON-NEXT: vmov.32 d15[1], r4 -; BE-NEON-NEXT: vmov.32 d14[1], r1 -; BE-NEON-NEXT: vrev64.32 q8, q7 -; BE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! -; BE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-NEON-NEXT: vrev64.32 d8, d11 -; BE-NEON-NEXT: vst1.64 {d12, d13}, [r5:128] -; BE-NEON-NEXT: vmov.f32 s0, s17 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s16 -; BE-NEON-NEXT: mov r4, r1 -; BE-NEON-NEXT: vmov.32 d13[0], r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.32 d12[0], r0 -; BE-NEON-NEXT: add lr, sp, #208 -; BE-NEON-NEXT: vmov.32 d13[1], r4 -; BE-NEON-NEXT: vmov.32 d12[1], r1 -; BE-NEON-NEXT: vrev64.32 q8, q6 -; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #176 -; BE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; BE-NEON-NEXT: vrev64.32 d8, d12 -; BE-NEON-NEXT: vmov.f32 s0, s17 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s16 -; BE-NEON-NEXT: vmov.32 d15[0], r0 -; BE-NEON-NEXT: vmov.32 d15[1], r1 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.32 d14[0], r0 -; BE-NEON-NEXT: mov r5, r6 -; BE-NEON-NEXT: vrev64.32 d8, d13 -; BE-NEON-NEXT: vmov.32 d14[1], r1 -; BE-NEON-NEXT: vmov.f32 s0, s17 -; BE-NEON-NEXT: vrev64.32 q8, q7 -; BE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s16 -; BE-NEON-NEXT: vmov.32 d15[0], r0 -; BE-NEON-NEXT: vmov.32 d15[1], r1 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.32 d14[0], r0 -; BE-NEON-NEXT: vrev64.32 d8, d10 -; BE-NEON-NEXT: vmov.32 d14[1], r1 -; BE-NEON-NEXT: vmov.f32 s0, s17 -; BE-NEON-NEXT: vrev64.32 q8, q7 -; BE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.f32 s0, s16 -; BE-NEON-NEXT: mov r4, r1 -; BE-NEON-NEXT: vmov.32 d15[0], r0 -; BE-NEON-NEXT: bl llrintf -; BE-NEON-NEXT: vmov.32 d14[0], r0 -; BE-NEON-NEXT: add lr, sp, #208 -; BE-NEON-NEXT: add r0, r6, #192 -; BE-NEON-NEXT: vmov.32 d15[1], r4 -; BE-NEON-NEXT: vmov.32 d14[1], r1 -; BE-NEON-NEXT: vrev64.32 q8, q7 -; BE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! -; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEON-NEXT: add lr, sp, #56 -; BE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128] -; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEON-NEXT: add lr, sp, #192 -; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEON-NEXT: add lr, sp, #240 -; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEON-NEXT: add lr, sp, #224 -; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEON-NEXT: add lr, sp, #136 -; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] -; BE-NEON-NEXT: add r0, r6, #128 -; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEON-NEXT: add lr, sp, #104 -; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEON-NEXT: add lr, sp, #88 -; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEON-NEXT: add lr, sp, #72 -; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] -; BE-NEON-NEXT: add sp, sp, #256 -; BE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEON-NEXT: add sp, sp, #4 -; BE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} - %a = call <32 x i64> @llvm.llrint.v32i64.v32f32(<32 x float> %x) - ret <32 x i64> %a + %a = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> %x) + ret <16 x i64> %a } -declare <32 x i64> @llvm.llrint.v32i64.v32f32(<32 x float>) +declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>) define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) { ; LE-LABEL: llrint_v1i64_v1f64: @@ -5681,15 +1413,6 @@ define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) { ; LE-NEXT: vmov.32 d0[1], r1 ; LE-NEXT: pop {r11, pc} ; -; LE-NEON-LABEL: llrint_v1i64_v1f64: -; LE-NEON: @ %bb.0: -; LE-NEON-NEXT: .save {r11, lr} -; LE-NEON-NEXT: push {r11, lr} -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vmov.32 d0[0], r0 -; LE-NEON-NEXT: vmov.32 d0[1], r1 -; LE-NEON-NEXT: pop {r11, pc} -; ; BE-LABEL: llrint_v1i64_v1f64: ; BE: @ %bb.0: ; BE-NEXT: .save {r11, lr} @@ -5699,16 +1422,6 @@ define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) { ; BE-NEXT: vmov.32 d16[1], r1 ; BE-NEXT: vrev64.32 d0, d16 ; BE-NEXT: pop {r11, pc} -; -; BE-NEON-LABEL: llrint_v1i64_v1f64: -; BE-NEON: @ %bb.0: -; BE-NEON-NEXT: .save {r11, lr} -; BE-NEON-NEXT: push {r11, lr} -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vmov.32 d16[0], r0 -; BE-NEON-NEXT: vmov.32 d16[1], r1 -; BE-NEON-NEXT: vrev64.32 d0, d16 -; BE-NEON-NEXT: pop {r11, pc} %a = call <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double> %x) ret <1 x i64> %a } @@ -5735,26 +1448,6 @@ define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) { ; LE-NEXT: vpop {d8, d9, d10, d11} ; LE-NEXT: pop {r4, pc} ; -; LE-NEON-LABEL: llrint_v2i64_v2f64: -; LE-NEON: @ %bb.0: -; LE-NEON-NEXT: .save {r4, lr} -; LE-NEON-NEXT: push {r4, lr} -; LE-NEON-NEXT: .vsave {d8, d9, d10, d11} -; LE-NEON-NEXT: vpush {d8, d9, d10, d11} -; LE-NEON-NEXT: vorr q4, q0, q0 -; LE-NEON-NEXT: vorr d0, d9, d9 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vorr d0, d8, d8 -; LE-NEON-NEXT: mov r4, r1 -; LE-NEON-NEXT: vmov.32 d11[0], r0 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vmov.32 d10[0], r0 -; LE-NEON-NEXT: vmov.32 d11[1], r4 -; LE-NEON-NEXT: vmov.32 d10[1], r1 -; LE-NEON-NEXT: vorr q0, q5, q5 -; LE-NEON-NEXT: vpop {d8, d9, d10, d11} -; LE-NEON-NEXT: pop {r4, pc} -; ; BE-LABEL: llrint_v2i64_v2f64: ; BE: @ %bb.0: ; BE-NEXT: .save {r4, lr} @@ -5774,26 +1467,6 @@ define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) { ; BE-NEXT: vrev64.32 q0, q5 ; BE-NEXT: vpop {d8, d9, d10, d11} ; BE-NEXT: pop {r4, pc} -; -; BE-NEON-LABEL: llrint_v2i64_v2f64: -; BE-NEON: @ %bb.0: -; BE-NEON-NEXT: .save {r4, lr} -; BE-NEON-NEXT: push {r4, lr} -; BE-NEON-NEXT: .vsave {d8, d9, d10, d11} -; BE-NEON-NEXT: vpush {d8, d9, d10, d11} -; BE-NEON-NEXT: vorr q4, q0, q0 -; BE-NEON-NEXT: vorr d0, d9, d9 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vorr d0, d8, d8 -; BE-NEON-NEXT: mov r4, r1 -; BE-NEON-NEXT: vmov.32 d11[0], r0 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vmov.32 d10[0], r0 -; BE-NEON-NEXT: vmov.32 d11[1], r4 -; BE-NEON-NEXT: vmov.32 d10[1], r1 -; BE-NEON-NEXT: vrev64.32 q0, q5 -; BE-NEON-NEXT: vpop {d8, d9, d10, d11} -; BE-NEON-NEXT: pop {r4, pc} %a = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> %x) ret <2 x i64> %a } @@ -5832,38 +1505,6 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) { ; LE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; LE-NEXT: pop {r4, r5, r6, pc} ; -; LE-NEON-LABEL: llrint_v4i64_v4f64: -; LE-NEON: @ %bb.0: -; LE-NEON-NEXT: .save {r4, r5, r6, lr} -; LE-NEON-NEXT: push {r4, r5, r6, lr} -; LE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEON-NEXT: vorr q5, q1, q1 -; LE-NEON-NEXT: vorr q6, q0, q0 -; LE-NEON-NEXT: vorr d0, d11, d11 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vorr d0, d12, d12 -; LE-NEON-NEXT: mov r4, r1 -; LE-NEON-NEXT: vmov.32 d9[0], r0 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vorr d0, d13, d13 -; LE-NEON-NEXT: mov r5, r1 -; LE-NEON-NEXT: vmov.32 d14[0], r0 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vorr d0, d10, d10 -; LE-NEON-NEXT: mov r6, r1 -; LE-NEON-NEXT: vmov.32 d15[0], r0 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vmov.32 d8[0], r0 -; LE-NEON-NEXT: vmov.32 d15[1], r6 -; LE-NEON-NEXT: vmov.32 d9[1], r4 -; LE-NEON-NEXT: vmov.32 d14[1], r5 -; LE-NEON-NEXT: vmov.32 d8[1], r1 -; LE-NEON-NEXT: vorr q0, q7, q7 -; LE-NEON-NEXT: vorr q1, q4, q4 -; LE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEON-NEXT: pop {r4, r5, r6, pc} -; ; BE-LABEL: llrint_v4i64_v4f64: ; BE: @ %bb.0: ; BE-NEXT: .save {r4, r5, r6, lr} @@ -5883,1039 +1524,161 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) { ; BE-NEXT: vmov.32 d14[0], r0 ; BE-NEXT: bl llrint ; BE-NEXT: vorr d0, d8, d8 -; BE-NEXT: mov r6, r1 -; BE-NEXT: vmov.32 d15[0], r0 -; BE-NEXT: bl llrint -; BE-NEXT: vmov.32 d12[0], r0 -; BE-NEXT: vmov.32 d15[1], r6 -; BE-NEXT: vmov.32 d13[1], r4 -; BE-NEXT: vmov.32 d14[1], r5 -; BE-NEXT: vmov.32 d12[1], r1 -; BE-NEXT: vrev64.32 q0, q7 -; BE-NEXT: vrev64.32 q1, q6 -; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEXT: pop {r4, r5, r6, pc} -; -; BE-NEON-LABEL: llrint_v4i64_v4f64: -; BE-NEON: @ %bb.0: -; BE-NEON-NEXT: .save {r4, r5, r6, lr} -; BE-NEON-NEXT: push {r4, r5, r6, lr} -; BE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEON-NEXT: vorr q4, q1, q1 -; BE-NEON-NEXT: vorr q5, q0, q0 -; BE-NEON-NEXT: vorr d0, d9, d9 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vorr d0, d10, d10 -; BE-NEON-NEXT: mov r4, r1 -; BE-NEON-NEXT: vmov.32 d13[0], r0 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vorr d0, d11, d11 -; BE-NEON-NEXT: mov r5, r1 -; BE-NEON-NEXT: vmov.32 d14[0], r0 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vorr d0, d8, d8 -; BE-NEON-NEXT: mov r6, r1 -; BE-NEON-NEXT: vmov.32 d15[0], r0 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vmov.32 d12[0], r0 -; BE-NEON-NEXT: vmov.32 d15[1], r6 -; BE-NEON-NEXT: vmov.32 d13[1], r4 -; BE-NEON-NEXT: vmov.32 d14[1], r5 -; BE-NEON-NEXT: vmov.32 d12[1], r1 -; BE-NEON-NEXT: vrev64.32 q0, q7 -; BE-NEON-NEXT: vrev64.32 q1, q6 -; BE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEON-NEXT: pop {r4, r5, r6, pc} - %a = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> %x) - ret <4 x i64> %a -} -declare <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double>) - -define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) { -; LE-LABEL: llrint_v8i64_v8f64: -; LE: @ %bb.0: -; LE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; LE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} -; LE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEXT: .pad #40 -; LE-NEXT: sub sp, sp, #40 -; LE-NEXT: vorr q4, q0, q0 -; LE-NEXT: add lr, sp, #24 -; LE-NEXT: vorr d0, d7, d7 -; LE-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill -; LE-NEXT: vorr q7, q2, q2 -; LE-NEXT: vorr q6, q1, q1 -; LE-NEXT: bl llrint -; LE-NEXT: vorr d0, d14, d14 -; LE-NEXT: add lr, sp, #8 -; LE-NEXT: vmov.32 d17[0], r0 -; LE-NEXT: mov r8, r1 -; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEXT: bl llrint -; LE-NEXT: vorr d0, d15, d15 -; LE-NEXT: mov r9, r1 -; LE-NEXT: vmov.32 d10[0], r0 -; LE-NEXT: bl llrint -; LE-NEXT: vorr d0, d12, d12 -; LE-NEXT: mov r10, r1 -; LE-NEXT: vmov.32 d11[0], r0 -; LE-NEXT: bl llrint -; LE-NEXT: vorr d0, d13, d13 -; LE-NEXT: mov r7, r1 -; LE-NEXT: vmov.32 d14[0], r0 -; LE-NEXT: bl llrint -; LE-NEXT: vorr d0, d8, d8 -; LE-NEXT: mov r4, r1 -; LE-NEXT: vmov.32 d15[0], r0 -; LE-NEXT: bl llrint -; LE-NEXT: vorr d0, d9, d9 -; LE-NEXT: mov r5, r1 -; LE-NEXT: vmov.32 d12[0], r0 -; LE-NEXT: bl llrint -; LE-NEXT: add lr, sp, #24 -; LE-NEXT: mov r6, r1 -; LE-NEXT: vmov.32 d13[0], r0 -; LE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; LE-NEXT: bl llrint -; LE-NEXT: add lr, sp, #8 -; LE-NEXT: vmov.32 d13[1], r6 -; LE-NEXT: vldmia lr, {d6, d7} @ 16-byte Reload -; LE-NEXT: vmov.32 d15[1], r4 -; LE-NEXT: vmov.32 d11[1], r10 -; LE-NEXT: vmov.32 d6[0], r0 -; LE-NEXT: vmov.32 d12[1], r5 -; LE-NEXT: vmov.32 d14[1], r7 -; LE-NEXT: vorr q0, q6, q6 -; LE-NEXT: vmov.32 d10[1], r9 -; LE-NEXT: vorr q1, q7, q7 -; LE-NEXT: vmov.32 d7[1], r8 -; LE-NEXT: vorr q2, q5, q5 -; LE-NEXT: vmov.32 d6[1], r1 -; LE-NEXT: add sp, sp, #40 -; LE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} -; -; LE-NEON-LABEL: llrint_v8i64_v8f64: -; LE-NEON: @ %bb.0: -; LE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; LE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} -; LE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEON-NEXT: .pad #40 -; LE-NEON-NEXT: sub sp, sp, #40 -; LE-NEON-NEXT: vorr q4, q0, q0 -; LE-NEON-NEXT: add lr, sp, #24 -; LE-NEON-NEXT: vorr d0, d7, d7 -; LE-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill -; LE-NEON-NEXT: vorr q7, q2, q2 -; LE-NEON-NEXT: vorr q6, q1, q1 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vorr d0, d14, d14 -; LE-NEON-NEXT: add lr, sp, #8 -; LE-NEON-NEXT: vmov.32 d17[0], r0 -; LE-NEON-NEXT: mov r8, r1 -; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vorr d0, d15, d15 -; LE-NEON-NEXT: mov r9, r1 -; LE-NEON-NEXT: vmov.32 d10[0], r0 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vorr d0, d12, d12 -; LE-NEON-NEXT: mov r10, r1 -; LE-NEON-NEXT: vmov.32 d11[0], r0 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vorr d0, d13, d13 -; LE-NEON-NEXT: mov r7, r1 -; LE-NEON-NEXT: vmov.32 d14[0], r0 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vorr d0, d8, d8 -; LE-NEON-NEXT: mov r4, r1 -; LE-NEON-NEXT: vmov.32 d15[0], r0 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vorr d0, d9, d9 -; LE-NEON-NEXT: mov r5, r1 -; LE-NEON-NEXT: vmov.32 d12[0], r0 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: add lr, sp, #24 -; LE-NEON-NEXT: mov r6, r1 -; LE-NEON-NEXT: vmov.32 d13[0], r0 -; LE-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: add lr, sp, #8 -; LE-NEON-NEXT: vmov.32 d13[1], r6 -; LE-NEON-NEXT: vldmia lr, {d6, d7} @ 16-byte Reload -; LE-NEON-NEXT: vmov.32 d15[1], r4 -; LE-NEON-NEXT: vmov.32 d11[1], r10 -; LE-NEON-NEXT: vmov.32 d6[0], r0 -; LE-NEON-NEXT: vmov.32 d12[1], r5 -; LE-NEON-NEXT: vmov.32 d14[1], r7 -; LE-NEON-NEXT: vorr q0, q6, q6 -; LE-NEON-NEXT: vmov.32 d10[1], r9 -; LE-NEON-NEXT: vorr q1, q7, q7 -; LE-NEON-NEXT: vmov.32 d7[1], r8 -; LE-NEON-NEXT: vorr q2, q5, q5 -; LE-NEON-NEXT: vmov.32 d6[1], r1 -; LE-NEON-NEXT: add sp, sp, #40 -; LE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} -; -; BE-LABEL: llrint_v8i64_v8f64: -; BE: @ %bb.0: -; BE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; BE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} -; BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEXT: .pad #40 -; BE-NEXT: sub sp, sp, #40 -; BE-NEXT: vorr q4, q0, q0 -; BE-NEXT: add lr, sp, #24 -; BE-NEXT: vorr d0, d7, d7 -; BE-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill -; BE-NEXT: vorr q7, q2, q2 -; BE-NEXT: vorr q6, q1, q1 -; BE-NEXT: bl llrint -; BE-NEXT: vorr d0, d14, d14 -; BE-NEXT: add lr, sp, #8 -; BE-NEXT: vmov.32 d17[0], r0 -; BE-NEXT: mov r8, r1 -; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEXT: bl llrint -; BE-NEXT: vorr d0, d15, d15 -; BE-NEXT: mov r9, r1 -; BE-NEXT: vmov.32 d10[0], r0 -; BE-NEXT: bl llrint -; BE-NEXT: vorr d0, d12, d12 -; BE-NEXT: mov r10, r1 -; BE-NEXT: vmov.32 d11[0], r0 -; BE-NEXT: bl llrint -; BE-NEXT: vorr d0, d13, d13 -; BE-NEXT: mov r7, r1 -; BE-NEXT: vmov.32 d14[0], r0 -; BE-NEXT: bl llrint -; BE-NEXT: vorr d0, d8, d8 -; BE-NEXT: mov r4, r1 +; BE-NEXT: mov r6, r1 ; BE-NEXT: vmov.32 d15[0], r0 ; BE-NEXT: bl llrint -; BE-NEXT: vorr d0, d9, d9 -; BE-NEXT: mov r5, r1 ; BE-NEXT: vmov.32 d12[0], r0 -; BE-NEXT: bl llrint -; BE-NEXT: add lr, sp, #24 -; BE-NEXT: mov r6, r1 -; BE-NEXT: vmov.32 d13[0], r0 -; BE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; BE-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; BE-NEXT: bl llrint -; BE-NEXT: add lr, sp, #8 -; BE-NEXT: vmov.32 d13[1], r6 -; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEXT: vmov.32 d15[1], r4 -; BE-NEXT: vmov.32 d16[0], r0 -; BE-NEXT: vmov.32 d11[1], r10 -; BE-NEXT: vmov.32 d17[1], r8 -; BE-NEXT: vmov.32 d12[1], r5 -; BE-NEXT: vmov.32 d14[1], r7 -; BE-NEXT: vmov.32 d10[1], r9 -; BE-NEXT: vmov.32 d16[1], r1 -; BE-NEXT: vrev64.32 q0, q6 -; BE-NEXT: vrev64.32 q1, q7 -; BE-NEXT: vrev64.32 q2, q5 -; BE-NEXT: vrev64.32 q3, q8 -; BE-NEXT: add sp, sp, #40 +; BE-NEXT: vmov.32 d15[1], r6 +; BE-NEXT: vmov.32 d13[1], r4 +; BE-NEXT: vmov.32 d14[1], r5 +; BE-NEXT: vmov.32 d12[1], r1 +; BE-NEXT: vrev64.32 q0, q7 +; BE-NEXT: vrev64.32 q1, q6 ; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} -; -; BE-NEON-LABEL: llrint_v8i64_v8f64: -; BE-NEON: @ %bb.0: -; BE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; BE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} -; BE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEON-NEXT: .pad #40 -; BE-NEON-NEXT: sub sp, sp, #40 -; BE-NEON-NEXT: vorr q4, q0, q0 -; BE-NEON-NEXT: add lr, sp, #24 -; BE-NEON-NEXT: vorr d0, d7, d7 -; BE-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill -; BE-NEON-NEXT: vorr q7, q2, q2 -; BE-NEON-NEXT: vorr q6, q1, q1 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vorr d0, d14, d14 -; BE-NEON-NEXT: add lr, sp, #8 -; BE-NEON-NEXT: vmov.32 d17[0], r0 -; BE-NEON-NEXT: mov r8, r1 -; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vorr d0, d15, d15 -; BE-NEON-NEXT: mov r9, r1 -; BE-NEON-NEXT: vmov.32 d10[0], r0 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vorr d0, d12, d12 -; BE-NEON-NEXT: mov r10, r1 -; BE-NEON-NEXT: vmov.32 d11[0], r0 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vorr d0, d13, d13 -; BE-NEON-NEXT: mov r7, r1 -; BE-NEON-NEXT: vmov.32 d14[0], r0 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vorr d0, d8, d8 -; BE-NEON-NEXT: mov r4, r1 -; BE-NEON-NEXT: vmov.32 d15[0], r0 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vorr d0, d9, d9 -; BE-NEON-NEXT: mov r5, r1 -; BE-NEON-NEXT: vmov.32 d12[0], r0 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: add lr, sp, #24 -; BE-NEON-NEXT: mov r6, r1 -; BE-NEON-NEXT: vmov.32 d13[0], r0 -; BE-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; BE-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: add lr, sp, #8 -; BE-NEON-NEXT: vmov.32 d13[1], r6 -; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEON-NEXT: vmov.32 d15[1], r4 -; BE-NEON-NEXT: vmov.32 d16[0], r0 -; BE-NEON-NEXT: vmov.32 d11[1], r10 -; BE-NEON-NEXT: vmov.32 d17[1], r8 -; BE-NEON-NEXT: vmov.32 d12[1], r5 -; BE-NEON-NEXT: vmov.32 d14[1], r7 -; BE-NEON-NEXT: vmov.32 d10[1], r9 -; BE-NEON-NEXT: vmov.32 d16[1], r1 -; BE-NEON-NEXT: vrev64.32 q0, q6 -; BE-NEON-NEXT: vrev64.32 q1, q7 -; BE-NEON-NEXT: vrev64.32 q2, q5 -; BE-NEON-NEXT: vrev64.32 q3, q8 -; BE-NEON-NEXT: add sp, sp, #40 -; BE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} - %a = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> %x) - ret <8 x i64> %a +; BE-NEXT: pop {r4, r5, r6, pc} + %a = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> %x) + ret <4 x i64> %a } -declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>) +declare <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double>) -define <16 x i64> @llrint_v16f64(<16 x double> %x) { -; LE-LABEL: llrint_v16f64: +define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) { +; LE-LABEL: llrint_v8i64_v8f64: ; LE: @ %bb.0: -; LE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-NEXT: .pad #4 -; LE-NEXT: sub sp, sp, #4 +; LE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; LE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} ; LE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} ; LE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEXT: .pad #176 -; LE-NEXT: sub sp, sp, #176 -; LE-NEXT: add lr, sp, #40 -; LE-NEXT: str r0, [sp, #140] @ 4-byte Spill -; LE-NEXT: add r0, sp, #312 -; LE-NEXT: vorr q6, q2, q2 +; LE-NEXT: .pad #40 +; LE-NEXT: sub sp, sp, #40 +; LE-NEXT: vorr q4, q0, q0 +; LE-NEXT: add lr, sp, #24 +; LE-NEXT: vorr d0, d7, d7 ; LE-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill -; LE-NEXT: add lr, sp, #96 -; LE-NEXT: vorr q7, q1, q1 -; LE-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; LE-NEXT: add lr, sp, #144 -; LE-NEXT: vorr d0, d1, d1 -; LE-NEXT: vld1.64 {d16, d17}, [r0] -; LE-NEXT: add r0, sp, #280 -; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEXT: add lr, sp, #80 -; LE-NEXT: vld1.64 {d16, d17}, [r0] -; LE-NEXT: add r0, sp, #296 -; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEXT: add lr, sp, #120 -; LE-NEXT: vld1.64 {d16, d17}, [r0] -; LE-NEXT: add r0, sp, #328 -; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEXT: add lr, sp, #56 -; LE-NEXT: vld1.64 {d16, d17}, [r0] -; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-NEXT: vorr q7, q2, q2 +; LE-NEXT: vorr q6, q1, q1 ; LE-NEXT: bl llrint ; LE-NEXT: vorr d0, d14, d14 -; LE-NEXT: str r1, [sp, #116] @ 4-byte Spill -; LE-NEXT: vmov.32 d11[0], r0 +; LE-NEXT: add lr, sp, #8 +; LE-NEXT: vmov.32 d17[0], r0 +; LE-NEXT: mov r8, r1 +; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill ; LE-NEXT: bl llrint ; LE-NEXT: vorr d0, d15, d15 -; LE-NEXT: str r1, [sp, #76] @ 4-byte Spill -; LE-NEXT: vmov.32 d8[0], r0 +; LE-NEXT: mov r9, r1 +; LE-NEXT: vmov.32 d10[0], r0 ; LE-NEXT: bl llrint ; LE-NEXT: vorr d0, d12, d12 -; LE-NEXT: add lr, sp, #160 -; LE-NEXT: vmov.32 d9[0], r0 -; LE-NEXT: str r1, [sp, #72] @ 4-byte Spill -; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-NEXT: mov r10, r1 +; LE-NEXT: vmov.32 d11[0], r0 ; LE-NEXT: bl llrint ; LE-NEXT: vorr d0, d13, d13 -; LE-NEXT: mov r6, r1 +; LE-NEXT: mov r7, r1 ; LE-NEXT: vmov.32 d14[0], r0 ; LE-NEXT: bl llrint -; LE-NEXT: add lr, sp, #40 +; LE-NEXT: vorr d0, d8, d8 ; LE-NEXT: mov r4, r1 ; LE-NEXT: vmov.32 d15[0], r0 -; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-NEXT: vorr d0, d8, d8 ; LE-NEXT: bl llrint ; LE-NEXT: vorr d0, d9, d9 -; LE-NEXT: mov r7, r1 -; LE-NEXT: vmov.32 d12[0], r0 -; LE-NEXT: bl llrint -; LE-NEXT: add lr, sp, #96 ; LE-NEXT: mov r5, r1 -; LE-NEXT: vmov.32 d13[0], r0 -; LE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; LE-NEXT: bl llrint -; LE-NEXT: vmov.32 d10[0], r0 -; LE-NEXT: add lr, sp, #40 -; LE-NEXT: mov r10, r1 -; LE-NEXT: vmov.32 d13[1], r5 -; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-NEXT: add lr, sp, #56 -; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-NEXT: vorr d0, d9, d9 -; LE-NEXT: bl llrint -; LE-NEXT: vorr d0, d8, d8 -; LE-NEXT: vmov.32 d12[1], r7 -; LE-NEXT: add lr, sp, #96 -; LE-NEXT: mov r9, r1 -; LE-NEXT: vmov.32 d11[0], r0 -; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEXT: vmov.32 d12[0], r0 ; LE-NEXT: bl llrint -; LE-NEXT: vmov.32 d10[0], r0 ; LE-NEXT: add lr, sp, #24 -; LE-NEXT: mov r11, r1 -; LE-NEXT: vmov.32 d15[1], r4 -; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-NEXT: add lr, sp, #144 -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: vorr d0, d17, d17 -; LE-NEXT: bl llrint -; LE-NEXT: add lr, sp, #8 -; LE-NEXT: vmov.32 d14[1], r6 -; LE-NEXT: mov r8, r1 -; LE-NEXT: vmov.32 d17[0], r0 -; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEXT: add lr, sp, #56 -; LE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-NEXT: add lr, sp, #80 -; LE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-NEXT: vorr d0, d11, d11 -; LE-NEXT: bl llrint -; LE-NEXT: vmov.32 d15[0], r0 -; LE-NEXT: add lr, sp, #160 -; LE-NEXT: vorr d0, d10, d10 -; LE-NEXT: ldr r0, [sp, #72] @ 4-byte Reload -; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload ; LE-NEXT: mov r6, r1 -; LE-NEXT: vmov.32 d9[1], r0 -; LE-NEXT: bl llrint -; LE-NEXT: vmov.32 d14[0], r0 -; LE-NEXT: ldr r0, [sp, #76] @ 4-byte Reload -; LE-NEXT: add lr, sp, #160 -; LE-NEXT: mov r4, r1 -; LE-NEXT: vmov.32 d8[1], r0 -; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-NEXT: add lr, sp, #120 -; LE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-NEXT: vorr d0, d11, d11 -; LE-NEXT: bl llrint ; LE-NEXT: vmov.32 d13[0], r0 -; LE-NEXT: add lr, sp, #40 -; LE-NEXT: vorr d0, d10, d10 -; LE-NEXT: ldr r0, [sp, #116] @ 4-byte Reload -; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-NEXT: mov r5, r1 -; LE-NEXT: vmov.32 d9[1], r0 -; LE-NEXT: bl llrint -; LE-NEXT: add lr, sp, #144 -; LE-NEXT: mov r7, r1 -; LE-NEXT: vmov.32 d12[0], r0 ; LE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload ; LE-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; LE-NEXT: vmov.32 d8[1], r10 ; LE-NEXT: bl llrint ; LE-NEXT: add lr, sp, #8 -; LE-NEXT: vmov.32 d15[1], r6 -; LE-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload -; LE-NEXT: add lr, sp, #24 -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: add lr, sp, #160 -; LE-NEXT: vmov.32 d20[0], r0 -; LE-NEXT: vmov.32 d21[1], r8 -; LE-NEXT: vmov.32 d20[1], r1 -; LE-NEXT: ldr r1, [sp, #140] @ 4-byte Reload -; LE-NEXT: vmov.32 d13[1], r5 -; LE-NEXT: mov r0, r1 -; LE-NEXT: vst1.64 {d8, d9}, [r0:128]! -; LE-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; LE-NEXT: add lr, sp, #56 -; LE-NEXT: vmov.32 d14[1], r4 -; LE-NEXT: vst1.64 {d18, d19}, [r0:128]! -; LE-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; LE-NEXT: add lr, sp, #96 -; LE-NEXT: vmov.32 d12[1], r7 -; LE-NEXT: vst1.64 {d18, d19}, [r0:128]! -; LE-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; LE-NEXT: vmov.32 d17[1], r9 -; LE-NEXT: vst1.64 {d18, d19}, [r0:128] -; LE-NEXT: add r0, r1, #64 -; LE-NEXT: vst1.64 {d14, d15}, [r0:128]! -; LE-NEXT: vst1.64 {d12, d13}, [r0:128]! -; LE-NEXT: vmov.32 d16[1], r11 -; LE-NEXT: vst1.64 {d20, d21}, [r0:128]! -; LE-NEXT: vst1.64 {d16, d17}, [r0:128] -; LE-NEXT: add sp, sp, #176 +; LE-NEXT: vmov.32 d13[1], r6 +; LE-NEXT: vldmia lr, {d6, d7} @ 16-byte Reload +; LE-NEXT: vmov.32 d15[1], r4 +; LE-NEXT: vmov.32 d11[1], r10 +; LE-NEXT: vmov.32 d6[0], r0 +; LE-NEXT: vmov.32 d12[1], r5 +; LE-NEXT: vmov.32 d14[1], r7 +; LE-NEXT: vorr q0, q6, q6 +; LE-NEXT: vmov.32 d10[1], r9 +; LE-NEXT: vorr q1, q7, q7 +; LE-NEXT: vmov.32 d7[1], r8 +; LE-NEXT: vorr q2, q5, q5 +; LE-NEXT: vmov.32 d6[1], r1 +; LE-NEXT: add sp, sp, #40 ; LE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEXT: add sp, sp, #4 -; LE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; -; LE-NEON-LABEL: llrint_v16f64: -; LE-NEON: @ %bb.0: -; LE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-NEON-NEXT: .pad #4 -; LE-NEON-NEXT: sub sp, sp, #4 -; LE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEON-NEXT: .pad #176 -; LE-NEON-NEXT: sub sp, sp, #176 -; LE-NEON-NEXT: add lr, sp, #40 -; LE-NEON-NEXT: str r0, [sp, #140] @ 4-byte Spill -; LE-NEON-NEXT: add r0, sp, #312 -; LE-NEON-NEXT: vorr q6, q2, q2 -; LE-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #96 -; LE-NEON-NEXT: vorr q7, q1, q1 -; LE-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #144 -; LE-NEON-NEXT: vorr d0, d1, d1 -; LE-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; LE-NEON-NEXT: add r0, sp, #280 -; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #80 -; LE-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; LE-NEON-NEXT: add r0, sp, #296 -; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #120 -; LE-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; LE-NEON-NEXT: add r0, sp, #328 -; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #56 -; LE-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vorr d0, d14, d14 -; LE-NEON-NEXT: str r1, [sp, #116] @ 4-byte Spill -; LE-NEON-NEXT: vmov.32 d11[0], r0 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vorr d0, d15, d15 -; LE-NEON-NEXT: str r1, [sp, #76] @ 4-byte Spill -; LE-NEON-NEXT: vmov.32 d8[0], r0 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vorr d0, d12, d12 -; LE-NEON-NEXT: add lr, sp, #160 -; LE-NEON-NEXT: vmov.32 d9[0], r0 -; LE-NEON-NEXT: str r1, [sp, #72] @ 4-byte Spill -; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vorr d0, d13, d13 -; LE-NEON-NEXT: mov r6, r1 -; LE-NEON-NEXT: vmov.32 d14[0], r0 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: add lr, sp, #40 -; LE-NEON-NEXT: mov r4, r1 -; LE-NEON-NEXT: vmov.32 d15[0], r0 -; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-NEON-NEXT: vorr d0, d8, d8 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vorr d0, d9, d9 -; LE-NEON-NEXT: mov r7, r1 -; LE-NEON-NEXT: vmov.32 d12[0], r0 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: add lr, sp, #96 -; LE-NEON-NEXT: mov r5, r1 -; LE-NEON-NEXT: vmov.32 d13[0], r0 -; LE-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vmov.32 d10[0], r0 -; LE-NEON-NEXT: add lr, sp, #40 -; LE-NEON-NEXT: mov r10, r1 -; LE-NEON-NEXT: vmov.32 d13[1], r5 -; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #56 -; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-NEON-NEXT: vorr d0, d9, d9 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vorr d0, d8, d8 -; LE-NEON-NEXT: vmov.32 d12[1], r7 -; LE-NEON-NEXT: add lr, sp, #96 -; LE-NEON-NEXT: mov r9, r1 -; LE-NEON-NEXT: vmov.32 d11[0], r0 -; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vmov.32 d10[0], r0 -; LE-NEON-NEXT: add lr, sp, #24 -; LE-NEON-NEXT: mov r11, r1 -; LE-NEON-NEXT: vmov.32 d15[1], r4 -; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #144 -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: vorr d0, d17, d17 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: add lr, sp, #8 -; LE-NEON-NEXT: vmov.32 d14[1], r6 -; LE-NEON-NEXT: mov r8, r1 -; LE-NEON-NEXT: vmov.32 d17[0], r0 -; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #56 -; LE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #80 -; LE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-NEON-NEXT: vorr d0, d11, d11 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vmov.32 d15[0], r0 -; LE-NEON-NEXT: add lr, sp, #160 -; LE-NEON-NEXT: vorr d0, d10, d10 -; LE-NEON-NEXT: ldr r0, [sp, #72] @ 4-byte Reload -; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-NEON-NEXT: mov r6, r1 -; LE-NEON-NEXT: vmov.32 d9[1], r0 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vmov.32 d14[0], r0 -; LE-NEON-NEXT: ldr r0, [sp, #76] @ 4-byte Reload -; LE-NEON-NEXT: add lr, sp, #160 -; LE-NEON-NEXT: mov r4, r1 -; LE-NEON-NEXT: vmov.32 d8[1], r0 -; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #120 -; LE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-NEON-NEXT: vorr d0, d11, d11 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vmov.32 d13[0], r0 -; LE-NEON-NEXT: add lr, sp, #40 -; LE-NEON-NEXT: vorr d0, d10, d10 -; LE-NEON-NEXT: ldr r0, [sp, #116] @ 4-byte Reload -; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-NEON-NEXT: mov r5, r1 -; LE-NEON-NEXT: vmov.32 d9[1], r0 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: add lr, sp, #144 -; LE-NEON-NEXT: mov r7, r1 -; LE-NEON-NEXT: vmov.32 d12[0], r0 -; LE-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; LE-NEON-NEXT: vmov.32 d8[1], r10 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: add lr, sp, #8 -; LE-NEON-NEXT: vmov.32 d15[1], r6 -; LE-NEON-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #24 -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #160 -; LE-NEON-NEXT: vmov.32 d20[0], r0 -; LE-NEON-NEXT: vmov.32 d21[1], r8 -; LE-NEON-NEXT: vmov.32 d20[1], r1 -; LE-NEON-NEXT: ldr r1, [sp, #140] @ 4-byte Reload -; LE-NEON-NEXT: vmov.32 d13[1], r5 -; LE-NEON-NEXT: mov r0, r1 -; LE-NEON-NEXT: vst1.64 {d8, d9}, [r0:128]! -; LE-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #56 -; LE-NEON-NEXT: vmov.32 d14[1], r4 -; LE-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]! -; LE-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #96 -; LE-NEON-NEXT: vmov.32 d12[1], r7 -; LE-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]! -; LE-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; LE-NEON-NEXT: vmov.32 d17[1], r9 -; LE-NEON-NEXT: vst1.64 {d18, d19}, [r0:128] -; LE-NEON-NEXT: add r0, r1, #64 -; LE-NEON-NEXT: vst1.64 {d14, d15}, [r0:128]! -; LE-NEON-NEXT: vst1.64 {d12, d13}, [r0:128]! -; LE-NEON-NEXT: vmov.32 d16[1], r11 -; LE-NEON-NEXT: vst1.64 {d20, d21}, [r0:128]! -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] -; LE-NEON-NEXT: add sp, sp, #176 -; LE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEON-NEXT: add sp, sp, #4 -; LE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; LE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} ; -; BE-LABEL: llrint_v16f64: +; BE-LABEL: llrint_v8i64_v8f64: ; BE: @ %bb.0: -; BE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-NEXT: .pad #4 -; BE-NEXT: sub sp, sp, #4 +; BE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; BE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} ; BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} ; BE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEXT: .pad #168 -; BE-NEXT: sub sp, sp, #168 -; BE-NEXT: add lr, sp, #64 -; BE-NEXT: str r0, [sp, #132] @ 4-byte Spill -; BE-NEXT: add r0, sp, #304 -; BE-NEXT: vorr q4, q3, q3 -; BE-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; BE-NEXT: add lr, sp, #48 -; BE-NEXT: vorr d0, d1, d1 -; BE-NEXT: vld1.64 {d16, d17}, [r0] -; BE-NEXT: add r0, sp, #320 -; BE-NEXT: vorr q6, q2, q2 -; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEXT: add lr, sp, #88 -; BE-NEXT: vorr q7, q1, q1 -; BE-NEXT: vld1.64 {d16, d17}, [r0] -; BE-NEXT: add r0, sp, #272 -; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEXT: add lr, sp, #112 -; BE-NEXT: vld1.64 {d16, d17}, [r0] -; BE-NEXT: add r0, sp, #288 -; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEXT: .pad #40 +; BE-NEXT: sub sp, sp, #40 +; BE-NEXT: vorr q4, q0, q0 ; BE-NEXT: add lr, sp, #24 -; BE-NEXT: vld1.64 {d16, d17}, [r0] -; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEXT: vorr d0, d7, d7 +; BE-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill +; BE-NEXT: vorr q7, q2, q2 +; BE-NEXT: vorr q6, q1, q1 ; BE-NEXT: bl llrint ; BE-NEXT: vorr d0, d14, d14 -; BE-NEXT: add lr, sp, #136 +; BE-NEXT: add lr, sp, #8 ; BE-NEXT: vmov.32 d17[0], r0 -; BE-NEXT: str r1, [sp, #108] @ 4-byte Spill +; BE-NEXT: mov r8, r1 ; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill ; BE-NEXT: bl llrint ; BE-NEXT: vorr d0, d15, d15 -; BE-NEXT: str r1, [sp, #84] @ 4-byte Spill +; BE-NEXT: mov r9, r1 ; BE-NEXT: vmov.32 d10[0], r0 ; BE-NEXT: bl llrint ; BE-NEXT: vorr d0, d12, d12 -; BE-NEXT: add lr, sp, #152 +; BE-NEXT: mov r10, r1 ; BE-NEXT: vmov.32 d11[0], r0 -; BE-NEXT: str r1, [sp, #44] @ 4-byte Spill -; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill ; BE-NEXT: bl llrint ; BE-NEXT: vorr d0, d13, d13 -; BE-NEXT: mov r6, r1 -; BE-NEXT: vmov.32 d10[0], r0 -; BE-NEXT: bl llrint -; BE-NEXT: vorr d0, d8, d8 -; BE-NEXT: mov r4, r1 -; BE-NEXT: vmov.32 d11[0], r0 -; BE-NEXT: bl llrint -; BE-NEXT: vorr d0, d9, d9 ; BE-NEXT: mov r7, r1 -; BE-NEXT: vmov.32 d12[0], r0 -; BE-NEXT: bl llrint -; BE-NEXT: add lr, sp, #64 -; BE-NEXT: mov r5, r1 -; BE-NEXT: vmov.32 d13[0], r0 -; BE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; BE-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; BE-NEXT: bl llrint -; BE-NEXT: add lr, sp, #136 -; BE-NEXT: mov r9, r1 -; BE-NEXT: vmov.32 d13[1], r5 -; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEXT: vmov.32 d16[0], r0 -; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEXT: add lr, sp, #24 -; BE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; BE-NEXT: vorr d0, d9, d9 +; BE-NEXT: vmov.32 d14[0], r0 ; BE-NEXT: bl llrint ; BE-NEXT: vorr d0, d8, d8 -; BE-NEXT: vmov.32 d12[1], r7 -; BE-NEXT: add lr, sp, #64 -; BE-NEXT: mov r10, r1 +; BE-NEXT: mov r4, r1 ; BE-NEXT: vmov.32 d15[0], r0 -; BE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill ; BE-NEXT: bl llrint -; BE-NEXT: vmov.32 d14[0], r0 -; BE-NEXT: add lr, sp, #8 -; BE-NEXT: mov r11, r1 -; BE-NEXT: vmov.32 d11[1], r4 -; BE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; BE-NEXT: add lr, sp, #48 -; BE-NEXT: vorr q6, q5, q5 -; BE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload ; BE-NEXT: vorr d0, d9, d9 +; BE-NEXT: mov r5, r1 +; BE-NEXT: vmov.32 d12[0], r0 ; BE-NEXT: bl llrint -; BE-NEXT: vorr d0, d8, d8 -; BE-NEXT: vmov.32 d12[1], r6 ; BE-NEXT: add lr, sp, #24 -; BE-NEXT: mov r8, r1 -; BE-NEXT: vmov.32 d11[0], r0 -; BE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; BE-NEXT: bl llrint -; BE-NEXT: vmov.32 d10[0], r0 -; BE-NEXT: add lr, sp, #48 -; BE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload ; BE-NEXT: mov r6, r1 -; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-NEXT: add lr, sp, #152 -; BE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; BE-NEXT: add lr, sp, #88 -; BE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; BE-NEXT: vorr d0, d13, d13 -; BE-NEXT: vmov.32 d9[1], r0 -; BE-NEXT: bl llrint -; BE-NEXT: vmov.32 d15[0], r0 -; BE-NEXT: ldr r0, [sp, #84] @ 4-byte Reload -; BE-NEXT: vorr d0, d12, d12 -; BE-NEXT: add lr, sp, #152 -; BE-NEXT: mov r4, r1 -; BE-NEXT: vmov.32 d8[1], r0 -; BE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; BE-NEXT: bl llrint -; BE-NEXT: add lr, sp, #136 -; BE-NEXT: vmov.32 d14[0], r0 -; BE-NEXT: ldr r0, [sp, #108] @ 4-byte Reload -; BE-NEXT: mov r5, r1 -; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-NEXT: add lr, sp, #112 -; BE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; BE-NEXT: vorr d0, d9, d9 -; BE-NEXT: vmov.32 d11[1], r0 -; BE-NEXT: bl llrint -; BE-NEXT: vorr d0, d8, d8 -; BE-NEXT: mov r7, r1 ; BE-NEXT: vmov.32 d13[0], r0 -; BE-NEXT: vmov.32 d10[1], r9 +; BE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; BE-NEXT: @ kill: def $d0 killed $d0 killed $q0 ; BE-NEXT: bl llrint ; BE-NEXT: add lr, sp, #8 -; BE-NEXT: vmov.32 d12[0], r0 -; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEXT: add lr, sp, #48 -; BE-NEXT: vmov.32 d17[1], r10 -; BE-NEXT: vmov.32 d16[1], r11 -; BE-NEXT: vorr q12, q8, q8 +; BE-NEXT: vmov.32 d13[1], r6 ; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEXT: add lr, sp, #152 -; BE-NEXT: vmov.32 d17[1], r8 -; BE-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; BE-NEXT: add lr, sp, #24 -; BE-NEXT: vmov.32 d13[1], r7 -; BE-NEXT: vmov.32 d16[1], r6 -; BE-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload -; BE-NEXT: add lr, sp, #64 -; BE-NEXT: vorr q13, q8, q8 -; BE-NEXT: vmov.32 d12[1], r1 -; BE-NEXT: ldr r1, [sp, #132] @ 4-byte Reload -; BE-NEXT: vrev64.32 q8, q5 -; BE-NEXT: mov r0, r1 -; BE-NEXT: vldmia lr, {d22, d23} @ 16-byte Reload -; BE-NEXT: vrev64.32 q9, q9 -; BE-NEXT: vrev64.32 q10, q10 -; BE-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-NEXT: vst1.64 {d18, d19}, [r0:128]! -; BE-NEXT: vrev64.32 q11, q11 ; BE-NEXT: vmov.32 d15[1], r4 -; BE-NEXT: vst1.64 {d20, d21}, [r0:128]! -; BE-NEXT: vrev64.32 q15, q6 -; BE-NEXT: vmov.32 d14[1], r5 -; BE-NEXT: vrev64.32 q12, q12 -; BE-NEXT: vst1.64 {d22, d23}, [r0:128] -; BE-NEXT: add r0, r1, #64 -; BE-NEXT: vrev64.32 q13, q13 -; BE-NEXT: vst1.64 {d30, d31}, [r0:128]! -; BE-NEXT: vst1.64 {d24, d25}, [r0:128]! -; BE-NEXT: vrev64.32 q14, q7 -; BE-NEXT: vst1.64 {d26, d27}, [r0:128]! -; BE-NEXT: vst1.64 {d28, d29}, [r0:128] -; BE-NEXT: add sp, sp, #168 +; BE-NEXT: vmov.32 d16[0], r0 +; BE-NEXT: vmov.32 d11[1], r10 +; BE-NEXT: vmov.32 d17[1], r8 +; BE-NEXT: vmov.32 d12[1], r5 +; BE-NEXT: vmov.32 d14[1], r7 +; BE-NEXT: vmov.32 d10[1], r9 +; BE-NEXT: vmov.32 d16[1], r1 +; BE-NEXT: vrev64.32 q0, q6 +; BE-NEXT: vrev64.32 q1, q7 +; BE-NEXT: vrev64.32 q2, q5 +; BE-NEXT: vrev64.32 q3, q8 +; BE-NEXT: add sp, sp, #40 ; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEXT: add sp, sp, #4 -; BE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; -; BE-NEON-LABEL: llrint_v16f64: -; BE-NEON: @ %bb.0: -; BE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-NEON-NEXT: .pad #4 -; BE-NEON-NEXT: sub sp, sp, #4 -; BE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEON-NEXT: .pad #168 -; BE-NEON-NEXT: sub sp, sp, #168 -; BE-NEON-NEXT: add lr, sp, #64 -; BE-NEON-NEXT: str r0, [sp, #132] @ 4-byte Spill -; BE-NEON-NEXT: add r0, sp, #304 -; BE-NEON-NEXT: vorr q4, q3, q3 -; BE-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #48 -; BE-NEON-NEXT: vorr d0, d1, d1 -; BE-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; BE-NEON-NEXT: add r0, sp, #320 -; BE-NEON-NEXT: vorr q6, q2, q2 -; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #88 -; BE-NEON-NEXT: vorr q7, q1, q1 -; BE-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; BE-NEON-NEXT: add r0, sp, #272 -; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #112 -; BE-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; BE-NEON-NEXT: add r0, sp, #288 -; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #24 -; BE-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vorr d0, d14, d14 -; BE-NEON-NEXT: add lr, sp, #136 -; BE-NEON-NEXT: vmov.32 d17[0], r0 -; BE-NEON-NEXT: str r1, [sp, #108] @ 4-byte Spill -; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vorr d0, d15, d15 -; BE-NEON-NEXT: str r1, [sp, #84] @ 4-byte Spill -; BE-NEON-NEXT: vmov.32 d10[0], r0 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vorr d0, d12, d12 -; BE-NEON-NEXT: add lr, sp, #152 -; BE-NEON-NEXT: vmov.32 d11[0], r0 -; BE-NEON-NEXT: str r1, [sp, #44] @ 4-byte Spill -; BE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vorr d0, d13, d13 -; BE-NEON-NEXT: mov r6, r1 -; BE-NEON-NEXT: vmov.32 d10[0], r0 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vorr d0, d8, d8 -; BE-NEON-NEXT: mov r4, r1 -; BE-NEON-NEXT: vmov.32 d11[0], r0 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vorr d0, d9, d9 -; BE-NEON-NEXT: mov r7, r1 -; BE-NEON-NEXT: vmov.32 d12[0], r0 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: add lr, sp, #64 -; BE-NEON-NEXT: mov r5, r1 -; BE-NEON-NEXT: vmov.32 d13[0], r0 -; BE-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; BE-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: add lr, sp, #136 -; BE-NEON-NEXT: mov r9, r1 -; BE-NEON-NEXT: vmov.32 d13[1], r5 -; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEON-NEXT: vmov.32 d16[0], r0 -; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #24 -; BE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; BE-NEON-NEXT: vorr d0, d9, d9 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vorr d0, d8, d8 -; BE-NEON-NEXT: vmov.32 d12[1], r7 -; BE-NEON-NEXT: add lr, sp, #64 -; BE-NEON-NEXT: mov r10, r1 -; BE-NEON-NEXT: vmov.32 d15[0], r0 -; BE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vmov.32 d14[0], r0 -; BE-NEON-NEXT: add lr, sp, #8 -; BE-NEON-NEXT: mov r11, r1 -; BE-NEON-NEXT: vmov.32 d11[1], r4 -; BE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #48 -; BE-NEON-NEXT: vorr q6, q5, q5 -; BE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; BE-NEON-NEXT: vorr d0, d9, d9 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vorr d0, d8, d8 -; BE-NEON-NEXT: vmov.32 d12[1], r6 -; BE-NEON-NEXT: add lr, sp, #24 -; BE-NEON-NEXT: mov r8, r1 -; BE-NEON-NEXT: vmov.32 d11[0], r0 -; BE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vmov.32 d10[0], r0 -; BE-NEON-NEXT: add lr, sp, #48 -; BE-NEON-NEXT: ldr r0, [sp, #44] @ 4-byte Reload -; BE-NEON-NEXT: mov r6, r1 -; BE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #152 -; BE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; BE-NEON-NEXT: add lr, sp, #88 -; BE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; BE-NEON-NEXT: vorr d0, d13, d13 -; BE-NEON-NEXT: vmov.32 d9[1], r0 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vmov.32 d15[0], r0 -; BE-NEON-NEXT: ldr r0, [sp, #84] @ 4-byte Reload -; BE-NEON-NEXT: vorr d0, d12, d12 -; BE-NEON-NEXT: add lr, sp, #152 -; BE-NEON-NEXT: mov r4, r1 -; BE-NEON-NEXT: vmov.32 d8[1], r0 -; BE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: add lr, sp, #136 -; BE-NEON-NEXT: vmov.32 d14[0], r0 -; BE-NEON-NEXT: ldr r0, [sp, #108] @ 4-byte Reload -; BE-NEON-NEXT: mov r5, r1 -; BE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-NEON-NEXT: add lr, sp, #112 -; BE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; BE-NEON-NEXT: vorr d0, d9, d9 -; BE-NEON-NEXT: vmov.32 d11[1], r0 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vorr d0, d8, d8 -; BE-NEON-NEXT: mov r7, r1 -; BE-NEON-NEXT: vmov.32 d13[0], r0 -; BE-NEON-NEXT: vmov.32 d10[1], r9 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: add lr, sp, #8 -; BE-NEON-NEXT: vmov.32 d12[0], r0 -; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEON-NEXT: add lr, sp, #48 -; BE-NEON-NEXT: vmov.32 d17[1], r10 -; BE-NEON-NEXT: vmov.32 d16[1], r11 -; BE-NEON-NEXT: vorr q12, q8, q8 -; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEON-NEXT: add lr, sp, #152 -; BE-NEON-NEXT: vmov.32 d17[1], r8 -; BE-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; BE-NEON-NEXT: add lr, sp, #24 -; BE-NEON-NEXT: vmov.32 d13[1], r7 -; BE-NEON-NEXT: vmov.32 d16[1], r6 -; BE-NEON-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload -; BE-NEON-NEXT: add lr, sp, #64 -; BE-NEON-NEXT: vorr q13, q8, q8 -; BE-NEON-NEXT: vmov.32 d12[1], r1 -; BE-NEON-NEXT: ldr r1, [sp, #132] @ 4-byte Reload -; BE-NEON-NEXT: vrev64.32 q8, q5 -; BE-NEON-NEXT: mov r0, r1 -; BE-NEON-NEXT: vldmia lr, {d22, d23} @ 16-byte Reload -; BE-NEON-NEXT: vrev64.32 q9, q9 -; BE-NEON-NEXT: vrev64.32 q10, q10 -; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]! -; BE-NEON-NEXT: vrev64.32 q11, q11 -; BE-NEON-NEXT: vmov.32 d15[1], r4 -; BE-NEON-NEXT: vst1.64 {d20, d21}, [r0:128]! -; BE-NEON-NEXT: vrev64.32 q15, q6 -; BE-NEON-NEXT: vmov.32 d14[1], r5 -; BE-NEON-NEXT: vrev64.32 q12, q12 -; BE-NEON-NEXT: vst1.64 {d22, d23}, [r0:128] -; BE-NEON-NEXT: add r0, r1, #64 -; BE-NEON-NEXT: vrev64.32 q13, q13 -; BE-NEON-NEXT: vst1.64 {d30, d31}, [r0:128]! -; BE-NEON-NEXT: vst1.64 {d24, d25}, [r0:128]! -; BE-NEON-NEXT: vrev64.32 q14, q7 -; BE-NEON-NEXT: vst1.64 {d26, d27}, [r0:128]! -; BE-NEON-NEXT: vst1.64 {d28, d29}, [r0:128] -; BE-NEON-NEXT: add sp, sp, #168 -; BE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEON-NEXT: add sp, sp, #4 -; BE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} - %a = call <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double> %x) - ret <16 x i64> %a +; BE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} + %a = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> %x) + ret <8 x i64> %a } -declare <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double>) +declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>) -define <32 x i64> @llrint_v32f64(<32 x double> %x) { -; LE-LABEL: llrint_v32f64: +define <16 x i64> @llrint_v16f64(<16 x double> %x) { +; LE-LABEL: llrint_v16f64: ; LE: @ %bb.0: ; LE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; LE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} @@ -6923,691 +1686,172 @@ define <32 x i64> @llrint_v32f64(<32 x double> %x) { ; LE-NEXT: sub sp, sp, #4 ; LE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} ; LE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEXT: .pad #208 -; LE-NEXT: sub sp, sp, #208 -; LE-NEXT: add lr, sp, #72 -; LE-NEXT: str r0, [sp, #156] @ 4-byte Spill -; LE-NEXT: add r0, sp, #456 -; LE-NEXT: vorr q4, q0, q0 -; LE-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill -; LE-NEXT: add lr, sp, #56 -; LE-NEXT: vorr d0, d7, d7 -; LE-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill -; LE-NEXT: add lr, sp, #104 -; LE-NEXT: vorr q5, q2, q2 -; LE-NEXT: vld1.64 {d16, d17}, [r0] -; LE-NEXT: add r0, sp, #344 -; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEXT: add lr, sp, #192 -; LE-NEXT: vld1.64 {d16, d17}, [r0] -; LE-NEXT: add r0, sp, #376 -; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEXT: add lr, sp, #160 -; LE-NEXT: vld1.64 {d16, d17}, [r0] -; LE-NEXT: add r0, sp, #360 -; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEXT: add lr, sp, #136 -; LE-NEXT: vld1.64 {d16, d17}, [r0] -; LE-NEXT: add r0, sp, #440 -; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEXT: add lr, sp, #176 -; LE-NEXT: vld1.64 {d16, d17}, [r0] -; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEXT: bl llrint -; LE-NEXT: vorr d0, d10, d10 -; LE-NEXT: str r1, [sp, #120] @ 4-byte Spill -; LE-NEXT: vmov.32 d13[0], r0 -; LE-NEXT: bl llrint -; LE-NEXT: vorr d0, d11, d11 -; LE-NEXT: mov r10, r1 -; LE-NEXT: vmov.32 d14[0], r0 -; LE-NEXT: bl llrint -; LE-NEXT: vorr d0, d8, d8 -; LE-NEXT: add lr, sp, #88 -; LE-NEXT: vmov.32 d15[0], r0 -; LE-NEXT: mov r11, r1 -; LE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-NEXT: bl llrint -; LE-NEXT: vorr d0, d9, d9 -; LE-NEXT: mov r9, r1 -; LE-NEXT: vmov.32 d10[0], r0 -; LE-NEXT: bl llrint -; LE-NEXT: add lr, sp, #40 -; LE-NEXT: vmov.32 d11[0], r0 -; LE-NEXT: mov r6, r1 -; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-NEXT: add lr, sp, #56 -; LE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-NEXT: vorr d0, d10, d10 -; LE-NEXT: bl llrint -; LE-NEXT: vorr d0, d11, d11 -; LE-NEXT: mov r4, r1 -; LE-NEXT: vmov.32 d8[0], r0 -; LE-NEXT: bl llrint -; LE-NEXT: add lr, sp, #72 -; LE-NEXT: mov r7, r1 -; LE-NEXT: vmov.32 d9[0], r0 -; LE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; LE-NEXT: bl llrint -; LE-NEXT: vmov.32 d12[0], r0 -; LE-NEXT: add lr, sp, #56 -; LE-NEXT: mov r5, r1 -; LE-NEXT: vmov.32 d9[1], r7 -; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-NEXT: add lr, sp, #176 -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: vorr d0, d17, d17 -; LE-NEXT: bl llrint -; LE-NEXT: vmov.32 d8[1], r4 -; LE-NEXT: add lr, sp, #72 -; LE-NEXT: mov r7, r1 -; LE-NEXT: vmov.32 d13[0], r0 -; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-NEXT: add lr, sp, #104 -; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-NEXT: vorr d0, d9, d9 -; LE-NEXT: bl llrint +; LE-NEXT: .pad #176 +; LE-NEXT: sub sp, sp, #176 ; LE-NEXT: add lr, sp, #40 -; LE-NEXT: vorr d0, d8, d8 -; LE-NEXT: mov r8, r1 -; LE-NEXT: vmov.32 d15[0], r0 -; LE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-NEXT: vmov.32 d11[1], r6 -; LE-NEXT: bl llrint -; LE-NEXT: add lr, sp, #176 -; LE-NEXT: mov r4, r1 -; LE-NEXT: vmov.32 d14[0], r0 -; LE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; LE-NEXT: vmov.32 d10[1], r9 -; LE-NEXT: bl llrint -; LE-NEXT: add lr, sp, #88 -; LE-NEXT: vmov.32 d12[0], r0 -; LE-NEXT: ldr r0, [sp, #120] @ 4-byte Reload -; LE-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload -; LE-NEXT: add lr, sp, #56 -; LE-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; LE-NEXT: add lr, sp, #24 -; LE-NEXT: vmov.32 d19[1], r0 -; LE-NEXT: add r0, sp, #408 -; LE-NEXT: ldr r2, [sp, #156] @ 4-byte Reload +; LE-NEXT: str r0, [sp, #140] @ 4-byte Spill +; LE-NEXT: add r0, sp, #312 +; LE-NEXT: vorr q6, q2, q2 +; LE-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill +; LE-NEXT: add lr, sp, #96 +; LE-NEXT: vorr q7, q1, q1 +; LE-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; LE-NEXT: add lr, sp, #144 +; LE-NEXT: vorr d0, d1, d1 ; LE-NEXT: vld1.64 {d16, d17}, [r0] -; LE-NEXT: vmov.32 d13[1], r7 -; LE-NEXT: mov r0, r2 -; LE-NEXT: vmov.32 d12[1], r1 -; LE-NEXT: add r1, sp, #488 -; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEXT: add lr, sp, #40 -; LE-NEXT: vld1.64 {d16, d17}, [r1] -; LE-NEXT: add r1, sp, #472 -; LE-NEXT: vst1.64 {d10, d11}, [r0:128]! -; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEXT: add lr, sp, #72 -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: add lr, sp, #56 -; LE-NEXT: vmov.32 d21[1], r11 -; LE-NEXT: vmov.32 d20[1], r10 -; LE-NEXT: add r10, r2, #192 -; LE-NEXT: vst1.64 {d16, d17}, [r0:128]! -; LE-NEXT: vld1.64 {d16, d17}, [r1] -; LE-NEXT: add r1, sp, #392 -; LE-NEXT: vmov.32 d18[1], r5 -; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEXT: add lr, sp, #72 -; LE-NEXT: vst1.64 {d20, d21}, [r0:128]! -; LE-NEXT: vld1.64 {d16, d17}, [r1] +; LE-NEXT: add r0, sp, #280 ; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEXT: add lr, sp, #104 -; LE-NEXT: vst1.64 {d18, d19}, [r0:128] -; LE-NEXT: add r0, sp, #312 +; LE-NEXT: add lr, sp, #80 ; LE-NEXT: vld1.64 {d16, d17}, [r0] -; LE-NEXT: add r0, sp, #328 -; LE-NEXT: vmov.32 d15[1], r8 +; LE-NEXT: add r0, sp, #296 ; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill ; LE-NEXT: add lr, sp, #120 ; LE-NEXT: vld1.64 {d16, d17}, [r0] -; LE-NEXT: add r0, sp, #424 -; LE-NEXT: vmov.32 d14[1], r4 -; LE-NEXT: vst1.64 {d12, d13}, [r10:128]! +; LE-NEXT: add r0, sp, #328 ; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEXT: add lr, sp, #176 +; LE-NEXT: add lr, sp, #56 ; LE-NEXT: vld1.64 {d16, d17}, [r0] -; LE-NEXT: vst1.64 {d14, d15}, [r10:128]! ; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEXT: add lr, sp, #192 -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: vorr d0, d17, d17 ; LE-NEXT: bl llrint -; LE-NEXT: add lr, sp, #136 -; LE-NEXT: mov r9, r1 -; LE-NEXT: vmov.32 d9[0], r0 -; LE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-NEXT: vorr d0, d10, d10 +; LE-NEXT: vorr d0, d14, d14 +; LE-NEXT: str r1, [sp, #116] @ 4-byte Spill +; LE-NEXT: vmov.32 d11[0], r0 ; LE-NEXT: bl llrint -; LE-NEXT: vorr d0, d11, d11 -; LE-NEXT: mov r8, r1 -; LE-NEXT: vmov.32 d12[0], r0 +; LE-NEXT: vorr d0, d15, d15 +; LE-NEXT: str r1, [sp, #76] @ 4-byte Spill +; LE-NEXT: vmov.32 d8[0], r0 ; LE-NEXT: bl llrint +; LE-NEXT: vorr d0, d12, d12 ; LE-NEXT: add lr, sp, #160 -; LE-NEXT: mov r5, r1 -; LE-NEXT: vmov.32 d13[0], r0 -; LE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-NEXT: vorr d0, d10, d10 +; LE-NEXT: vmov.32 d9[0], r0 +; LE-NEXT: str r1, [sp, #72] @ 4-byte Spill +; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill ; LE-NEXT: bl llrint -; LE-NEXT: vorr d0, d11, d11 +; LE-NEXT: vorr d0, d13, d13 ; LE-NEXT: mov r6, r1 ; LE-NEXT: vmov.32 d14[0], r0 ; LE-NEXT: bl llrint -; LE-NEXT: add lr, sp, #192 +; LE-NEXT: add lr, sp, #40 ; LE-NEXT: mov r4, r1 ; LE-NEXT: vmov.32 d15[0], r0 -; LE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; LE-NEXT: bl llrint -; LE-NEXT: vmov.32 d8[0], r0 -; LE-NEXT: add lr, sp, #192 -; LE-NEXT: mov r11, r1 -; LE-NEXT: vmov.32 d15[1], r4 -; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-NEXT: add lr, sp, #176 ; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-NEXT: vorr d0, d9, d9 -; LE-NEXT: bl llrint ; LE-NEXT: vorr d0, d8, d8 -; LE-NEXT: vmov.32 d14[1], r6 -; LE-NEXT: add lr, sp, #136 -; LE-NEXT: mov r4, r1 -; LE-NEXT: vmov.32 d11[0], r0 -; LE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill ; LE-NEXT: bl llrint -; LE-NEXT: vmov.32 d10[0], r0 -; LE-NEXT: add lr, sp, #176 -; LE-NEXT: mov r7, r1 -; LE-NEXT: vmov.32 d13[1], r5 -; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-NEXT: add lr, sp, #24 -; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload ; LE-NEXT: vorr d0, d9, d9 +; LE-NEXT: mov r7, r1 +; LE-NEXT: vmov.32 d12[0], r0 ; LE-NEXT: bl llrint -; LE-NEXT: vorr d0, d8, d8 -; LE-NEXT: vmov.32 d12[1], r8 -; LE-NEXT: add lr, sp, #88 +; LE-NEXT: add lr, sp, #96 ; LE-NEXT: mov r5, r1 -; LE-NEXT: vmov.32 d15[0], r0 -; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-NEXT: bl llrint -; LE-NEXT: add lr, sp, #192 -; LE-NEXT: str r1, [sp, #24] @ 4-byte Spill -; LE-NEXT: vmov.32 d14[0], r0 -; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-NEXT: add lr, sp, #40 -; LE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-NEXT: vorr d0, d11, d11 -; LE-NEXT: vmov.32 d9[1], r9 -; LE-NEXT: bl llrint -; LE-NEXT: vorr d0, d10, d10 -; LE-NEXT: vmov.32 d8[1], r11 -; LE-NEXT: add lr, sp, #192 -; LE-NEXT: mov r6, r1 ; LE-NEXT: vmov.32 d13[0], r0 -; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-NEXT: @ kill: def $d0 killed $d0 killed $q0 ; LE-NEXT: bl llrint -; LE-NEXT: vmov.32 d12[0], r0 -; LE-NEXT: add lr, sp, #160 -; LE-NEXT: str r1, [sp, #40] @ 4-byte Spill -; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-NEXT: add lr, sp, #176 -; LE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-NEXT: vmov.32 d10[0], r0 +; LE-NEXT: add lr, sp, #40 +; LE-NEXT: mov r10, r1 +; LE-NEXT: vmov.32 d13[1], r5 +; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill ; LE-NEXT: add lr, sp, #56 ; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload ; LE-NEXT: vorr d0, d9, d9 -; LE-NEXT: vmov.32 d11[1], r4 ; LE-NEXT: bl llrint ; LE-NEXT: vorr d0, d8, d8 -; LE-NEXT: vmov.32 d10[1], r7 -; LE-NEXT: add lr, sp, #176 -; LE-NEXT: mov r8, r1 -; LE-NEXT: vmov.32 d13[0], r0 -; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-NEXT: vmov.32 d12[1], r7 +; LE-NEXT: add lr, sp, #96 +; LE-NEXT: mov r9, r1 +; LE-NEXT: vmov.32 d11[0], r0 +; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill ; LE-NEXT: bl llrint -; LE-NEXT: vmov.32 d12[0], r0 -; LE-NEXT: add lr, sp, #8 +; LE-NEXT: vmov.32 d10[0], r0 +; LE-NEXT: add lr, sp, #24 ; LE-NEXT: mov r11, r1 -; LE-NEXT: vmov.32 d15[1], r5 -; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-NEXT: add lr, sp, #72 -; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-NEXT: vorr d0, d9, d9 +; LE-NEXT: vmov.32 d15[1], r4 +; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-NEXT: add lr, sp, #144 +; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: vorr d0, d17, d17 ; LE-NEXT: bl llrint -; LE-NEXT: vmov.32 d11[0], r0 -; LE-NEXT: ldr r0, [sp, #24] @ 4-byte Reload -; LE-NEXT: vorr d0, d8, d8 +; LE-NEXT: add lr, sp, #8 +; LE-NEXT: vmov.32 d14[1], r6 +; LE-NEXT: mov r8, r1 +; LE-NEXT: vmov.32 d17[0], r0 +; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill ; LE-NEXT: add lr, sp, #56 -; LE-NEXT: mov r9, r1 -; LE-NEXT: vmov.32 d14[1], r0 ; LE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-NEXT: add lr, sp, #80 +; LE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-NEXT: vorr d0, d11, d11 ; LE-NEXT: bl llrint -; LE-NEXT: vmov.32 d10[0], r0 -; LE-NEXT: add lr, sp, #72 -; LE-NEXT: mov r7, r1 -; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-NEXT: vmov.32 d15[0], r0 ; LE-NEXT: add lr, sp, #160 -; LE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-NEXT: add lr, sp, #104 +; LE-NEXT: vorr d0, d10, d10 +; LE-NEXT: ldr r0, [sp, #72] @ 4-byte Reload ; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-NEXT: vorr d0, d9, d9 -; LE-NEXT: vmov.32 d13[1], r6 -; LE-NEXT: bl llrint -; LE-NEXT: vmov.32 d11[0], r0 -; LE-NEXT: ldr r0, [sp, #40] @ 4-byte Reload -; LE-NEXT: vorr d0, d8, d8 -; LE-NEXT: add lr, sp, #160 ; LE-NEXT: mov r6, r1 -; LE-NEXT: vmov.32 d12[1], r0 -; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-NEXT: vmov.32 d9[1], r0 ; LE-NEXT: bl llrint -; LE-NEXT: add lr, sp, #8 +; LE-NEXT: vmov.32 d14[0], r0 +; LE-NEXT: ldr r0, [sp, #76] @ 4-byte Reload +; LE-NEXT: add lr, sp, #160 ; LE-NEXT: mov r4, r1 -; LE-NEXT: vmov.32 d10[0], r0 -; LE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-NEXT: vmov.32 d8[1], r0 +; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill ; LE-NEXT: add lr, sp, #120 -; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-NEXT: vorr d0, d9, d9 -; LE-NEXT: vmov.32 d13[1], r8 +; LE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-NEXT: vorr d0, d11, d11 ; LE-NEXT: bl llrint -; LE-NEXT: vorr d0, d8, d8 +; LE-NEXT: vmov.32 d13[0], r0 +; LE-NEXT: add lr, sp, #40 +; LE-NEXT: vorr d0, d10, d10 +; LE-NEXT: ldr r0, [sp, #116] @ 4-byte Reload +; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload ; LE-NEXT: mov r5, r1 -; LE-NEXT: vmov.32 d15[0], r0 -; LE-NEXT: vmov.32 d12[1], r11 +; LE-NEXT: vmov.32 d9[1], r0 ; LE-NEXT: bl llrint -; LE-NEXT: add lr, sp, #72 -; LE-NEXT: vmov.32 d14[0], r0 +; LE-NEXT: add lr, sp, #144 +; LE-NEXT: mov r7, r1 +; LE-NEXT: vmov.32 d12[0], r0 +; LE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; LE-NEXT: vmov.32 d8[1], r10 +; LE-NEXT: bl llrint +; LE-NEXT: add lr, sp, #8 +; LE-NEXT: vmov.32 d15[1], r6 +; LE-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload +; LE-NEXT: add lr, sp, #24 ; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload ; LE-NEXT: add lr, sp, #160 -; LE-NEXT: vmov.32 d17[1], r9 -; LE-NEXT: vmov.32 d16[1], r7 -; LE-NEXT: vst1.64 {d12, d13}, [r10:128]! -; LE-NEXT: vorr q9, q8, q8 -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: add lr, sp, #136 -; LE-NEXT: vmov.32 d15[1], r5 -; LE-NEXT: vst1.64 {d16, d17}, [r10:128] -; LE-NEXT: vmov.32 d14[1], r1 -; LE-NEXT: ldr r1, [sp, #156] @ 4-byte Reload -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: add r0, r1, #128 +; LE-NEXT: vmov.32 d20[0], r0 +; LE-NEXT: vmov.32 d21[1], r8 +; LE-NEXT: vmov.32 d20[1], r1 +; LE-NEXT: ldr r1, [sp, #140] @ 4-byte Reload +; LE-NEXT: vmov.32 d13[1], r5 +; LE-NEXT: mov r0, r1 +; LE-NEXT: vst1.64 {d8, d9}, [r0:128]! +; LE-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload ; LE-NEXT: add lr, sp, #56 -; LE-NEXT: vmov.32 d11[1], r6 -; LE-NEXT: vst1.64 {d16, d17}, [r0:128]! +; LE-NEXT: vmov.32 d14[1], r4 ; LE-NEXT: vst1.64 {d18, d19}, [r0:128]! -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: add lr, sp, #176 -; LE-NEXT: vmov.32 d10[1], r4 -; LE-NEXT: vst1.64 {d16, d17}, [r0:128]! -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: add lr, sp, #192 -; LE-NEXT: vst1.64 {d16, d17}, [r0:128] +; LE-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; LE-NEXT: add lr, sp, #96 +; LE-NEXT: vmov.32 d12[1], r7 +; LE-NEXT: vst1.64 {d18, d19}, [r0:128]! +; LE-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; LE-NEXT: vmov.32 d17[1], r9 +; LE-NEXT: vst1.64 {d18, d19}, [r0:128] ; LE-NEXT: add r0, r1, #64 -; LE-NEXT: vst1.64 {d10, d11}, [r0:128]! ; LE-NEXT: vst1.64 {d14, d15}, [r0:128]! -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: add lr, sp, #88 -; LE-NEXT: vst1.64 {d16, d17}, [r0:128]! -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: vst1.64 {d12, d13}, [r0:128]! +; LE-NEXT: vmov.32 d16[1], r11 +; LE-NEXT: vst1.64 {d20, d21}, [r0:128]! ; LE-NEXT: vst1.64 {d16, d17}, [r0:128] -; LE-NEXT: add sp, sp, #208 +; LE-NEXT: add sp, sp, #176 ; LE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; LE-NEXT: add sp, sp, #4 ; LE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; -; LE-NEON-LABEL: llrint_v32f64: -; LE-NEON: @ %bb.0: -; LE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-NEON-NEXT: .pad #4 -; LE-NEON-NEXT: sub sp, sp, #4 -; LE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEON-NEXT: .pad #208 -; LE-NEON-NEXT: sub sp, sp, #208 -; LE-NEON-NEXT: add lr, sp, #72 -; LE-NEON-NEXT: str r0, [sp, #156] @ 4-byte Spill -; LE-NEON-NEXT: add r0, sp, #456 -; LE-NEON-NEXT: vorr q4, q0, q0 -; LE-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #56 -; LE-NEON-NEXT: vorr d0, d7, d7 -; LE-NEON-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #104 -; LE-NEON-NEXT: vorr q5, q2, q2 -; LE-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; LE-NEON-NEXT: add r0, sp, #344 -; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #192 -; LE-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; LE-NEON-NEXT: add r0, sp, #376 -; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #160 -; LE-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; LE-NEON-NEXT: add r0, sp, #360 -; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #136 -; LE-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; LE-NEON-NEXT: add r0, sp, #440 -; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #176 -; LE-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vorr d0, d10, d10 -; LE-NEON-NEXT: str r1, [sp, #120] @ 4-byte Spill -; LE-NEON-NEXT: vmov.32 d13[0], r0 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vorr d0, d11, d11 -; LE-NEON-NEXT: mov r10, r1 -; LE-NEON-NEXT: vmov.32 d14[0], r0 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vorr d0, d8, d8 -; LE-NEON-NEXT: add lr, sp, #88 -; LE-NEON-NEXT: vmov.32 d15[0], r0 -; LE-NEON-NEXT: mov r11, r1 -; LE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vorr d0, d9, d9 -; LE-NEON-NEXT: mov r9, r1 -; LE-NEON-NEXT: vmov.32 d10[0], r0 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: add lr, sp, #40 -; LE-NEON-NEXT: vmov.32 d11[0], r0 -; LE-NEON-NEXT: mov r6, r1 -; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #56 -; LE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-NEON-NEXT: vorr d0, d10, d10 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vorr d0, d11, d11 -; LE-NEON-NEXT: mov r4, r1 -; LE-NEON-NEXT: vmov.32 d8[0], r0 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: add lr, sp, #72 -; LE-NEON-NEXT: mov r7, r1 -; LE-NEON-NEXT: vmov.32 d9[0], r0 -; LE-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vmov.32 d12[0], r0 -; LE-NEON-NEXT: add lr, sp, #56 -; LE-NEON-NEXT: mov r5, r1 -; LE-NEON-NEXT: vmov.32 d9[1], r7 -; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #176 -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: vorr d0, d17, d17 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vmov.32 d8[1], r4 -; LE-NEON-NEXT: add lr, sp, #72 -; LE-NEON-NEXT: mov r7, r1 -; LE-NEON-NEXT: vmov.32 d13[0], r0 -; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #104 -; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-NEON-NEXT: vorr d0, d9, d9 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: add lr, sp, #40 -; LE-NEON-NEXT: vorr d0, d8, d8 -; LE-NEON-NEXT: mov r8, r1 -; LE-NEON-NEXT: vmov.32 d15[0], r0 -; LE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-NEON-NEXT: vmov.32 d11[1], r6 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: add lr, sp, #176 -; LE-NEON-NEXT: mov r4, r1 -; LE-NEON-NEXT: vmov.32 d14[0], r0 -; LE-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; LE-NEON-NEXT: vmov.32 d10[1], r9 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: add lr, sp, #88 -; LE-NEON-NEXT: vmov.32 d12[0], r0 -; LE-NEON-NEXT: ldr r0, [sp, #120] @ 4-byte Reload -; LE-NEON-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #56 -; LE-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #24 -; LE-NEON-NEXT: vmov.32 d19[1], r0 -; LE-NEON-NEXT: add r0, sp, #408 -; LE-NEON-NEXT: ldr r2, [sp, #156] @ 4-byte Reload -; LE-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; LE-NEON-NEXT: vmov.32 d13[1], r7 -; LE-NEON-NEXT: mov r0, r2 -; LE-NEON-NEXT: vmov.32 d12[1], r1 -; LE-NEON-NEXT: add r1, sp, #488 -; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #40 -; LE-NEON-NEXT: vld1.64 {d16, d17}, [r1] -; LE-NEON-NEXT: add r1, sp, #472 -; LE-NEON-NEXT: vst1.64 {d10, d11}, [r0:128]! -; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #72 -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #56 -; LE-NEON-NEXT: vmov.32 d21[1], r11 -; LE-NEON-NEXT: vmov.32 d20[1], r10 -; LE-NEON-NEXT: add r10, r2, #192 -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! -; LE-NEON-NEXT: vld1.64 {d16, d17}, [r1] -; LE-NEON-NEXT: add r1, sp, #392 -; LE-NEON-NEXT: vmov.32 d18[1], r5 -; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #72 -; LE-NEON-NEXT: vst1.64 {d20, d21}, [r0:128]! -; LE-NEON-NEXT: vld1.64 {d16, d17}, [r1] -; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #104 -; LE-NEON-NEXT: vst1.64 {d18, d19}, [r0:128] -; LE-NEON-NEXT: add r0, sp, #312 -; LE-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; LE-NEON-NEXT: add r0, sp, #328 -; LE-NEON-NEXT: vmov.32 d15[1], r8 -; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #120 -; LE-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; LE-NEON-NEXT: add r0, sp, #424 -; LE-NEON-NEXT: vmov.32 d14[1], r4 -; LE-NEON-NEXT: vst1.64 {d12, d13}, [r10:128]! -; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #176 -; LE-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; LE-NEON-NEXT: vst1.64 {d14, d15}, [r10:128]! -; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #192 -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: vorr d0, d17, d17 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: add lr, sp, #136 -; LE-NEON-NEXT: mov r9, r1 -; LE-NEON-NEXT: vmov.32 d9[0], r0 -; LE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-NEON-NEXT: vorr d0, d10, d10 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vorr d0, d11, d11 -; LE-NEON-NEXT: mov r8, r1 -; LE-NEON-NEXT: vmov.32 d12[0], r0 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: add lr, sp, #160 -; LE-NEON-NEXT: mov r5, r1 -; LE-NEON-NEXT: vmov.32 d13[0], r0 -; LE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-NEON-NEXT: vorr d0, d10, d10 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vorr d0, d11, d11 -; LE-NEON-NEXT: mov r6, r1 -; LE-NEON-NEXT: vmov.32 d14[0], r0 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: add lr, sp, #192 -; LE-NEON-NEXT: mov r4, r1 -; LE-NEON-NEXT: vmov.32 d15[0], r0 -; LE-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vmov.32 d8[0], r0 -; LE-NEON-NEXT: add lr, sp, #192 -; LE-NEON-NEXT: mov r11, r1 -; LE-NEON-NEXT: vmov.32 d15[1], r4 -; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #176 -; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-NEON-NEXT: vorr d0, d9, d9 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vorr d0, d8, d8 -; LE-NEON-NEXT: vmov.32 d14[1], r6 -; LE-NEON-NEXT: add lr, sp, #136 -; LE-NEON-NEXT: mov r4, r1 -; LE-NEON-NEXT: vmov.32 d11[0], r0 -; LE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vmov.32 d10[0], r0 -; LE-NEON-NEXT: add lr, sp, #176 -; LE-NEON-NEXT: mov r7, r1 -; LE-NEON-NEXT: vmov.32 d13[1], r5 -; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #24 -; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-NEON-NEXT: vorr d0, d9, d9 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vorr d0, d8, d8 -; LE-NEON-NEXT: vmov.32 d12[1], r8 -; LE-NEON-NEXT: add lr, sp, #88 -; LE-NEON-NEXT: mov r5, r1 -; LE-NEON-NEXT: vmov.32 d15[0], r0 -; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: add lr, sp, #192 -; LE-NEON-NEXT: str r1, [sp, #24] @ 4-byte Spill -; LE-NEON-NEXT: vmov.32 d14[0], r0 -; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #40 -; LE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-NEON-NEXT: vorr d0, d11, d11 -; LE-NEON-NEXT: vmov.32 d9[1], r9 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vorr d0, d10, d10 -; LE-NEON-NEXT: vmov.32 d8[1], r11 -; LE-NEON-NEXT: add lr, sp, #192 -; LE-NEON-NEXT: mov r6, r1 -; LE-NEON-NEXT: vmov.32 d13[0], r0 -; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vmov.32 d12[0], r0 -; LE-NEON-NEXT: add lr, sp, #160 -; LE-NEON-NEXT: str r1, [sp, #40] @ 4-byte Spill -; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #176 -; LE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #56 -; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-NEON-NEXT: vorr d0, d9, d9 -; LE-NEON-NEXT: vmov.32 d11[1], r4 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vorr d0, d8, d8 -; LE-NEON-NEXT: vmov.32 d10[1], r7 -; LE-NEON-NEXT: add lr, sp, #176 -; LE-NEON-NEXT: mov r8, r1 -; LE-NEON-NEXT: vmov.32 d13[0], r0 -; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vmov.32 d12[0], r0 -; LE-NEON-NEXT: add lr, sp, #8 -; LE-NEON-NEXT: mov r11, r1 -; LE-NEON-NEXT: vmov.32 d15[1], r5 -; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #72 -; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-NEON-NEXT: vorr d0, d9, d9 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vmov.32 d11[0], r0 -; LE-NEON-NEXT: ldr r0, [sp, #24] @ 4-byte Reload -; LE-NEON-NEXT: vorr d0, d8, d8 -; LE-NEON-NEXT: add lr, sp, #56 -; LE-NEON-NEXT: mov r9, r1 -; LE-NEON-NEXT: vmov.32 d14[1], r0 -; LE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vmov.32 d10[0], r0 -; LE-NEON-NEXT: add lr, sp, #72 -; LE-NEON-NEXT: mov r7, r1 -; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #160 -; LE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #104 -; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-NEON-NEXT: vorr d0, d9, d9 -; LE-NEON-NEXT: vmov.32 d13[1], r6 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vmov.32 d11[0], r0 -; LE-NEON-NEXT: ldr r0, [sp, #40] @ 4-byte Reload -; LE-NEON-NEXT: vorr d0, d8, d8 -; LE-NEON-NEXT: add lr, sp, #160 -; LE-NEON-NEXT: mov r6, r1 -; LE-NEON-NEXT: vmov.32 d12[1], r0 -; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: add lr, sp, #8 -; LE-NEON-NEXT: mov r4, r1 -; LE-NEON-NEXT: vmov.32 d10[0], r0 -; LE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #120 -; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-NEON-NEXT: vorr d0, d9, d9 -; LE-NEON-NEXT: vmov.32 d13[1], r8 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: vorr d0, d8, d8 -; LE-NEON-NEXT: mov r5, r1 -; LE-NEON-NEXT: vmov.32 d15[0], r0 -; LE-NEON-NEXT: vmov.32 d12[1], r11 -; LE-NEON-NEXT: bl llrint -; LE-NEON-NEXT: add lr, sp, #72 -; LE-NEON-NEXT: vmov.32 d14[0], r0 -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #160 -; LE-NEON-NEXT: vmov.32 d17[1], r9 -; LE-NEON-NEXT: vmov.32 d16[1], r7 -; LE-NEON-NEXT: vst1.64 {d12, d13}, [r10:128]! -; LE-NEON-NEXT: vorr q9, q8, q8 -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #136 -; LE-NEON-NEXT: vmov.32 d15[1], r5 -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r10:128] -; LE-NEON-NEXT: vmov.32 d14[1], r1 -; LE-NEON-NEXT: ldr r1, [sp, #156] @ 4-byte Reload -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: add r0, r1, #128 -; LE-NEON-NEXT: add lr, sp, #56 -; LE-NEON-NEXT: vmov.32 d11[1], r6 -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! -; LE-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]! -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #176 -; LE-NEON-NEXT: vmov.32 d10[1], r4 -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #192 -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] -; LE-NEON-NEXT: add r0, r1, #64 -; LE-NEON-NEXT: vst1.64 {d10, d11}, [r0:128]! -; LE-NEON-NEXT: vst1.64 {d14, d15}, [r0:128]! -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #88 -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] -; LE-NEON-NEXT: add sp, sp, #208 -; LE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEON-NEXT: add sp, sp, #4 -; LE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; -; BE-LABEL: llrint_v32f64: +; BE-LABEL: llrint_v16f64: ; BE: @ %bb.0: ; BE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; BE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} @@ -7615,675 +1859,183 @@ define <32 x i64> @llrint_v32f64(<32 x double> %x) { ; BE-NEXT: sub sp, sp, #4 ; BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} ; BE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEXT: .pad #232 -; BE-NEXT: sub sp, sp, #232 -; BE-NEXT: add lr, sp, #184 -; BE-NEXT: str r0, [sp, #148] @ 4-byte Spill -; BE-NEXT: add r0, sp, #416 -; BE-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill -; BE-NEXT: add lr, sp, #168 -; BE-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill -; BE-NEXT: add lr, sp, #152 -; BE-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill -; BE-NEXT: add lr, sp, #128 -; BE-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; BE-NEXT: add lr, sp, #200 -; BE-NEXT: vld1.64 {d18, d19}, [r0] -; BE-NEXT: add r0, sp, #448 -; BE-NEXT: vorr d0, d19, d19 -; BE-NEXT: vld1.64 {d14, d15}, [r0] -; BE-NEXT: add r0, sp, #336 -; BE-NEXT: vstmia lr, {d18, d19} @ 16-byte Spill +; BE-NEXT: .pad #168 +; BE-NEXT: sub sp, sp, #168 ; BE-NEXT: add lr, sp, #64 -; BE-NEXT: vld1.64 {d16, d17}, [r0] -; BE-NEXT: add r0, sp, #400 -; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEXT: add lr, sp, #8 -; BE-NEXT: vld1.64 {d16, d17}, [r0] -; BE-NEXT: add r0, sp, #352 -; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEXT: add lr, sp, #24 -; BE-NEXT: vld1.64 {d16, d17}, [r0] -; BE-NEXT: add r0, sp, #368 -; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEXT: str r0, [sp, #132] @ 4-byte Spill +; BE-NEXT: add r0, sp, #304 +; BE-NEXT: vorr q4, q3, q3 +; BE-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill ; BE-NEXT: add lr, sp, #48 +; BE-NEXT: vorr d0, d1, d1 ; BE-NEXT: vld1.64 {d16, d17}, [r0] -; BE-NEXT: add r0, sp, #384 +; BE-NEXT: add r0, sp, #320 +; BE-NEXT: vorr q6, q2, q2 ; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEXT: add lr, sp, #96 +; BE-NEXT: add lr, sp, #88 +; BE-NEXT: vorr q7, q1, q1 ; BE-NEXT: vld1.64 {d16, d17}, [r0] -; BE-NEXT: add r0, sp, #512 +; BE-NEXT: add r0, sp, #272 ; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill ; BE-NEXT: add lr, sp, #112 ; BE-NEXT: vld1.64 {d16, d17}, [r0] -; BE-NEXT: add r0, sp, #432 -; BE-NEXT: vld1.64 {d8, d9}, [r0] +; BE-NEXT: add r0, sp, #288 +; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-NEXT: add lr, sp, #24 +; BE-NEXT: vld1.64 {d16, d17}, [r0] ; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill ; BE-NEXT: bl llrint -; BE-NEXT: vorr d0, d8, d8 -; BE-NEXT: str r1, [sp, #80] @ 4-byte Spill -; BE-NEXT: vmov.32 d13[0], r0 +; BE-NEXT: vorr d0, d14, d14 +; BE-NEXT: add lr, sp, #136 +; BE-NEXT: vmov.32 d17[0], r0 +; BE-NEXT: str r1, [sp, #108] @ 4-byte Spill +; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill ; BE-NEXT: bl llrint -; BE-NEXT: vorr d0, d9, d9 -; BE-NEXT: str r1, [sp, #44] @ 4-byte Spill +; BE-NEXT: vorr d0, d15, d15 +; BE-NEXT: str r1, [sp, #84] @ 4-byte Spill ; BE-NEXT: vmov.32 d10[0], r0 ; BE-NEXT: bl llrint -; BE-NEXT: vorr d0, d14, d14 -; BE-NEXT: add lr, sp, #216 +; BE-NEXT: vorr d0, d12, d12 +; BE-NEXT: add lr, sp, #152 ; BE-NEXT: vmov.32 d11[0], r0 -; BE-NEXT: mov r9, r1 +; BE-NEXT: str r1, [sp, #44] @ 4-byte Spill ; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill ; BE-NEXT: bl llrint -; BE-NEXT: vorr d0, d15, d15 -; BE-NEXT: mov r8, r1 -; BE-NEXT: vmov.32 d8[0], r0 +; BE-NEXT: vorr d0, d13, d13 +; BE-NEXT: mov r6, r1 +; BE-NEXT: vmov.32 d10[0], r0 ; BE-NEXT: bl llrint -; BE-NEXT: add lr, sp, #64 +; BE-NEXT: vorr d0, d8, d8 ; BE-NEXT: mov r4, r1 -; BE-NEXT: vmov.32 d9[0], r0 -; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-NEXT: vorr d0, d10, d10 -; BE-NEXT: bl llrint -; BE-NEXT: vorr d0, d11, d11 -; BE-NEXT: mov r6, r1 -; BE-NEXT: vmov.32 d14[0], r0 +; BE-NEXT: vmov.32 d11[0], r0 ; BE-NEXT: bl llrint -; BE-NEXT: add lr, sp, #200 +; BE-NEXT: vorr d0, d9, d9 ; BE-NEXT: mov r7, r1 -; BE-NEXT: vmov.32 d15[0], r0 -; BE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; BE-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; BE-NEXT: bl llrint ; BE-NEXT: vmov.32 d12[0], r0 -; BE-NEXT: add lr, sp, #200 -; BE-NEXT: mov r5, r1 -; BE-NEXT: vmov.32 d15[1], r7 -; BE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; BE-NEXT: add lr, sp, #8 -; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-NEXT: vorr d0, d11, d11 ; BE-NEXT: bl llrint -; BE-NEXT: vorr d0, d10, d10 -; BE-NEXT: vmov.32 d14[1], r6 ; BE-NEXT: add lr, sp, #64 -; BE-NEXT: mov r10, r1 +; BE-NEXT: mov r5, r1 ; BE-NEXT: vmov.32 d13[0], r0 -; BE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; BE-NEXT: bl llrint -; BE-NEXT: add lr, sp, #24 -; BE-NEXT: mov r11, r1 -; BE-NEXT: vmov.32 d12[0], r0 -; BE-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload -; BE-NEXT: vorr d0, d15, d15 -; BE-NEXT: vmov.32 d9[1], r4 -; BE-NEXT: bl llrint -; BE-NEXT: vorr d0, d14, d14 -; BE-NEXT: vmov.32 d8[1], r8 -; BE-NEXT: add lr, sp, #8 -; BE-NEXT: str r1, [sp, #4] @ 4-byte Spill -; BE-NEXT: vmov.32 d11[0], r0 -; BE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; BE-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; BE-NEXT: @ kill: def $d0 killed $d0 killed $q0 ; BE-NEXT: bl llrint -; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: add lr, sp, #136 +; BE-NEXT: mov r9, r1 +; BE-NEXT: vmov.32 d13[1], r5 +; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: vmov.32 d16[0], r0 +; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill ; BE-NEXT: add lr, sp, #24 -; BE-NEXT: mov r8, r1 -; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-NEXT: add lr, sp, #216 -; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-NEXT: add lr, sp, #48 ; BE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload ; BE-NEXT: vorr d0, d9, d9 -; BE-NEXT: vmov.32 d11[1], r9 ; BE-NEXT: bl llrint -; BE-NEXT: vmov.32 d15[0], r0 -; BE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload ; BE-NEXT: vorr d0, d8, d8 -; BE-NEXT: add lr, sp, #216 -; BE-NEXT: mov r9, r1 -; BE-NEXT: vmov.32 d10[1], r0 -; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-NEXT: vmov.32 d12[1], r7 +; BE-NEXT: add lr, sp, #64 +; BE-NEXT: mov r10, r1 +; BE-NEXT: vmov.32 d15[0], r0 +; BE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill ; BE-NEXT: bl llrint ; BE-NEXT: vmov.32 d14[0], r0 -; BE-NEXT: add lr, sp, #48 -; BE-NEXT: ldr r0, [sp, #80] @ 4-byte Reload -; BE-NEXT: mov r6, r1 +; BE-NEXT: add lr, sp, #8 +; BE-NEXT: mov r11, r1 +; BE-NEXT: vmov.32 d11[1], r4 ; BE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; BE-NEXT: add lr, sp, #200 -; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-NEXT: add lr, sp, #96 +; BE-NEXT: add lr, sp, #48 +; BE-NEXT: vorr q6, q5, q5 ; BE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload ; BE-NEXT: vorr d0, d9, d9 -; BE-NEXT: vmov.32 d11[1], r0 ; BE-NEXT: bl llrint ; BE-NEXT: vorr d0, d8, d8 -; BE-NEXT: vmov.32 d10[1], r5 -; BE-NEXT: add lr, sp, #200 -; BE-NEXT: mov r7, r1 -; BE-NEXT: vmov.32 d15[0], r0 -; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-NEXT: bl llrint -; BE-NEXT: add lr, sp, #112 -; BE-NEXT: vorr q4, q6, q6 -; BE-NEXT: mov r4, r1 -; BE-NEXT: vmov.32 d14[0], r0 -; BE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; BE-NEXT: vorr d0, d13, d13 -; BE-NEXT: vmov.32 d9[1], r10 -; BE-NEXT: bl llrint -; BE-NEXT: vorr d0, d12, d12 -; BE-NEXT: mov r5, r1 +; BE-NEXT: vmov.32 d12[1], r6 +; BE-NEXT: add lr, sp, #24 +; BE-NEXT: mov r8, r1 ; BE-NEXT: vmov.32 d11[0], r0 -; BE-NEXT: vmov.32 d8[1], r11 +; BE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill ; BE-NEXT: bl llrint ; BE-NEXT: vmov.32 d10[0], r0 -; BE-NEXT: add lr, sp, #24 -; BE-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEXT: add lr, sp, #48 -; BE-NEXT: vmov.32 d17[1], r0 -; BE-NEXT: vmov.32 d16[1], r8 -; BE-NEXT: vorr q9, q8, q8 -; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEXT: add lr, sp, #112 -; BE-NEXT: vmov.32 d17[1], r9 -; BE-NEXT: vmov.32 d16[1], r6 -; BE-NEXT: vorr q10, q8, q8 -; BE-NEXT: vrev64.32 q8, q4 -; BE-NEXT: vmov.32 d15[1], r7 -; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEXT: add lr, sp, #200 -; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEXT: vmov.32 d11[1], r5 -; BE-NEXT: vrev64.32 q8, q8 -; BE-NEXT: vmov.32 d14[1], r4 -; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEXT: add lr, sp, #216 -; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEXT: vmov.32 d10[1], r1 -; BE-NEXT: vrev64.32 q8, q8 -; BE-NEXT: vrev64.32 q6, q7 -; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEXT: add lr, sp, #8 -; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEXT: add lr, sp, #96 -; BE-NEXT: vrev64.32 q7, q5 -; BE-NEXT: vrev64.32 q8, q8 -; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEXT: add lr, sp, #64 -; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEXT: add lr, sp, #80 -; BE-NEXT: vrev64.32 q8, q8 -; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEXT: add lr, sp, #64 -; BE-NEXT: vrev64.32 q8, q9 -; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill ; BE-NEXT: add lr, sp, #48 -; BE-NEXT: vrev64.32 q8, q10 -; BE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEXT: add lr, sp, #128 -; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-NEXT: vorr d0, d11, d11 -; BE-NEXT: bl llrint -; BE-NEXT: vorr d0, d10, d10 -; BE-NEXT: mov r4, r1 -; BE-NEXT: vmov.32 d9[0], r0 -; BE-NEXT: bl llrint -; BE-NEXT: vmov.32 d8[0], r0 -; BE-NEXT: ldr r6, [sp, #148] @ 4-byte Reload +; BE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload +; BE-NEXT: mov r6, r1 +; BE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill ; BE-NEXT: add lr, sp, #152 -; BE-NEXT: vmov.32 d9[1], r4 -; BE-NEXT: mov r5, r6 -; BE-NEXT: vmov.32 d8[1], r1 -; BE-NEXT: vrev64.32 q8, q4 -; BE-NEXT: vst1.64 {d16, d17}, [r5:128]! -; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-NEXT: vorr d0, d11, d11 -; BE-NEXT: bl llrint -; BE-NEXT: vorr d0, d10, d10 -; BE-NEXT: mov r4, r1 -; BE-NEXT: vmov.32 d9[0], r0 -; BE-NEXT: bl llrint -; BE-NEXT: vmov.32 d8[0], r0 -; BE-NEXT: add lr, sp, #168 -; BE-NEXT: vmov.32 d9[1], r4 -; BE-NEXT: vmov.32 d8[1], r1 -; BE-NEXT: vrev64.32 q8, q4 -; BE-NEXT: vst1.64 {d16, d17}, [r5:128]! -; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-NEXT: vorr d0, d11, d11 +; BE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-NEXT: add lr, sp, #88 +; BE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; BE-NEXT: vorr d0, d13, d13 +; BE-NEXT: vmov.32 d9[1], r0 ; BE-NEXT: bl llrint -; BE-NEXT: vorr d0, d10, d10 +; BE-NEXT: vmov.32 d15[0], r0 +; BE-NEXT: ldr r0, [sp, #84] @ 4-byte Reload +; BE-NEXT: vorr d0, d12, d12 +; BE-NEXT: add lr, sp, #152 ; BE-NEXT: mov r4, r1 -; BE-NEXT: vmov.32 d9[0], r0 +; BE-NEXT: vmov.32 d8[1], r0 +; BE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill ; BE-NEXT: bl llrint -; BE-NEXT: vmov.32 d8[0], r0 -; BE-NEXT: add lr, sp, #184 -; BE-NEXT: vmov.32 d9[1], r4 -; BE-NEXT: vmov.32 d8[1], r1 -; BE-NEXT: vrev64.32 q8, q4 -; BE-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-NEXT: add lr, sp, #136 +; BE-NEXT: vmov.32 d14[0], r0 +; BE-NEXT: ldr r0, [sp, #108] @ 4-byte Reload +; BE-NEXT: mov r5, r1 ; BE-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-NEXT: vorr d0, d11, d11 -; BE-NEXT: bl llrint -; BE-NEXT: vorr d0, d10, d10 -; BE-NEXT: mov r4, r1 -; BE-NEXT: vmov.32 d9[0], r0 -; BE-NEXT: bl llrint -; BE-NEXT: vmov.32 d8[0], r0 -; BE-NEXT: add r0, sp, #464 -; BE-NEXT: vmov.32 d9[1], r4 -; BE-NEXT: vmov.32 d8[1], r1 -; BE-NEXT: vrev64.32 q8, q4 -; BE-NEXT: vld1.64 {d8, d9}, [r0] -; BE-NEXT: vorr d0, d9, d9 -; BE-NEXT: vst1.64 {d16, d17}, [r5:128] -; BE-NEXT: bl llrint -; BE-NEXT: vorr d0, d8, d8 -; BE-NEXT: mov r4, r1 -; BE-NEXT: vmov.32 d11[0], r0 -; BE-NEXT: bl llrint -; BE-NEXT: vmov.32 d10[0], r0 -; BE-NEXT: add r0, sp, #480 -; BE-NEXT: add r5, r6, #192 -; BE-NEXT: vmov.32 d11[1], r4 -; BE-NEXT: vmov.32 d10[1], r1 -; BE-NEXT: vld1.64 {d8, d9}, [r0] -; BE-NEXT: vorr d0, d9, d9 -; BE-NEXT: vrev64.32 q8, q5 -; BE-NEXT: vst1.64 {d16, d17}, [r5:128]! -; BE-NEXT: bl llrint -; BE-NEXT: vorr d0, d8, d8 -; BE-NEXT: mov r4, r1 -; BE-NEXT: vmov.32 d11[0], r0 -; BE-NEXT: bl llrint -; BE-NEXT: vmov.32 d10[0], r0 -; BE-NEXT: add r0, sp, #496 -; BE-NEXT: vmov.32 d11[1], r4 -; BE-NEXT: vmov.32 d10[1], r1 -; BE-NEXT: vld1.64 {d8, d9}, [r0] +; BE-NEXT: add lr, sp, #112 +; BE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload ; BE-NEXT: vorr d0, d9, d9 -; BE-NEXT: vrev64.32 q8, q5 -; BE-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-NEXT: vmov.32 d11[1], r0 ; BE-NEXT: bl llrint ; BE-NEXT: vorr d0, d8, d8 -; BE-NEXT: mov r4, r1 -; BE-NEXT: vmov.32 d11[0], r0 +; BE-NEXT: mov r7, r1 +; BE-NEXT: vmov.32 d13[0], r0 +; BE-NEXT: vmov.32 d10[1], r9 ; BE-NEXT: bl llrint -; BE-NEXT: vmov.32 d10[0], r0 -; BE-NEXT: add lr, sp, #112 -; BE-NEXT: add r0, r6, #128 -; BE-NEXT: vmov.32 d11[1], r4 -; BE-NEXT: vmov.32 d10[1], r1 -; BE-NEXT: vrev64.32 q8, q5 -; BE-NEXT: vst1.64 {d16, d17}, [r5:128]! -; BE-NEXT: vst1.64 {d14, d15}, [r5:128] -; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEXT: add lr, sp, #200 -; BE-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEXT: add lr, sp, #216 -; BE-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEXT: add lr, sp, #96 -; BE-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEXT: add lr, sp, #80 -; BE-NEXT: vst1.64 {d16, d17}, [r0:128] -; BE-NEXT: add r0, r6, #64 -; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEXT: add lr, sp, #64 -; BE-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-NEXT: add lr, sp, #8 +; BE-NEXT: vmov.32 d12[0], r0 ; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload ; BE-NEXT: add lr, sp, #48 -; BE-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-NEXT: vmov.32 d17[1], r10 +; BE-NEXT: vmov.32 d16[1], r11 +; BE-NEXT: vorr q12, q8, q8 ; BE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-NEXT: add lr, sp, #152 +; BE-NEXT: vmov.32 d17[1], r8 +; BE-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; BE-NEXT: add lr, sp, #24 +; BE-NEXT: vmov.32 d13[1], r7 +; BE-NEXT: vmov.32 d16[1], r6 +; BE-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload +; BE-NEXT: add lr, sp, #64 +; BE-NEXT: vorr q13, q8, q8 +; BE-NEXT: vmov.32 d12[1], r1 +; BE-NEXT: ldr r1, [sp, #132] @ 4-byte Reload +; BE-NEXT: vrev64.32 q8, q5 +; BE-NEXT: mov r0, r1 +; BE-NEXT: vldmia lr, {d22, d23} @ 16-byte Reload +; BE-NEXT: vrev64.32 q9, q9 +; BE-NEXT: vrev64.32 q10, q10 ; BE-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-NEXT: vst1.64 {d12, d13}, [r0:128] -; BE-NEXT: add sp, sp, #232 +; BE-NEXT: vst1.64 {d18, d19}, [r0:128]! +; BE-NEXT: vrev64.32 q11, q11 +; BE-NEXT: vmov.32 d15[1], r4 +; BE-NEXT: vst1.64 {d20, d21}, [r0:128]! +; BE-NEXT: vrev64.32 q15, q6 +; BE-NEXT: vmov.32 d14[1], r5 +; BE-NEXT: vrev64.32 q12, q12 +; BE-NEXT: vst1.64 {d22, d23}, [r0:128] +; BE-NEXT: add r0, r1, #64 +; BE-NEXT: vrev64.32 q13, q13 +; BE-NEXT: vst1.64 {d30, d31}, [r0:128]! +; BE-NEXT: vst1.64 {d24, d25}, [r0:128]! +; BE-NEXT: vrev64.32 q14, q7 +; BE-NEXT: vst1.64 {d26, d27}, [r0:128]! +; BE-NEXT: vst1.64 {d28, d29}, [r0:128] +; BE-NEXT: add sp, sp, #168 ; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; BE-NEXT: add sp, sp, #4 ; BE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; -; BE-NEON-LABEL: llrint_v32f64: -; BE-NEON: @ %bb.0: -; BE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-NEON-NEXT: .pad #4 -; BE-NEON-NEXT: sub sp, sp, #4 -; BE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEON-NEXT: .pad #232 -; BE-NEON-NEXT: sub sp, sp, #232 -; BE-NEON-NEXT: add lr, sp, #184 -; BE-NEON-NEXT: str r0, [sp, #148] @ 4-byte Spill -; BE-NEON-NEXT: add r0, sp, #416 -; BE-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #168 -; BE-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #152 -; BE-NEON-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #128 -; BE-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #200 -; BE-NEON-NEXT: vld1.64 {d18, d19}, [r0] -; BE-NEON-NEXT: add r0, sp, #448 -; BE-NEON-NEXT: vorr d0, d19, d19 -; BE-NEON-NEXT: vld1.64 {d14, d15}, [r0] -; BE-NEON-NEXT: add r0, sp, #336 -; BE-NEON-NEXT: vstmia lr, {d18, d19} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #64 -; BE-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; BE-NEON-NEXT: add r0, sp, #400 -; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #8 -; BE-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; BE-NEON-NEXT: add r0, sp, #352 -; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #24 -; BE-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; BE-NEON-NEXT: add r0, sp, #368 -; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #48 -; BE-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; BE-NEON-NEXT: add r0, sp, #384 -; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #96 -; BE-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; BE-NEON-NEXT: add r0, sp, #512 -; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #112 -; BE-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; BE-NEON-NEXT: add r0, sp, #432 -; BE-NEON-NEXT: vld1.64 {d8, d9}, [r0] -; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vorr d0, d8, d8 -; BE-NEON-NEXT: str r1, [sp, #80] @ 4-byte Spill -; BE-NEON-NEXT: vmov.32 d13[0], r0 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vorr d0, d9, d9 -; BE-NEON-NEXT: str r1, [sp, #44] @ 4-byte Spill -; BE-NEON-NEXT: vmov.32 d10[0], r0 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vorr d0, d14, d14 -; BE-NEON-NEXT: add lr, sp, #216 -; BE-NEON-NEXT: vmov.32 d11[0], r0 -; BE-NEON-NEXT: mov r9, r1 -; BE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vorr d0, d15, d15 -; BE-NEON-NEXT: mov r8, r1 -; BE-NEON-NEXT: vmov.32 d8[0], r0 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: add lr, sp, #64 -; BE-NEON-NEXT: mov r4, r1 -; BE-NEON-NEXT: vmov.32 d9[0], r0 -; BE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-NEON-NEXT: vorr d0, d10, d10 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vorr d0, d11, d11 -; BE-NEON-NEXT: mov r6, r1 -; BE-NEON-NEXT: vmov.32 d14[0], r0 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: add lr, sp, #200 -; BE-NEON-NEXT: mov r7, r1 -; BE-NEON-NEXT: vmov.32 d15[0], r0 -; BE-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; BE-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vmov.32 d12[0], r0 -; BE-NEON-NEXT: add lr, sp, #200 -; BE-NEON-NEXT: mov r5, r1 -; BE-NEON-NEXT: vmov.32 d15[1], r7 -; BE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #8 -; BE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-NEON-NEXT: vorr d0, d11, d11 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vorr d0, d10, d10 -; BE-NEON-NEXT: vmov.32 d14[1], r6 -; BE-NEON-NEXT: add lr, sp, #64 -; BE-NEON-NEXT: mov r10, r1 -; BE-NEON-NEXT: vmov.32 d13[0], r0 -; BE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: add lr, sp, #24 -; BE-NEON-NEXT: mov r11, r1 -; BE-NEON-NEXT: vmov.32 d12[0], r0 -; BE-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload -; BE-NEON-NEXT: vorr d0, d15, d15 -; BE-NEON-NEXT: vmov.32 d9[1], r4 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vorr d0, d14, d14 -; BE-NEON-NEXT: vmov.32 d8[1], r8 -; BE-NEON-NEXT: add lr, sp, #8 -; BE-NEON-NEXT: str r1, [sp, #4] @ 4-byte Spill -; BE-NEON-NEXT: vmov.32 d11[0], r0 -; BE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vmov.32 d10[0], r0 -; BE-NEON-NEXT: add lr, sp, #24 -; BE-NEON-NEXT: mov r8, r1 -; BE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #216 -; BE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-NEON-NEXT: add lr, sp, #48 -; BE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; BE-NEON-NEXT: vorr d0, d9, d9 -; BE-NEON-NEXT: vmov.32 d11[1], r9 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vmov.32 d15[0], r0 -; BE-NEON-NEXT: ldr r0, [sp, #44] @ 4-byte Reload -; BE-NEON-NEXT: vorr d0, d8, d8 -; BE-NEON-NEXT: add lr, sp, #216 -; BE-NEON-NEXT: mov r9, r1 -; BE-NEON-NEXT: vmov.32 d10[1], r0 -; BE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vmov.32 d14[0], r0 -; BE-NEON-NEXT: add lr, sp, #48 -; BE-NEON-NEXT: ldr r0, [sp, #80] @ 4-byte Reload -; BE-NEON-NEXT: mov r6, r1 -; BE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #200 -; BE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-NEON-NEXT: add lr, sp, #96 -; BE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; BE-NEON-NEXT: vorr d0, d9, d9 -; BE-NEON-NEXT: vmov.32 d11[1], r0 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vorr d0, d8, d8 -; BE-NEON-NEXT: vmov.32 d10[1], r5 -; BE-NEON-NEXT: add lr, sp, #200 -; BE-NEON-NEXT: mov r7, r1 -; BE-NEON-NEXT: vmov.32 d15[0], r0 -; BE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: add lr, sp, #112 -; BE-NEON-NEXT: vorr q4, q6, q6 -; BE-NEON-NEXT: mov r4, r1 -; BE-NEON-NEXT: vmov.32 d14[0], r0 -; BE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; BE-NEON-NEXT: vorr d0, d13, d13 -; BE-NEON-NEXT: vmov.32 d9[1], r10 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vorr d0, d12, d12 -; BE-NEON-NEXT: mov r5, r1 -; BE-NEON-NEXT: vmov.32 d11[0], r0 -; BE-NEON-NEXT: vmov.32 d8[1], r11 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vmov.32 d10[0], r0 -; BE-NEON-NEXT: add lr, sp, #24 -; BE-NEON-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEON-NEXT: add lr, sp, #48 -; BE-NEON-NEXT: vmov.32 d17[1], r0 -; BE-NEON-NEXT: vmov.32 d16[1], r8 -; BE-NEON-NEXT: vorr q9, q8, q8 -; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEON-NEXT: add lr, sp, #112 -; BE-NEON-NEXT: vmov.32 d17[1], r9 -; BE-NEON-NEXT: vmov.32 d16[1], r6 -; BE-NEON-NEXT: vorr q10, q8, q8 -; BE-NEON-NEXT: vrev64.32 q8, q4 -; BE-NEON-NEXT: vmov.32 d15[1], r7 -; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #200 -; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEON-NEXT: vmov.32 d11[1], r5 -; BE-NEON-NEXT: vrev64.32 q8, q8 -; BE-NEON-NEXT: vmov.32 d14[1], r4 -; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #216 -; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEON-NEXT: vmov.32 d10[1], r1 -; BE-NEON-NEXT: vrev64.32 q8, q8 -; BE-NEON-NEXT: vrev64.32 q6, q7 -; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #8 -; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEON-NEXT: add lr, sp, #96 -; BE-NEON-NEXT: vrev64.32 q7, q5 -; BE-NEON-NEXT: vrev64.32 q8, q8 -; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #64 -; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEON-NEXT: add lr, sp, #80 -; BE-NEON-NEXT: vrev64.32 q8, q8 -; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #64 -; BE-NEON-NEXT: vrev64.32 q8, q9 -; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #48 -; BE-NEON-NEXT: vrev64.32 q8, q10 -; BE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-NEON-NEXT: add lr, sp, #128 -; BE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-NEON-NEXT: vorr d0, d11, d11 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vorr d0, d10, d10 -; BE-NEON-NEXT: mov r4, r1 -; BE-NEON-NEXT: vmov.32 d9[0], r0 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vmov.32 d8[0], r0 -; BE-NEON-NEXT: ldr r6, [sp, #148] @ 4-byte Reload -; BE-NEON-NEXT: add lr, sp, #152 -; BE-NEON-NEXT: vmov.32 d9[1], r4 -; BE-NEON-NEXT: mov r5, r6 -; BE-NEON-NEXT: vmov.32 d8[1], r1 -; BE-NEON-NEXT: vrev64.32 q8, q4 -; BE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! -; BE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-NEON-NEXT: vorr d0, d11, d11 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vorr d0, d10, d10 -; BE-NEON-NEXT: mov r4, r1 -; BE-NEON-NEXT: vmov.32 d9[0], r0 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vmov.32 d8[0], r0 -; BE-NEON-NEXT: add lr, sp, #168 -; BE-NEON-NEXT: vmov.32 d9[1], r4 -; BE-NEON-NEXT: vmov.32 d8[1], r1 -; BE-NEON-NEXT: vrev64.32 q8, q4 -; BE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! -; BE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-NEON-NEXT: vorr d0, d11, d11 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vorr d0, d10, d10 -; BE-NEON-NEXT: mov r4, r1 -; BE-NEON-NEXT: vmov.32 d9[0], r0 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vmov.32 d8[0], r0 -; BE-NEON-NEXT: add lr, sp, #184 -; BE-NEON-NEXT: vmov.32 d9[1], r4 -; BE-NEON-NEXT: vmov.32 d8[1], r1 -; BE-NEON-NEXT: vrev64.32 q8, q4 -; BE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! -; BE-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-NEON-NEXT: vorr d0, d11, d11 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vorr d0, d10, d10 -; BE-NEON-NEXT: mov r4, r1 -; BE-NEON-NEXT: vmov.32 d9[0], r0 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vmov.32 d8[0], r0 -; BE-NEON-NEXT: add r0, sp, #464 -; BE-NEON-NEXT: vmov.32 d9[1], r4 -; BE-NEON-NEXT: vmov.32 d8[1], r1 -; BE-NEON-NEXT: vrev64.32 q8, q4 -; BE-NEON-NEXT: vld1.64 {d8, d9}, [r0] -; BE-NEON-NEXT: vorr d0, d9, d9 -; BE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128] -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vorr d0, d8, d8 -; BE-NEON-NEXT: mov r4, r1 -; BE-NEON-NEXT: vmov.32 d11[0], r0 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vmov.32 d10[0], r0 -; BE-NEON-NEXT: add r0, sp, #480 -; BE-NEON-NEXT: add r5, r6, #192 -; BE-NEON-NEXT: vmov.32 d11[1], r4 -; BE-NEON-NEXT: vmov.32 d10[1], r1 -; BE-NEON-NEXT: vld1.64 {d8, d9}, [r0] -; BE-NEON-NEXT: vorr d0, d9, d9 -; BE-NEON-NEXT: vrev64.32 q8, q5 -; BE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vorr d0, d8, d8 -; BE-NEON-NEXT: mov r4, r1 -; BE-NEON-NEXT: vmov.32 d11[0], r0 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vmov.32 d10[0], r0 -; BE-NEON-NEXT: add r0, sp, #496 -; BE-NEON-NEXT: vmov.32 d11[1], r4 -; BE-NEON-NEXT: vmov.32 d10[1], r1 -; BE-NEON-NEXT: vld1.64 {d8, d9}, [r0] -; BE-NEON-NEXT: vorr d0, d9, d9 -; BE-NEON-NEXT: vrev64.32 q8, q5 -; BE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vorr d0, d8, d8 -; BE-NEON-NEXT: mov r4, r1 -; BE-NEON-NEXT: vmov.32 d11[0], r0 -; BE-NEON-NEXT: bl llrint -; BE-NEON-NEXT: vmov.32 d10[0], r0 -; BE-NEON-NEXT: add lr, sp, #112 -; BE-NEON-NEXT: add r0, r6, #128 -; BE-NEON-NEXT: vmov.32 d11[1], r4 -; BE-NEON-NEXT: vmov.32 d10[1], r1 -; BE-NEON-NEXT: vrev64.32 q8, q5 -; BE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! -; BE-NEON-NEXT: vst1.64 {d14, d15}, [r5:128] -; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEON-NEXT: add lr, sp, #200 -; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEON-NEXT: add lr, sp, #216 -; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEON-NEXT: add lr, sp, #96 -; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEON-NEXT: add lr, sp, #80 -; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] -; BE-NEON-NEXT: add r0, r6, #64 -; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEON-NEXT: add lr, sp, #64 -; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEON-NEXT: add lr, sp, #48 -; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-NEON-NEXT: vst1.64 {d12, d13}, [r0:128] -; BE-NEON-NEXT: add sp, sp, #232 -; BE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEON-NEXT: add sp, sp, #4 -; BE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} - %a = call <32 x i64> @llvm.llrint.v32i64.v16f64(<32 x double> %x) - ret <32 x i64> %a + %a = call <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double> %x) + ret <16 x i64> %a } -declare <32 x i64> @llvm.llrint.v32i64.v32f64(<32 x double>) +declare <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double>) define <1 x i64> @llrint_v1i64_v1f128(<1 x fp128> %x) { ; LE-LABEL: llrint_v1i64_v1f128: @@ -8295,15 +2047,6 @@ define <1 x i64> @llrint_v1i64_v1f128(<1 x fp128> %x) { ; LE-NEXT: vmov.32 d0[1], r1 ; LE-NEXT: pop {r11, pc} ; -; LE-NEON-LABEL: llrint_v1i64_v1f128: -; LE-NEON: @ %bb.0: -; LE-NEON-NEXT: .save {r11, lr} -; LE-NEON-NEXT: push {r11, lr} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: vmov.32 d0[0], r0 -; LE-NEON-NEXT: vmov.32 d0[1], r1 -; LE-NEON-NEXT: pop {r11, pc} -; ; BE-LABEL: llrint_v1i64_v1f128: ; BE: @ %bb.0: ; BE-NEXT: .save {r11, lr} @@ -8313,16 +2056,6 @@ define <1 x i64> @llrint_v1i64_v1f128(<1 x fp128> %x) { ; BE-NEXT: vmov.32 d16[1], r1 ; BE-NEXT: vrev64.32 d0, d16 ; BE-NEXT: pop {r11, pc} -; -; BE-NEON-LABEL: llrint_v1i64_v1f128: -; BE-NEON: @ %bb.0: -; BE-NEON-NEXT: .save {r11, lr} -; BE-NEON-NEXT: push {r11, lr} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: vmov.32 d16[0], r0 -; BE-NEON-NEXT: vmov.32 d16[1], r1 -; BE-NEON-NEXT: vrev64.32 d0, d16 -; BE-NEON-NEXT: pop {r11, pc} %a = call <1 x i64> @llvm.llrint.v1i64.v1f128(<1 x fp128> %x) ret <1 x i64> %a } @@ -8356,1312 +2089,135 @@ define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) { ; LE-NEXT: vpop {d8, d9} ; LE-NEXT: pop {r4, r5, r6, r7, r8, pc} ; -; LE-NEON-LABEL: llrint_v2i64_v2f128: -; LE-NEON: @ %bb.0: -; LE-NEON-NEXT: .save {r4, r5, r6, r7, r8, lr} -; LE-NEON-NEXT: push {r4, r5, r6, r7, r8, lr} -; LE-NEON-NEXT: .vsave {d8, d9} -; LE-NEON-NEXT: vpush {d8, d9} -; LE-NEON-NEXT: mov r8, r3 -; LE-NEON-NEXT: add r3, sp, #40 -; LE-NEON-NEXT: mov r5, r2 -; LE-NEON-NEXT: mov r6, r1 -; LE-NEON-NEXT: mov r7, r0 -; LE-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: mov r4, r1 -; LE-NEON-NEXT: vmov.32 d9[0], r0 -; LE-NEON-NEXT: mov r0, r7 -; LE-NEON-NEXT: mov r1, r6 -; LE-NEON-NEXT: mov r2, r5 -; LE-NEON-NEXT: mov r3, r8 -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: vmov.32 d8[0], r0 -; LE-NEON-NEXT: vmov.32 d9[1], r4 -; LE-NEON-NEXT: vmov.32 d8[1], r1 -; LE-NEON-NEXT: vorr q0, q4, q4 -; LE-NEON-NEXT: vpop {d8, d9} -; LE-NEON-NEXT: pop {r4, r5, r6, r7, r8, pc} -; ; BE-LABEL: llrint_v2i64_v2f128: ; BE: @ %bb.0: ; BE-NEXT: .save {r4, r5, r6, r7, r8, lr} -; BE-NEXT: push {r4, r5, r6, r7, r8, lr} -; BE-NEXT: .vsave {d8} -; BE-NEXT: vpush {d8} -; BE-NEXT: mov r8, r3 -; BE-NEXT: add r3, sp, #32 -; BE-NEXT: mov r5, r2 -; BE-NEXT: mov r6, r1 -; BE-NEXT: mov r7, r0 -; BE-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-NEXT: bl llrintl -; BE-NEXT: mov r4, r1 -; BE-NEXT: vmov.32 d8[0], r0 -; BE-NEXT: mov r0, r7 -; BE-NEXT: mov r1, r6 -; BE-NEXT: mov r2, r5 -; BE-NEXT: mov r3, r8 -; BE-NEXT: bl llrintl -; BE-NEXT: vmov.32 d16[0], r0 -; BE-NEXT: vmov.32 d8[1], r4 -; BE-NEXT: vmov.32 d16[1], r1 -; BE-NEXT: vrev64.32 d1, d8 -; BE-NEXT: vrev64.32 d0, d16 -; BE-NEXT: vpop {d8} -; BE-NEXT: pop {r4, r5, r6, r7, r8, pc} -; -; BE-NEON-LABEL: llrint_v2i64_v2f128: -; BE-NEON: @ %bb.0: -; BE-NEON-NEXT: .save {r4, r5, r6, r7, r8, lr} -; BE-NEON-NEXT: push {r4, r5, r6, r7, r8, lr} -; BE-NEON-NEXT: .vsave {d8} -; BE-NEON-NEXT: vpush {d8} -; BE-NEON-NEXT: mov r8, r3 -; BE-NEON-NEXT: add r3, sp, #32 -; BE-NEON-NEXT: mov r5, r2 -; BE-NEON-NEXT: mov r6, r1 -; BE-NEON-NEXT: mov r7, r0 -; BE-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: mov r4, r1 -; BE-NEON-NEXT: vmov.32 d8[0], r0 -; BE-NEON-NEXT: mov r0, r7 -; BE-NEON-NEXT: mov r1, r6 -; BE-NEON-NEXT: mov r2, r5 -; BE-NEON-NEXT: mov r3, r8 -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: vmov.32 d16[0], r0 -; BE-NEON-NEXT: vmov.32 d8[1], r4 -; BE-NEON-NEXT: vmov.32 d16[1], r1 -; BE-NEON-NEXT: vrev64.32 d1, d8 -; BE-NEON-NEXT: vrev64.32 d0, d16 -; BE-NEON-NEXT: vpop {d8} -; BE-NEON-NEXT: pop {r4, r5, r6, r7, r8, pc} - %a = call <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128> %x) - ret <2 x i64> %a -} -declare <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128>) - -define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) { -; LE-LABEL: llrint_v4i64_v4f128: -; LE: @ %bb.0: -; LE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; LE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} -; LE-NEXT: .vsave {d8, d9, d10, d11} -; LE-NEXT: vpush {d8, d9, d10, d11} -; LE-NEXT: mov r5, r3 -; LE-NEXT: add r3, sp, #96 -; LE-NEXT: mov r7, r2 -; LE-NEXT: mov r6, r1 -; LE-NEXT: mov r4, r0 -; LE-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-NEXT: bl llrintl -; LE-NEXT: mov r9, r1 -; LE-NEXT: vmov.32 d9[0], r0 -; LE-NEXT: mov r0, r4 -; LE-NEXT: mov r1, r6 -; LE-NEXT: mov r2, r7 -; LE-NEXT: mov r3, r5 -; LE-NEXT: ldr r8, [sp, #80] -; LE-NEXT: ldr r10, [sp, #64] -; LE-NEXT: bl llrintl -; LE-NEXT: add r3, sp, #68 -; LE-NEXT: mov r5, r1 -; LE-NEXT: vmov.32 d10[0], r0 -; LE-NEXT: mov r0, r10 -; LE-NEXT: ldm r3, {r1, r2, r3} -; LE-NEXT: bl llrintl -; LE-NEXT: add r3, sp, #84 -; LE-NEXT: mov r4, r1 -; LE-NEXT: vmov.32 d11[0], r0 -; LE-NEXT: mov r0, r8 -; LE-NEXT: ldm r3, {r1, r2, r3} -; LE-NEXT: bl llrintl -; LE-NEXT: vmov.32 d8[0], r0 -; LE-NEXT: vmov.32 d11[1], r4 -; LE-NEXT: vmov.32 d9[1], r9 -; LE-NEXT: vmov.32 d10[1], r5 -; LE-NEXT: vmov.32 d8[1], r1 -; LE-NEXT: vorr q0, q5, q5 -; LE-NEXT: vorr q1, q4, q4 -; LE-NEXT: vpop {d8, d9, d10, d11} -; LE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} -; -; LE-NEON-LABEL: llrint_v4i64_v4f128: -; LE-NEON: @ %bb.0: -; LE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; LE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} -; LE-NEON-NEXT: .vsave {d8, d9, d10, d11} -; LE-NEON-NEXT: vpush {d8, d9, d10, d11} -; LE-NEON-NEXT: mov r5, r3 -; LE-NEON-NEXT: add r3, sp, #96 -; LE-NEON-NEXT: mov r7, r2 -; LE-NEON-NEXT: mov r6, r1 -; LE-NEON-NEXT: mov r4, r0 -; LE-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: mov r9, r1 -; LE-NEON-NEXT: vmov.32 d9[0], r0 -; LE-NEON-NEXT: mov r0, r4 -; LE-NEON-NEXT: mov r1, r6 -; LE-NEON-NEXT: mov r2, r7 -; LE-NEON-NEXT: mov r3, r5 -; LE-NEON-NEXT: ldr r8, [sp, #80] -; LE-NEON-NEXT: ldr r10, [sp, #64] -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add r3, sp, #68 -; LE-NEON-NEXT: mov r5, r1 -; LE-NEON-NEXT: vmov.32 d10[0], r0 -; LE-NEON-NEXT: mov r0, r10 -; LE-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add r3, sp, #84 -; LE-NEON-NEXT: mov r4, r1 -; LE-NEON-NEXT: vmov.32 d11[0], r0 -; LE-NEON-NEXT: mov r0, r8 -; LE-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: vmov.32 d8[0], r0 -; LE-NEON-NEXT: vmov.32 d11[1], r4 -; LE-NEON-NEXT: vmov.32 d9[1], r9 -; LE-NEON-NEXT: vmov.32 d10[1], r5 -; LE-NEON-NEXT: vmov.32 d8[1], r1 -; LE-NEON-NEXT: vorr q0, q5, q5 -; LE-NEON-NEXT: vorr q1, q4, q4 -; LE-NEON-NEXT: vpop {d8, d9, d10, d11} -; LE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} -; -; BE-LABEL: llrint_v4i64_v4f128: -; BE: @ %bb.0: -; BE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; BE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} -; BE-NEXT: .vsave {d8, d9, d10} -; BE-NEXT: vpush {d8, d9, d10} -; BE-NEXT: mov r5, r3 -; BE-NEXT: add r3, sp, #88 -; BE-NEXT: mov r7, r2 -; BE-NEXT: mov r6, r1 -; BE-NEXT: mov r4, r0 -; BE-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-NEXT: bl llrintl -; BE-NEXT: mov r9, r1 -; BE-NEXT: vmov.32 d8[0], r0 -; BE-NEXT: mov r0, r4 -; BE-NEXT: mov r1, r6 -; BE-NEXT: mov r2, r7 -; BE-NEXT: mov r3, r5 -; BE-NEXT: ldr r8, [sp, #72] -; BE-NEXT: ldr r10, [sp, #56] -; BE-NEXT: bl llrintl -; BE-NEXT: add r3, sp, #60 -; BE-NEXT: mov r5, r1 -; BE-NEXT: vmov.32 d9[0], r0 -; BE-NEXT: mov r0, r10 -; BE-NEXT: ldm r3, {r1, r2, r3} -; BE-NEXT: bl llrintl -; BE-NEXT: add r3, sp, #76 -; BE-NEXT: mov r4, r1 -; BE-NEXT: vmov.32 d10[0], r0 -; BE-NEXT: mov r0, r8 -; BE-NEXT: ldm r3, {r1, r2, r3} -; BE-NEXT: bl llrintl -; BE-NEXT: vmov.32 d16[0], r0 -; BE-NEXT: vmov.32 d10[1], r4 -; BE-NEXT: vmov.32 d8[1], r9 -; BE-NEXT: vmov.32 d9[1], r5 -; BE-NEXT: vmov.32 d16[1], r1 -; BE-NEXT: vrev64.32 d1, d10 -; BE-NEXT: vrev64.32 d3, d8 -; BE-NEXT: vrev64.32 d0, d9 -; BE-NEXT: vrev64.32 d2, d16 -; BE-NEXT: vpop {d8, d9, d10} -; BE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} -; -; BE-NEON-LABEL: llrint_v4i64_v4f128: -; BE-NEON: @ %bb.0: -; BE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; BE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} -; BE-NEON-NEXT: .vsave {d8, d9, d10} -; BE-NEON-NEXT: vpush {d8, d9, d10} -; BE-NEON-NEXT: mov r5, r3 -; BE-NEON-NEXT: add r3, sp, #88 -; BE-NEON-NEXT: mov r7, r2 -; BE-NEON-NEXT: mov r6, r1 -; BE-NEON-NEXT: mov r4, r0 -; BE-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: mov r9, r1 -; BE-NEON-NEXT: vmov.32 d8[0], r0 -; BE-NEON-NEXT: mov r0, r4 -; BE-NEON-NEXT: mov r1, r6 -; BE-NEON-NEXT: mov r2, r7 -; BE-NEON-NEXT: mov r3, r5 -; BE-NEON-NEXT: ldr r8, [sp, #72] -; BE-NEON-NEXT: ldr r10, [sp, #56] -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: add r3, sp, #60 -; BE-NEON-NEXT: mov r5, r1 -; BE-NEON-NEXT: vmov.32 d9[0], r0 -; BE-NEON-NEXT: mov r0, r10 -; BE-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: add r3, sp, #76 -; BE-NEON-NEXT: mov r4, r1 -; BE-NEON-NEXT: vmov.32 d10[0], r0 -; BE-NEON-NEXT: mov r0, r8 -; BE-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: vmov.32 d16[0], r0 -; BE-NEON-NEXT: vmov.32 d10[1], r4 -; BE-NEON-NEXT: vmov.32 d8[1], r9 -; BE-NEON-NEXT: vmov.32 d9[1], r5 -; BE-NEON-NEXT: vmov.32 d16[1], r1 -; BE-NEON-NEXT: vrev64.32 d1, d10 -; BE-NEON-NEXT: vrev64.32 d3, d8 -; BE-NEON-NEXT: vrev64.32 d0, d9 -; BE-NEON-NEXT: vrev64.32 d2, d16 -; BE-NEON-NEXT: vpop {d8, d9, d10} -; BE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} - %a = call <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128> %x) - ret <4 x i64> %a -} -declare <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128>) - -define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) { -; LE-LABEL: llrint_v8i64_v8f128: -; LE: @ %bb.0: -; LE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-NEXT: .pad #4 -; LE-NEXT: sub sp, sp, #4 -; LE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEXT: .pad #8 -; LE-NEXT: sub sp, sp, #8 -; LE-NEXT: mov r11, r3 -; LE-NEXT: add r3, sp, #208 -; LE-NEXT: mov r10, r2 -; LE-NEXT: mov r4, r1 -; LE-NEXT: mov r5, r0 -; LE-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-NEXT: bl llrintl -; LE-NEXT: add r7, sp, #164 -; LE-NEXT: ldr r6, [sp, #160] -; LE-NEXT: str r1, [sp, #4] @ 4-byte Spill -; LE-NEXT: vmov.32 d9[0], r0 -; LE-NEXT: ldm r7, {r1, r2, r3, r7} -; LE-NEXT: mov r0, r6 -; LE-NEXT: ldr r8, [sp, #128] -; LE-NEXT: ldr r9, [sp, #144] -; LE-NEXT: bl llrintl -; LE-NEXT: add r3, sp, #180 -; LE-NEXT: str r1, [sp] @ 4-byte Spill -; LE-NEXT: vmov.32 d10[0], r0 -; LE-NEXT: mov r0, r7 -; LE-NEXT: ldm r3, {r1, r2, r3} -; LE-NEXT: bl llrintl -; LE-NEXT: add r3, sp, #132 -; LE-NEXT: mov r7, r1 -; LE-NEXT: vmov.32 d11[0], r0 -; LE-NEXT: mov r0, r8 -; LE-NEXT: ldm r3, {r1, r2, r3} -; LE-NEXT: bl llrintl -; LE-NEXT: add r3, sp, #148 -; LE-NEXT: mov r8, r1 -; LE-NEXT: vmov.32 d12[0], r0 -; LE-NEXT: mov r0, r9 -; LE-NEXT: ldm r3, {r1, r2, r3} -; LE-NEXT: bl llrintl -; LE-NEXT: mov r9, r1 -; LE-NEXT: vmov.32 d13[0], r0 -; LE-NEXT: mov r0, r5 -; LE-NEXT: mov r1, r4 -; LE-NEXT: mov r2, r10 -; LE-NEXT: mov r3, r11 -; LE-NEXT: ldr r6, [sp, #112] -; LE-NEXT: bl llrintl -; LE-NEXT: add r3, sp, #116 -; LE-NEXT: mov r4, r1 -; LE-NEXT: vmov.32 d14[0], r0 -; LE-NEXT: mov r0, r6 -; LE-NEXT: ldm r3, {r1, r2, r3} -; LE-NEXT: bl llrintl -; LE-NEXT: add r3, sp, #196 -; LE-NEXT: vmov.32 d15[0], r0 -; LE-NEXT: ldr r0, [sp, #192] -; LE-NEXT: mov r5, r1 -; LE-NEXT: ldm r3, {r1, r2, r3} -; LE-NEXT: bl llrintl -; LE-NEXT: vmov.32 d8[0], r0 -; LE-NEXT: ldr r0, [sp] @ 4-byte Reload -; LE-NEXT: vmov.32 d11[1], r7 -; LE-NEXT: vmov.32 d10[1], r0 -; LE-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; LE-NEXT: vmov.32 d15[1], r5 -; LE-NEXT: vorr q2, q5, q5 -; LE-NEXT: vmov.32 d13[1], r9 -; LE-NEXT: vmov.32 d9[1], r0 -; LE-NEXT: vmov.32 d14[1], r4 -; LE-NEXT: vmov.32 d12[1], r8 -; LE-NEXT: vorr q0, q7, q7 -; LE-NEXT: vmov.32 d8[1], r1 -; LE-NEXT: vorr q1, q6, q6 -; LE-NEXT: vorr q3, q4, q4 -; LE-NEXT: add sp, sp, #8 -; LE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEXT: add sp, sp, #4 -; LE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; -; LE-NEON-LABEL: llrint_v8i64_v8f128: -; LE-NEON: @ %bb.0: -; LE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-NEON-NEXT: .pad #4 -; LE-NEON-NEXT: sub sp, sp, #4 -; LE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEON-NEXT: .pad #8 -; LE-NEON-NEXT: sub sp, sp, #8 -; LE-NEON-NEXT: mov r11, r3 -; LE-NEON-NEXT: add r3, sp, #208 -; LE-NEON-NEXT: mov r10, r2 -; LE-NEON-NEXT: mov r4, r1 -; LE-NEON-NEXT: mov r5, r0 -; LE-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add r7, sp, #164 -; LE-NEON-NEXT: ldr r6, [sp, #160] -; LE-NEON-NEXT: str r1, [sp, #4] @ 4-byte Spill -; LE-NEON-NEXT: vmov.32 d9[0], r0 -; LE-NEON-NEXT: ldm r7, {r1, r2, r3, r7} -; LE-NEON-NEXT: mov r0, r6 -; LE-NEON-NEXT: ldr r8, [sp, #128] -; LE-NEON-NEXT: ldr r9, [sp, #144] -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add r3, sp, #180 -; LE-NEON-NEXT: str r1, [sp] @ 4-byte Spill -; LE-NEON-NEXT: vmov.32 d10[0], r0 -; LE-NEON-NEXT: mov r0, r7 -; LE-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add r3, sp, #132 -; LE-NEON-NEXT: mov r7, r1 -; LE-NEON-NEXT: vmov.32 d11[0], r0 -; LE-NEON-NEXT: mov r0, r8 -; LE-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add r3, sp, #148 -; LE-NEON-NEXT: mov r8, r1 -; LE-NEON-NEXT: vmov.32 d12[0], r0 -; LE-NEON-NEXT: mov r0, r9 -; LE-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: mov r9, r1 -; LE-NEON-NEXT: vmov.32 d13[0], r0 -; LE-NEON-NEXT: mov r0, r5 -; LE-NEON-NEXT: mov r1, r4 -; LE-NEON-NEXT: mov r2, r10 -; LE-NEON-NEXT: mov r3, r11 -; LE-NEON-NEXT: ldr r6, [sp, #112] -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add r3, sp, #116 -; LE-NEON-NEXT: mov r4, r1 -; LE-NEON-NEXT: vmov.32 d14[0], r0 -; LE-NEON-NEXT: mov r0, r6 -; LE-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add r3, sp, #196 -; LE-NEON-NEXT: vmov.32 d15[0], r0 -; LE-NEON-NEXT: ldr r0, [sp, #192] -; LE-NEON-NEXT: mov r5, r1 -; LE-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: vmov.32 d8[0], r0 -; LE-NEON-NEXT: ldr r0, [sp] @ 4-byte Reload -; LE-NEON-NEXT: vmov.32 d11[1], r7 -; LE-NEON-NEXT: vmov.32 d10[1], r0 -; LE-NEON-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; LE-NEON-NEXT: vmov.32 d15[1], r5 -; LE-NEON-NEXT: vorr q2, q5, q5 -; LE-NEON-NEXT: vmov.32 d13[1], r9 -; LE-NEON-NEXT: vmov.32 d9[1], r0 -; LE-NEON-NEXT: vmov.32 d14[1], r4 -; LE-NEON-NEXT: vmov.32 d12[1], r8 -; LE-NEON-NEXT: vorr q0, q7, q7 -; LE-NEON-NEXT: vmov.32 d8[1], r1 -; LE-NEON-NEXT: vorr q1, q6, q6 -; LE-NEON-NEXT: vorr q3, q4, q4 -; LE-NEON-NEXT: add sp, sp, #8 -; LE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEON-NEXT: add sp, sp, #4 -; LE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; -; BE-LABEL: llrint_v8i64_v8f128: -; BE: @ %bb.0: -; BE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-NEXT: .pad #4 -; BE-NEXT: sub sp, sp, #4 -; BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14} -; BE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14} -; BE-NEXT: .pad #16 -; BE-NEXT: sub sp, sp, #16 -; BE-NEXT: str r3, [sp, #4] @ 4-byte Spill -; BE-NEXT: add r3, sp, #208 -; BE-NEXT: mov r11, r2 -; BE-NEXT: mov r4, r1 -; BE-NEXT: mov r5, r0 -; BE-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-NEXT: bl llrintl -; BE-NEXT: ldr r7, [sp, #176] -; BE-NEXT: add r3, sp, #180 -; BE-NEXT: str r1, [sp, #12] @ 4-byte Spill -; BE-NEXT: vmov.32 d8[0], r0 -; BE-NEXT: ldm r3, {r1, r2, r3} -; BE-NEXT: mov r0, r7 -; BE-NEXT: ldr r6, [sp, #128] -; BE-NEXT: ldr r8, [sp, #144] -; BE-NEXT: bl llrintl -; BE-NEXT: add r3, sp, #132 -; BE-NEXT: str r1, [sp, #8] @ 4-byte Spill -; BE-NEXT: vmov.32 d9[0], r0 -; BE-NEXT: mov r0, r6 -; BE-NEXT: ldm r3, {r1, r2, r3} -; BE-NEXT: bl llrintl -; BE-NEXT: add r3, sp, #148 +; BE-NEXT: push {r4, r5, r6, r7, r8, lr} +; BE-NEXT: .vsave {d8} +; BE-NEXT: vpush {d8} +; BE-NEXT: mov r8, r3 +; BE-NEXT: add r3, sp, #32 +; BE-NEXT: mov r5, r2 ; BE-NEXT: mov r6, r1 -; BE-NEXT: vmov.32 d10[0], r0 -; BE-NEXT: mov r0, r8 -; BE-NEXT: ldm r3, {r1, r2, r3} -; BE-NEXT: bl llrintl -; BE-NEXT: add r3, sp, #160 -; BE-NEXT: mov r9, r0 -; BE-NEXT: mov r7, r1 +; BE-NEXT: mov r7, r0 ; BE-NEXT: ldm r3, {r0, r1, r2, r3} ; BE-NEXT: bl llrintl -; BE-NEXT: ldr r3, [sp, #4] @ 4-byte Reload -; BE-NEXT: mov r8, r1 -; BE-NEXT: vmov.32 d11[0], r0 -; BE-NEXT: mov r0, r5 -; BE-NEXT: mov r1, r4 -; BE-NEXT: mov r2, r11 -; BE-NEXT: ldr r10, [sp, #112] -; BE-NEXT: vmov.32 d12[0], r9 -; BE-NEXT: bl llrintl -; BE-NEXT: add r3, sp, #116 ; BE-NEXT: mov r4, r1 -; BE-NEXT: vmov.32 d13[0], r0 -; BE-NEXT: mov r0, r10 -; BE-NEXT: ldm r3, {r1, r2, r3} -; BE-NEXT: bl llrintl -; BE-NEXT: add r3, sp, #196 -; BE-NEXT: vmov.32 d14[0], r0 -; BE-NEXT: ldr r0, [sp, #192] -; BE-NEXT: mov r5, r1 -; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: vmov.32 d8[0], r0 +; BE-NEXT: mov r0, r7 +; BE-NEXT: mov r1, r6 +; BE-NEXT: mov r2, r5 +; BE-NEXT: mov r3, r8 ; BE-NEXT: bl llrintl ; BE-NEXT: vmov.32 d16[0], r0 -; BE-NEXT: ldr r0, [sp, #8] @ 4-byte Reload -; BE-NEXT: vmov.32 d14[1], r5 -; BE-NEXT: vmov.32 d9[1], r0 -; BE-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; BE-NEXT: vmov.32 d12[1], r7 -; BE-NEXT: vmov.32 d8[1], r0 -; BE-NEXT: vmov.32 d13[1], r4 -; BE-NEXT: vmov.32 d10[1], r6 -; BE-NEXT: vmov.32 d11[1], r8 +; BE-NEXT: vmov.32 d8[1], r4 ; BE-NEXT: vmov.32 d16[1], r1 -; BE-NEXT: vrev64.32 d1, d14 -; BE-NEXT: vrev64.32 d3, d12 -; BE-NEXT: vrev64.32 d5, d9 -; BE-NEXT: vrev64.32 d7, d8 -; BE-NEXT: vrev64.32 d0, d13 -; BE-NEXT: vrev64.32 d2, d10 -; BE-NEXT: vrev64.32 d4, d11 -; BE-NEXT: vrev64.32 d6, d16 -; BE-NEXT: add sp, sp, #16 -; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14} -; BE-NEXT: add sp, sp, #4 -; BE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; -; BE-NEON-LABEL: llrint_v8i64_v8f128: -; BE-NEON: @ %bb.0: -; BE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-NEON-NEXT: .pad #4 -; BE-NEON-NEXT: sub sp, sp, #4 -; BE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14} -; BE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14} -; BE-NEON-NEXT: .pad #16 -; BE-NEON-NEXT: sub sp, sp, #16 -; BE-NEON-NEXT: str r3, [sp, #4] @ 4-byte Spill -; BE-NEON-NEXT: add r3, sp, #208 -; BE-NEON-NEXT: mov r11, r2 -; BE-NEON-NEXT: mov r4, r1 -; BE-NEON-NEXT: mov r5, r0 -; BE-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: ldr r7, [sp, #176] -; BE-NEON-NEXT: add r3, sp, #180 -; BE-NEON-NEXT: str r1, [sp, #12] @ 4-byte Spill -; BE-NEON-NEXT: vmov.32 d8[0], r0 -; BE-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-NEON-NEXT: mov r0, r7 -; BE-NEON-NEXT: ldr r6, [sp, #128] -; BE-NEON-NEXT: ldr r8, [sp, #144] -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: add r3, sp, #132 -; BE-NEON-NEXT: str r1, [sp, #8] @ 4-byte Spill -; BE-NEON-NEXT: vmov.32 d9[0], r0 -; BE-NEON-NEXT: mov r0, r6 -; BE-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: add r3, sp, #148 -; BE-NEON-NEXT: mov r6, r1 -; BE-NEON-NEXT: vmov.32 d10[0], r0 -; BE-NEON-NEXT: mov r0, r8 -; BE-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: add r3, sp, #160 -; BE-NEON-NEXT: mov r9, r0 -; BE-NEON-NEXT: mov r7, r1 -; BE-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: ldr r3, [sp, #4] @ 4-byte Reload -; BE-NEON-NEXT: mov r8, r1 -; BE-NEON-NEXT: vmov.32 d11[0], r0 -; BE-NEON-NEXT: mov r0, r5 -; BE-NEON-NEXT: mov r1, r4 -; BE-NEON-NEXT: mov r2, r11 -; BE-NEON-NEXT: ldr r10, [sp, #112] -; BE-NEON-NEXT: vmov.32 d12[0], r9 -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: add r3, sp, #116 -; BE-NEON-NEXT: mov r4, r1 -; BE-NEON-NEXT: vmov.32 d13[0], r0 -; BE-NEON-NEXT: mov r0, r10 -; BE-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: add r3, sp, #196 -; BE-NEON-NEXT: vmov.32 d14[0], r0 -; BE-NEON-NEXT: ldr r0, [sp, #192] -; BE-NEON-NEXT: mov r5, r1 -; BE-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: vmov.32 d16[0], r0 -; BE-NEON-NEXT: ldr r0, [sp, #8] @ 4-byte Reload -; BE-NEON-NEXT: vmov.32 d14[1], r5 -; BE-NEON-NEXT: vmov.32 d9[1], r0 -; BE-NEON-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; BE-NEON-NEXT: vmov.32 d12[1], r7 -; BE-NEON-NEXT: vmov.32 d8[1], r0 -; BE-NEON-NEXT: vmov.32 d13[1], r4 -; BE-NEON-NEXT: vmov.32 d10[1], r6 -; BE-NEON-NEXT: vmov.32 d11[1], r8 -; BE-NEON-NEXT: vmov.32 d16[1], r1 -; BE-NEON-NEXT: vrev64.32 d1, d14 -; BE-NEON-NEXT: vrev64.32 d3, d12 -; BE-NEON-NEXT: vrev64.32 d5, d9 -; BE-NEON-NEXT: vrev64.32 d7, d8 -; BE-NEON-NEXT: vrev64.32 d0, d13 -; BE-NEON-NEXT: vrev64.32 d2, d10 -; BE-NEON-NEXT: vrev64.32 d4, d11 -; BE-NEON-NEXT: vrev64.32 d6, d16 -; BE-NEON-NEXT: add sp, sp, #16 -; BE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14} -; BE-NEON-NEXT: add sp, sp, #4 -; BE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} - %a = call <8 x i64> @llvm.llrint.v8i64.v8f128(<8 x fp128> %x) - ret <8 x i64> %a +; BE-NEXT: vrev64.32 d1, d8 +; BE-NEXT: vrev64.32 d0, d16 +; BE-NEXT: vpop {d8} +; BE-NEXT: pop {r4, r5, r6, r7, r8, pc} + %a = call <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128> %x) + ret <2 x i64> %a } -declare <8 x i64> @llvm.llrint.v8i64.v8f128(<8 x fp128>) +declare <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128>) -define <16 x i64> @llrint_v16f128(<16 x fp128> %x) { -; LE-LABEL: llrint_v16f128: +define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) { +; LE-LABEL: llrint_v4i64_v4f128: ; LE: @ %bb.0: -; LE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-NEXT: .pad #4 -; LE-NEXT: sub sp, sp, #4 -; LE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEXT: .pad #72 -; LE-NEXT: sub sp, sp, #72 -; LE-NEXT: mov r6, r3 -; LE-NEXT: add r3, sp, #408 +; LE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; LE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; LE-NEXT: .vsave {d8, d9, d10, d11} +; LE-NEXT: vpush {d8, d9, d10, d11} +; LE-NEXT: mov r5, r3 +; LE-NEXT: add r3, sp, #96 ; LE-NEXT: mov r7, r2 +; LE-NEXT: mov r6, r1 ; LE-NEXT: mov r4, r0 ; LE-NEXT: ldm r3, {r0, r1, r2, r3} ; LE-NEXT: bl llrintl -; LE-NEXT: add r5, sp, #176 -; LE-NEXT: mov r10, r1 -; LE-NEXT: vmov.32 d13[0], r0 -; LE-NEXT: mov r0, r7 -; LE-NEXT: ldm r5, {r2, r3, r5} -; LE-NEXT: mov r1, r6 -; LE-NEXT: ldr r8, [sp, #232] -; LE-NEXT: bl llrintl -; LE-NEXT: add r3, sp, #188 ; LE-NEXT: mov r9, r1 -; LE-NEXT: vmov.32 d8[0], r0 -; LE-NEXT: mov r0, r5 -; LE-NEXT: ldm r3, {r1, r2, r3} -; LE-NEXT: bl llrintl -; LE-NEXT: add r3, sp, #236 -; LE-NEXT: mov r11, r1 ; LE-NEXT: vmov.32 d9[0], r0 -; LE-NEXT: mov r0, r8 -; LE-NEXT: ldm r3, {r1, r2, r3} -; LE-NEXT: bl llrintl -; LE-NEXT: add r3, sp, #252 -; LE-NEXT: vmov.32 d10[0], r0 -; LE-NEXT: ldr r0, [sp, #248] -; LE-NEXT: mov r8, r1 -; LE-NEXT: ldm r3, {r1, r2, r3} -; LE-NEXT: bl llrintl -; LE-NEXT: add r3, sp, #268 -; LE-NEXT: vmov.32 d11[0], r0 -; LE-NEXT: ldr r0, [sp, #264] -; LE-NEXT: mov r6, r1 -; LE-NEXT: ldm r3, {r1, r2, r3} -; LE-NEXT: bl llrintl -; LE-NEXT: add r3, sp, #284 -; LE-NEXT: vmov.32 d14[0], r0 -; LE-NEXT: ldr r0, [sp, #280] -; LE-NEXT: mov r7, r1 -; LE-NEXT: ldm r3, {r1, r2, r3} +; LE-NEXT: mov r0, r4 +; LE-NEXT: mov r1, r6 +; LE-NEXT: mov r2, r7 +; LE-NEXT: mov r3, r5 +; LE-NEXT: ldr r8, [sp, #80] +; LE-NEXT: ldr r10, [sp, #64] ; LE-NEXT: bl llrintl -; LE-NEXT: add r3, sp, #316 -; LE-NEXT: vmov.32 d15[0], r0 -; LE-NEXT: ldr r0, [sp, #312] +; LE-NEXT: add r3, sp, #68 ; LE-NEXT: mov r5, r1 -; LE-NEXT: ldm r3, {r1, r2, r3} -; LE-NEXT: bl llrintl -; LE-NEXT: vmov.32 d15[1], r5 -; LE-NEXT: add lr, sp, #56 -; LE-NEXT: ldr r5, [sp, #300] -; LE-NEXT: vmov.32 d14[1], r7 -; LE-NEXT: ldr r2, [sp, #304] -; LE-NEXT: ldr r3, [sp, #308] -; LE-NEXT: vmov.32 d11[1], r6 -; LE-NEXT: ldr r6, [sp, #200] -; LE-NEXT: ldr r7, [sp, #204] -; LE-NEXT: vmov.32 d10[1], r8 -; LE-NEXT: ldr r8, [sp, #344] -; LE-NEXT: vmov.32 d9[1], r11 -; LE-NEXT: ldr r11, [sp, #216] -; LE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-NEXT: add lr, sp, #40 -; LE-NEXT: vmov.32 d17[0], r0 -; LE-NEXT: ldr r0, [sp, #296] -; LE-NEXT: vmov.32 d8[1], r9 -; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-NEXT: add lr, sp, #24 -; LE-NEXT: vorr q5, q8, q8 -; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-NEXT: vorr q4, q6, q6 -; LE-NEXT: vmov.32 d11[1], r1 -; LE-NEXT: mov r1, r5 -; LE-NEXT: vmov.32 d9[1], r10 -; LE-NEXT: bl llrintl -; LE-NEXT: vmov.32 d10[0], r0 -; LE-NEXT: ldr r2, [sp, #208] -; LE-NEXT: ldr r3, [sp, #212] -; LE-NEXT: add lr, sp, #8 -; LE-NEXT: mov r9, r1 -; LE-NEXT: mov r0, r6 -; LE-NEXT: mov r1, r7 -; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-NEXT: bl llrintl -; LE-NEXT: add r3, sp, #220 -; LE-NEXT: mov r10, r1 ; LE-NEXT: vmov.32 d10[0], r0 -; LE-NEXT: mov r0, r11 +; LE-NEXT: mov r0, r10 ; LE-NEXT: ldm r3, {r1, r2, r3} ; LE-NEXT: bl llrintl -; LE-NEXT: add r3, sp, #348 -; LE-NEXT: mov r11, r1 +; LE-NEXT: add r3, sp, #84 +; LE-NEXT: mov r4, r1 ; LE-NEXT: vmov.32 d11[0], r0 ; LE-NEXT: mov r0, r8 ; LE-NEXT: ldm r3, {r1, r2, r3} ; LE-NEXT: bl llrintl -; LE-NEXT: add r3, sp, #364 -; LE-NEXT: vmov.32 d13[0], r0 -; LE-NEXT: ldr r0, [sp, #360] -; LE-NEXT: mov r8, r1 -; LE-NEXT: ldm r3, {r1, r2, r3} -; LE-NEXT: bl llrintl -; LE-NEXT: add r3, sp, #380 -; LE-NEXT: vmov.32 d14[0], r0 -; LE-NEXT: ldr r0, [sp, #376] -; LE-NEXT: mov r5, r1 -; LE-NEXT: ldm r3, {r1, r2, r3} -; LE-NEXT: bl llrintl -; LE-NEXT: add r3, sp, #396 -; LE-NEXT: vmov.32 d15[0], r0 -; LE-NEXT: ldr r0, [sp, #392] -; LE-NEXT: mov r6, r1 -; LE-NEXT: ldm r3, {r1, r2, r3} -; LE-NEXT: bl llrintl -; LE-NEXT: add r3, sp, #332 ; LE-NEXT: vmov.32 d8[0], r0 -; LE-NEXT: ldr r0, [sp, #328] -; LE-NEXT: mov r7, r1 -; LE-NEXT: ldm r3, {r1, r2, r3} -; LE-NEXT: bl llrintl -; LE-NEXT: add lr, sp, #8 -; LE-NEXT: vmov.32 d12[0], r0 -; LE-NEXT: add r0, r4, #64 -; LE-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; LE-NEXT: add lr, sp, #24 -; LE-NEXT: vmov.32 d13[1], r8 -; LE-NEXT: vmov.32 d18[1], r9 -; LE-NEXT: vmov.32 d15[1], r6 -; LE-NEXT: vmov.32 d12[1], r1 -; LE-NEXT: vmov.32 d14[1], r5 -; LE-NEXT: vst1.64 {d18, d19}, [r0:128]! -; LE-NEXT: vst1.64 {d12, d13}, [r0:128]! -; LE-NEXT: vmov.32 d8[1], r7 -; LE-NEXT: vst1.64 {d14, d15}, [r0:128]! -; LE-NEXT: vst1.64 {d8, d9}, [r0:128] -; LE-NEXT: vmov.32 d11[1], r11 -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: add lr, sp, #40 -; LE-NEXT: vmov.32 d10[1], r10 -; LE-NEXT: vst1.64 {d16, d17}, [r4:128]! -; LE-NEXT: vst1.64 {d10, d11}, [r4:128]! -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: add lr, sp, #56 -; LE-NEXT: vst1.64 {d16, d17}, [r4:128]! -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: vst1.64 {d16, d17}, [r4:128] -; LE-NEXT: add sp, sp, #72 -; LE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEXT: add sp, sp, #4 -; LE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; -; LE-NEON-LABEL: llrint_v16f128: -; LE-NEON: @ %bb.0: -; LE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-NEON-NEXT: .pad #4 -; LE-NEON-NEXT: sub sp, sp, #4 -; LE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEON-NEXT: .pad #72 -; LE-NEON-NEXT: sub sp, sp, #72 -; LE-NEON-NEXT: mov r6, r3 -; LE-NEON-NEXT: add r3, sp, #408 -; LE-NEON-NEXT: mov r7, r2 -; LE-NEON-NEXT: mov r4, r0 -; LE-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add r5, sp, #176 -; LE-NEON-NEXT: mov r10, r1 -; LE-NEON-NEXT: vmov.32 d13[0], r0 -; LE-NEON-NEXT: mov r0, r7 -; LE-NEON-NEXT: ldm r5, {r2, r3, r5} -; LE-NEON-NEXT: mov r1, r6 -; LE-NEON-NEXT: ldr r8, [sp, #232] -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add r3, sp, #188 -; LE-NEON-NEXT: mov r9, r1 -; LE-NEON-NEXT: vmov.32 d8[0], r0 -; LE-NEON-NEXT: mov r0, r5 -; LE-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add r3, sp, #236 -; LE-NEON-NEXT: mov r11, r1 -; LE-NEON-NEXT: vmov.32 d9[0], r0 -; LE-NEON-NEXT: mov r0, r8 -; LE-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add r3, sp, #252 -; LE-NEON-NEXT: vmov.32 d10[0], r0 -; LE-NEON-NEXT: ldr r0, [sp, #248] -; LE-NEON-NEXT: mov r8, r1 -; LE-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add r3, sp, #268 -; LE-NEON-NEXT: vmov.32 d11[0], r0 -; LE-NEON-NEXT: ldr r0, [sp, #264] -; LE-NEON-NEXT: mov r6, r1 -; LE-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add r3, sp, #284 -; LE-NEON-NEXT: vmov.32 d14[0], r0 -; LE-NEON-NEXT: ldr r0, [sp, #280] -; LE-NEON-NEXT: mov r7, r1 -; LE-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add r3, sp, #316 -; LE-NEON-NEXT: vmov.32 d15[0], r0 -; LE-NEON-NEXT: ldr r0, [sp, #312] -; LE-NEON-NEXT: mov r5, r1 -; LE-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: vmov.32 d15[1], r5 -; LE-NEON-NEXT: add lr, sp, #56 -; LE-NEON-NEXT: ldr r5, [sp, #300] -; LE-NEON-NEXT: vmov.32 d14[1], r7 -; LE-NEON-NEXT: ldr r2, [sp, #304] -; LE-NEON-NEXT: ldr r3, [sp, #308] -; LE-NEON-NEXT: vmov.32 d11[1], r6 -; LE-NEON-NEXT: ldr r6, [sp, #200] -; LE-NEON-NEXT: ldr r7, [sp, #204] -; LE-NEON-NEXT: vmov.32 d10[1], r8 -; LE-NEON-NEXT: ldr r8, [sp, #344] -; LE-NEON-NEXT: vmov.32 d9[1], r11 -; LE-NEON-NEXT: ldr r11, [sp, #216] -; LE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #40 -; LE-NEON-NEXT: vmov.32 d17[0], r0 -; LE-NEON-NEXT: ldr r0, [sp, #296] -; LE-NEON-NEXT: vmov.32 d8[1], r9 -; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #24 -; LE-NEON-NEXT: vorr q5, q8, q8 -; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-NEON-NEXT: vorr q4, q6, q6 -; LE-NEON-NEXT: vmov.32 d11[1], r1 -; LE-NEON-NEXT: mov r1, r5 -; LE-NEON-NEXT: vmov.32 d9[1], r10 -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: vmov.32 d10[0], r0 -; LE-NEON-NEXT: ldr r2, [sp, #208] -; LE-NEON-NEXT: ldr r3, [sp, #212] -; LE-NEON-NEXT: add lr, sp, #8 -; LE-NEON-NEXT: mov r9, r1 -; LE-NEON-NEXT: mov r0, r6 -; LE-NEON-NEXT: mov r1, r7 -; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add r3, sp, #220 -; LE-NEON-NEXT: mov r10, r1 -; LE-NEON-NEXT: vmov.32 d10[0], r0 -; LE-NEON-NEXT: mov r0, r11 -; LE-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add r3, sp, #348 -; LE-NEON-NEXT: mov r11, r1 -; LE-NEON-NEXT: vmov.32 d11[0], r0 -; LE-NEON-NEXT: mov r0, r8 -; LE-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add r3, sp, #364 -; LE-NEON-NEXT: vmov.32 d13[0], r0 -; LE-NEON-NEXT: ldr r0, [sp, #360] -; LE-NEON-NEXT: mov r8, r1 -; LE-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add r3, sp, #380 -; LE-NEON-NEXT: vmov.32 d14[0], r0 -; LE-NEON-NEXT: ldr r0, [sp, #376] -; LE-NEON-NEXT: mov r5, r1 -; LE-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add r3, sp, #396 -; LE-NEON-NEXT: vmov.32 d15[0], r0 -; LE-NEON-NEXT: ldr r0, [sp, #392] -; LE-NEON-NEXT: mov r6, r1 -; LE-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add r3, sp, #332 -; LE-NEON-NEXT: vmov.32 d8[0], r0 -; LE-NEON-NEXT: ldr r0, [sp, #328] -; LE-NEON-NEXT: mov r7, r1 -; LE-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add lr, sp, #8 -; LE-NEON-NEXT: vmov.32 d12[0], r0 -; LE-NEON-NEXT: add r0, r4, #64 -; LE-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #24 -; LE-NEON-NEXT: vmov.32 d13[1], r8 -; LE-NEON-NEXT: vmov.32 d18[1], r9 -; LE-NEON-NEXT: vmov.32 d15[1], r6 -; LE-NEON-NEXT: vmov.32 d12[1], r1 -; LE-NEON-NEXT: vmov.32 d14[1], r5 -; LE-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]! -; LE-NEON-NEXT: vst1.64 {d12, d13}, [r0:128]! -; LE-NEON-NEXT: vmov.32 d8[1], r7 -; LE-NEON-NEXT: vst1.64 {d14, d15}, [r0:128]! -; LE-NEON-NEXT: vst1.64 {d8, d9}, [r0:128] -; LE-NEON-NEXT: vmov.32 d11[1], r11 -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #40 -; LE-NEON-NEXT: vmov.32 d10[1], r10 -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r4:128]! -; LE-NEON-NEXT: vst1.64 {d10, d11}, [r4:128]! -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #56 -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r4:128]! -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r4:128] -; LE-NEON-NEXT: add sp, sp, #72 -; LE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEON-NEXT: add sp, sp, #4 -; LE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; LE-NEXT: vmov.32 d11[1], r4 +; LE-NEXT: vmov.32 d9[1], r9 +; LE-NEXT: vmov.32 d10[1], r5 +; LE-NEXT: vmov.32 d8[1], r1 +; LE-NEXT: vorr q0, q5, q5 +; LE-NEXT: vorr q1, q4, q4 +; LE-NEXT: vpop {d8, d9, d10, d11} +; LE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} ; -; BE-LABEL: llrint_v16f128: +; BE-LABEL: llrint_v4i64_v4f128: ; BE: @ %bb.0: -; BE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-NEXT: .pad #4 -; BE-NEXT: sub sp, sp, #4 -; BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEXT: .pad #56 -; BE-NEXT: sub sp, sp, #56 +; BE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; BE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; BE-NEXT: .vsave {d8, d9, d10} +; BE-NEXT: vpush {d8, d9, d10} ; BE-NEXT: mov r5, r3 -; BE-NEXT: add r3, sp, #376 -; BE-NEXT: mov r6, r2 +; BE-NEXT: add r3, sp, #88 +; BE-NEXT: mov r7, r2 +; BE-NEXT: mov r6, r1 ; BE-NEXT: mov r4, r0 ; BE-NEXT: ldm r3, {r0, r1, r2, r3} ; BE-NEXT: bl llrintl -; BE-NEXT: ldr r7, [sp, #392] -; BE-NEXT: add r3, sp, #396 ; BE-NEXT: mov r9, r1 ; BE-NEXT: vmov.32 d8[0], r0 -; BE-NEXT: ldm r3, {r1, r2, r3} -; BE-NEXT: mov r0, r7 -; BE-NEXT: ldr r11, [sp, #168] -; BE-NEXT: bl llrintl -; BE-NEXT: ldr r2, [sp, #160] -; BE-NEXT: mov r10, r1 -; BE-NEXT: ldr r3, [sp, #164] -; BE-NEXT: vmov.32 d9[0], r0 -; BE-NEXT: mov r0, r6 -; BE-NEXT: mov r1, r5 -; BE-NEXT: bl llrintl -; BE-NEXT: add r3, sp, #172 -; BE-NEXT: mov r8, r1 -; BE-NEXT: vmov.32 d10[0], r0 -; BE-NEXT: mov r0, r11 -; BE-NEXT: ldm r3, {r1, r2, r3} -; BE-NEXT: bl llrintl -; BE-NEXT: add r3, sp, #220 -; BE-NEXT: vmov.32 d11[0], r0 -; BE-NEXT: ldr r0, [sp, #216] -; BE-NEXT: mov r11, r1 -; BE-NEXT: ldm r3, {r1, r2, r3} -; BE-NEXT: bl llrintl -; BE-NEXT: add r3, sp, #236 -; BE-NEXT: vmov.32 d12[0], r0 -; BE-NEXT: ldr r0, [sp, #232] -; BE-NEXT: mov r6, r1 -; BE-NEXT: ldm r3, {r1, r2, r3} -; BE-NEXT: bl llrintl -; BE-NEXT: add r3, sp, #252 -; BE-NEXT: vmov.32 d13[0], r0 -; BE-NEXT: ldr r0, [sp, #248] -; BE-NEXT: mov r7, r1 -; BE-NEXT: ldm r3, {r1, r2, r3} -; BE-NEXT: bl llrintl -; BE-NEXT: add r3, sp, #268 -; BE-NEXT: vmov.32 d14[0], r0 -; BE-NEXT: ldr r0, [sp, #264] -; BE-NEXT: mov r5, r1 -; BE-NEXT: ldm r3, {r1, r2, r3} -; BE-NEXT: bl llrintl -; BE-NEXT: vmov.32 d15[0], r0 -; BE-NEXT: ldr r0, [sp, #280] -; BE-NEXT: ldr r2, [sp, #288] -; BE-NEXT: vmov.32 d13[1], r7 -; BE-NEXT: ldr r7, [sp, #284] -; BE-NEXT: ldr r3, [sp, #292] -; BE-NEXT: vmov.32 d14[1], r5 -; BE-NEXT: ldr r5, [sp, #328] -; BE-NEXT: vmov.32 d12[1], r6 -; BE-NEXT: ldr r6, [sp, #300] -; BE-NEXT: vmov.32 d10[1], r8 -; BE-NEXT: ldr r8, [sp, #184] -; BE-NEXT: vmov.32 d11[1], r11 -; BE-NEXT: vmov.32 d9[1], r10 -; BE-NEXT: vmov.32 d8[1], r9 -; BE-NEXT: vmov.32 d15[1], r1 -; BE-NEXT: mov r1, r7 -; BE-NEXT: vstr d14, [sp, #48] @ 8-byte Spill -; BE-NEXT: vstr d13, [sp, #40] @ 8-byte Spill -; BE-NEXT: vstr d12, [sp, #32] @ 8-byte Spill -; BE-NEXT: vstr d11, [sp, #24] @ 8-byte Spill -; BE-NEXT: vstr d10, [sp, #16] @ 8-byte Spill -; BE-NEXT: vstr d9, [sp, #8] @ 8-byte Spill -; BE-NEXT: vstr d8, [sp] @ 8-byte Spill -; BE-NEXT: bl llrintl -; BE-NEXT: mov r10, r1 -; BE-NEXT: ldr r1, [sp, #296] -; BE-NEXT: ldr r2, [sp, #304] -; BE-NEXT: vmov.32 d8[0], r0 -; BE-NEXT: ldr r3, [sp, #308] -; BE-NEXT: mov r0, r1 +; BE-NEXT: mov r0, r4 ; BE-NEXT: mov r1, r6 +; BE-NEXT: mov r2, r7 +; BE-NEXT: mov r3, r5 +; BE-NEXT: ldr r8, [sp, #72] +; BE-NEXT: ldr r10, [sp, #56] ; BE-NEXT: bl llrintl -; BE-NEXT: add r3, sp, #332 -; BE-NEXT: mov r11, r1 -; BE-NEXT: vmov.32 d9[0], r0 -; BE-NEXT: mov r0, r5 -; BE-NEXT: ldm r3, {r1, r2, r3} -; BE-NEXT: bl llrintl -; BE-NEXT: add r3, sp, #188 -; BE-NEXT: mov r7, r1 -; BE-NEXT: vmov.32 d10[0], r0 -; BE-NEXT: mov r0, r8 -; BE-NEXT: ldm r3, {r1, r2, r3} -; BE-NEXT: bl llrintl -; BE-NEXT: add r3, sp, #204 -; BE-NEXT: vmov.32 d11[0], r0 -; BE-NEXT: ldr r0, [sp, #200] -; BE-NEXT: mov r8, r1 -; BE-NEXT: ldm r3, {r1, r2, r3} -; BE-NEXT: bl llrintl -; BE-NEXT: add r3, sp, #348 -; BE-NEXT: vmov.32 d12[0], r0 -; BE-NEXT: ldr r0, [sp, #344] +; BE-NEXT: add r3, sp, #60 ; BE-NEXT: mov r5, r1 +; BE-NEXT: vmov.32 d9[0], r0 +; BE-NEXT: mov r0, r10 ; BE-NEXT: ldm r3, {r1, r2, r3} ; BE-NEXT: bl llrintl -; BE-NEXT: add r3, sp, #364 -; BE-NEXT: vmov.32 d13[0], r0 -; BE-NEXT: ldr r0, [sp, #360] -; BE-NEXT: mov r9, r1 -; BE-NEXT: ldm r3, {r1, r2, r3} -; BE-NEXT: bl llrintl -; BE-NEXT: add r3, sp, #316 -; BE-NEXT: vmov.32 d14[0], r0 -; BE-NEXT: ldr r0, [sp, #312] -; BE-NEXT: mov r6, r1 +; BE-NEXT: add r3, sp, #76 +; BE-NEXT: mov r4, r1 +; BE-NEXT: vmov.32 d10[0], r0 +; BE-NEXT: mov r0, r8 ; BE-NEXT: ldm r3, {r1, r2, r3} ; BE-NEXT: bl llrintl -; BE-NEXT: vldr d18, [sp, #48] @ 8-byte Reload -; BE-NEXT: vrev64.32 d17, d15 -; BE-NEXT: vrev64.32 d16, d18 -; BE-NEXT: vldr d18, [sp, #40] @ 8-byte Reload -; BE-NEXT: vmov.32 d24[0], r0 -; BE-NEXT: add r0, r4, #64 -; BE-NEXT: vldr d20, [sp, #32] @ 8-byte Reload -; BE-NEXT: vrev64.32 d19, d18 -; BE-NEXT: vmov.32 d9[1], r11 -; BE-NEXT: vmov.32 d10[1], r7 -; BE-NEXT: vrev64.32 d18, d20 -; BE-NEXT: vldr d20, [sp, #24] @ 8-byte Reload -; BE-NEXT: vmov.32 d8[1], r10 -; BE-NEXT: vmov.32 d14[1], r6 -; BE-NEXT: vmov.32 d24[1], r1 -; BE-NEXT: vldr d22, [sp, #16] @ 8-byte Reload -; BE-NEXT: vrev64.32 d21, d20 -; BE-NEXT: vrev64.32 d1, d9 -; BE-NEXT: vmov.32 d13[1], r9 -; BE-NEXT: vrev64.32 d31, d10 -; BE-NEXT: vrev64.32 d20, d22 -; BE-NEXT: vldr d22, [sp, #8] @ 8-byte Reload -; BE-NEXT: vrev64.32 d0, d8 -; BE-NEXT: vrev64.32 d29, d14 -; BE-NEXT: vmov.32 d12[1], r5 -; BE-NEXT: vrev64.32 d30, d24 -; BE-NEXT: vrev64.32 d27, d22 -; BE-NEXT: vldr d22, [sp] @ 8-byte Reload -; BE-NEXT: vst1.64 {d0, d1}, [r0:128]! -; BE-NEXT: vmov.32 d11[1], r8 -; BE-NEXT: vrev64.32 d28, d13 -; BE-NEXT: vst1.64 {d30, d31}, [r0:128]! -; BE-NEXT: vrev64.32 d26, d22 -; BE-NEXT: vrev64.32 d23, d12 -; BE-NEXT: vst1.64 {d28, d29}, [r0:128]! -; BE-NEXT: vrev64.32 d22, d11 -; BE-NEXT: vst1.64 {d26, d27}, [r0:128] -; BE-NEXT: vst1.64 {d20, d21}, [r4:128]! -; BE-NEXT: vst1.64 {d22, d23}, [r4:128]! -; BE-NEXT: vst1.64 {d18, d19}, [r4:128]! -; BE-NEXT: vst1.64 {d16, d17}, [r4:128] -; BE-NEXT: add sp, sp, #56 -; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEXT: add sp, sp, #4 -; BE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; -; BE-NEON-LABEL: llrint_v16f128: -; BE-NEON: @ %bb.0: -; BE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-NEON-NEXT: .pad #4 -; BE-NEON-NEXT: sub sp, sp, #4 -; BE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEON-NEXT: .pad #56 -; BE-NEON-NEXT: sub sp, sp, #56 -; BE-NEON-NEXT: mov r5, r3 -; BE-NEON-NEXT: add r3, sp, #376 -; BE-NEON-NEXT: mov r6, r2 -; BE-NEON-NEXT: mov r4, r0 -; BE-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: ldr r7, [sp, #392] -; BE-NEON-NEXT: add r3, sp, #396 -; BE-NEON-NEXT: mov r9, r1 -; BE-NEON-NEXT: vmov.32 d8[0], r0 -; BE-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-NEON-NEXT: mov r0, r7 -; BE-NEON-NEXT: ldr r11, [sp, #168] -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: ldr r2, [sp, #160] -; BE-NEON-NEXT: mov r10, r1 -; BE-NEON-NEXT: ldr r3, [sp, #164] -; BE-NEON-NEXT: vmov.32 d9[0], r0 -; BE-NEON-NEXT: mov r0, r6 -; BE-NEON-NEXT: mov r1, r5 -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: add r3, sp, #172 -; BE-NEON-NEXT: mov r8, r1 -; BE-NEON-NEXT: vmov.32 d10[0], r0 -; BE-NEON-NEXT: mov r0, r11 -; BE-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: add r3, sp, #220 -; BE-NEON-NEXT: vmov.32 d11[0], r0 -; BE-NEON-NEXT: ldr r0, [sp, #216] -; BE-NEON-NEXT: mov r11, r1 -; BE-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: add r3, sp, #236 -; BE-NEON-NEXT: vmov.32 d12[0], r0 -; BE-NEON-NEXT: ldr r0, [sp, #232] -; BE-NEON-NEXT: mov r6, r1 -; BE-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: add r3, sp, #252 -; BE-NEON-NEXT: vmov.32 d13[0], r0 -; BE-NEON-NEXT: ldr r0, [sp, #248] -; BE-NEON-NEXT: mov r7, r1 -; BE-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: add r3, sp, #268 -; BE-NEON-NEXT: vmov.32 d14[0], r0 -; BE-NEON-NEXT: ldr r0, [sp, #264] -; BE-NEON-NEXT: mov r5, r1 -; BE-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: vmov.32 d15[0], r0 -; BE-NEON-NEXT: ldr r0, [sp, #280] -; BE-NEON-NEXT: ldr r2, [sp, #288] -; BE-NEON-NEXT: vmov.32 d13[1], r7 -; BE-NEON-NEXT: ldr r7, [sp, #284] -; BE-NEON-NEXT: ldr r3, [sp, #292] -; BE-NEON-NEXT: vmov.32 d14[1], r5 -; BE-NEON-NEXT: ldr r5, [sp, #328] -; BE-NEON-NEXT: vmov.32 d12[1], r6 -; BE-NEON-NEXT: ldr r6, [sp, #300] -; BE-NEON-NEXT: vmov.32 d10[1], r8 -; BE-NEON-NEXT: ldr r8, [sp, #184] -; BE-NEON-NEXT: vmov.32 d11[1], r11 -; BE-NEON-NEXT: vmov.32 d9[1], r10 -; BE-NEON-NEXT: vmov.32 d8[1], r9 -; BE-NEON-NEXT: vmov.32 d15[1], r1 -; BE-NEON-NEXT: mov r1, r7 -; BE-NEON-NEXT: vstr d14, [sp, #48] @ 8-byte Spill -; BE-NEON-NEXT: vstr d13, [sp, #40] @ 8-byte Spill -; BE-NEON-NEXT: vstr d12, [sp, #32] @ 8-byte Spill -; BE-NEON-NEXT: vstr d11, [sp, #24] @ 8-byte Spill -; BE-NEON-NEXT: vstr d10, [sp, #16] @ 8-byte Spill -; BE-NEON-NEXT: vstr d9, [sp, #8] @ 8-byte Spill -; BE-NEON-NEXT: vstr d8, [sp] @ 8-byte Spill -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: mov r10, r1 -; BE-NEON-NEXT: ldr r1, [sp, #296] -; BE-NEON-NEXT: ldr r2, [sp, #304] -; BE-NEON-NEXT: vmov.32 d8[0], r0 -; BE-NEON-NEXT: ldr r3, [sp, #308] -; BE-NEON-NEXT: mov r0, r1 -; BE-NEON-NEXT: mov r1, r6 -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: add r3, sp, #332 -; BE-NEON-NEXT: mov r11, r1 -; BE-NEON-NEXT: vmov.32 d9[0], r0 -; BE-NEON-NEXT: mov r0, r5 -; BE-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: add r3, sp, #188 -; BE-NEON-NEXT: mov r7, r1 -; BE-NEON-NEXT: vmov.32 d10[0], r0 -; BE-NEON-NEXT: mov r0, r8 -; BE-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: add r3, sp, #204 -; BE-NEON-NEXT: vmov.32 d11[0], r0 -; BE-NEON-NEXT: ldr r0, [sp, #200] -; BE-NEON-NEXT: mov r8, r1 -; BE-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: add r3, sp, #348 -; BE-NEON-NEXT: vmov.32 d12[0], r0 -; BE-NEON-NEXT: ldr r0, [sp, #344] -; BE-NEON-NEXT: mov r5, r1 -; BE-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: add r3, sp, #364 -; BE-NEON-NEXT: vmov.32 d13[0], r0 -; BE-NEON-NEXT: ldr r0, [sp, #360] -; BE-NEON-NEXT: mov r9, r1 -; BE-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: add r3, sp, #316 -; BE-NEON-NEXT: vmov.32 d14[0], r0 -; BE-NEON-NEXT: ldr r0, [sp, #312] -; BE-NEON-NEXT: mov r6, r1 -; BE-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: vldr d18, [sp, #48] @ 8-byte Reload -; BE-NEON-NEXT: vrev64.32 d17, d15 -; BE-NEON-NEXT: vrev64.32 d16, d18 -; BE-NEON-NEXT: vldr d18, [sp, #40] @ 8-byte Reload -; BE-NEON-NEXT: vmov.32 d24[0], r0 -; BE-NEON-NEXT: add r0, r4, #64 -; BE-NEON-NEXT: vldr d20, [sp, #32] @ 8-byte Reload -; BE-NEON-NEXT: vrev64.32 d19, d18 -; BE-NEON-NEXT: vmov.32 d9[1], r11 -; BE-NEON-NEXT: vmov.32 d10[1], r7 -; BE-NEON-NEXT: vrev64.32 d18, d20 -; BE-NEON-NEXT: vldr d20, [sp, #24] @ 8-byte Reload -; BE-NEON-NEXT: vmov.32 d8[1], r10 -; BE-NEON-NEXT: vmov.32 d14[1], r6 -; BE-NEON-NEXT: vmov.32 d24[1], r1 -; BE-NEON-NEXT: vldr d22, [sp, #16] @ 8-byte Reload -; BE-NEON-NEXT: vrev64.32 d21, d20 -; BE-NEON-NEXT: vrev64.32 d1, d9 -; BE-NEON-NEXT: vmov.32 d13[1], r9 -; BE-NEON-NEXT: vrev64.32 d31, d10 -; BE-NEON-NEXT: vrev64.32 d20, d22 -; BE-NEON-NEXT: vldr d22, [sp, #8] @ 8-byte Reload -; BE-NEON-NEXT: vrev64.32 d0, d8 -; BE-NEON-NEXT: vrev64.32 d29, d14 -; BE-NEON-NEXT: vmov.32 d12[1], r5 -; BE-NEON-NEXT: vrev64.32 d30, d24 -; BE-NEON-NEXT: vrev64.32 d27, d22 -; BE-NEON-NEXT: vldr d22, [sp] @ 8-byte Reload -; BE-NEON-NEXT: vst1.64 {d0, d1}, [r0:128]! -; BE-NEON-NEXT: vmov.32 d11[1], r8 -; BE-NEON-NEXT: vrev64.32 d28, d13 -; BE-NEON-NEXT: vst1.64 {d30, d31}, [r0:128]! -; BE-NEON-NEXT: vrev64.32 d26, d22 -; BE-NEON-NEXT: vrev64.32 d23, d12 -; BE-NEON-NEXT: vst1.64 {d28, d29}, [r0:128]! -; BE-NEON-NEXT: vrev64.32 d22, d11 -; BE-NEON-NEXT: vst1.64 {d26, d27}, [r0:128] -; BE-NEON-NEXT: vst1.64 {d20, d21}, [r4:128]! -; BE-NEON-NEXT: vst1.64 {d22, d23}, [r4:128]! -; BE-NEON-NEXT: vst1.64 {d18, d19}, [r4:128]! -; BE-NEON-NEXT: vst1.64 {d16, d17}, [r4:128] -; BE-NEON-NEXT: add sp, sp, #56 -; BE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEON-NEXT: add sp, sp, #4 -; BE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} - %a = call <16 x i64> @llvm.llrint.v16i64.v16f128(<16 x fp128> %x) - ret <16 x i64> %a +; BE-NEXT: vmov.32 d16[0], r0 +; BE-NEXT: vmov.32 d10[1], r4 +; BE-NEXT: vmov.32 d8[1], r9 +; BE-NEXT: vmov.32 d9[1], r5 +; BE-NEXT: vmov.32 d16[1], r1 +; BE-NEXT: vrev64.32 d1, d10 +; BE-NEXT: vrev64.32 d3, d8 +; BE-NEXT: vrev64.32 d0, d9 +; BE-NEXT: vrev64.32 d2, d16 +; BE-NEXT: vpop {d8, d9, d10} +; BE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} + %a = call <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128> %x) + ret <4 x i64> %a } -declare <16 x i64> @llvm.llrint.v16i64.v16f128(<16 x fp128>) +declare <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128>) -define <32 x i64> @llrint_v32f128(<32 x fp128> %x) { -; LE-LABEL: llrint_v32f128: +define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) { +; LE-LABEL: llrint_v8i64_v8f128: ; LE: @ %bb.0: ; LE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; LE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} @@ -9669,1458 +2225,170 @@ define <32 x i64> @llrint_v32f128(<32 x fp128> %x) { ; LE-NEXT: sub sp, sp, #4 ; LE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} ; LE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEXT: .pad #192 -; LE-NEXT: sub sp, sp, #192 -; LE-NEXT: str r3, [sp, #60] @ 4-byte Spill -; LE-NEXT: add r3, sp, #688 -; LE-NEXT: str r2, [sp, #56] @ 4-byte Spill -; LE-NEXT: mov r9, r0 -; LE-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-NEXT: bl llrintl -; LE-NEXT: add r3, sp, #560 -; LE-NEXT: mov r4, r0 -; LE-NEXT: str r1, [sp, #64] @ 4-byte Spill -; LE-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-NEXT: bl llrintl -; LE-NEXT: vmov.32 d17[0], r0 -; LE-NEXT: ldr r7, [sp, #544] -; LE-NEXT: ldr r6, [sp, #548] -; LE-NEXT: add lr, sp, #96 -; LE-NEXT: ldr r2, [sp, #552] -; LE-NEXT: vmov.32 d17[1], r1 -; LE-NEXT: ldr r3, [sp, #556] -; LE-NEXT: mov r0, r7 -; LE-NEXT: mov r1, r6 -; LE-NEXT: vorr q4, q8, q8 -; LE-NEXT: ldr r5, [sp, #528] -; LE-NEXT: vmov.32 d17[0], r4 -; LE-NEXT: ldr r10, [sp, #304] -; LE-NEXT: ldr r8, [sp, #368] -; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEXT: bl llrintl -; LE-NEXT: add r3, sp, #532 -; LE-NEXT: vmov.32 d8[0], r0 -; LE-NEXT: mov r11, r1 -; LE-NEXT: add lr, sp, #144 -; LE-NEXT: ldm r3, {r1, r2, r3} -; LE-NEXT: mov r0, r5 -; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-NEXT: bl llrintl -; LE-NEXT: add r3, sp, #308 -; LE-NEXT: mov r5, r1 -; LE-NEXT: add lr, sp, #176 -; LE-NEXT: vmov.32 d17[0], r0 -; LE-NEXT: ldm r3, {r1, r2, r3} -; LE-NEXT: mov r0, r10 -; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEXT: bl llrintl -; LE-NEXT: add r3, sp, #372 -; LE-NEXT: mov r10, r1 -; LE-NEXT: vmov.32 d11[0], r0 -; LE-NEXT: mov r0, r8 -; LE-NEXT: ldm r3, {r1, r2, r3} -; LE-NEXT: bl llrintl -; LE-NEXT: add r3, sp, #404 -; LE-NEXT: vmov.32 d15[0], r0 -; LE-NEXT: ldr r0, [sp, #400] -; LE-NEXT: mov r6, r1 -; LE-NEXT: ldm r3, {r1, r2, r3} -; LE-NEXT: bl llrintl -; LE-NEXT: add r3, sp, #596 -; LE-NEXT: vmov.32 d9[0], r0 -; LE-NEXT: ldr r0, [sp, #592] -; LE-NEXT: mov r7, r1 -; LE-NEXT: ldm r3, {r1, r2, r3} -; LE-NEXT: bl llrintl -; LE-NEXT: add r3, sp, #676 -; LE-NEXT: vmov.32 d13[0], r0 -; LE-NEXT: ldr r0, [sp, #672] +; LE-NEXT: .pad #8 +; LE-NEXT: sub sp, sp, #8 +; LE-NEXT: mov r11, r3 +; LE-NEXT: add r3, sp, #208 +; LE-NEXT: mov r10, r2 ; LE-NEXT: mov r4, r1 -; LE-NEXT: ldm r3, {r1, r2, r3} -; LE-NEXT: bl llrintl -; LE-NEXT: add lr, sp, #96 -; LE-NEXT: vmov.32 d13[1], r4 -; LE-NEXT: str r1, [sp, #52] @ 4-byte Spill -; LE-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; LE-NEXT: add lr, sp, #80 -; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-NEXT: add lr, sp, #128 -; LE-NEXT: vmov.32 d9[1], r7 -; LE-NEXT: ldr r1, [sp, #628] -; LE-NEXT: ldr r2, [sp, #632] -; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-NEXT: add lr, sp, #112 -; LE-NEXT: vmov.32 d15[1], r6 -; LE-NEXT: ldr r3, [sp, #636] -; LE-NEXT: ldr r7, [sp, #64] @ 4-byte Reload -; LE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-NEXT: add lr, sp, #160 -; LE-NEXT: vmov.32 d11[1], r10 -; LE-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-NEXT: add lr, sp, #176 -; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-NEXT: add lr, sp, #144 -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: vmov.32 d18[0], r0 -; LE-NEXT: ldr r0, [sp, #624] -; LE-NEXT: vmov.32 d16[1], r11 -; LE-NEXT: vmov.32 d9[1], r5 -; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEXT: add lr, sp, #96 -; LE-NEXT: vmov.32 d19[1], r7 -; LE-NEXT: vstmia lr, {d18, d19} @ 16-byte Spill -; LE-NEXT: bl llrintl -; LE-NEXT: add r3, sp, #512 -; LE-NEXT: str r0, [sp, #48] @ 4-byte Spill -; LE-NEXT: str r1, [sp, #64] @ 4-byte Spill -; LE-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-NEXT: bl llrintl -; LE-NEXT: add r3, sp, #768 -; LE-NEXT: mov r11, r0 -; LE-NEXT: str r1, [sp, #28] @ 4-byte Spill +; LE-NEXT: mov r5, r0 ; LE-NEXT: ldm r3, {r0, r1, r2, r3} ; LE-NEXT: bl llrintl -; LE-NEXT: ldr r6, [sp, #784] -; LE-NEXT: add r3, sp, #788 -; LE-NEXT: mov r8, r1 -; LE-NEXT: vmov.32 d14[0], r0 -; LE-NEXT: ldm r3, {r1, r2, r3} -; LE-NEXT: mov r0, r6 -; LE-NEXT: ldr r5, [sp, #736] -; LE-NEXT: ldr r7, [sp, #752] -; LE-NEXT: ldr r4, [sp, #720] -; LE-NEXT: bl llrintl -; LE-NEXT: add r3, sp, #740 -; LE-NEXT: mov r6, r1 -; LE-NEXT: vmov.32 d15[0], r0 -; LE-NEXT: mov r0, r5 -; LE-NEXT: ldm r3, {r1, r2, r3} -; LE-NEXT: bl llrintl -; LE-NEXT: add r3, sp, #756 -; LE-NEXT: mov r5, r1 -; LE-NEXT: vmov.32 d12[0], r0 -; LE-NEXT: mov r0, r7 -; LE-NEXT: ldm r3, {r1, r2, r3} -; LE-NEXT: bl llrintl -; LE-NEXT: add r3, sp, #724 -; LE-NEXT: mov r7, r1 -; LE-NEXT: vmov.32 d13[0], r0 -; LE-NEXT: mov r0, r4 -; LE-NEXT: ldm r3, {r1, r2, r3} -; LE-NEXT: bl llrintl -; LE-NEXT: vmov.32 d13[1], r7 -; LE-NEXT: add lr, sp, #8 -; LE-NEXT: ldr r2, [sp, #296] -; LE-NEXT: vmov.32 d12[1], r5 -; LE-NEXT: ldr r3, [sp, #300] -; LE-NEXT: ldr r4, [sp, #576] -; LE-NEXT: vmov.32 d11[0], r0 -; LE-NEXT: ldr r0, [sp, #56] @ 4-byte Reload -; LE-NEXT: ldr r10, [sp, #384] -; LE-NEXT: vmov.32 d15[1], r6 -; LE-NEXT: ldr r6, [sp, #352] -; LE-NEXT: vmov.32 d14[1], r8 -; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-NEXT: add lr, sp, #32 -; LE-NEXT: vmov.32 d11[1], r1 -; LE-NEXT: ldr r1, [sp, #60] @ 4-byte Reload -; LE-NEXT: vmov.32 d8[0], r11 -; LE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-NEXT: add lr, sp, #176 -; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-NEXT: bl llrintl -; LE-NEXT: add lr, sp, #160 -; LE-NEXT: add r3, sp, #356 -; LE-NEXT: mov r5, r1 -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: ldm r3, {r1, r2, r3} -; LE-NEXT: vmov.32 d16[0], r0 +; LE-NEXT: add r7, sp, #164 +; LE-NEXT: ldr r6, [sp, #160] +; LE-NEXT: str r1, [sp, #4] @ 4-byte Spill +; LE-NEXT: vmov.32 d9[0], r0 +; LE-NEXT: ldm r7, {r1, r2, r3, r7} ; LE-NEXT: mov r0, r6 -; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEXT: bl llrintl -; LE-NEXT: add lr, sp, #112 -; LE-NEXT: add r3, sp, #388 -; LE-NEXT: mov r7, r1 -; LE-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload -; LE-NEXT: ldm r3, {r1, r2, r3} -; LE-NEXT: vmov.32 d14[0], r0 -; LE-NEXT: mov r0, r10 -; LE-NEXT: bl llrintl -; LE-NEXT: add lr, sp, #128 -; LE-NEXT: add r3, sp, #580 -; LE-NEXT: mov r6, r1 -; LE-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-NEXT: ldm r3, {r1, r2, r3} -; LE-NEXT: vmov.32 d12[0], r0 -; LE-NEXT: mov r0, r4 -; LE-NEXT: bl llrintl -; LE-NEXT: add lr, sp, #80 -; LE-NEXT: add r3, sp, #708 -; LE-NEXT: mov r4, r1 -; LE-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-NEXT: ldm r3, {r1, r2, r3} -; LE-NEXT: vmov.32 d8[0], r0 -; LE-NEXT: ldr r0, [sp, #704] +; LE-NEXT: ldr r8, [sp, #128] +; LE-NEXT: ldr r9, [sp, #144] ; LE-NEXT: bl llrintl -; LE-NEXT: vmov.32 d8[1], r4 -; LE-NEXT: add lr, sp, #80 -; LE-NEXT: ldr r2, [sp, #52] @ 4-byte Reload -; LE-NEXT: vmov.32 d12[1], r6 -; LE-NEXT: ldr r6, [sp, #644] -; LE-NEXT: ldr r3, [sp, #652] -; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-NEXT: add lr, sp, #128 -; LE-NEXT: vmov.32 d14[1], r7 -; LE-NEXT: ldr r4, [sp, #480] -; LE-NEXT: ldr r7, [sp, #656] -; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-NEXT: add lr, sp, #112 -; LE-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-NEXT: add lr, sp, #160 -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-NEXT: add r3, sp, #180 +; LE-NEXT: str r1, [sp] @ 4-byte Spill ; LE-NEXT: vmov.32 d10[0], r0 -; LE-NEXT: ldr r0, [sp, #28] @ 4-byte Reload -; LE-NEXT: ldr r10, [sp, #496] -; LE-NEXT: vmov.32 d16[1], r5 -; LE-NEXT: add r5, r9, #192 -; LE-NEXT: ldr r8, [sp, #608] -; LE-NEXT: vmov.32 d10[1], r1 -; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEXT: add lr, sp, #176 -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: vmov.32 d16[1], r0 -; LE-NEXT: ldr r0, [sp, #640] -; LE-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEXT: add lr, sp, #96 -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: add lr, sp, #8 -; LE-NEXT: vmov.32 d16[1], r2 -; LE-NEXT: ldr r2, [sp, #648] -; LE-NEXT: vst1.64 {d16, d17}, [r5:128]! -; LE-NEXT: vst1.64 {d10, d11}, [r5:128]! -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: vst1.64 {d16, d17}, [r5:128]! -; LE-NEXT: ldr r1, [sp, #48] @ 4-byte Reload -; LE-NEXT: vmov.32 d9[0], r1 -; LE-NEXT: mov r1, r6 -; LE-NEXT: bl llrintl -; LE-NEXT: add r3, sp, #660 -; LE-NEXT: mov r11, r1 -; LE-NEXT: vmov.32 d12[0], r0 ; LE-NEXT: mov r0, r7 ; LE-NEXT: ldm r3, {r1, r2, r3} ; LE-NEXT: bl llrintl -; LE-NEXT: add r3, sp, #484 +; LE-NEXT: add r3, sp, #132 ; LE-NEXT: mov r7, r1 -; LE-NEXT: vmov.32 d13[0], r0 -; LE-NEXT: mov r0, r4 -; LE-NEXT: ldm r3, {r1, r2, r3} -; LE-NEXT: bl llrintl -; LE-NEXT: add r3, sp, #500 -; LE-NEXT: mov r6, r1 -; LE-NEXT: vmov.32 d10[0], r0 -; LE-NEXT: mov r0, r10 -; LE-NEXT: ldm r3, {r1, r2, r3} -; LE-NEXT: bl llrintl -; LE-NEXT: add r3, sp, #612 -; LE-NEXT: mov r4, r1 ; LE-NEXT: vmov.32 d11[0], r0 ; LE-NEXT: mov r0, r8 ; LE-NEXT: ldm r3, {r1, r2, r3} ; LE-NEXT: bl llrintl -; LE-NEXT: vmov.32 d8[0], r0 -; LE-NEXT: ldr r0, [sp, #64] @ 4-byte Reload -; LE-NEXT: add lr, sp, #96 -; LE-NEXT: add r8, r9, #128 -; LE-NEXT: vmov.32 d13[1], r7 -; LE-NEXT: ldr r2, [sp, #344] -; LE-NEXT: ldr r3, [sp, #348] -; LE-NEXT: vmov.32 d12[1], r11 -; LE-NEXT: ldr r7, [sp, #452] -; LE-NEXT: ldr r10, [sp, #416] -; LE-NEXT: vmov.32 d9[1], r0 -; LE-NEXT: ldr r0, [sp, #336] -; LE-NEXT: vmov.32 d8[1], r1 -; LE-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-NEXT: add lr, sp, #64 -; LE-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-NEXT: add lr, sp, #32 -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: add lr, sp, #144 -; LE-NEXT: vmov.32 d11[1], r4 -; LE-NEXT: ldr r4, [sp, #340] -; LE-NEXT: vst1.64 {d16, d17}, [r5:128] -; LE-NEXT: mov r1, r4 -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: add lr, sp, #80 -; LE-NEXT: vmov.32 d10[1], r6 -; LE-NEXT: ldr r6, [sp, #448] -; LE-NEXT: vst1.64 {d16, d17}, [r8:128]! -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: vst1.64 {d16, d17}, [r8:128]! -; LE-NEXT: bl llrintl -; LE-NEXT: ldr r2, [sp, #456] -; LE-NEXT: mov r11, r1 -; LE-NEXT: ldr r3, [sp, #460] -; LE-NEXT: vmov.32 d15[0], r0 -; LE-NEXT: mov r0, r6 -; LE-NEXT: mov r1, r7 -; LE-NEXT: ldr r5, [sp, #432] -; LE-NEXT: bl llrintl -; LE-NEXT: add r3, sp, #468 +; LE-NEXT: add r3, sp, #148 +; LE-NEXT: mov r8, r1 ; LE-NEXT: vmov.32 d12[0], r0 -; LE-NEXT: ldr r0, [sp, #464] -; LE-NEXT: mov r6, r1 -; LE-NEXT: ldm r3, {r1, r2, r3} -; LE-NEXT: bl llrintl -; LE-NEXT: add r3, sp, #420 -; LE-NEXT: mov r7, r1 -; LE-NEXT: vmov.32 d13[0], r0 -; LE-NEXT: mov r0, r10 +; LE-NEXT: mov r0, r9 ; LE-NEXT: ldm r3, {r1, r2, r3} ; LE-NEXT: bl llrintl -; LE-NEXT: add r3, sp, #436 -; LE-NEXT: mov r4, r1 -; LE-NEXT: vmov.32 d8[0], r0 +; LE-NEXT: mov r9, r1 +; LE-NEXT: vmov.32 d13[0], r0 ; LE-NEXT: mov r0, r5 +; LE-NEXT: mov r1, r4 +; LE-NEXT: mov r2, r10 +; LE-NEXT: mov r3, r11 +; LE-NEXT: ldr r6, [sp, #112] +; LE-NEXT: bl llrintl +; LE-NEXT: add r3, sp, #116 +; LE-NEXT: mov r4, r1 +; LE-NEXT: vmov.32 d14[0], r0 +; LE-NEXT: mov r0, r6 ; LE-NEXT: ldm r3, {r1, r2, r3} ; LE-NEXT: bl llrintl -; LE-NEXT: add r3, sp, #324 -; LE-NEXT: vmov.32 d9[0], r0 -; LE-NEXT: ldr r0, [sp, #320] +; LE-NEXT: add r3, sp, #196 +; LE-NEXT: vmov.32 d15[0], r0 +; LE-NEXT: ldr r0, [sp, #192] ; LE-NEXT: mov r5, r1 ; LE-NEXT: ldm r3, {r1, r2, r3} ; LE-NEXT: bl llrintl -; LE-NEXT: add lr, sp, #64 -; LE-NEXT: vmov.32 d9[1], r5 -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: add lr, sp, #96 -; LE-NEXT: vmov.32 d13[1], r7 -; LE-NEXT: vst1.64 {d16, d17}, [r8:128]! -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: add lr, sp, #176 -; LE-NEXT: vmov.32 d8[1], r4 -; LE-NEXT: vmov.32 d12[1], r6 -; LE-NEXT: vmov.32 d14[0], r0 -; LE-NEXT: add r0, r9, #64 -; LE-NEXT: vst1.64 {d16, d17}, [r8:128] -; LE-NEXT: vst1.64 {d8, d9}, [r0:128]! -; LE-NEXT: vst1.64 {d12, d13}, [r0:128]! -; LE-NEXT: vst1.64 {d10, d11}, [r0:128]! -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: add lr, sp, #160 -; LE-NEXT: vmov.32 d15[1], r11 -; LE-NEXT: vst1.64 {d16, d17}, [r0:128] -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: add lr, sp, #112 -; LE-NEXT: vmov.32 d14[1], r1 -; LE-NEXT: vst1.64 {d16, d17}, [r9:128]! -; LE-NEXT: vst1.64 {d14, d15}, [r9:128]! -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: add lr, sp, #128 -; LE-NEXT: vst1.64 {d16, d17}, [r9:128]! -; LE-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEXT: vst1.64 {d16, d17}, [r9:128] -; LE-NEXT: add sp, sp, #192 +; LE-NEXT: vmov.32 d8[0], r0 +; LE-NEXT: ldr r0, [sp] @ 4-byte Reload +; LE-NEXT: vmov.32 d11[1], r7 +; LE-NEXT: vmov.32 d10[1], r0 +; LE-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; LE-NEXT: vmov.32 d15[1], r5 +; LE-NEXT: vorr q2, q5, q5 +; LE-NEXT: vmov.32 d13[1], r9 +; LE-NEXT: vmov.32 d9[1], r0 +; LE-NEXT: vmov.32 d14[1], r4 +; LE-NEXT: vmov.32 d12[1], r8 +; LE-NEXT: vorr q0, q7, q7 +; LE-NEXT: vmov.32 d8[1], r1 +; LE-NEXT: vorr q1, q6, q6 +; LE-NEXT: vorr q3, q4, q4 +; LE-NEXT: add sp, sp, #8 ; LE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; LE-NEXT: add sp, sp, #4 ; LE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; -; LE-NEON-LABEL: llrint_v32f128: -; LE-NEON: @ %bb.0: -; LE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-NEON-NEXT: .pad #4 -; LE-NEON-NEXT: sub sp, sp, #4 -; LE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEON-NEXT: .pad #192 -; LE-NEON-NEXT: sub sp, sp, #192 -; LE-NEON-NEXT: str r3, [sp, #60] @ 4-byte Spill -; LE-NEON-NEXT: add r3, sp, #688 -; LE-NEON-NEXT: str r2, [sp, #56] @ 4-byte Spill -; LE-NEON-NEXT: mov r9, r0 -; LE-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add r3, sp, #560 -; LE-NEON-NEXT: mov r4, r0 -; LE-NEON-NEXT: str r1, [sp, #64] @ 4-byte Spill -; LE-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: vmov.32 d17[0], r0 -; LE-NEON-NEXT: ldr r7, [sp, #544] -; LE-NEON-NEXT: ldr r6, [sp, #548] -; LE-NEON-NEXT: add lr, sp, #96 -; LE-NEON-NEXT: ldr r2, [sp, #552] -; LE-NEON-NEXT: vmov.32 d17[1], r1 -; LE-NEON-NEXT: ldr r3, [sp, #556] -; LE-NEON-NEXT: mov r0, r7 -; LE-NEON-NEXT: mov r1, r6 -; LE-NEON-NEXT: vorr q4, q8, q8 -; LE-NEON-NEXT: ldr r5, [sp, #528] -; LE-NEON-NEXT: vmov.32 d17[0], r4 -; LE-NEON-NEXT: ldr r10, [sp, #304] -; LE-NEON-NEXT: ldr r8, [sp, #368] -; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add r3, sp, #532 -; LE-NEON-NEXT: vmov.32 d8[0], r0 -; LE-NEON-NEXT: mov r11, r1 -; LE-NEON-NEXT: add lr, sp, #144 -; LE-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-NEON-NEXT: mov r0, r5 -; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add r3, sp, #308 -; LE-NEON-NEXT: mov r5, r1 -; LE-NEON-NEXT: add lr, sp, #176 -; LE-NEON-NEXT: vmov.32 d17[0], r0 -; LE-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-NEON-NEXT: mov r0, r10 -; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add r3, sp, #372 -; LE-NEON-NEXT: mov r10, r1 -; LE-NEON-NEXT: vmov.32 d11[0], r0 -; LE-NEON-NEXT: mov r0, r8 -; LE-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add r3, sp, #404 -; LE-NEON-NEXT: vmov.32 d15[0], r0 -; LE-NEON-NEXT: ldr r0, [sp, #400] -; LE-NEON-NEXT: mov r6, r1 -; LE-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add r3, sp, #596 -; LE-NEON-NEXT: vmov.32 d9[0], r0 -; LE-NEON-NEXT: ldr r0, [sp, #592] -; LE-NEON-NEXT: mov r7, r1 -; LE-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add r3, sp, #676 -; LE-NEON-NEXT: vmov.32 d13[0], r0 -; LE-NEON-NEXT: ldr r0, [sp, #672] -; LE-NEON-NEXT: mov r4, r1 -; LE-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add lr, sp, #96 -; LE-NEON-NEXT: vmov.32 d13[1], r4 -; LE-NEON-NEXT: str r1, [sp, #52] @ 4-byte Spill -; LE-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #80 -; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #128 -; LE-NEON-NEXT: vmov.32 d9[1], r7 -; LE-NEON-NEXT: ldr r1, [sp, #628] -; LE-NEON-NEXT: ldr r2, [sp, #632] -; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #112 -; LE-NEON-NEXT: vmov.32 d15[1], r6 -; LE-NEON-NEXT: ldr r3, [sp, #636] -; LE-NEON-NEXT: ldr r7, [sp, #64] @ 4-byte Reload -; LE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #160 -; LE-NEON-NEXT: vmov.32 d11[1], r10 -; LE-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #176 -; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #144 -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: vmov.32 d18[0], r0 -; LE-NEON-NEXT: ldr r0, [sp, #624] -; LE-NEON-NEXT: vmov.32 d16[1], r11 -; LE-NEON-NEXT: vmov.32 d9[1], r5 -; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #96 -; LE-NEON-NEXT: vmov.32 d19[1], r7 -; LE-NEON-NEXT: vstmia lr, {d18, d19} @ 16-byte Spill -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add r3, sp, #512 -; LE-NEON-NEXT: str r0, [sp, #48] @ 4-byte Spill -; LE-NEON-NEXT: str r1, [sp, #64] @ 4-byte Spill -; LE-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add r3, sp, #768 -; LE-NEON-NEXT: mov r11, r0 -; LE-NEON-NEXT: str r1, [sp, #28] @ 4-byte Spill -; LE-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: ldr r6, [sp, #784] -; LE-NEON-NEXT: add r3, sp, #788 -; LE-NEON-NEXT: mov r8, r1 -; LE-NEON-NEXT: vmov.32 d14[0], r0 -; LE-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-NEON-NEXT: mov r0, r6 -; LE-NEON-NEXT: ldr r5, [sp, #736] -; LE-NEON-NEXT: ldr r7, [sp, #752] -; LE-NEON-NEXT: ldr r4, [sp, #720] -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add r3, sp, #740 -; LE-NEON-NEXT: mov r6, r1 -; LE-NEON-NEXT: vmov.32 d15[0], r0 -; LE-NEON-NEXT: mov r0, r5 -; LE-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add r3, sp, #756 -; LE-NEON-NEXT: mov r5, r1 -; LE-NEON-NEXT: vmov.32 d12[0], r0 -; LE-NEON-NEXT: mov r0, r7 -; LE-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add r3, sp, #724 -; LE-NEON-NEXT: mov r7, r1 -; LE-NEON-NEXT: vmov.32 d13[0], r0 -; LE-NEON-NEXT: mov r0, r4 -; LE-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: vmov.32 d13[1], r7 -; LE-NEON-NEXT: add lr, sp, #8 -; LE-NEON-NEXT: ldr r2, [sp, #296] -; LE-NEON-NEXT: vmov.32 d12[1], r5 -; LE-NEON-NEXT: ldr r3, [sp, #300] -; LE-NEON-NEXT: ldr r4, [sp, #576] -; LE-NEON-NEXT: vmov.32 d11[0], r0 -; LE-NEON-NEXT: ldr r0, [sp, #56] @ 4-byte Reload -; LE-NEON-NEXT: ldr r10, [sp, #384] -; LE-NEON-NEXT: vmov.32 d15[1], r6 -; LE-NEON-NEXT: ldr r6, [sp, #352] -; LE-NEON-NEXT: vmov.32 d14[1], r8 -; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #32 -; LE-NEON-NEXT: vmov.32 d11[1], r1 -; LE-NEON-NEXT: ldr r1, [sp, #60] @ 4-byte Reload -; LE-NEON-NEXT: vmov.32 d8[0], r11 -; LE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #176 -; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add lr, sp, #160 -; LE-NEON-NEXT: add r3, sp, #356 -; LE-NEON-NEXT: mov r5, r1 -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-NEON-NEXT: vmov.32 d16[0], r0 -; LE-NEON-NEXT: mov r0, r6 -; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add lr, sp, #112 -; LE-NEON-NEXT: add r3, sp, #388 -; LE-NEON-NEXT: mov r7, r1 -; LE-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload -; LE-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-NEON-NEXT: vmov.32 d14[0], r0 -; LE-NEON-NEXT: mov r0, r10 -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add lr, sp, #128 -; LE-NEON-NEXT: add r3, sp, #580 -; LE-NEON-NEXT: mov r6, r1 -; LE-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-NEON-NEXT: vmov.32 d12[0], r0 -; LE-NEON-NEXT: mov r0, r4 -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add lr, sp, #80 -; LE-NEON-NEXT: add r3, sp, #708 -; LE-NEON-NEXT: mov r4, r1 -; LE-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-NEON-NEXT: vmov.32 d8[0], r0 -; LE-NEON-NEXT: ldr r0, [sp, #704] -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: vmov.32 d8[1], r4 -; LE-NEON-NEXT: add lr, sp, #80 -; LE-NEON-NEXT: ldr r2, [sp, #52] @ 4-byte Reload -; LE-NEON-NEXT: vmov.32 d12[1], r6 -; LE-NEON-NEXT: ldr r6, [sp, #644] -; LE-NEON-NEXT: ldr r3, [sp, #652] -; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #128 -; LE-NEON-NEXT: vmov.32 d14[1], r7 -; LE-NEON-NEXT: ldr r4, [sp, #480] -; LE-NEON-NEXT: ldr r7, [sp, #656] -; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #112 -; LE-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #160 -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: vmov.32 d10[0], r0 -; LE-NEON-NEXT: ldr r0, [sp, #28] @ 4-byte Reload -; LE-NEON-NEXT: ldr r10, [sp, #496] -; LE-NEON-NEXT: vmov.32 d16[1], r5 -; LE-NEON-NEXT: add r5, r9, #192 -; LE-NEON-NEXT: ldr r8, [sp, #608] -; LE-NEON-NEXT: vmov.32 d10[1], r1 -; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #176 -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: vmov.32 d16[1], r0 -; LE-NEON-NEXT: ldr r0, [sp, #640] -; LE-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #96 -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #8 -; LE-NEON-NEXT: vmov.32 d16[1], r2 -; LE-NEON-NEXT: ldr r2, [sp, #648] -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! -; LE-NEON-NEXT: vst1.64 {d10, d11}, [r5:128]! -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! -; LE-NEON-NEXT: ldr r1, [sp, #48] @ 4-byte Reload -; LE-NEON-NEXT: vmov.32 d9[0], r1 -; LE-NEON-NEXT: mov r1, r6 -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add r3, sp, #660 -; LE-NEON-NEXT: mov r11, r1 -; LE-NEON-NEXT: vmov.32 d12[0], r0 -; LE-NEON-NEXT: mov r0, r7 -; LE-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add r3, sp, #484 -; LE-NEON-NEXT: mov r7, r1 -; LE-NEON-NEXT: vmov.32 d13[0], r0 -; LE-NEON-NEXT: mov r0, r4 -; LE-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add r3, sp, #500 -; LE-NEON-NEXT: mov r6, r1 -; LE-NEON-NEXT: vmov.32 d10[0], r0 -; LE-NEON-NEXT: mov r0, r10 -; LE-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add r3, sp, #612 -; LE-NEON-NEXT: mov r4, r1 -; LE-NEON-NEXT: vmov.32 d11[0], r0 -; LE-NEON-NEXT: mov r0, r8 -; LE-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: vmov.32 d8[0], r0 -; LE-NEON-NEXT: ldr r0, [sp, #64] @ 4-byte Reload -; LE-NEON-NEXT: add lr, sp, #96 -; LE-NEON-NEXT: add r8, r9, #128 -; LE-NEON-NEXT: vmov.32 d13[1], r7 -; LE-NEON-NEXT: ldr r2, [sp, #344] -; LE-NEON-NEXT: ldr r3, [sp, #348] -; LE-NEON-NEXT: vmov.32 d12[1], r11 -; LE-NEON-NEXT: ldr r7, [sp, #452] -; LE-NEON-NEXT: ldr r10, [sp, #416] -; LE-NEON-NEXT: vmov.32 d9[1], r0 -; LE-NEON-NEXT: ldr r0, [sp, #336] -; LE-NEON-NEXT: vmov.32 d8[1], r1 -; LE-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #64 -; LE-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-NEON-NEXT: add lr, sp, #32 -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #144 -; LE-NEON-NEXT: vmov.32 d11[1], r4 -; LE-NEON-NEXT: ldr r4, [sp, #340] -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r5:128] -; LE-NEON-NEXT: mov r1, r4 -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #80 -; LE-NEON-NEXT: vmov.32 d10[1], r6 -; LE-NEON-NEXT: ldr r6, [sp, #448] -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r8:128]! -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r8:128]! -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: ldr r2, [sp, #456] -; LE-NEON-NEXT: mov r11, r1 -; LE-NEON-NEXT: ldr r3, [sp, #460] -; LE-NEON-NEXT: vmov.32 d15[0], r0 -; LE-NEON-NEXT: mov r0, r6 -; LE-NEON-NEXT: mov r1, r7 -; LE-NEON-NEXT: ldr r5, [sp, #432] -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add r3, sp, #468 -; LE-NEON-NEXT: vmov.32 d12[0], r0 -; LE-NEON-NEXT: ldr r0, [sp, #464] -; LE-NEON-NEXT: mov r6, r1 -; LE-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add r3, sp, #420 -; LE-NEON-NEXT: mov r7, r1 -; LE-NEON-NEXT: vmov.32 d13[0], r0 -; LE-NEON-NEXT: mov r0, r10 -; LE-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add r3, sp, #436 -; LE-NEON-NEXT: mov r4, r1 -; LE-NEON-NEXT: vmov.32 d8[0], r0 -; LE-NEON-NEXT: mov r0, r5 -; LE-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add r3, sp, #324 -; LE-NEON-NEXT: vmov.32 d9[0], r0 -; LE-NEON-NEXT: ldr r0, [sp, #320] -; LE-NEON-NEXT: mov r5, r1 -; LE-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-NEON-NEXT: bl llrintl -; LE-NEON-NEXT: add lr, sp, #64 -; LE-NEON-NEXT: vmov.32 d9[1], r5 -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #96 -; LE-NEON-NEXT: vmov.32 d13[1], r7 -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r8:128]! -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #176 -; LE-NEON-NEXT: vmov.32 d8[1], r4 -; LE-NEON-NEXT: vmov.32 d12[1], r6 -; LE-NEON-NEXT: vmov.32 d14[0], r0 -; LE-NEON-NEXT: add r0, r9, #64 -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r8:128] -; LE-NEON-NEXT: vst1.64 {d8, d9}, [r0:128]! -; LE-NEON-NEXT: vst1.64 {d12, d13}, [r0:128]! -; LE-NEON-NEXT: vst1.64 {d10, d11}, [r0:128]! -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #160 -; LE-NEON-NEXT: vmov.32 d15[1], r11 -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #112 -; LE-NEON-NEXT: vmov.32 d14[1], r1 -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r9:128]! -; LE-NEON-NEXT: vst1.64 {d14, d15}, [r9:128]! -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: add lr, sp, #128 -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r9:128]! -; LE-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-NEON-NEXT: vst1.64 {d16, d17}, [r9:128] -; LE-NEON-NEXT: add sp, sp, #192 -; LE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-NEON-NEXT: add sp, sp, #4 -; LE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; -; BE-LABEL: llrint_v32f128: +; BE-LABEL: llrint_v8i64_v8f128: ; BE: @ %bb.0: ; BE-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; BE-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; BE-NEXT: .pad #4 ; BE-NEXT: sub sp, sp, #4 -; BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEXT: .pad #152 -; BE-NEXT: sub sp, sp, #152 -; BE-NEXT: str r3, [sp, #120] @ 4-byte Spill -; BE-NEXT: add r3, sp, #712 -; BE-NEXT: str r2, [sp, #112] @ 4-byte Spill -; BE-NEXT: mov r9, r0 -; BE-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-NEXT: bl llrintl -; BE-NEXT: ldr r7, [sp, #648] -; BE-NEXT: add r3, sp, #652 -; BE-NEXT: mov r5, r1 -; BE-NEXT: vmov.32 d9[0], r0 -; BE-NEXT: ldm r3, {r1, r2, r3} -; BE-NEXT: mov r0, r7 -; BE-NEXT: ldr r6, [sp, #520] -; BE-NEXT: ldr r8, [sp, #632] -; BE-NEXT: bl llrintl -; BE-NEXT: add r3, sp, #524 -; BE-NEXT: mov r7, r1 -; BE-NEXT: vmov.32 d10[0], r0 -; BE-NEXT: mov r0, r6 -; BE-NEXT: ldm r3, {r1, r2, r3} -; BE-NEXT: bl llrintl -; BE-NEXT: add r3, sp, #636 +; BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14} +; BE-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14} +; BE-NEXT: .pad #16 +; BE-NEXT: sub sp, sp, #16 +; BE-NEXT: str r3, [sp, #4] @ 4-byte Spill +; BE-NEXT: add r3, sp, #208 +; BE-NEXT: mov r11, r2 ; BE-NEXT: mov r4, r1 -; BE-NEXT: vmov.32 d8[0], r0 -; BE-NEXT: mov r0, r8 -; BE-NEXT: ldm r3, {r1, r2, r3} -; BE-NEXT: bl llrintl -; BE-NEXT: mov r6, r1 -; BE-NEXT: vmov.32 d11[0], r0 -; BE-NEXT: ldr r0, [sp, #488] -; BE-NEXT: vmov.32 d8[1], r4 -; BE-NEXT: ldr r1, [sp, #492] -; BE-NEXT: ldr r2, [sp, #496] -; BE-NEXT: vmov.32 d10[1], r7 -; BE-NEXT: ldr r3, [sp, #500] -; BE-NEXT: vmov.32 d9[1], r5 -; BE-NEXT: vstr d8, [sp, #144] @ 8-byte Spill -; BE-NEXT: vstr d10, [sp, #136] @ 8-byte Spill -; BE-NEXT: vstr d9, [sp, #128] @ 8-byte Spill -; BE-NEXT: bl llrintl -; BE-NEXT: add r3, sp, #680 -; BE-NEXT: str r0, [sp, #104] @ 4-byte Spill -; BE-NEXT: str r1, [sp, #88] @ 4-byte Spill +; BE-NEXT: mov r5, r0 ; BE-NEXT: ldm r3, {r0, r1, r2, r3} ; BE-NEXT: bl llrintl -; BE-NEXT: vmov.32 d16[0], r0 -; BE-NEXT: ldr r0, [sp, #728] -; BE-NEXT: ldr r2, [sp, #736] -; BE-NEXT: vmov.32 d11[1], r6 -; BE-NEXT: ldr r6, [sp, #732] -; BE-NEXT: ldr r3, [sp, #740] -; BE-NEXT: vmov.32 d16[1], r1 -; BE-NEXT: ldr r5, [sp, #504] -; BE-NEXT: mov r1, r6 -; BE-NEXT: ldr r7, [sp, #744] -; BE-NEXT: ldr r4, [sp, #748] -; BE-NEXT: vstr d11, [sp, #24] @ 8-byte Spill -; BE-NEXT: vstr d16, [sp, #8] @ 8-byte Spill -; BE-NEXT: bl llrintl -; BE-NEXT: ldr r2, [sp, #752] -; BE-NEXT: mov r11, r1 -; BE-NEXT: ldr r3, [sp, #756] -; BE-NEXT: vmov.32 d13[0], r0 -; BE-NEXT: mov r0, r7 -; BE-NEXT: mov r1, r4 -; BE-NEXT: ldr r10, [sp, #552] -; BE-NEXT: ldr r6, [sp, #664] -; BE-NEXT: bl llrintl -; BE-NEXT: add r3, sp, #508 -; BE-NEXT: mov r8, r1 +; BE-NEXT: ldr r7, [sp, #176] +; BE-NEXT: add r3, sp, #180 +; BE-NEXT: str r1, [sp, #12] @ 4-byte Spill ; BE-NEXT: vmov.32 d8[0], r0 -; BE-NEXT: mov r0, r5 -; BE-NEXT: ldm r3, {r1, r2, r3} -; BE-NEXT: bl llrintl -; BE-NEXT: add r3, sp, #540 -; BE-NEXT: vmov.32 d10[0], r0 -; BE-NEXT: ldr r0, [sp, #536] -; BE-NEXT: mov r7, r1 -; BE-NEXT: ldm r3, {r1, r2, r3} -; BE-NEXT: bl llrintl -; BE-NEXT: add r3, sp, #556 -; BE-NEXT: mov r5, r1 -; BE-NEXT: vmov.32 d12[0], r0 -; BE-NEXT: mov r0, r10 ; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: mov r0, r7 +; BE-NEXT: ldr r6, [sp, #128] +; BE-NEXT: ldr r8, [sp, #144] ; BE-NEXT: bl llrintl -; BE-NEXT: add r3, sp, #668 -; BE-NEXT: mov r4, r1 +; BE-NEXT: add r3, sp, #132 +; BE-NEXT: str r1, [sp, #8] @ 4-byte Spill ; BE-NEXT: vmov.32 d9[0], r0 ; BE-NEXT: mov r0, r6 ; BE-NEXT: ldm r3, {r1, r2, r3} ; BE-NEXT: bl llrintl -; BE-NEXT: add r3, sp, #700 -; BE-NEXT: vmov.32 d14[0], r0 -; BE-NEXT: ldr r0, [sp, #696] +; BE-NEXT: add r3, sp, #148 ; BE-NEXT: mov r6, r1 -; BE-NEXT: ldm r3, {r1, r2, r3} -; BE-NEXT: bl llrintl -; BE-NEXT: vmov.32 d11[0], r0 -; BE-NEXT: ldr r0, [sp, #104] @ 4-byte Reload -; BE-NEXT: ldr r2, [sp, #256] -; BE-NEXT: vmov.32 d13[1], r11 -; BE-NEXT: ldr r3, [sp, #260] -; BE-NEXT: vmov.32 d14[1], r6 -; BE-NEXT: ldr r6, [sp, #264] -; BE-NEXT: vmov.32 d9[1], r4 -; BE-NEXT: ldr r4, [sp, #344] -; BE-NEXT: vmov.32 d12[1], r5 -; BE-NEXT: ldr r5, [sp, #312] -; BE-NEXT: vmov.32 d8[1], r8 -; BE-NEXT: ldr r8, [sp, #328] -; BE-NEXT: vmov.32 d10[1], r7 -; BE-NEXT: vstr d13, [sp, #32] @ 8-byte Spill -; BE-NEXT: vmov.32 d11[1], r1 -; BE-NEXT: ldr r1, [sp, #120] @ 4-byte Reload -; BE-NEXT: vmov.32 d13[0], r0 -; BE-NEXT: ldr r0, [sp, #112] @ 4-byte Reload -; BE-NEXT: vstr d14, [sp] @ 8-byte Spill -; BE-NEXT: vstr d9, [sp, #16] @ 8-byte Spill -; BE-NEXT: vstr d12, [sp, #56] @ 8-byte Spill -; BE-NEXT: vstr d10, [sp, #64] @ 8-byte Spill -; BE-NEXT: vstr d8, [sp, #40] @ 8-byte Spill -; BE-NEXT: bl llrintl -; BE-NEXT: add r3, sp, #268 -; BE-NEXT: mov r11, r1 -; BE-NEXT: vmov.32 d14[0], r0 -; BE-NEXT: mov r0, r6 -; BE-NEXT: ldm r3, {r1, r2, r3} -; BE-NEXT: bl llrintl -; BE-NEXT: add r3, sp, #316 -; BE-NEXT: mov r10, r1 -; BE-NEXT: vmov.32 d8[0], r0 -; BE-NEXT: mov r0, r5 -; BE-NEXT: ldm r3, {r1, r2, r3} -; BE-NEXT: bl llrintl -; BE-NEXT: add r3, sp, #332 -; BE-NEXT: mov r7, r1 -; BE-NEXT: vmov.32 d12[0], r0 -; BE-NEXT: mov r0, r8 -; BE-NEXT: ldm r3, {r1, r2, r3} -; BE-NEXT: bl llrintl -; BE-NEXT: add r3, sp, #348 -; BE-NEXT: mov r5, r1 -; BE-NEXT: vmov.32 d15[0], r0 -; BE-NEXT: mov r0, r4 -; BE-NEXT: ldm r3, {r1, r2, r3} -; BE-NEXT: bl llrintl -; BE-NEXT: add r3, sp, #364 -; BE-NEXT: vmov.32 d9[0], r0 -; BE-NEXT: ldr r0, [sp, #360] -; BE-NEXT: mov r4, r1 -; BE-NEXT: ldm r3, {r1, r2, r3} -; BE-NEXT: bl llrintl -; BE-NEXT: add r3, sp, #476 ; BE-NEXT: vmov.32 d10[0], r0 -; BE-NEXT: ldr r0, [sp, #472] -; BE-NEXT: mov r6, r1 -; BE-NEXT: ldm r3, {r1, r2, r3} -; BE-NEXT: bl llrintl -; BE-NEXT: vmov.32 d16[0], r0 -; BE-NEXT: ldr r0, [sp, #88] @ 4-byte Reload -; BE-NEXT: ldr r2, [sp, #592] -; BE-NEXT: vldr d20, [sp, #136] @ 8-byte Reload -; BE-NEXT: vmov.32 d16[1], r1 -; BE-NEXT: ldr r1, [sp, #588] -; BE-NEXT: ldr r3, [sp, #596] -; BE-NEXT: vldr d22, [sp, #24] @ 8-byte Reload -; BE-NEXT: vldr d18, [sp, #8] @ 8-byte Reload -; BE-NEXT: vrev64.32 d21, d20 -; BE-NEXT: vmov.32 d10[1], r6 -; BE-NEXT: ldr r6, [sp, #600] -; BE-NEXT: vmov.32 d9[1], r4 -; BE-NEXT: ldr r4, [sp, #616] -; BE-NEXT: vmov.32 d12[1], r7 -; BE-NEXT: ldr r7, [sp, #604] -; BE-NEXT: vmov.32 d8[1], r10 -; BE-NEXT: add r10, r9, #192 -; BE-NEXT: vmov.32 d14[1], r11 -; BE-NEXT: ldr r11, [sp, #440] -; BE-NEXT: vmov.32 d13[1], r0 -; BE-NEXT: ldr r0, [sp, #584] -; BE-NEXT: vmov.32 d15[1], r5 -; BE-NEXT: vstr d16, [sp, #48] @ 8-byte Spill -; BE-NEXT: vldr d16, [sp, #128] @ 8-byte Reload -; BE-NEXT: vrev64.32 d20, d22 -; BE-NEXT: vldr d22, [sp] @ 8-byte Reload -; BE-NEXT: vrev64.32 d19, d18 -; BE-NEXT: vrev64.32 d17, d16 -; BE-NEXT: vrev64.32 d18, d22 -; BE-NEXT: vstr d10, [sp, #120] @ 8-byte Spill -; BE-NEXT: vstr d9, [sp, #112] @ 8-byte Spill -; BE-NEXT: vstr d15, [sp, #104] @ 8-byte Spill -; BE-NEXT: vstr d12, [sp, #96] @ 8-byte Spill -; BE-NEXT: vstr d8, [sp, #80] @ 8-byte Spill -; BE-NEXT: vstr d14, [sp, #72] @ 8-byte Spill -; BE-NEXT: vstr d13, [sp, #88] @ 8-byte Spill -; BE-NEXT: vst1.64 {d20, d21}, [r10:128]! -; BE-NEXT: vrev64.32 d16, d11 -; BE-NEXT: vst1.64 {d18, d19}, [r10:128]! -; BE-NEXT: vst1.64 {d16, d17}, [r10:128]! -; BE-NEXT: bl llrintl -; BE-NEXT: ldr r2, [sp, #608] -; BE-NEXT: mov r8, r1 -; BE-NEXT: ldr r3, [sp, #612] -; BE-NEXT: vmov.32 d8[0], r0 -; BE-NEXT: mov r0, r6 -; BE-NEXT: mov r1, r7 -; BE-NEXT: ldr r5, [sp, #456] -; BE-NEXT: bl llrintl -; BE-NEXT: add r3, sp, #620 -; BE-NEXT: mov r6, r1 -; BE-NEXT: vmov.32 d9[0], r0 -; BE-NEXT: mov r0, r4 +; BE-NEXT: mov r0, r8 ; BE-NEXT: ldm r3, {r1, r2, r3} ; BE-NEXT: bl llrintl -; BE-NEXT: add r3, sp, #444 +; BE-NEXT: add r3, sp, #160 +; BE-NEXT: mov r9, r0 ; BE-NEXT: mov r7, r1 -; BE-NEXT: vmov.32 d10[0], r0 -; BE-NEXT: mov r0, r11 -; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: ldm r3, {r0, r1, r2, r3} ; BE-NEXT: bl llrintl -; BE-NEXT: add r3, sp, #460 -; BE-NEXT: mov r4, r1 +; BE-NEXT: ldr r3, [sp, #4] @ 4-byte Reload +; BE-NEXT: mov r8, r1 ; BE-NEXT: vmov.32 d11[0], r0 ; BE-NEXT: mov r0, r5 -; BE-NEXT: ldm r3, {r1, r2, r3} +; BE-NEXT: mov r1, r4 +; BE-NEXT: mov r2, r11 +; BE-NEXT: ldr r10, [sp, #112] +; BE-NEXT: vmov.32 d12[0], r9 ; BE-NEXT: bl llrintl -; BE-NEXT: add r3, sp, #572 +; BE-NEXT: add r3, sp, #116 +; BE-NEXT: mov r4, r1 ; BE-NEXT: vmov.32 d13[0], r0 -; BE-NEXT: ldr r0, [sp, #568] -; BE-NEXT: mov r5, r1 -; BE-NEXT: ldm r3, {r1, r2, r3} -; BE-NEXT: bl llrintl -; BE-NEXT: vldr d16, [sp, #16] @ 8-byte Reload -; BE-NEXT: vldr d18, [sp, #56] @ 8-byte Reload -; BE-NEXT: vrev64.32 d17, d16 -; BE-NEXT: ldr r2, [sp, #304] -; BE-NEXT: vrev64.32 d16, d18 -; BE-NEXT: ldr r3, [sp, #308] -; BE-NEXT: vldr d18, [sp, #144] @ 8-byte Reload -; BE-NEXT: vldr d20, [sp, #64] @ 8-byte Reload -; BE-NEXT: vrev64.32 d19, d18 -; BE-NEXT: vrev64.32 d18, d20 -; BE-NEXT: vldr d20, [sp, #40] @ 8-byte Reload -; BE-NEXT: vldr d22, [sp, #32] @ 8-byte Reload -; BE-NEXT: vmov.32 d14[0], r0 -; BE-NEXT: ldr r0, [sp, #296] -; BE-NEXT: vmov.32 d10[1], r7 -; BE-NEXT: ldr r7, [sp, #412] -; BE-NEXT: vmov.32 d9[1], r6 -; BE-NEXT: ldr r6, [sp, #408] -; BE-NEXT: vmov.32 d8[1], r8 -; BE-NEXT: add r8, r9, #128 -; BE-NEXT: vrev64.32 d21, d20 -; BE-NEXT: vmov.32 d13[1], r5 -; BE-NEXT: ldr r5, [sp, #300] -; BE-NEXT: vrev64.32 d20, d22 -; BE-NEXT: vmov.32 d14[1], r1 -; BE-NEXT: mov r1, r5 -; BE-NEXT: vstr d10, [sp, #136] @ 8-byte Spill -; BE-NEXT: vstr d9, [sp, #128] @ 8-byte Spill -; BE-NEXT: vstr d8, [sp, #24] @ 8-byte Spill -; BE-NEXT: vst1.64 {d20, d21}, [r10:128] -; BE-NEXT: vst1.64 {d18, d19}, [r8:128]! -; BE-NEXT: vmov.32 d11[1], r4 -; BE-NEXT: ldr r4, [sp, #424] -; BE-NEXT: ldr r10, [sp, #376] -; BE-NEXT: vst1.64 {d16, d17}, [r8:128]! -; BE-NEXT: bl llrintl -; BE-NEXT: ldr r2, [sp, #416] -; BE-NEXT: mov r11, r1 -; BE-NEXT: ldr r3, [sp, #420] -; BE-NEXT: vmov.32 d15[0], r0 -; BE-NEXT: mov r0, r6 -; BE-NEXT: mov r1, r7 -; BE-NEXT: ldr r5, [sp, #392] -; BE-NEXT: bl llrintl -; BE-NEXT: add r3, sp, #428 -; BE-NEXT: mov r6, r1 -; BE-NEXT: vmov.32 d8[0], r0 -; BE-NEXT: mov r0, r4 -; BE-NEXT: ldm r3, {r1, r2, r3} -; BE-NEXT: bl llrintl -; BE-NEXT: add r3, sp, #380 -; BE-NEXT: mov r7, r1 -; BE-NEXT: vmov.32 d9[0], r0 ; BE-NEXT: mov r0, r10 ; BE-NEXT: ldm r3, {r1, r2, r3} ; BE-NEXT: bl llrintl -; BE-NEXT: add r3, sp, #396 -; BE-NEXT: mov r4, r1 -; BE-NEXT: vmov.32 d12[0], r0 -; BE-NEXT: mov r0, r5 -; BE-NEXT: ldm r3, {r1, r2, r3} -; BE-NEXT: bl llrintl -; BE-NEXT: add r3, sp, #284 -; BE-NEXT: vmov.32 d10[0], r0 -; BE-NEXT: ldr r0, [sp, #280] +; BE-NEXT: add r3, sp, #196 +; BE-NEXT: vmov.32 d14[0], r0 +; BE-NEXT: ldr r0, [sp, #192] ; BE-NEXT: mov r5, r1 ; BE-NEXT: ldm r3, {r1, r2, r3} ; BE-NEXT: bl llrintl -; BE-NEXT: vldr d16, [sp, #120] @ 8-byte Reload -; BE-NEXT: vldr d18, [sp, #112] @ 8-byte Reload -; BE-NEXT: vrev64.32 d17, d16 -; BE-NEXT: vldr d26, [sp, #136] @ 8-byte Reload -; BE-NEXT: vrev64.32 d16, d18 -; BE-NEXT: vldr d18, [sp, #104] @ 8-byte Reload -; BE-NEXT: vrev64.32 d31, d26 -; BE-NEXT: vldr d26, [sp, #128] @ 8-byte Reload -; BE-NEXT: vldr d20, [sp, #96] @ 8-byte Reload -; BE-NEXT: vrev64.32 d19, d18 -; BE-NEXT: vrev64.32 d18, d20 -; BE-NEXT: vldr d20, [sp, #80] @ 8-byte Reload -; BE-NEXT: vrev64.32 d30, d26 -; BE-NEXT: vldr d26, [sp, #24] @ 8-byte Reload -; BE-NEXT: vmov.32 d10[1], r5 -; BE-NEXT: vldr d22, [sp, #72] @ 8-byte Reload -; BE-NEXT: vrev64.32 d21, d20 -; BE-NEXT: vrev64.32 d1, d26 -; BE-NEXT: vmov.32 d9[1], r7 -; BE-NEXT: vmov.32 d12[1], r4 -; BE-NEXT: vrev64.32 d20, d22 -; BE-NEXT: vldr d22, [sp, #88] @ 8-byte Reload -; BE-NEXT: vmov.32 d8[1], r6 -; BE-NEXT: vrev64.32 d0, d14 -; BE-NEXT: vmov.32 d28[0], r0 -; BE-NEXT: add r0, r9, #64 -; BE-NEXT: vrev64.32 d3, d10 -; BE-NEXT: vldr d24, [sp, #48] @ 8-byte Reload -; BE-NEXT: vrev64.32 d23, d22 +; BE-NEXT: vmov.32 d16[0], r0 +; BE-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; BE-NEXT: vmov.32 d14[1], r5 +; BE-NEXT: vmov.32 d9[1], r0 +; BE-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; BE-NEXT: vmov.32 d12[1], r7 +; BE-NEXT: vmov.32 d8[1], r0 +; BE-NEXT: vmov.32 d13[1], r4 +; BE-NEXT: vmov.32 d10[1], r6 +; BE-NEXT: vmov.32 d11[1], r8 +; BE-NEXT: vmov.32 d16[1], r1 +; BE-NEXT: vrev64.32 d1, d14 +; BE-NEXT: vrev64.32 d3, d12 ; BE-NEXT: vrev64.32 d5, d9 -; BE-NEXT: vst1.64 {d0, d1}, [r8:128]! -; BE-NEXT: vrev64.32 d2, d12 -; BE-NEXT: vmov.32 d15[1], r11 -; BE-NEXT: vrev64.32 d22, d24 -; BE-NEXT: vrev64.32 d25, d13 -; BE-NEXT: vrev64.32 d4, d8 -; BE-NEXT: vst1.64 {d30, d31}, [r8:128] -; BE-NEXT: vst1.64 {d2, d3}, [r0:128]! -; BE-NEXT: vmov.32 d28[1], r1 -; BE-NEXT: vrev64.32 d24, d11 -; BE-NEXT: vst1.64 {d4, d5}, [r0:128]! -; BE-NEXT: vrev64.32 d27, d15 -; BE-NEXT: vst1.64 {d24, d25}, [r0:128]! -; BE-NEXT: vrev64.32 d26, d28 -; BE-NEXT: vst1.64 {d22, d23}, [r0:128] -; BE-NEXT: vst1.64 {d20, d21}, [r9:128]! -; BE-NEXT: vst1.64 {d26, d27}, [r9:128]! -; BE-NEXT: vst1.64 {d18, d19}, [r9:128]! -; BE-NEXT: vst1.64 {d16, d17}, [r9:128] -; BE-NEXT: add sp, sp, #152 -; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-NEXT: vrev64.32 d7, d8 +; BE-NEXT: vrev64.32 d0, d13 +; BE-NEXT: vrev64.32 d2, d10 +; BE-NEXT: vrev64.32 d4, d11 +; BE-NEXT: vrev64.32 d6, d16 +; BE-NEXT: add sp, sp, #16 +; BE-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14} ; BE-NEXT: add sp, sp, #4 ; BE-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; -; BE-NEON-LABEL: llrint_v32f128: -; BE-NEON: @ %bb.0: -; BE-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-NEON-NEXT: .pad #4 -; BE-NEON-NEXT: sub sp, sp, #4 -; BE-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEON-NEXT: .pad #152 -; BE-NEON-NEXT: sub sp, sp, #152 -; BE-NEON-NEXT: str r3, [sp, #120] @ 4-byte Spill -; BE-NEON-NEXT: add r3, sp, #712 -; BE-NEON-NEXT: str r2, [sp, #112] @ 4-byte Spill -; BE-NEON-NEXT: mov r9, r0 -; BE-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: ldr r7, [sp, #648] -; BE-NEON-NEXT: add r3, sp, #652 -; BE-NEON-NEXT: mov r5, r1 -; BE-NEON-NEXT: vmov.32 d9[0], r0 -; BE-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-NEON-NEXT: mov r0, r7 -; BE-NEON-NEXT: ldr r6, [sp, #520] -; BE-NEON-NEXT: ldr r8, [sp, #632] -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: add r3, sp, #524 -; BE-NEON-NEXT: mov r7, r1 -; BE-NEON-NEXT: vmov.32 d10[0], r0 -; BE-NEON-NEXT: mov r0, r6 -; BE-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: add r3, sp, #636 -; BE-NEON-NEXT: mov r4, r1 -; BE-NEON-NEXT: vmov.32 d8[0], r0 -; BE-NEON-NEXT: mov r0, r8 -; BE-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: mov r6, r1 -; BE-NEON-NEXT: vmov.32 d11[0], r0 -; BE-NEON-NEXT: ldr r0, [sp, #488] -; BE-NEON-NEXT: vmov.32 d8[1], r4 -; BE-NEON-NEXT: ldr r1, [sp, #492] -; BE-NEON-NEXT: ldr r2, [sp, #496] -; BE-NEON-NEXT: vmov.32 d10[1], r7 -; BE-NEON-NEXT: ldr r3, [sp, #500] -; BE-NEON-NEXT: vmov.32 d9[1], r5 -; BE-NEON-NEXT: vstr d8, [sp, #144] @ 8-byte Spill -; BE-NEON-NEXT: vstr d10, [sp, #136] @ 8-byte Spill -; BE-NEON-NEXT: vstr d9, [sp, #128] @ 8-byte Spill -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: add r3, sp, #680 -; BE-NEON-NEXT: str r0, [sp, #104] @ 4-byte Spill -; BE-NEON-NEXT: str r1, [sp, #88] @ 4-byte Spill -; BE-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: vmov.32 d16[0], r0 -; BE-NEON-NEXT: ldr r0, [sp, #728] -; BE-NEON-NEXT: ldr r2, [sp, #736] -; BE-NEON-NEXT: vmov.32 d11[1], r6 -; BE-NEON-NEXT: ldr r6, [sp, #732] -; BE-NEON-NEXT: ldr r3, [sp, #740] -; BE-NEON-NEXT: vmov.32 d16[1], r1 -; BE-NEON-NEXT: ldr r5, [sp, #504] -; BE-NEON-NEXT: mov r1, r6 -; BE-NEON-NEXT: ldr r7, [sp, #744] -; BE-NEON-NEXT: ldr r4, [sp, #748] -; BE-NEON-NEXT: vstr d11, [sp, #24] @ 8-byte Spill -; BE-NEON-NEXT: vstr d16, [sp, #8] @ 8-byte Spill -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: ldr r2, [sp, #752] -; BE-NEON-NEXT: mov r11, r1 -; BE-NEON-NEXT: ldr r3, [sp, #756] -; BE-NEON-NEXT: vmov.32 d13[0], r0 -; BE-NEON-NEXT: mov r0, r7 -; BE-NEON-NEXT: mov r1, r4 -; BE-NEON-NEXT: ldr r10, [sp, #552] -; BE-NEON-NEXT: ldr r6, [sp, #664] -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: add r3, sp, #508 -; BE-NEON-NEXT: mov r8, r1 -; BE-NEON-NEXT: vmov.32 d8[0], r0 -; BE-NEON-NEXT: mov r0, r5 -; BE-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: add r3, sp, #540 -; BE-NEON-NEXT: vmov.32 d10[0], r0 -; BE-NEON-NEXT: ldr r0, [sp, #536] -; BE-NEON-NEXT: mov r7, r1 -; BE-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: add r3, sp, #556 -; BE-NEON-NEXT: mov r5, r1 -; BE-NEON-NEXT: vmov.32 d12[0], r0 -; BE-NEON-NEXT: mov r0, r10 -; BE-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: add r3, sp, #668 -; BE-NEON-NEXT: mov r4, r1 -; BE-NEON-NEXT: vmov.32 d9[0], r0 -; BE-NEON-NEXT: mov r0, r6 -; BE-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: add r3, sp, #700 -; BE-NEON-NEXT: vmov.32 d14[0], r0 -; BE-NEON-NEXT: ldr r0, [sp, #696] -; BE-NEON-NEXT: mov r6, r1 -; BE-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: vmov.32 d11[0], r0 -; BE-NEON-NEXT: ldr r0, [sp, #104] @ 4-byte Reload -; BE-NEON-NEXT: ldr r2, [sp, #256] -; BE-NEON-NEXT: vmov.32 d13[1], r11 -; BE-NEON-NEXT: ldr r3, [sp, #260] -; BE-NEON-NEXT: vmov.32 d14[1], r6 -; BE-NEON-NEXT: ldr r6, [sp, #264] -; BE-NEON-NEXT: vmov.32 d9[1], r4 -; BE-NEON-NEXT: ldr r4, [sp, #344] -; BE-NEON-NEXT: vmov.32 d12[1], r5 -; BE-NEON-NEXT: ldr r5, [sp, #312] -; BE-NEON-NEXT: vmov.32 d8[1], r8 -; BE-NEON-NEXT: ldr r8, [sp, #328] -; BE-NEON-NEXT: vmov.32 d10[1], r7 -; BE-NEON-NEXT: vstr d13, [sp, #32] @ 8-byte Spill -; BE-NEON-NEXT: vmov.32 d11[1], r1 -; BE-NEON-NEXT: ldr r1, [sp, #120] @ 4-byte Reload -; BE-NEON-NEXT: vmov.32 d13[0], r0 -; BE-NEON-NEXT: ldr r0, [sp, #112] @ 4-byte Reload -; BE-NEON-NEXT: vstr d14, [sp] @ 8-byte Spill -; BE-NEON-NEXT: vstr d9, [sp, #16] @ 8-byte Spill -; BE-NEON-NEXT: vstr d12, [sp, #56] @ 8-byte Spill -; BE-NEON-NEXT: vstr d10, [sp, #64] @ 8-byte Spill -; BE-NEON-NEXT: vstr d8, [sp, #40] @ 8-byte Spill -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: add r3, sp, #268 -; BE-NEON-NEXT: mov r11, r1 -; BE-NEON-NEXT: vmov.32 d14[0], r0 -; BE-NEON-NEXT: mov r0, r6 -; BE-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: add r3, sp, #316 -; BE-NEON-NEXT: mov r10, r1 -; BE-NEON-NEXT: vmov.32 d8[0], r0 -; BE-NEON-NEXT: mov r0, r5 -; BE-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: add r3, sp, #332 -; BE-NEON-NEXT: mov r7, r1 -; BE-NEON-NEXT: vmov.32 d12[0], r0 -; BE-NEON-NEXT: mov r0, r8 -; BE-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: add r3, sp, #348 -; BE-NEON-NEXT: mov r5, r1 -; BE-NEON-NEXT: vmov.32 d15[0], r0 -; BE-NEON-NEXT: mov r0, r4 -; BE-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: add r3, sp, #364 -; BE-NEON-NEXT: vmov.32 d9[0], r0 -; BE-NEON-NEXT: ldr r0, [sp, #360] -; BE-NEON-NEXT: mov r4, r1 -; BE-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: add r3, sp, #476 -; BE-NEON-NEXT: vmov.32 d10[0], r0 -; BE-NEON-NEXT: ldr r0, [sp, #472] -; BE-NEON-NEXT: mov r6, r1 -; BE-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: vmov.32 d16[0], r0 -; BE-NEON-NEXT: ldr r0, [sp, #88] @ 4-byte Reload -; BE-NEON-NEXT: ldr r2, [sp, #592] -; BE-NEON-NEXT: vldr d20, [sp, #136] @ 8-byte Reload -; BE-NEON-NEXT: vmov.32 d16[1], r1 -; BE-NEON-NEXT: ldr r1, [sp, #588] -; BE-NEON-NEXT: ldr r3, [sp, #596] -; BE-NEON-NEXT: vldr d22, [sp, #24] @ 8-byte Reload -; BE-NEON-NEXT: vldr d18, [sp, #8] @ 8-byte Reload -; BE-NEON-NEXT: vrev64.32 d21, d20 -; BE-NEON-NEXT: vmov.32 d10[1], r6 -; BE-NEON-NEXT: ldr r6, [sp, #600] -; BE-NEON-NEXT: vmov.32 d9[1], r4 -; BE-NEON-NEXT: ldr r4, [sp, #616] -; BE-NEON-NEXT: vmov.32 d12[1], r7 -; BE-NEON-NEXT: ldr r7, [sp, #604] -; BE-NEON-NEXT: vmov.32 d8[1], r10 -; BE-NEON-NEXT: add r10, r9, #192 -; BE-NEON-NEXT: vmov.32 d14[1], r11 -; BE-NEON-NEXT: ldr r11, [sp, #440] -; BE-NEON-NEXT: vmov.32 d13[1], r0 -; BE-NEON-NEXT: ldr r0, [sp, #584] -; BE-NEON-NEXT: vmov.32 d15[1], r5 -; BE-NEON-NEXT: vstr d16, [sp, #48] @ 8-byte Spill -; BE-NEON-NEXT: vldr d16, [sp, #128] @ 8-byte Reload -; BE-NEON-NEXT: vrev64.32 d20, d22 -; BE-NEON-NEXT: vldr d22, [sp] @ 8-byte Reload -; BE-NEON-NEXT: vrev64.32 d19, d18 -; BE-NEON-NEXT: vrev64.32 d17, d16 -; BE-NEON-NEXT: vrev64.32 d18, d22 -; BE-NEON-NEXT: vstr d10, [sp, #120] @ 8-byte Spill -; BE-NEON-NEXT: vstr d9, [sp, #112] @ 8-byte Spill -; BE-NEON-NEXT: vstr d15, [sp, #104] @ 8-byte Spill -; BE-NEON-NEXT: vstr d12, [sp, #96] @ 8-byte Spill -; BE-NEON-NEXT: vstr d8, [sp, #80] @ 8-byte Spill -; BE-NEON-NEXT: vstr d14, [sp, #72] @ 8-byte Spill -; BE-NEON-NEXT: vstr d13, [sp, #88] @ 8-byte Spill -; BE-NEON-NEXT: vst1.64 {d20, d21}, [r10:128]! -; BE-NEON-NEXT: vrev64.32 d16, d11 -; BE-NEON-NEXT: vst1.64 {d18, d19}, [r10:128]! -; BE-NEON-NEXT: vst1.64 {d16, d17}, [r10:128]! -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: ldr r2, [sp, #608] -; BE-NEON-NEXT: mov r8, r1 -; BE-NEON-NEXT: ldr r3, [sp, #612] -; BE-NEON-NEXT: vmov.32 d8[0], r0 -; BE-NEON-NEXT: mov r0, r6 -; BE-NEON-NEXT: mov r1, r7 -; BE-NEON-NEXT: ldr r5, [sp, #456] -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: add r3, sp, #620 -; BE-NEON-NEXT: mov r6, r1 -; BE-NEON-NEXT: vmov.32 d9[0], r0 -; BE-NEON-NEXT: mov r0, r4 -; BE-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: add r3, sp, #444 -; BE-NEON-NEXT: mov r7, r1 -; BE-NEON-NEXT: vmov.32 d10[0], r0 -; BE-NEON-NEXT: mov r0, r11 -; BE-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: add r3, sp, #460 -; BE-NEON-NEXT: mov r4, r1 -; BE-NEON-NEXT: vmov.32 d11[0], r0 -; BE-NEON-NEXT: mov r0, r5 -; BE-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: add r3, sp, #572 -; BE-NEON-NEXT: vmov.32 d13[0], r0 -; BE-NEON-NEXT: ldr r0, [sp, #568] -; BE-NEON-NEXT: mov r5, r1 -; BE-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: vldr d16, [sp, #16] @ 8-byte Reload -; BE-NEON-NEXT: vldr d18, [sp, #56] @ 8-byte Reload -; BE-NEON-NEXT: vrev64.32 d17, d16 -; BE-NEON-NEXT: ldr r2, [sp, #304] -; BE-NEON-NEXT: vrev64.32 d16, d18 -; BE-NEON-NEXT: ldr r3, [sp, #308] -; BE-NEON-NEXT: vldr d18, [sp, #144] @ 8-byte Reload -; BE-NEON-NEXT: vldr d20, [sp, #64] @ 8-byte Reload -; BE-NEON-NEXT: vrev64.32 d19, d18 -; BE-NEON-NEXT: vrev64.32 d18, d20 -; BE-NEON-NEXT: vldr d20, [sp, #40] @ 8-byte Reload -; BE-NEON-NEXT: vldr d22, [sp, #32] @ 8-byte Reload -; BE-NEON-NEXT: vmov.32 d14[0], r0 -; BE-NEON-NEXT: ldr r0, [sp, #296] -; BE-NEON-NEXT: vmov.32 d10[1], r7 -; BE-NEON-NEXT: ldr r7, [sp, #412] -; BE-NEON-NEXT: vmov.32 d9[1], r6 -; BE-NEON-NEXT: ldr r6, [sp, #408] -; BE-NEON-NEXT: vmov.32 d8[1], r8 -; BE-NEON-NEXT: add r8, r9, #128 -; BE-NEON-NEXT: vrev64.32 d21, d20 -; BE-NEON-NEXT: vmov.32 d13[1], r5 -; BE-NEON-NEXT: ldr r5, [sp, #300] -; BE-NEON-NEXT: vrev64.32 d20, d22 -; BE-NEON-NEXT: vmov.32 d14[1], r1 -; BE-NEON-NEXT: mov r1, r5 -; BE-NEON-NEXT: vstr d10, [sp, #136] @ 8-byte Spill -; BE-NEON-NEXT: vstr d9, [sp, #128] @ 8-byte Spill -; BE-NEON-NEXT: vstr d8, [sp, #24] @ 8-byte Spill -; BE-NEON-NEXT: vst1.64 {d20, d21}, [r10:128] -; BE-NEON-NEXT: vst1.64 {d18, d19}, [r8:128]! -; BE-NEON-NEXT: vmov.32 d11[1], r4 -; BE-NEON-NEXT: ldr r4, [sp, #424] -; BE-NEON-NEXT: ldr r10, [sp, #376] -; BE-NEON-NEXT: vst1.64 {d16, d17}, [r8:128]! -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: ldr r2, [sp, #416] -; BE-NEON-NEXT: mov r11, r1 -; BE-NEON-NEXT: ldr r3, [sp, #420] -; BE-NEON-NEXT: vmov.32 d15[0], r0 -; BE-NEON-NEXT: mov r0, r6 -; BE-NEON-NEXT: mov r1, r7 -; BE-NEON-NEXT: ldr r5, [sp, #392] -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: add r3, sp, #428 -; BE-NEON-NEXT: mov r6, r1 -; BE-NEON-NEXT: vmov.32 d8[0], r0 -; BE-NEON-NEXT: mov r0, r4 -; BE-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: add r3, sp, #380 -; BE-NEON-NEXT: mov r7, r1 -; BE-NEON-NEXT: vmov.32 d9[0], r0 -; BE-NEON-NEXT: mov r0, r10 -; BE-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: add r3, sp, #396 -; BE-NEON-NEXT: mov r4, r1 -; BE-NEON-NEXT: vmov.32 d12[0], r0 -; BE-NEON-NEXT: mov r0, r5 -; BE-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: add r3, sp, #284 -; BE-NEON-NEXT: vmov.32 d10[0], r0 -; BE-NEON-NEXT: ldr r0, [sp, #280] -; BE-NEON-NEXT: mov r5, r1 -; BE-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-NEON-NEXT: bl llrintl -; BE-NEON-NEXT: vldr d16, [sp, #120] @ 8-byte Reload -; BE-NEON-NEXT: vldr d18, [sp, #112] @ 8-byte Reload -; BE-NEON-NEXT: vrev64.32 d17, d16 -; BE-NEON-NEXT: vldr d26, [sp, #136] @ 8-byte Reload -; BE-NEON-NEXT: vrev64.32 d16, d18 -; BE-NEON-NEXT: vldr d18, [sp, #104] @ 8-byte Reload -; BE-NEON-NEXT: vrev64.32 d31, d26 -; BE-NEON-NEXT: vldr d26, [sp, #128] @ 8-byte Reload -; BE-NEON-NEXT: vldr d20, [sp, #96] @ 8-byte Reload -; BE-NEON-NEXT: vrev64.32 d19, d18 -; BE-NEON-NEXT: vrev64.32 d18, d20 -; BE-NEON-NEXT: vldr d20, [sp, #80] @ 8-byte Reload -; BE-NEON-NEXT: vrev64.32 d30, d26 -; BE-NEON-NEXT: vldr d26, [sp, #24] @ 8-byte Reload -; BE-NEON-NEXT: vmov.32 d10[1], r5 -; BE-NEON-NEXT: vldr d22, [sp, #72] @ 8-byte Reload -; BE-NEON-NEXT: vrev64.32 d21, d20 -; BE-NEON-NEXT: vrev64.32 d1, d26 -; BE-NEON-NEXT: vmov.32 d9[1], r7 -; BE-NEON-NEXT: vmov.32 d12[1], r4 -; BE-NEON-NEXT: vrev64.32 d20, d22 -; BE-NEON-NEXT: vldr d22, [sp, #88] @ 8-byte Reload -; BE-NEON-NEXT: vmov.32 d8[1], r6 -; BE-NEON-NEXT: vrev64.32 d0, d14 -; BE-NEON-NEXT: vmov.32 d28[0], r0 -; BE-NEON-NEXT: add r0, r9, #64 -; BE-NEON-NEXT: vrev64.32 d3, d10 -; BE-NEON-NEXT: vldr d24, [sp, #48] @ 8-byte Reload -; BE-NEON-NEXT: vrev64.32 d23, d22 -; BE-NEON-NEXT: vrev64.32 d5, d9 -; BE-NEON-NEXT: vst1.64 {d0, d1}, [r8:128]! -; BE-NEON-NEXT: vrev64.32 d2, d12 -; BE-NEON-NEXT: vmov.32 d15[1], r11 -; BE-NEON-NEXT: vrev64.32 d22, d24 -; BE-NEON-NEXT: vrev64.32 d25, d13 -; BE-NEON-NEXT: vrev64.32 d4, d8 -; BE-NEON-NEXT: vst1.64 {d30, d31}, [r8:128] -; BE-NEON-NEXT: vst1.64 {d2, d3}, [r0:128]! -; BE-NEON-NEXT: vmov.32 d28[1], r1 -; BE-NEON-NEXT: vrev64.32 d24, d11 -; BE-NEON-NEXT: vst1.64 {d4, d5}, [r0:128]! -; BE-NEON-NEXT: vrev64.32 d27, d15 -; BE-NEON-NEXT: vst1.64 {d24, d25}, [r0:128]! -; BE-NEON-NEXT: vrev64.32 d26, d28 -; BE-NEON-NEXT: vst1.64 {d22, d23}, [r0:128] -; BE-NEON-NEXT: vst1.64 {d20, d21}, [r9:128]! -; BE-NEON-NEXT: vst1.64 {d26, d27}, [r9:128]! -; BE-NEON-NEXT: vst1.64 {d18, d19}, [r9:128]! -; BE-NEON-NEXT: vst1.64 {d16, d17}, [r9:128] -; BE-NEON-NEXT: add sp, sp, #152 -; BE-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-NEON-NEXT: add sp, sp, #4 -; BE-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} - %a = call <32 x i64> @llvm.llrint.v32i64.v16f128(<32 x fp128> %x) - ret <32 x i64> %a + %a = call <8 x i64> @llvm.llrint.v8i64.v8f128(<8 x fp128> %x) + ret <8 x i64> %a } -declare <32 x i64> @llvm.llrint.v32i64.v32f128(<32 x fp128>) +declare <8 x i64> @llvm.llrint.v8i64.v8f128(<8 x fp128>) diff --git a/llvm/test/CodeGen/ARM/vector-lrint.ll b/llvm/test/CodeGen/ARM/vector-lrint.ll index 50c8b9ff6d913..fe5e3cbcdf771 100644 --- a/llvm/test/CodeGen/ARM/vector-lrint.ll +++ b/llvm/test/CodeGen/ARM/vector-lrint.ll @@ -1,13 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armv7-unknown-none-eabihf | FileCheck %s --check-prefix=LE-I32 -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armv7-unknown-none-eabihf | FileCheck %s --check-prefix=LE-I64 -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefix=LE-I32-NEON -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefix=LE-I64-NEON -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armebv7-unknown-none-eabihf | FileCheck %s --check-prefix=BE-I32 -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armebv7-unknown-none-eabihf | FileCheck %s --check-prefix=BE-I64 -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefix=BE-I32-NEON -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefix=BE-I64-NEON +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armv7-unknown-none-eabihf | FileCheck %s --check-prefixes=LE-I32 +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armv7-unknown-none-eabihf | FileCheck %s --check-prefixes=LE-I64 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefixes=LE-I32 +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefixes=LE-I64 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armebv7-unknown-none-eabihf | FileCheck %s --check-prefixes=BE-I32 +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armebv7-unknown-none-eabihf | FileCheck %s --check-prefixes=BE-I64 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefixes=BE-I32 +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefixes=BE-I64 ; FIXME: crash "Do not know how to soft promote this operator's operand!" ; define <1 x iXLen> @lrint_v1f16(<1 x half> %x) { @@ -40,12 +40,6 @@ ; } ; declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half>) -; define <32 x iXLen> @lrint_v32f16(<32 x half> %x) { -; %a = call <32 x iXLen> @llvm.lrint.v32iXLen.v32f16(<32 x half> %x) -; ret <32 x iXLen> %a -; } -; declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f16(<32 x half>) - define <1 x iXLen> @lrint_v1f32(<1 x float> %x) { ; LE-I32-LABEL: lrint_v1f32: ; LE-I32: @ %bb.0: @@ -63,22 +57,6 @@ define <1 x iXLen> @lrint_v1f32(<1 x float> %x) { ; LE-I64-NEXT: vmov.32 d0[1], r1 ; LE-I64-NEXT: pop {r11, pc} ; -; LE-I32-NEON-LABEL: lrint_v1f32: -; LE-I32-NEON: @ %bb.0: -; LE-I32-NEON-NEXT: .save {r11, lr} -; LE-I32-NEON-NEXT: push {r11, lr} -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: pop {r11, pc} -; -; LE-I64-NEON-LABEL: lrint_v1f32: -; LE-I64-NEON: @ %bb.0: -; LE-I64-NEON-NEXT: .save {r11, lr} -; LE-I64-NEON-NEXT: push {r11, lr} -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.32 d0[0], r0 -; LE-I64-NEON-NEXT: vmov.32 d0[1], r1 -; LE-I64-NEON-NEXT: pop {r11, pc} -; ; BE-I32-LABEL: lrint_v1f32: ; BE-I32: @ %bb.0: ; BE-I32-NEXT: .save {r11, lr} @@ -95,23 +73,6 @@ define <1 x iXLen> @lrint_v1f32(<1 x float> %x) { ; BE-I64-NEXT: vmov.32 d16[1], r1 ; BE-I64-NEXT: vrev64.32 d0, d16 ; BE-I64-NEXT: pop {r11, pc} -; -; BE-I32-NEON-LABEL: lrint_v1f32: -; BE-I32-NEON: @ %bb.0: -; BE-I32-NEON-NEXT: .save {r11, lr} -; BE-I32-NEON-NEXT: push {r11, lr} -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: pop {r11, pc} -; -; BE-I64-NEON-LABEL: lrint_v1f32: -; BE-I64-NEON: @ %bb.0: -; BE-I64-NEON-NEXT: .save {r11, lr} -; BE-I64-NEON-NEXT: push {r11, lr} -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.32 d16[0], r0 -; BE-I64-NEON-NEXT: vmov.32 d16[1], r1 -; BE-I64-NEON-NEXT: vrev64.32 d0, d16 -; BE-I64-NEON-NEXT: pop {r11, pc} %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float> %x) ret <1 x iXLen> %a } @@ -157,45 +118,6 @@ define <2 x iXLen> @lrint_v2f32(<2 x float> %x) { ; LE-I64-NEXT: vpop {d10, d11} ; LE-I64-NEXT: pop {r4, pc} ; -; LE-I32-NEON-LABEL: lrint_v2f32: -; LE-I32-NEON: @ %bb.0: -; LE-I32-NEON-NEXT: .save {r11, lr} -; LE-I32-NEON-NEXT: push {r11, lr} -; LE-I32-NEON-NEXT: .vsave {d8, d9} -; LE-I32-NEON-NEXT: vpush {d8, d9} -; LE-I32-NEON-NEXT: vmov.f64 d8, d0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s17 -; LE-I32-NEON-NEXT: vmov.32 d9[0], r0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.32 d9[1], r0 -; LE-I32-NEON-NEXT: vorr d0, d9, d9 -; LE-I32-NEON-NEXT: vpop {d8, d9} -; LE-I32-NEON-NEXT: pop {r11, pc} -; -; LE-I64-NEON-LABEL: lrint_v2f32: -; LE-I64-NEON: @ %bb.0: -; LE-I64-NEON-NEXT: .save {r4, lr} -; LE-I64-NEON-NEXT: push {r4, lr} -; LE-I64-NEON-NEXT: .vsave {d10, d11} -; LE-I64-NEON-NEXT: vpush {d10, d11} -; LE-I64-NEON-NEXT: .vsave {d8} -; LE-I64-NEON-NEXT: vpush {d8} -; LE-I64-NEON-NEXT: vmov.f64 d8, d0 -; LE-I64-NEON-NEXT: vmov.f32 s0, s17 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.f32 s0, s16 -; LE-I64-NEON-NEXT: mov r4, r1 -; LE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; LE-I64-NEON-NEXT: vmov.32 d11[1], r4 -; LE-I64-NEON-NEXT: vmov.32 d10[1], r1 -; LE-I64-NEON-NEXT: vorr q0, q5, q5 -; LE-I64-NEON-NEXT: vpop {d8} -; LE-I64-NEON-NEXT: vpop {d10, d11} -; LE-I64-NEON-NEXT: pop {r4, pc} -; ; BE-I32-LABEL: lrint_v2f32: ; BE-I32: @ %bb.0: ; BE-I32-NEXT: .save {r11, lr} @@ -235,46 +157,6 @@ define <2 x iXLen> @lrint_v2f32(<2 x float> %x) { ; BE-I64-NEXT: vpop {d8} ; BE-I64-NEXT: vpop {d10, d11} ; BE-I64-NEXT: pop {r4, pc} -; -; BE-I32-NEON-LABEL: lrint_v2f32: -; BE-I32-NEON: @ %bb.0: -; BE-I32-NEON-NEXT: .save {r11, lr} -; BE-I32-NEON-NEXT: push {r11, lr} -; BE-I32-NEON-NEXT: .vsave {d8, d9} -; BE-I32-NEON-NEXT: vpush {d8, d9} -; BE-I32-NEON-NEXT: vrev64.32 d8, d0 -; BE-I32-NEON-NEXT: vmov.f32 s0, s16 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s17 -; BE-I32-NEON-NEXT: vmov.32 d9[0], r0 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.32 d9[1], r0 -; BE-I32-NEON-NEXT: vrev64.32 d0, d9 -; BE-I32-NEON-NEXT: vpop {d8, d9} -; BE-I32-NEON-NEXT: pop {r11, pc} -; -; BE-I64-NEON-LABEL: lrint_v2f32: -; BE-I64-NEON: @ %bb.0: -; BE-I64-NEON-NEXT: .save {r4, lr} -; BE-I64-NEON-NEXT: push {r4, lr} -; BE-I64-NEON-NEXT: .vsave {d10, d11} -; BE-I64-NEON-NEXT: vpush {d10, d11} -; BE-I64-NEON-NEXT: .vsave {d8} -; BE-I64-NEON-NEXT: vpush {d8} -; BE-I64-NEON-NEXT: vrev64.32 d8, d0 -; BE-I64-NEON-NEXT: vmov.f32 s0, s17 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.f32 s0, s16 -; BE-I64-NEON-NEXT: mov r4, r1 -; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEON-NEXT: vmov.32 d11[1], r4 -; BE-I64-NEON-NEXT: vmov.32 d10[1], r1 -; BE-I64-NEON-NEXT: vrev64.32 q0, q5 -; BE-I64-NEON-NEXT: vpop {d8} -; BE-I64-NEON-NEXT: vpop {d10, d11} -; BE-I64-NEON-NEXT: pop {r4, pc} %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float> %x) ret <2 x iXLen> %a } @@ -335,60 +217,6 @@ define <4 x iXLen> @lrint_v4f32(<4 x float> %x) { ; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13} ; LE-I64-NEXT: pop {r4, r5, r6, pc} ; -; LE-I32-NEON-LABEL: lrint_v4f32: -; LE-I32-NEON: @ %bb.0: -; LE-I32-NEON-NEXT: .save {r11, lr} -; LE-I32-NEON-NEXT: push {r11, lr} -; LE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11} -; LE-I32-NEON-NEXT: vpush {d8, d9, d10, d11} -; LE-I32-NEON-NEXT: vorr q4, q0, q0 -; LE-I32-NEON-NEXT: vmov.f32 s0, s18 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s16 -; LE-I32-NEON-NEXT: vmov.32 d11[0], r0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s19 -; LE-I32-NEON-NEXT: vmov.32 d10[0], r0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s17 -; LE-I32-NEON-NEXT: vmov.32 d11[1], r0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.32 d10[1], r0 -; LE-I32-NEON-NEXT: vorr q0, q5, q5 -; LE-I32-NEON-NEXT: vpop {d8, d9, d10, d11} -; LE-I32-NEON-NEXT: pop {r11, pc} -; -; LE-I64-NEON-LABEL: lrint_v4f32: -; LE-I64-NEON: @ %bb.0: -; LE-I64-NEON-NEXT: .save {r4, r5, r6, lr} -; LE-I64-NEON-NEXT: push {r4, r5, r6, lr} -; LE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13} -; LE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13} -; LE-I64-NEON-NEXT: vorr q5, q0, q0 -; LE-I64-NEON-NEXT: vmov.f32 s0, s23 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.f32 s0, s20 -; LE-I64-NEON-NEXT: mov r4, r1 -; LE-I64-NEON-NEXT: vmov.32 d9[0], r0 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.f32 s0, s21 -; LE-I64-NEON-NEXT: mov r5, r1 -; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.f32 s0, s22 -; LE-I64-NEON-NEXT: mov r6, r1 -; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 -; LE-I64-NEON-NEXT: vmov.32 d13[1], r6 -; LE-I64-NEON-NEXT: vmov.32 d9[1], r4 -; LE-I64-NEON-NEXT: vmov.32 d12[1], r5 -; LE-I64-NEON-NEXT: vmov.32 d8[1], r1 -; LE-I64-NEON-NEXT: vorr q0, q6, q6 -; LE-I64-NEON-NEXT: vorr q1, q4, q4 -; LE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13} -; LE-I64-NEON-NEXT: pop {r4, r5, r6, pc} -; ; BE-I32-LABEL: lrint_v4f32: ; BE-I32: @ %bb.0: ; BE-I32-NEXT: .save {r11, lr} @@ -443,61 +271,6 @@ define <4 x iXLen> @lrint_v4f32(<4 x float> %x) { ; BE-I64-NEXT: vrev64.32 q1, q5 ; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13} ; BE-I64-NEXT: pop {r4, r5, r6, pc} -; -; BE-I32-NEON-LABEL: lrint_v4f32: -; BE-I32-NEON: @ %bb.0: -; BE-I32-NEON-NEXT: .save {r11, lr} -; BE-I32-NEON-NEXT: push {r11, lr} -; BE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11} -; BE-I32-NEON-NEXT: vpush {d8, d9, d10, d11} -; BE-I32-NEON-NEXT: vrev64.32 q4, q0 -; BE-I32-NEON-NEXT: vmov.f32 s0, s18 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s16 -; BE-I32-NEON-NEXT: vmov.32 d11[0], r0 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s19 -; BE-I32-NEON-NEXT: vmov.32 d10[0], r0 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s17 -; BE-I32-NEON-NEXT: vmov.32 d11[1], r0 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.32 d10[1], r0 -; BE-I32-NEON-NEXT: vrev64.32 q0, q5 -; BE-I32-NEON-NEXT: vpop {d8, d9, d10, d11} -; BE-I32-NEON-NEXT: pop {r11, pc} -; -; BE-I64-NEON-LABEL: lrint_v4f32: -; BE-I64-NEON: @ %bb.0: -; BE-I64-NEON-NEXT: .save {r4, r5, r6, lr} -; BE-I64-NEON-NEXT: push {r4, r5, r6, lr} -; BE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13} -; BE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13} -; BE-I64-NEON-NEXT: vrev64.32 d8, d1 -; BE-I64-NEON-NEXT: vrev64.32 d9, d0 -; BE-I64-NEON-NEXT: vmov.f32 s0, s17 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.f32 s0, s18 -; BE-I64-NEON-NEXT: mov r4, r1 -; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.f32 s0, s19 -; BE-I64-NEON-NEXT: mov r5, r1 -; BE-I64-NEON-NEXT: vmov.32 d12[0], r0 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.f32 s0, s16 -; BE-I64-NEON-NEXT: mov r6, r1 -; BE-I64-NEON-NEXT: vmov.32 d13[0], r0 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEON-NEXT: vmov.32 d13[1], r6 -; BE-I64-NEON-NEXT: vmov.32 d11[1], r4 -; BE-I64-NEON-NEXT: vmov.32 d12[1], r5 -; BE-I64-NEON-NEXT: vmov.32 d10[1], r1 -; BE-I64-NEON-NEXT: vrev64.32 q0, q6 -; BE-I64-NEON-NEXT: vrev64.32 q1, q5 -; BE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13} -; BE-I64-NEON-NEXT: pop {r4, r5, r6, pc} %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float> %x) ret <4 x iXLen> %a } @@ -607,109 +380,6 @@ define <8 x iXLen> @lrint_v8f32(<8 x float> %x) { ; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} ; -; LE-I32-NEON-LABEL: lrint_v8f32: -; LE-I32-NEON: @ %bb.0: -; LE-I32-NEON-NEXT: .save {r11, lr} -; LE-I32-NEON-NEXT: push {r11, lr} -; LE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I32-NEON-NEXT: vorr q5, q1, q1 -; LE-I32-NEON-NEXT: vorr q7, q0, q0 -; LE-I32-NEON-NEXT: vmov.f32 s0, s20 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s22 -; LE-I32-NEON-NEXT: vmov.32 d8[0], r0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s30 -; LE-I32-NEON-NEXT: vmov.32 d9[0], r0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s28 -; LE-I32-NEON-NEXT: vmov.32 d13[0], r0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s31 -; LE-I32-NEON-NEXT: vmov.32 d12[0], r0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s29 -; LE-I32-NEON-NEXT: vmov.32 d13[1], r0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s23 -; LE-I32-NEON-NEXT: vmov.32 d12[1], r0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s21 -; LE-I32-NEON-NEXT: vmov.32 d9[1], r0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.32 d8[1], r0 -; LE-I32-NEON-NEXT: vorr q0, q6, q6 -; LE-I32-NEON-NEXT: vorr q1, q4, q4 -; LE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I32-NEON-NEXT: pop {r11, pc} -; -; LE-I64-NEON-LABEL: lrint_v8f32: -; LE-I64-NEON: @ %bb.0: -; LE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; LE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} -; LE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEON-NEXT: .pad #40 -; LE-I64-NEON-NEXT: sub sp, sp, #40 -; LE-I64-NEON-NEXT: vorr q6, q1, q1 -; LE-I64-NEON-NEXT: add lr, sp, #24 -; LE-I64-NEON-NEXT: vorr q7, q0, q0 -; LE-I64-NEON-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill -; LE-I64-NEON-NEXT: vmov.f32 s0, s27 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.f32 s0, s24 -; LE-I64-NEON-NEXT: mov r8, r1 -; LE-I64-NEON-NEXT: vmov.32 d9[0], r0 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.f32 s0, s25 -; LE-I64-NEON-NEXT: mov r9, r1 -; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vorr q6, q7, q7 -; LE-I64-NEON-NEXT: add lr, sp, #8 -; LE-I64-NEON-NEXT: mov r10, r1 -; LE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; LE-I64-NEON-NEXT: vmov.f32 s0, s26 -; LE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.f32 s0, s27 -; LE-I64-NEON-NEXT: mov r7, r1 -; LE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.f32 s0, s24 -; LE-I64-NEON-NEXT: mov r4, r1 -; LE-I64-NEON-NEXT: vmov.32 d15[0], r0 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: add lr, sp, #8 -; LE-I64-NEON-NEXT: mov r5, r1 -; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 -; LE-I64-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-I64-NEON-NEXT: vmov.f32 s0, s1 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: add lr, sp, #24 -; LE-I64-NEON-NEXT: mov r6, r1 -; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 -; LE-I64-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-I64-NEON-NEXT: vmov.f32 s0, s2 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 -; LE-I64-NEON-NEXT: vmov.32 d13[1], r6 -; LE-I64-NEON-NEXT: vmov.32 d15[1], r4 -; LE-I64-NEON-NEXT: vmov.32 d11[1], r10 -; LE-I64-NEON-NEXT: vmov.32 d9[1], r8 -; LE-I64-NEON-NEXT: vmov.32 d12[1], r5 -; LE-I64-NEON-NEXT: vmov.32 d14[1], r7 -; LE-I64-NEON-NEXT: vorr q0, q6, q6 -; LE-I64-NEON-NEXT: vmov.32 d10[1], r9 -; LE-I64-NEON-NEXT: vorr q1, q7, q7 -; LE-I64-NEON-NEXT: vmov.32 d8[1], r1 -; LE-I64-NEON-NEXT: vorr q2, q5, q5 -; LE-I64-NEON-NEXT: vorr q3, q4, q4 -; LE-I64-NEON-NEXT: add sp, sp, #40 -; LE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} -; ; BE-I32-LABEL: lrint_v8f32: ; BE-I32: @ %bb.0: ; BE-I32-NEXT: .save {r11, lr} @@ -815,112 +485,6 @@ define <8 x iXLen> @lrint_v8f32(<8 x float> %x) { ; BE-I64-NEXT: add sp, sp, #32 ; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} -; -; BE-I32-NEON-LABEL: lrint_v8f32: -; BE-I32-NEON: @ %bb.0: -; BE-I32-NEON-NEXT: .save {r11, lr} -; BE-I32-NEON-NEXT: push {r11, lr} -; BE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I32-NEON-NEXT: vrev64.32 q4, q1 -; BE-I32-NEON-NEXT: vrev64.32 q5, q0 -; BE-I32-NEON-NEXT: vmov.f32 s0, s16 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s20 -; BE-I32-NEON-NEXT: vmov.32 d12[0], r0 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s18 -; BE-I32-NEON-NEXT: vmov.32 d14[0], r0 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s22 -; BE-I32-NEON-NEXT: vmov.32 d13[0], r0 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s19 -; BE-I32-NEON-NEXT: vmov.32 d15[0], r0 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s23 -; BE-I32-NEON-NEXT: vmov.32 d13[1], r0 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s21 -; BE-I32-NEON-NEXT: vmov.32 d15[1], r0 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s17 -; BE-I32-NEON-NEXT: vmov.32 d14[1], r0 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.32 d12[1], r0 -; BE-I32-NEON-NEXT: vrev64.32 q0, q7 -; BE-I32-NEON-NEXT: vrev64.32 q1, q6 -; BE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I32-NEON-NEXT: pop {r11, pc} -; -; BE-I64-NEON-LABEL: lrint_v8f32: -; BE-I64-NEON: @ %bb.0: -; BE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; BE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} -; BE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I64-NEON-NEXT: .pad #32 -; BE-I64-NEON-NEXT: sub sp, sp, #32 -; BE-I64-NEON-NEXT: vorr q4, q1, q1 -; BE-I64-NEON-NEXT: add lr, sp, #8 -; BE-I64-NEON-NEXT: vorr q5, q0, q0 -; BE-I64-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; BE-I64-NEON-NEXT: vrev64.32 d12, d8 -; BE-I64-NEON-NEXT: vmov.f32 s0, s25 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.f32 s0, s24 -; BE-I64-NEON-NEXT: mov r8, r1 -; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vrev64.32 d0, d11 -; BE-I64-NEON-NEXT: mov r9, r1 -; BE-I64-NEON-NEXT: vrev64.32 d8, d9 -; BE-I64-NEON-NEXT: vorr d9, d0, d0 -; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEON-NEXT: vstr d8, [sp, #24] @ 8-byte Spill -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.f32 s0, s17 -; BE-I64-NEON-NEXT: mov r10, r1 -; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: add lr, sp, #8 -; BE-I64-NEON-NEXT: vmov.f32 s0, s19 -; BE-I64-NEON-NEXT: mov r7, r1 -; BE-I64-NEON-NEXT: vmov.32 d13[0], r0 -; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEON-NEXT: vrev64.32 d8, d16 -; BE-I64-NEON-NEXT: vstr d8, [sp, #8] @ 8-byte Spill -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.f32 s0, s16 -; BE-I64-NEON-NEXT: mov r4, r1 -; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vldr d0, [sp, #8] @ 8-byte Reload -; BE-I64-NEON-NEXT: mov r5, r1 -; BE-I64-NEON-NEXT: vmov.32 d8[0], r0 -; BE-I64-NEON-NEXT: vmov.f32 s0, s1 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vldr d0, [sp, #24] @ 8-byte Reload -; BE-I64-NEON-NEXT: mov r6, r1 -; BE-I64-NEON-NEXT: @ kill: def $s0 killed $s0 killed $d0 -; BE-I64-NEON-NEXT: vmov.32 d9[0], r0 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.32 d12[0], r0 -; BE-I64-NEON-NEXT: vmov.32 d9[1], r6 -; BE-I64-NEON-NEXT: vmov.32 d11[1], r4 -; BE-I64-NEON-NEXT: vmov.32 d15[1], r8 -; BE-I64-NEON-NEXT: vmov.32 d13[1], r7 -; BE-I64-NEON-NEXT: vmov.32 d8[1], r5 -; BE-I64-NEON-NEXT: vmov.32 d10[1], r10 -; BE-I64-NEON-NEXT: vmov.32 d14[1], r9 -; BE-I64-NEON-NEXT: vmov.32 d12[1], r1 -; BE-I64-NEON-NEXT: vrev64.32 q0, q4 -; BE-I64-NEON-NEXT: vrev64.32 q1, q5 -; BE-I64-NEON-NEXT: vrev64.32 q2, q7 -; BE-I64-NEON-NEXT: vrev64.32 q3, q6 -; BE-I64-NEON-NEXT: add sp, sp, #32 -; BE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float> %x) ret <8 x iXLen> %a } @@ -1172,251 +736,6 @@ define <16 x iXLen> @lrint_v16f32(<16 x float> %x) { ; LE-I64-NEXT: add sp, sp, #4 ; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; -; LE-I32-NEON-LABEL: lrint_v16f32: -; LE-I32-NEON: @ %bb.0: -; LE-I32-NEON-NEXT: .save {r11, lr} -; LE-I32-NEON-NEXT: push {r11, lr} -; LE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I32-NEON-NEXT: .pad #80 -; LE-I32-NEON-NEXT: sub sp, sp, #80 -; LE-I32-NEON-NEXT: vorr q5, q3, q3 -; LE-I32-NEON-NEXT: vstmia sp, {d0, d1} @ 16-byte Spill -; LE-I32-NEON-NEXT: add lr, sp, #32 -; LE-I32-NEON-NEXT: vorr q6, q2, q2 -; LE-I32-NEON-NEXT: vorr q7, q1, q1 -; LE-I32-NEON-NEXT: vmov.f32 s0, s20 -; LE-I32-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s22 -; LE-I32-NEON-NEXT: vmov.32 d8[0], r0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s24 -; LE-I32-NEON-NEXT: add lr, sp, #48 -; LE-I32-NEON-NEXT: vmov.32 d9[0], r0 -; LE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-I32-NEON-NEXT: add lr, sp, #16 -; LE-I32-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s26 -; LE-I32-NEON-NEXT: vmov.32 d8[0], r0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: add lr, sp, #64 -; LE-I32-NEON-NEXT: vmov.32 d9[0], r0 -; LE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-I32-NEON-NEXT: vorr q4, q7, q7 -; LE-I32-NEON-NEXT: vmov.f32 s0, s16 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s18 -; LE-I32-NEON-NEXT: vmov.32 d10[0], r0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vldmia sp, {d12, d13} @ 16-byte Reload -; LE-I32-NEON-NEXT: vmov.f32 s0, s26 -; LE-I32-NEON-NEXT: vmov.32 d11[0], r0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s24 -; LE-I32-NEON-NEXT: vmov.32 d15[0], r0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s27 -; LE-I32-NEON-NEXT: vmov.32 d14[0], r0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s25 -; LE-I32-NEON-NEXT: vmov.32 d15[1], r0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s19 -; LE-I32-NEON-NEXT: vmov.32 d14[1], r0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s17 -; LE-I32-NEON-NEXT: vmov.32 d11[1], r0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: add lr, sp, #16 -; LE-I32-NEON-NEXT: vmov.32 d10[1], r0 -; LE-I32-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-I32-NEON-NEXT: vmov.f32 s0, s27 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s25 -; LE-I32-NEON-NEXT: add lr, sp, #64 -; LE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-I32-NEON-NEXT: vmov.32 d9[1], r0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.32 d8[1], r0 -; LE-I32-NEON-NEXT: add lr, sp, #64 -; LE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-I32-NEON-NEXT: add lr, sp, #32 -; LE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-I32-NEON-NEXT: vmov.f32 s0, s19 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s17 -; LE-I32-NEON-NEXT: add lr, sp, #48 -; LE-I32-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-I32-NEON-NEXT: vmov.32 d13[1], r0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: add lr, sp, #64 -; LE-I32-NEON-NEXT: vmov.32 d12[1], r0 -; LE-I32-NEON-NEXT: vorr q0, q7, q7 -; LE-I32-NEON-NEXT: vldmia lr, {d4, d5} @ 16-byte Reload -; LE-I32-NEON-NEXT: vorr q1, q5, q5 -; LE-I32-NEON-NEXT: vorr q3, q6, q6 -; LE-I32-NEON-NEXT: add sp, sp, #80 -; LE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I32-NEON-NEXT: pop {r11, pc} -; -; LE-I64-NEON-LABEL: lrint_v16f32: -; LE-I64-NEON: @ %bb.0: -; LE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-I64-NEON-NEXT: .pad #4 -; LE-I64-NEON-NEXT: sub sp, sp, #4 -; LE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEON-NEXT: .pad #160 -; LE-I64-NEON-NEXT: sub sp, sp, #160 -; LE-I64-NEON-NEXT: add lr, sp, #112 -; LE-I64-NEON-NEXT: vorr q5, q3, q3 -; LE-I64-NEON-NEXT: vorr q6, q0, q0 -; LE-I64-NEON-NEXT: mov r4, r0 -; LE-I64-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #48 -; LE-I64-NEON-NEXT: vorr q7, q1, q1 -; LE-I64-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; LE-I64-NEON-NEXT: vmov.f32 s0, s23 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.f32 s0, s24 -; LE-I64-NEON-NEXT: add lr, sp, #144 -; LE-I64-NEON-NEXT: vmov.32 d17[0], r0 -; LE-I64-NEON-NEXT: str r1, [sp, #108] @ 4-byte Spill -; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.f32 s0, s25 -; LE-I64-NEON-NEXT: str r1, [sp, #84] @ 4-byte Spill -; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.f32 s0, s28 -; LE-I64-NEON-NEXT: add lr, sp, #128 -; LE-I64-NEON-NEXT: vmov.32 d9[0], r0 -; LE-I64-NEON-NEXT: str r1, [sp, #44] @ 4-byte Spill -; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.f32 s0, s29 -; LE-I64-NEON-NEXT: mov r9, r1 -; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.f32 s0, s30 -; LE-I64-NEON-NEXT: mov r6, r1 -; LE-I64-NEON-NEXT: vmov.32 d9[0], r0 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.f32 s0, s31 -; LE-I64-NEON-NEXT: mov r5, r1 -; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: add lr, sp, #112 -; LE-I64-NEON-NEXT: mov r7, r1 -; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 -; LE-I64-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload -; LE-I64-NEON-NEXT: vmov.f32 s0, s29 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.f32 s0, s22 -; LE-I64-NEON-NEXT: add lr, sp, #24 -; LE-I64-NEON-NEXT: vmov.32 d17[0], r0 -; LE-I64-NEON-NEXT: mov r11, r1 -; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEON-NEXT: vmov.32 d13[1], r7 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: add lr, sp, #144 -; LE-I64-NEON-NEXT: vmov.f32 s0, s21 -; LE-I64-NEON-NEXT: vmov.32 d12[1], r5 -; LE-I64-NEON-NEXT: str r1, [sp, #40] @ 4-byte Spill -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: vmov.32 d16[0], r0 -; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #88 -; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.f32 s0, s20 -; LE-I64-NEON-NEXT: mov r10, r1 -; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 -; LE-I64-NEON-NEXT: vmov.32 d9[1], r6 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.f32 s0, s31 -; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 -; LE-I64-NEON-NEXT: add lr, sp, #8 -; LE-I64-NEON-NEXT: mov r8, r1 -; LE-I64-NEON-NEXT: vmov.32 d8[1], r9 -; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #64 -; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: add lr, sp, #128 -; LE-I64-NEON-NEXT: vmov.32 d9[0], r0 -; LE-I64-NEON-NEXT: ldr r0, [sp, #44] @ 4-byte Reload -; LE-I64-NEON-NEXT: mov r9, r1 -; LE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #48 -; LE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-I64-NEON-NEXT: vmov.f32 s0, s27 -; LE-I64-NEON-NEXT: vmov.32 d11[1], r0 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.f32 s0, s26 -; LE-I64-NEON-NEXT: vmov.32 d15[0], r0 -; LE-I64-NEON-NEXT: ldr r0, [sp, #84] @ 4-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #128 -; LE-I64-NEON-NEXT: mov r7, r1 -; LE-I64-NEON-NEXT: vmov.32 d10[1], r0 -; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; LE-I64-NEON-NEXT: add lr, sp, #144 -; LE-I64-NEON-NEXT: ldr r0, [sp, #108] @ 4-byte Reload -; LE-I64-NEON-NEXT: mov r5, r1 -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: vmov.32 d17[1], r0 -; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #112 -; LE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-I64-NEON-NEXT: vmov.f32 s0, s20 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: add lr, sp, #24 -; LE-I64-NEON-NEXT: vmov.f32 s0, s22 -; LE-I64-NEON-NEXT: mov r6, r1 -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: vmov.32 d16[0], r0 -; LE-I64-NEON-NEXT: vmov.32 d17[1], r11 -; LE-I64-NEON-NEXT: vorr q6, q8, q8 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: add lr, sp, #144 -; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 -; LE-I64-NEON-NEXT: ldr r0, [sp, #40] @ 4-byte Reload -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #8 -; LE-I64-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #128 -; LE-I64-NEON-NEXT: vmov.32 d9[1], r9 -; LE-I64-NEON-NEXT: vmov.32 d12[1], r6 -; LE-I64-NEON-NEXT: vmov.32 d19[1], r10 -; LE-I64-NEON-NEXT: vmov.32 d8[1], r1 -; LE-I64-NEON-NEXT: vmov.32 d16[1], r0 -; LE-I64-NEON-NEXT: add r0, r4, #64 -; LE-I64-NEON-NEXT: vmov.32 d18[1], r8 -; LE-I64-NEON-NEXT: vst1.64 {d12, d13}, [r0:128]! -; LE-I64-NEON-NEXT: vst1.64 {d8, d9}, [r0:128]! -; LE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]! -; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] -; LE-I64-NEON-NEXT: vmov.32 d15[1], r7 -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #64 -; LE-I64-NEON-NEXT: vmov.32 d14[1], r5 -; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r4:128]! -; LE-I64-NEON-NEXT: vst1.64 {d14, d15}, [r4:128]! -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #88 -; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r4:128]! -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r4:128] -; LE-I64-NEON-NEXT: add sp, sp, #160 -; LE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEON-NEXT: add sp, sp, #4 -; LE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; ; BE-I32-LABEL: lrint_v16f32: ; BE-I32: @ %bb.0: ; BE-I32-NEXT: .save {r11, lr} @@ -1683,2433 +1002,76 @@ define <16 x iXLen> @lrint_v16f32(<16 x float> %x) { ; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; BE-I64-NEXT: add sp, sp, #4 ; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; -; BE-I32-NEON-LABEL: lrint_v16f32: -; BE-I32-NEON: @ %bb.0: -; BE-I32-NEON-NEXT: .save {r11, lr} -; BE-I32-NEON-NEXT: push {r11, lr} -; BE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I32-NEON-NEXT: .pad #96 -; BE-I32-NEON-NEXT: sub sp, sp, #96 -; BE-I32-NEON-NEXT: vrev64.32 q3, q3 -; BE-I32-NEON-NEXT: add lr, sp, #64 -; BE-I32-NEON-NEXT: vrev64.32 q4, q0 -; BE-I32-NEON-NEXT: vmov.f32 s0, s12 -; BE-I32-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill -; BE-I32-NEON-NEXT: add lr, sp, #32 -; BE-I32-NEON-NEXT: vrev64.32 q5, q1 -; BE-I32-NEON-NEXT: vrev64.32 q7, q2 -; BE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s16 -; BE-I32-NEON-NEXT: vmov.32 d16[0], r0 -; BE-I32-NEON-NEXT: add lr, sp, #80 -; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s18 -; BE-I32-NEON-NEXT: vmov.32 d12[0], r0 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s20 -; BE-I32-NEON-NEXT: add lr, sp, #48 -; BE-I32-NEON-NEXT: vmov.32 d13[0], r0 -; BE-I32-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; BE-I32-NEON-NEXT: add lr, sp, #16 -; BE-I32-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s22 -; BE-I32-NEON-NEXT: vmov.32 d8[0], r0 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s28 -; BE-I32-NEON-NEXT: vmov.32 d9[0], r0 -; BE-I32-NEON-NEXT: vstmia sp, {d8, d9} @ 16-byte Spill -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: add lr, sp, #64 -; BE-I32-NEON-NEXT: vmov.32 d12[0], r0 -; BE-I32-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-I32-NEON-NEXT: vmov.f32 s0, s22 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s30 -; BE-I32-NEON-NEXT: add lr, sp, #80 -; BE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; BE-I32-NEON-NEXT: vmov.32 d9[0], r0 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s23 -; BE-I32-NEON-NEXT: vmov.32 d13[0], r0 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s31 -; BE-I32-NEON-NEXT: add lr, sp, #80 -; BE-I32-NEON-NEXT: vmov.32 d9[1], r0 -; BE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s29 -; BE-I32-NEON-NEXT: vmov.32 d13[1], r0 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: add lr, sp, #16 -; BE-I32-NEON-NEXT: vmov.32 d12[1], r0 -; BE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; BE-I32-NEON-NEXT: vmov.f32 s0, s19 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s17 -; BE-I32-NEON-NEXT: vldmia sp, {d10, d11} @ 16-byte Reload -; BE-I32-NEON-NEXT: vmov.32 d11[1], r0 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: add lr, sp, #32 -; BE-I32-NEON-NEXT: vmov.32 d10[1], r0 -; BE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; BE-I32-NEON-NEXT: vmov.f32 s0, s19 -; BE-I32-NEON-NEXT: vorr q7, q5, q5 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s17 -; BE-I32-NEON-NEXT: add lr, sp, #48 -; BE-I32-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-I32-NEON-NEXT: vmov.32 d11[1], r0 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: add lr, sp, #64 -; BE-I32-NEON-NEXT: vmov.32 d10[1], r0 -; BE-I32-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; BE-I32-NEON-NEXT: vmov.f32 s0, s1 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: add lr, sp, #80 -; BE-I32-NEON-NEXT: vrev64.32 q0, q5 -; BE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I32-NEON-NEXT: vrev64.32 q1, q7 -; BE-I32-NEON-NEXT: vmov.32 d16[1], r0 -; BE-I32-NEON-NEXT: vrev64.32 q2, q6 -; BE-I32-NEON-NEXT: vrev64.32 q3, q8 -; BE-I32-NEON-NEXT: add sp, sp, #96 -; BE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I32-NEON-NEXT: pop {r11, pc} -; -; BE-I64-NEON-LABEL: lrint_v16f32: -; BE-I64-NEON: @ %bb.0: -; BE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-I64-NEON-NEXT: .pad #4 -; BE-I64-NEON-NEXT: sub sp, sp, #4 -; BE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I64-NEON-NEXT: .pad #144 -; BE-I64-NEON-NEXT: sub sp, sp, #144 -; BE-I64-NEON-NEXT: vorr q6, q3, q3 -; BE-I64-NEON-NEXT: add lr, sp, #112 -; BE-I64-NEON-NEXT: vorr q7, q0, q0 -; BE-I64-NEON-NEXT: mov r4, r0 -; BE-I64-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #96 -; BE-I64-NEON-NEXT: vrev64.32 d8, d13 -; BE-I64-NEON-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill -; BE-I64-NEON-NEXT: vmov.f32 s0, s17 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.f32 s0, s16 -; BE-I64-NEON-NEXT: str r1, [sp, #88] @ 4-byte Spill -; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vrev64.32 d8, d14 -; BE-I64-NEON-NEXT: add lr, sp, #128 -; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEON-NEXT: str r1, [sp, #92] @ 4-byte Spill -; BE-I64-NEON-NEXT: vmov.f32 s0, s16 -; BE-I64-NEON-NEXT: vrev64.32 d9, d12 -; BE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-I64-NEON-NEXT: vstr d9, [sp, #64] @ 8-byte Spill -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.f32 s0, s19 -; BE-I64-NEON-NEXT: mov r9, r1 -; BE-I64-NEON-NEXT: vmov.32 d12[0], r0 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.f32 s0, s17 -; BE-I64-NEON-NEXT: str r1, [sp, #84] @ 4-byte Spill -; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEON-NEXT: vrev64.32 d9, d15 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.f32 s0, s18 -; BE-I64-NEON-NEXT: mov r6, r1 -; BE-I64-NEON-NEXT: vmov.32 d13[0], r0 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.f32 s0, s19 -; BE-I64-NEON-NEXT: mov r5, r1 -; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vldr d0, [sp, #64] @ 8-byte Reload -; BE-I64-NEON-NEXT: mov r7, r1 -; BE-I64-NEON-NEXT: @ kill: def $s0 killed $s0 killed $d0 -; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEON-NEXT: add lr, sp, #40 -; BE-I64-NEON-NEXT: str r1, [sp, #60] @ 4-byte Spill -; BE-I64-NEON-NEXT: vmov.32 d15[1], r7 -; BE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #96 -; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEON-NEXT: vrev64.32 d8, d16 -; BE-I64-NEON-NEXT: vmov.f32 s0, s17 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.f32 s0, s16 -; BE-I64-NEON-NEXT: vmov.32 d14[1], r5 -; BE-I64-NEON-NEXT: add lr, sp, #64 -; BE-I64-NEON-NEXT: mov r10, r1 -; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEON-NEXT: add lr, sp, #24 -; BE-I64-NEON-NEXT: mov r11, r1 -; BE-I64-NEON-NEXT: vmov.32 d13[1], r6 -; BE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #96 -; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEON-NEXT: vrev64.32 d8, d17 -; BE-I64-NEON-NEXT: vmov.f32 s0, s17 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.f32 s0, s16 -; BE-I64-NEON-NEXT: vmov.32 d12[1], r9 -; BE-I64-NEON-NEXT: add lr, sp, #96 -; BE-I64-NEON-NEXT: mov r8, r1 -; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEON-NEXT: add lr, sp, #8 -; BE-I64-NEON-NEXT: ldr r0, [sp, #88] @ 4-byte Reload -; BE-I64-NEON-NEXT: mov r9, r1 -; BE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #112 -; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEON-NEXT: add lr, sp, #128 -; BE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-I64-NEON-NEXT: vrev64.32 d8, d16 -; BE-I64-NEON-NEXT: vmov.32 d11[1], r0 -; BE-I64-NEON-NEXT: vmov.f32 s0, s17 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.f32 s0, s16 -; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEON-NEXT: ldr r0, [sp, #92] @ 4-byte Reload -; BE-I64-NEON-NEXT: add lr, sp, #128 -; BE-I64-NEON-NEXT: mov r7, r1 -; BE-I64-NEON-NEXT: vmov.32 d10[1], r0 -; BE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: add lr, sp, #112 -; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEON-NEXT: ldr r0, [sp, #84] @ 4-byte Reload -; BE-I64-NEON-NEXT: mov r5, r1 -; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEON-NEXT: add lr, sp, #40 -; BE-I64-NEON-NEXT: vrev64.32 d8, d17 -; BE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; BE-I64-NEON-NEXT: vmov.f32 s0, s17 -; BE-I64-NEON-NEXT: vmov.32 d13[1], r0 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.f32 s0, s16 -; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEON-NEXT: ldr r0, [sp, #60] @ 4-byte Reload -; BE-I64-NEON-NEXT: mov r6, r1 -; BE-I64-NEON-NEXT: vmov.32 d12[1], r0 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: add lr, sp, #24 -; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEON-NEXT: add r0, r4, #64 -; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEON-NEXT: add lr, sp, #8 -; BE-I64-NEON-NEXT: vmov.32 d17[1], r10 -; BE-I64-NEON-NEXT: vmov.32 d16[1], r11 -; BE-I64-NEON-NEXT: vorr q12, q8, q8 -; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEON-NEXT: add lr, sp, #128 -; BE-I64-NEON-NEXT: vmov.32 d15[1], r7 -; BE-I64-NEON-NEXT: vmov.32 d11[1], r6 -; BE-I64-NEON-NEXT: vmov.32 d14[1], r5 -; BE-I64-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; BE-I64-NEON-NEXT: add lr, sp, #96 -; BE-I64-NEON-NEXT: vmov.32 d10[1], r1 -; BE-I64-NEON-NEXT: vmov.32 d17[1], r8 -; BE-I64-NEON-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload -; BE-I64-NEON-NEXT: add lr, sp, #64 -; BE-I64-NEON-NEXT: vmov.32 d16[1], r9 -; BE-I64-NEON-NEXT: vrev64.32 q14, q7 -; BE-I64-NEON-NEXT: vorr q13, q8, q8 -; BE-I64-NEON-NEXT: vrev64.32 q15, q5 -; BE-I64-NEON-NEXT: vldmia lr, {d22, d23} @ 16-byte Reload -; BE-I64-NEON-NEXT: vrev64.32 q8, q6 -; BE-I64-NEON-NEXT: vst1.64 {d28, d29}, [r0:128]! -; BE-I64-NEON-NEXT: vst1.64 {d30, d31}, [r0:128]! -; BE-I64-NEON-NEXT: vrev64.32 q9, q9 -; BE-I64-NEON-NEXT: vrev64.32 q10, q10 -; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-I64-NEON-NEXT: vrev64.32 q11, q11 -; BE-I64-NEON-NEXT: vrev64.32 q12, q12 -; BE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r0:128] -; BE-I64-NEON-NEXT: vst1.64 {d20, d21}, [r4:128]! -; BE-I64-NEON-NEXT: vst1.64 {d22, d23}, [r4:128]! -; BE-I64-NEON-NEXT: vrev64.32 q13, q13 -; BE-I64-NEON-NEXT: vst1.64 {d24, d25}, [r4:128]! -; BE-I64-NEON-NEXT: vst1.64 {d26, d27}, [r4:128] -; BE-I64-NEON-NEXT: add sp, sp, #144 -; BE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I64-NEON-NEXT: add sp, sp, #4 -; BE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float> %x) ret <16 x iXLen> %a } declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float>) -define <32 x iXLen> @lrint_v32f32(<32 x float> %x) { -; LE-I32-LABEL: lrint_v32f32: +define <1 x iXLen> @lrint_v1f64(<1 x double> %x) { +; LE-I32-LABEL: lrint_v1f64: ; LE-I32: @ %bb.0: -; LE-I32-NEXT: .save {r4, r5, r6, lr} -; LE-I32-NEXT: push {r4, r5, r6, lr} -; LE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I32-NEXT: .pad #144 -; LE-I32-NEXT: sub sp, sp, #144 -; LE-I32-NEXT: add lr, sp, #48 -; LE-I32-NEXT: mov r4, r0 -; LE-I32-NEXT: add r0, sp, #224 +; LE-I32-NEXT: .save {r11, lr} +; LE-I32-NEXT: push {r11, lr} +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: pop {r11, pc} +; +; LE-I64-LABEL: lrint_v1f64: +; LE-I64: @ %bb.0: +; LE-I64-NEXT: .save {r11, lr} +; LE-I64-NEXT: push {r11, lr} +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vmov.32 d0[0], r0 +; LE-I64-NEXT: vmov.32 d0[1], r1 +; LE-I64-NEXT: pop {r11, pc} +; +; BE-I32-LABEL: lrint_v1f64: +; BE-I32: @ %bb.0: +; BE-I32-NEXT: .save {r11, lr} +; BE-I32-NEXT: push {r11, lr} +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: pop {r11, pc} +; +; BE-I64-LABEL: lrint_v1f64: +; BE-I64: @ %bb.0: +; BE-I64-NEXT: .save {r11, lr} +; BE-I64-NEXT: push {r11, lr} +; BE-I64-NEXT: bl lrint +; BE-I64-NEXT: vmov.32 d16[0], r0 +; BE-I64-NEXT: vmov.32 d16[1], r1 +; BE-I64-NEXT: vrev64.32 d0, d16 +; BE-I64-NEXT: pop {r11, pc} + %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double> %x) + ret <1 x iXLen> %a +} +declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double>) + +define <2 x iXLen> @lrint_v2f64(<2 x double> %x) { +; LE-I32-LABEL: lrint_v2f64: +; LE-I32: @ %bb.0: +; LE-I32-NEXT: .save {r11, lr} +; LE-I32-NEXT: push {r11, lr} +; LE-I32-NEXT: .vsave {d8, d9, d10} +; LE-I32-NEXT: vpush {d8, d9, d10} ; LE-I32-NEXT: vorr q4, q0, q0 -; LE-I32-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill -; LE-I32-NEXT: add lr, sp, #16 -; LE-I32-NEXT: vorr q6, q3, q3 -; LE-I32-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; LE-I32-NEXT: vmov.f32 s0, s4 -; LE-I32-NEXT: add lr, sp, #80 -; LE-I32-NEXT: vorr q5, q1, q1 -; LE-I32-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I32-NEXT: add r0, sp, #272 -; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I32-NEXT: add lr, sp, #64 -; LE-I32-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I32-NEXT: add r0, sp, #240 -; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I32-NEXT: add lr, sp, #32 -; LE-I32-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I32-NEXT: vstmia sp, {d2, d3} @ 16-byte Spill -; LE-I32-NEXT: bl lrintf -; LE-I32-NEXT: vmov.f32 s0, s18 -; LE-I32-NEXT: vmov.32 d14[0], r0 -; LE-I32-NEXT: bl lrintf -; LE-I32-NEXT: vmov.f32 s0, s22 -; LE-I32-NEXT: add lr, sp, #112 -; LE-I32-NEXT: vmov.32 d17[0], r0 -; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I32-NEXT: bl lrintf -; LE-I32-NEXT: add lr, sp, #128 -; LE-I32-NEXT: vmov.32 d15[0], r0 -; LE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-I32-NEXT: add lr, sp, #48 -; LE-I32-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-I32-NEXT: vmov.f32 s0, s20 -; LE-I32-NEXT: bl lrintf -; LE-I32-NEXT: vmov.f32 s0, s22 -; LE-I32-NEXT: vmov.32 d8[0], r0 -; LE-I32-NEXT: vorr q7, q5, q5 -; LE-I32-NEXT: bl lrintf -; LE-I32-NEXT: vmov.f32 s0, s26 -; LE-I32-NEXT: vmov.32 d9[0], r0 -; LE-I32-NEXT: bl lrintf -; LE-I32-NEXT: vmov.f32 s0, s24 -; LE-I32-NEXT: vmov.32 d11[0], r0 -; LE-I32-NEXT: bl lrintf -; LE-I32-NEXT: vmov.f32 s0, s27 -; LE-I32-NEXT: vmov.32 d10[0], r0 -; LE-I32-NEXT: bl lrintf -; LE-I32-NEXT: vmov.f32 s0, s25 -; LE-I32-NEXT: vmov.32 d11[1], r0 -; LE-I32-NEXT: bl lrintf -; LE-I32-NEXT: vmov.f32 s0, s31 -; LE-I32-NEXT: vmov.32 d10[1], r0 -; LE-I32-NEXT: add lr, sp, #96 -; LE-I32-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-I32-NEXT: bl lrintf -; LE-I32-NEXT: vmov.f32 s0, s29 -; LE-I32-NEXT: vmov.32 d9[1], r0 -; LE-I32-NEXT: bl lrintf -; LE-I32-NEXT: vldmia sp, {d14, d15} @ 16-byte Reload -; LE-I32-NEXT: vmov.f32 s0, s31 -; LE-I32-NEXT: add lr, sp, #48 -; LE-I32-NEXT: vmov.32 d8[1], r0 -; LE-I32-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-I32-NEXT: bl lrintf -; LE-I32-NEXT: add lr, sp, #128 -; LE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-I32-NEXT: add lr, sp, #16 -; LE-I32-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-I32-NEXT: vmov.f32 s0, s23 -; LE-I32-NEXT: vmov.32 d9[1], r0 -; LE-I32-NEXT: bl lrintf -; LE-I32-NEXT: vmov.f32 s0, s29 -; LE-I32-NEXT: add lr, sp, #112 -; LE-I32-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-I32-NEXT: vmov.32 d13[1], r0 -; LE-I32-NEXT: bl lrintf -; LE-I32-NEXT: vmov.f32 s0, s20 -; LE-I32-NEXT: vmov.32 d8[1], r0 -; LE-I32-NEXT: add lr, sp, #128 -; LE-I32-NEXT: add r0, sp, #256 -; LE-I32-NEXT: vld1.64 {d14, d15}, [r0] -; LE-I32-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-I32-NEXT: bl lrintf -; LE-I32-NEXT: vmov.f32 s0, s21 -; LE-I32-NEXT: vorr q4, q6, q6 -; LE-I32-NEXT: vmov.32 d8[0], r0 -; LE-I32-NEXT: bl lrintf -; LE-I32-NEXT: vorr q6, q7, q7 -; LE-I32-NEXT: vmov.32 d8[1], r0 -; LE-I32-NEXT: add lr, sp, #112 -; LE-I32-NEXT: vstmia sp, {d14, d15} @ 16-byte Spill -; LE-I32-NEXT: vmov.f32 s0, s24 -; LE-I32-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-I32-NEXT: bl lrintf -; LE-I32-NEXT: add lr, sp, #32 -; LE-I32-NEXT: vmov.32 d14[0], r0 -; LE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-I32-NEXT: vmov.f32 s0, s18 -; LE-I32-NEXT: bl lrintf -; LE-I32-NEXT: vmov.f32 s0, s16 -; LE-I32-NEXT: vmov.32 d11[0], r0 -; LE-I32-NEXT: bl lrintf -; LE-I32-NEXT: vmov.f32 s0, s19 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: vorr d0, d9, d9 ; LE-I32-NEXT: vmov.32 d10[0], r0 -; LE-I32-NEXT: bl lrintf -; LE-I32-NEXT: vmov.f32 s0, s26 -; LE-I32-NEXT: vmov.32 d11[1], r0 -; LE-I32-NEXT: bl lrintf -; LE-I32-NEXT: vmov.f32 s0, s17 -; LE-I32-NEXT: add lr, sp, #16 -; LE-I32-NEXT: vmov.32 d15[0], r0 -; LE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-I32-NEXT: bl lrintf -; LE-I32-NEXT: vmov.32 d10[1], r0 -; LE-I32-NEXT: add lr, sp, #32 -; LE-I32-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-I32-NEXT: add lr, sp, #64 -; LE-I32-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-I32-NEXT: vmov.f32 s0, s20 -; LE-I32-NEXT: bl lrintf -; LE-I32-NEXT: add lr, sp, #80 -; LE-I32-NEXT: vmov.32 d14[0], r0 -; LE-I32-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-I32-NEXT: vmov.f32 s0, s26 -; LE-I32-NEXT: bl lrintf -; LE-I32-NEXT: vmov.f32 s0, s24 -; LE-I32-NEXT: vmov.32 d9[0], r0 -; LE-I32-NEXT: bl lrintf -; LE-I32-NEXT: vmov.f32 s0, s27 -; LE-I32-NEXT: vmov.32 d8[0], r0 -; LE-I32-NEXT: bl lrintf -; LE-I32-NEXT: vmov.f32 s0, s22 -; LE-I32-NEXT: vmov.32 d9[1], r0 -; LE-I32-NEXT: bl lrintf -; LE-I32-NEXT: vmov.f32 s0, s25 -; LE-I32-NEXT: vmov.32 d15[0], r0 -; LE-I32-NEXT: bl lrintf -; LE-I32-NEXT: vmov.f32 s0, s23 -; LE-I32-NEXT: vmov.32 d8[1], r0 -; LE-I32-NEXT: bl lrintf -; LE-I32-NEXT: vmov.f32 s0, s21 -; LE-I32-NEXT: vmov.32 d15[1], r0 -; LE-I32-NEXT: bl lrintf -; LE-I32-NEXT: vldmia sp, {d12, d13} @ 16-byte Reload -; LE-I32-NEXT: vmov.f32 s0, s27 -; LE-I32-NEXT: vmov.32 d14[1], r0 -; LE-I32-NEXT: bl lrintf -; LE-I32-NEXT: vmov.f32 s0, s25 -; LE-I32-NEXT: add lr, sp, #16 -; LE-I32-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-I32-NEXT: vmov.32 d11[1], r0 -; LE-I32-NEXT: bl lrintf -; LE-I32-NEXT: add lr, sp, #112 +; LE-I32-NEXT: bl lrint ; LE-I32-NEXT: vmov.32 d10[1], r0 -; LE-I32-NEXT: mov r0, r4 -; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEXT: add lr, sp, #128 -; LE-I32-NEXT: vst1.32 {d16, d17}, [r0:128]! -; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEXT: add lr, sp, #48 -; LE-I32-NEXT: vst1.32 {d16, d17}, [r0:128]! -; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEXT: add lr, sp, #96 -; LE-I32-NEXT: vst1.32 {d16, d17}, [r0:128]! -; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEXT: add lr, sp, #32 -; LE-I32-NEXT: vst1.64 {d16, d17}, [r0:128] -; LE-I32-NEXT: add r0, r4, #64 -; LE-I32-NEXT: vst1.32 {d8, d9}, [r0:128]! -; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEXT: vst1.32 {d16, d17}, [r0:128]! -; LE-I32-NEXT: vst1.32 {d10, d11}, [r0:128]! -; LE-I32-NEXT: vst1.64 {d14, d15}, [r0:128] -; LE-I32-NEXT: add sp, sp, #144 -; LE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I32-NEXT: pop {r4, r5, r6, pc} +; LE-I32-NEXT: vorr d0, d10, d10 +; LE-I32-NEXT: vpop {d8, d9, d10} +; LE-I32-NEXT: pop {r11, pc} ; -; LE-I64-LABEL: lrint_v32f32: +; LE-I64-LABEL: lrint_v2f64: ; LE-I64: @ %bb.0: -; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-I64-NEXT: .pad #4 -; LE-I64-NEXT: sub sp, sp, #4 -; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEXT: .pad #184 -; LE-I64-NEXT: sub sp, sp, #184 -; LE-I64-NEXT: add lr, sp, #152 -; LE-I64-NEXT: vorr q7, q3, q3 -; LE-I64-NEXT: vorr q4, q2, q2 -; LE-I64-NEXT: mov r5, r0 -; LE-I64-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #88 -; LE-I64-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; LE-I64-NEXT: vmov.f32 s0, s3 -; LE-I64-NEXT: str r0, [sp, #68] @ 4-byte Spill -; LE-I64-NEXT: bl lrintf -; LE-I64-NEXT: vmov.f32 s0, s18 -; LE-I64-NEXT: add lr, sp, #168 -; LE-I64-NEXT: vmov.32 d17[0], r0 -; LE-I64-NEXT: str r1, [sp, #16] @ 4-byte Spill -; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEXT: bl lrintf -; LE-I64-NEXT: vmov.f32 s0, s16 -; LE-I64-NEXT: mov r8, r1 -; LE-I64-NEXT: vmov.32 d10[0], r0 -; LE-I64-NEXT: bl lrintf -; LE-I64-NEXT: vmov.f32 s0, s17 -; LE-I64-NEXT: mov r6, r1 -; LE-I64-NEXT: vmov.32 d12[0], r0 -; LE-I64-NEXT: bl lrintf -; LE-I64-NEXT: vmov.f32 s0, s19 -; LE-I64-NEXT: mov r4, r1 -; LE-I64-NEXT: vmov.32 d13[0], r0 -; LE-I64-NEXT: bl lrintf -; LE-I64-NEXT: vmov.f32 s0, s31 -; LE-I64-NEXT: mov r7, r1 -; LE-I64-NEXT: vmov.32 d11[0], r0 -; LE-I64-NEXT: bl lrintf -; LE-I64-NEXT: vmov.f32 s0, s30 -; LE-I64-NEXT: str r1, [sp, #8] @ 4-byte Spill -; LE-I64-NEXT: vmov.32 d9[0], r0 -; LE-I64-NEXT: vmov.32 d11[1], r7 -; LE-I64-NEXT: bl lrintf -; LE-I64-NEXT: vmov.f32 s0, s29 -; LE-I64-NEXT: vmov.32 d8[0], r0 -; LE-I64-NEXT: add lr, sp, #104 -; LE-I64-NEXT: str r1, [sp, #12] @ 4-byte Spill -; LE-I64-NEXT: vmov.32 d13[1], r4 -; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-I64-NEXT: bl lrintf -; LE-I64-NEXT: vmov.32 d9[0], r0 -; LE-I64-NEXT: add r0, sp, #320 -; LE-I64-NEXT: add lr, sp, #120 -; LE-I64-NEXT: mov r11, r1 -; LE-I64-NEXT: vld1.64 {d0, d1}, [r0] -; LE-I64-NEXT: add r0, sp, #304 -; LE-I64-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #72 -; LE-I64-NEXT: vld1.64 {d0, d1}, [r0] -; LE-I64-NEXT: add r0, sp, #336 -; LE-I64-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #32 -; LE-I64-NEXT: vld1.64 {d0, d1}, [r0] -; LE-I64-NEXT: add r0, sp, #288 -; LE-I64-NEXT: vmov.32 d12[1], r6 -; LE-I64-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #48 -; LE-I64-NEXT: vld1.64 {d0, d1}, [r0] -; LE-I64-NEXT: vmov.32 d10[1], r8 -; LE-I64-NEXT: add r8, r5, #64 -; LE-I64-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #152 -; LE-I64-NEXT: vst1.64 {d12, d13}, [r8:128]! -; LE-I64-NEXT: vst1.64 {d10, d11}, [r8:128]! -; LE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-I64-NEXT: vmov.f32 s0, s27 -; LE-I64-NEXT: bl lrintf -; LE-I64-NEXT: vmov.f32 s0, s28 -; LE-I64-NEXT: mov r4, r1 -; LE-I64-NEXT: vmov.32 d11[0], r0 -; LE-I64-NEXT: bl lrintf -; LE-I64-NEXT: vmov.f32 s0, s26 -; LE-I64-NEXT: mov r9, r1 -; LE-I64-NEXT: vmov.32 d8[0], r0 -; LE-I64-NEXT: vmov.32 d11[1], r4 -; LE-I64-NEXT: bl lrintf -; LE-I64-NEXT: vmov.32 d10[0], r0 -; LE-I64-NEXT: add lr, sp, #136 -; LE-I64-NEXT: ldr r0, [sp, #16] @ 4-byte Reload -; LE-I64-NEXT: mov r10, r1 -; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #168 -; LE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-I64-NEXT: add lr, sp, #88 -; LE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-I64-NEXT: vmov.f32 s0, s26 -; LE-I64-NEXT: vmov.32 d11[1], r0 -; LE-I64-NEXT: bl lrintf -; LE-I64-NEXT: vmov.f32 s0, s25 -; LE-I64-NEXT: vmov.32 d10[0], r0 -; LE-I64-NEXT: add lr, sp, #168 -; LE-I64-NEXT: ldr r0, [sp, #8] @ 4-byte Reload -; LE-I64-NEXT: mov r7, r1 -; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #104 -; LE-I64-NEXT: vorr q5, q6, q6 -; LE-I64-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload -; LE-I64-NEXT: vmov.32 d15[1], r0 -; LE-I64-NEXT: bl lrintf -; LE-I64-NEXT: vmov.f32 s0, s20 -; LE-I64-NEXT: vmov.32 d13[0], r0 -; LE-I64-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; LE-I64-NEXT: add lr, sp, #104 -; LE-I64-NEXT: mov r4, r1 -; LE-I64-NEXT: vmov.32 d14[1], r0 -; LE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-I64-NEXT: bl lrintf -; LE-I64-NEXT: vmov.32 d12[0], r0 -; LE-I64-NEXT: add lr, sp, #152 -; LE-I64-NEXT: mov r6, r1 -; LE-I64-NEXT: vorr q7, q6, q6 -; LE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-I64-NEXT: vmov.32 d9[1], r11 -; LE-I64-NEXT: vmov.f32 s0, s25 -; LE-I64-NEXT: bl lrintf -; LE-I64-NEXT: vmov.f32 s0, s24 -; LE-I64-NEXT: mov r5, r1 -; LE-I64-NEXT: vmov.32 d11[0], r0 -; LE-I64-NEXT: vmov.32 d8[1], r9 -; LE-I64-NEXT: bl lrintf -; LE-I64-NEXT: add lr, sp, #136 -; LE-I64-NEXT: mov r11, r1 -; LE-I64-NEXT: vmov.32 d10[0], r0 -; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEXT: vmov.32 d16[1], r10 -; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #120 -; LE-I64-NEXT: vst1.64 {d8, d9}, [r8:128]! -; LE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-I64-NEXT: vmov.f32 s0, s1 -; LE-I64-NEXT: bl lrintf -; LE-I64-NEXT: add lr, sp, #152 -; LE-I64-NEXT: vmov.32 d17[0], r0 -; LE-I64-NEXT: mov r10, r1 -; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #104 -; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEXT: add lr, sp, #72 -; LE-I64-NEXT: vst1.64 {d16, d17}, [r8:128] -; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-I64-NEXT: vmov.f32 s0, s19 -; LE-I64-NEXT: bl lrintf -; LE-I64-NEXT: add lr, sp, #168 -; LE-I64-NEXT: vmov.f32 s0, s18 -; LE-I64-NEXT: mov r8, r1 -; LE-I64-NEXT: vmov.32 d13[0], r0 -; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEXT: vmov.32 d16[1], r7 -; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEXT: bl lrintf -; LE-I64-NEXT: vmov.f32 s0, s17 -; LE-I64-NEXT: vmov.32 d12[0], r0 -; LE-I64-NEXT: add lr, sp, #104 -; LE-I64-NEXT: mov r7, r1 -; LE-I64-NEXT: vmov.32 d15[1], r4 -; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-I64-NEXT: bl lrintf -; LE-I64-NEXT: vmov.f32 s0, s16 -; LE-I64-NEXT: mov r9, r1 -; LE-I64-NEXT: vmov.32 d13[0], r0 -; LE-I64-NEXT: vmov.32 d14[1], r6 -; LE-I64-NEXT: bl lrintf -; LE-I64-NEXT: vmov.32 d12[0], r0 -; LE-I64-NEXT: add lr, sp, #88 -; LE-I64-NEXT: mov r6, r1 -; LE-I64-NEXT: vmov.32 d11[1], r5 -; LE-I64-NEXT: vmov.32 d10[1], r11 -; LE-I64-NEXT: ldr r11, [sp, #68] @ 4-byte Reload -; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #16 -; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #32 -; LE-I64-NEXT: vst1.64 {d14, d15}, [r11:128]! -; LE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-I64-NEXT: vmov.f32 s0, s23 -; LE-I64-NEXT: bl lrintf -; LE-I64-NEXT: add lr, sp, #152 -; LE-I64-NEXT: mov r4, r1 -; LE-I64-NEXT: vmov.32 d9[0], r0 -; LE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-I64-NEXT: add lr, sp, #120 -; LE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-I64-NEXT: @ kill: def $s0 killed $s0 killed $q0 -; LE-I64-NEXT: vmov.32 d13[1], r10 -; LE-I64-NEXT: bl lrintf -; LE-I64-NEXT: vmov.f32 s0, s22 -; LE-I64-NEXT: vmov.32 d12[0], r0 -; LE-I64-NEXT: add lr, sp, #152 -; LE-I64-NEXT: mov r5, r1 -; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #104 -; LE-I64-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload -; LE-I64-NEXT: vmov.32 d15[1], r8 -; LE-I64-NEXT: bl lrintf -; LE-I64-NEXT: vmov.f32 s0, s21 -; LE-I64-NEXT: vmov.32 d8[0], r0 -; LE-I64-NEXT: add lr, sp, #72 -; LE-I64-NEXT: mov r8, r1 -; LE-I64-NEXT: vmov.32 d14[1], r7 -; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #104 -; LE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-I64-NEXT: bl lrintf -; LE-I64-NEXT: vmov.f32 s0, s20 -; LE-I64-NEXT: add lr, sp, #88 -; LE-I64-NEXT: mov r7, r1 -; LE-I64-NEXT: vmov.32 d9[0], r0 -; LE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-I64-NEXT: vmov.32 d13[1], r9 -; LE-I64-NEXT: bl lrintf -; LE-I64-NEXT: vmov.32 d8[0], r0 -; LE-I64-NEXT: add lr, sp, #32 -; LE-I64-NEXT: mov r9, r1 -; LE-I64-NEXT: vmov.32 d12[1], r6 -; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #88 -; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #120 -; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-I64-NEXT: vmov.f32 s0, s19 -; LE-I64-NEXT: bl lrintf -; LE-I64-NEXT: vmov.f32 s0, s18 -; LE-I64-NEXT: add lr, sp, #72 -; LE-I64-NEXT: mov r10, r1 -; LE-I64-NEXT: vmov.32 d15[0], r0 -; LE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-I64-NEXT: vmov.32 d13[1], r4 -; LE-I64-NEXT: bl lrintf -; LE-I64-NEXT: add lr, sp, #152 -; LE-I64-NEXT: mov r4, r1 -; LE-I64-NEXT: vmov.32 d14[0], r0 -; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEXT: vmov.32 d16[1], r5 -; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #168 -; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEXT: add lr, sp, #48 -; LE-I64-NEXT: vst1.64 {d16, d17}, [r11:128]! -; LE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-I64-NEXT: vmov.f32 s0, s21 -; LE-I64-NEXT: bl lrintf -; LE-I64-NEXT: vmov.f32 s0, s20 -; LE-I64-NEXT: vmov.32 d12[1], r8 -; LE-I64-NEXT: add lr, sp, #72 -; LE-I64-NEXT: mov r5, r1 -; LE-I64-NEXT: vmov.32 d9[0], r0 -; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-I64-NEXT: bl lrintf -; LE-I64-NEXT: vmov.f32 s0, s23 -; LE-I64-NEXT: add lr, sp, #32 -; LE-I64-NEXT: mov r6, r1 -; LE-I64-NEXT: vmov.32 d8[0], r0 -; LE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-I64-NEXT: vmov.32 d13[1], r7 -; LE-I64-NEXT: bl lrintf -; LE-I64-NEXT: add lr, sp, #48 -; LE-I64-NEXT: mov r7, r1 -; LE-I64-NEXT: vmov.32 d11[0], r0 -; LE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-I64-NEXT: vmov.f32 s0, s2 -; LE-I64-NEXT: vmov.32 d12[1], r9 -; LE-I64-NEXT: bl lrintf -; LE-I64-NEXT: add lr, sp, #16 -; LE-I64-NEXT: vmov.32 d10[0], r0 -; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEXT: add lr, sp, #136 -; LE-I64-NEXT: vmov.32 d11[1], r7 -; LE-I64-NEXT: vst1.64 {d16, d17}, [r11:128]! -; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEXT: add lr, sp, #152 -; LE-I64-NEXT: vmov.32 d15[1], r10 -; LE-I64-NEXT: vst1.64 {d16, d17}, [r11:128] -; LE-I64-NEXT: vmov.32 d10[1], r1 -; LE-I64-NEXT: ldr r1, [sp, #68] @ 4-byte Reload -; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEXT: add r0, r1, #192 -; LE-I64-NEXT: add lr, sp, #72 -; LE-I64-NEXT: vmov.32 d14[1], r4 -; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]! -; LE-I64-NEXT: vst1.64 {d14, d15}, [r0:128]! -; LE-I64-NEXT: vst1.64 {d12, d13}, [r0:128]! -; LE-I64-NEXT: vmov.32 d9[1], r5 -; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEXT: add lr, sp, #88 -; LE-I64-NEXT: vmov.32 d8[1], r6 -; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128] -; LE-I64-NEXT: add r0, r1, #128 -; LE-I64-NEXT: vst1.64 {d8, d9}, [r0:128]! -; LE-I64-NEXT: vst1.64 {d10, d11}, [r0:128]! -; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEXT: add lr, sp, #104 -; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]! -; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128] -; LE-I64-NEXT: add sp, sp, #184 -; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEXT: add sp, sp, #4 -; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; -; LE-I32-NEON-LABEL: lrint_v32f32: -; LE-I32-NEON: @ %bb.0: -; LE-I32-NEON-NEXT: .save {r4, r5, r6, lr} -; LE-I32-NEON-NEXT: push {r4, r5, r6, lr} -; LE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I32-NEON-NEXT: .pad #144 -; LE-I32-NEON-NEXT: sub sp, sp, #144 -; LE-I32-NEON-NEXT: add lr, sp, #48 -; LE-I32-NEON-NEXT: mov r4, r0 -; LE-I32-NEON-NEXT: add r0, sp, #224 -; LE-I32-NEON-NEXT: vorr q4, q0, q0 -; LE-I32-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill -; LE-I32-NEON-NEXT: add lr, sp, #16 -; LE-I32-NEON-NEXT: vorr q6, q3, q3 -; LE-I32-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; LE-I32-NEON-NEXT: vmov.f32 s0, s4 -; LE-I32-NEON-NEXT: add lr, sp, #80 -; LE-I32-NEON-NEXT: vorr q5, q1, q1 -; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I32-NEON-NEXT: add r0, sp, #272 -; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I32-NEON-NEXT: add lr, sp, #64 -; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I32-NEON-NEXT: add r0, sp, #240 -; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I32-NEON-NEXT: add lr, sp, #32 -; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I32-NEON-NEXT: vstmia sp, {d2, d3} @ 16-byte Spill -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s18 -; LE-I32-NEON-NEXT: vmov.32 d14[0], r0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s22 -; LE-I32-NEON-NEXT: add lr, sp, #112 -; LE-I32-NEON-NEXT: vmov.32 d17[0], r0 -; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: add lr, sp, #128 -; LE-I32-NEON-NEXT: vmov.32 d15[0], r0 -; LE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-I32-NEON-NEXT: add lr, sp, #48 -; LE-I32-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-I32-NEON-NEXT: vmov.f32 s0, s20 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s22 -; LE-I32-NEON-NEXT: vmov.32 d8[0], r0 -; LE-I32-NEON-NEXT: vorr q7, q5, q5 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s26 -; LE-I32-NEON-NEXT: vmov.32 d9[0], r0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s24 -; LE-I32-NEON-NEXT: vmov.32 d11[0], r0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s27 -; LE-I32-NEON-NEXT: vmov.32 d10[0], r0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s25 -; LE-I32-NEON-NEXT: vmov.32 d11[1], r0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s31 -; LE-I32-NEON-NEXT: vmov.32 d10[1], r0 -; LE-I32-NEON-NEXT: add lr, sp, #96 -; LE-I32-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s29 -; LE-I32-NEON-NEXT: vmov.32 d9[1], r0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vldmia sp, {d14, d15} @ 16-byte Reload -; LE-I32-NEON-NEXT: vmov.f32 s0, s31 -; LE-I32-NEON-NEXT: add lr, sp, #48 -; LE-I32-NEON-NEXT: vmov.32 d8[1], r0 -; LE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: add lr, sp, #128 -; LE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-I32-NEON-NEXT: add lr, sp, #16 -; LE-I32-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-I32-NEON-NEXT: vmov.f32 s0, s23 -; LE-I32-NEON-NEXT: vmov.32 d9[1], r0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s29 -; LE-I32-NEON-NEXT: add lr, sp, #112 -; LE-I32-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-I32-NEON-NEXT: vmov.32 d13[1], r0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s20 -; LE-I32-NEON-NEXT: vmov.32 d8[1], r0 -; LE-I32-NEON-NEXT: add lr, sp, #128 -; LE-I32-NEON-NEXT: add r0, sp, #256 -; LE-I32-NEON-NEXT: vld1.64 {d14, d15}, [r0] -; LE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s21 -; LE-I32-NEON-NEXT: vorr q4, q6, q6 -; LE-I32-NEON-NEXT: vmov.32 d8[0], r0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vorr q6, q7, q7 -; LE-I32-NEON-NEXT: vmov.32 d8[1], r0 -; LE-I32-NEON-NEXT: add lr, sp, #112 -; LE-I32-NEON-NEXT: vstmia sp, {d14, d15} @ 16-byte Spill -; LE-I32-NEON-NEXT: vmov.f32 s0, s24 -; LE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: add lr, sp, #32 -; LE-I32-NEON-NEXT: vmov.32 d14[0], r0 -; LE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-I32-NEON-NEXT: vmov.f32 s0, s18 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s16 -; LE-I32-NEON-NEXT: vmov.32 d11[0], r0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s19 -; LE-I32-NEON-NEXT: vmov.32 d10[0], r0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s26 -; LE-I32-NEON-NEXT: vmov.32 d11[1], r0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s17 -; LE-I32-NEON-NEXT: add lr, sp, #16 -; LE-I32-NEON-NEXT: vmov.32 d15[0], r0 -; LE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.32 d10[1], r0 -; LE-I32-NEON-NEXT: add lr, sp, #32 -; LE-I32-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-I32-NEON-NEXT: add lr, sp, #64 -; LE-I32-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-I32-NEON-NEXT: vmov.f32 s0, s20 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: add lr, sp, #80 -; LE-I32-NEON-NEXT: vmov.32 d14[0], r0 -; LE-I32-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-I32-NEON-NEXT: vmov.f32 s0, s26 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s24 -; LE-I32-NEON-NEXT: vmov.32 d9[0], r0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s27 -; LE-I32-NEON-NEXT: vmov.32 d8[0], r0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s22 -; LE-I32-NEON-NEXT: vmov.32 d9[1], r0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s25 -; LE-I32-NEON-NEXT: vmov.32 d15[0], r0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s23 -; LE-I32-NEON-NEXT: vmov.32 d8[1], r0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s21 -; LE-I32-NEON-NEXT: vmov.32 d15[1], r0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vldmia sp, {d12, d13} @ 16-byte Reload -; LE-I32-NEON-NEXT: vmov.f32 s0, s27 -; LE-I32-NEON-NEXT: vmov.32 d14[1], r0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: vmov.f32 s0, s25 -; LE-I32-NEON-NEXT: add lr, sp, #16 -; LE-I32-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-I32-NEON-NEXT: vmov.32 d11[1], r0 -; LE-I32-NEON-NEXT: bl lrintf -; LE-I32-NEON-NEXT: add lr, sp, #112 -; LE-I32-NEON-NEXT: vmov.32 d10[1], r0 -; LE-I32-NEON-NEXT: mov r0, r4 -; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEON-NEXT: add lr, sp, #128 -; LE-I32-NEON-NEXT: vst1.32 {d16, d17}, [r0:128]! -; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEON-NEXT: add lr, sp, #48 -; LE-I32-NEON-NEXT: vst1.32 {d16, d17}, [r0:128]! -; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEON-NEXT: add lr, sp, #96 -; LE-I32-NEON-NEXT: vst1.32 {d16, d17}, [r0:128]! -; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEON-NEXT: add lr, sp, #32 -; LE-I32-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] -; LE-I32-NEON-NEXT: add r0, r4, #64 -; LE-I32-NEON-NEXT: vst1.32 {d8, d9}, [r0:128]! -; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEON-NEXT: vst1.32 {d16, d17}, [r0:128]! -; LE-I32-NEON-NEXT: vst1.32 {d10, d11}, [r0:128]! -; LE-I32-NEON-NEXT: vst1.64 {d14, d15}, [r0:128] -; LE-I32-NEON-NEXT: add sp, sp, #144 -; LE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I32-NEON-NEXT: pop {r4, r5, r6, pc} -; -; LE-I64-NEON-LABEL: lrint_v32f32: -; LE-I64-NEON: @ %bb.0: -; LE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-I64-NEON-NEXT: .pad #4 -; LE-I64-NEON-NEXT: sub sp, sp, #4 -; LE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEON-NEXT: .pad #184 -; LE-I64-NEON-NEXT: sub sp, sp, #184 -; LE-I64-NEON-NEXT: add lr, sp, #152 -; LE-I64-NEON-NEXT: vorr q7, q3, q3 -; LE-I64-NEON-NEXT: vorr q4, q2, q2 -; LE-I64-NEON-NEXT: mov r5, r0 -; LE-I64-NEON-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #88 -; LE-I64-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; LE-I64-NEON-NEXT: vmov.f32 s0, s3 -; LE-I64-NEON-NEXT: str r0, [sp, #68] @ 4-byte Spill -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.f32 s0, s18 -; LE-I64-NEON-NEXT: add lr, sp, #168 -; LE-I64-NEON-NEXT: vmov.32 d17[0], r0 -; LE-I64-NEON-NEXT: str r1, [sp, #16] @ 4-byte Spill -; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.f32 s0, s16 -; LE-I64-NEON-NEXT: mov r8, r1 -; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.f32 s0, s17 -; LE-I64-NEON-NEXT: mov r6, r1 -; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.f32 s0, s19 -; LE-I64-NEON-NEXT: mov r4, r1 -; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.f32 s0, s31 -; LE-I64-NEON-NEXT: mov r7, r1 -; LE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.f32 s0, s30 -; LE-I64-NEON-NEXT: str r1, [sp, #8] @ 4-byte Spill -; LE-I64-NEON-NEXT: vmov.32 d9[0], r0 -; LE-I64-NEON-NEXT: vmov.32 d11[1], r7 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.f32 s0, s29 -; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 -; LE-I64-NEON-NEXT: add lr, sp, #104 -; LE-I64-NEON-NEXT: str r1, [sp, #12] @ 4-byte Spill -; LE-I64-NEON-NEXT: vmov.32 d13[1], r4 -; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.32 d9[0], r0 -; LE-I64-NEON-NEXT: add r0, sp, #320 -; LE-I64-NEON-NEXT: add lr, sp, #120 -; LE-I64-NEON-NEXT: mov r11, r1 -; LE-I64-NEON-NEXT: vld1.64 {d0, d1}, [r0] -; LE-I64-NEON-NEXT: add r0, sp, #304 -; LE-I64-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #72 -; LE-I64-NEON-NEXT: vld1.64 {d0, d1}, [r0] -; LE-I64-NEON-NEXT: add r0, sp, #336 -; LE-I64-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #32 -; LE-I64-NEON-NEXT: vld1.64 {d0, d1}, [r0] -; LE-I64-NEON-NEXT: add r0, sp, #288 -; LE-I64-NEON-NEXT: vmov.32 d12[1], r6 -; LE-I64-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #48 -; LE-I64-NEON-NEXT: vld1.64 {d0, d1}, [r0] -; LE-I64-NEON-NEXT: vmov.32 d10[1], r8 -; LE-I64-NEON-NEXT: add r8, r5, #64 -; LE-I64-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #152 -; LE-I64-NEON-NEXT: vst1.64 {d12, d13}, [r8:128]! -; LE-I64-NEON-NEXT: vst1.64 {d10, d11}, [r8:128]! -; LE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-I64-NEON-NEXT: vmov.f32 s0, s27 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.f32 s0, s28 -; LE-I64-NEON-NEXT: mov r4, r1 -; LE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.f32 s0, s26 -; LE-I64-NEON-NEXT: mov r9, r1 -; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 -; LE-I64-NEON-NEXT: vmov.32 d11[1], r4 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; LE-I64-NEON-NEXT: add lr, sp, #136 -; LE-I64-NEON-NEXT: ldr r0, [sp, #16] @ 4-byte Reload -; LE-I64-NEON-NEXT: mov r10, r1 -; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #168 -; LE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #88 -; LE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-I64-NEON-NEXT: vmov.f32 s0, s26 -; LE-I64-NEON-NEXT: vmov.32 d11[1], r0 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.f32 s0, s25 -; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; LE-I64-NEON-NEXT: add lr, sp, #168 -; LE-I64-NEON-NEXT: ldr r0, [sp, #8] @ 4-byte Reload -; LE-I64-NEON-NEXT: mov r7, r1 -; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #104 -; LE-I64-NEON-NEXT: vorr q5, q6, q6 -; LE-I64-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload -; LE-I64-NEON-NEXT: vmov.32 d15[1], r0 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.f32 s0, s20 -; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 -; LE-I64-NEON-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #104 -; LE-I64-NEON-NEXT: mov r4, r1 -; LE-I64-NEON-NEXT: vmov.32 d14[1], r0 -; LE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 -; LE-I64-NEON-NEXT: add lr, sp, #152 -; LE-I64-NEON-NEXT: mov r6, r1 -; LE-I64-NEON-NEXT: vorr q7, q6, q6 -; LE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-I64-NEON-NEXT: vmov.32 d9[1], r11 -; LE-I64-NEON-NEXT: vmov.f32 s0, s25 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.f32 s0, s24 -; LE-I64-NEON-NEXT: mov r5, r1 -; LE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; LE-I64-NEON-NEXT: vmov.32 d8[1], r9 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: add lr, sp, #136 -; LE-I64-NEON-NEXT: mov r11, r1 -; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: vmov.32 d16[1], r10 -; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #120 -; LE-I64-NEON-NEXT: vst1.64 {d8, d9}, [r8:128]! -; LE-I64-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-I64-NEON-NEXT: vmov.f32 s0, s1 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: add lr, sp, #152 -; LE-I64-NEON-NEXT: vmov.32 d17[0], r0 -; LE-I64-NEON-NEXT: mov r10, r1 -; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #104 -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #72 -; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r8:128] -; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-I64-NEON-NEXT: vmov.f32 s0, s19 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: add lr, sp, #168 -; LE-I64-NEON-NEXT: vmov.f32 s0, s18 -; LE-I64-NEON-NEXT: mov r8, r1 -; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: vmov.32 d16[1], r7 -; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.f32 s0, s17 -; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 -; LE-I64-NEON-NEXT: add lr, sp, #104 -; LE-I64-NEON-NEXT: mov r7, r1 -; LE-I64-NEON-NEXT: vmov.32 d15[1], r4 -; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.f32 s0, s16 -; LE-I64-NEON-NEXT: mov r9, r1 -; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 -; LE-I64-NEON-NEXT: vmov.32 d14[1], r6 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 -; LE-I64-NEON-NEXT: add lr, sp, #88 -; LE-I64-NEON-NEXT: mov r6, r1 -; LE-I64-NEON-NEXT: vmov.32 d11[1], r5 -; LE-I64-NEON-NEXT: vmov.32 d10[1], r11 -; LE-I64-NEON-NEXT: ldr r11, [sp, #68] @ 4-byte Reload -; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #16 -; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #32 -; LE-I64-NEON-NEXT: vst1.64 {d14, d15}, [r11:128]! -; LE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-I64-NEON-NEXT: vmov.f32 s0, s23 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: add lr, sp, #152 -; LE-I64-NEON-NEXT: mov r4, r1 -; LE-I64-NEON-NEXT: vmov.32 d9[0], r0 -; LE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #120 -; LE-I64-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-I64-NEON-NEXT: @ kill: def $s0 killed $s0 killed $q0 -; LE-I64-NEON-NEXT: vmov.32 d13[1], r10 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.f32 s0, s22 -; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 -; LE-I64-NEON-NEXT: add lr, sp, #152 -; LE-I64-NEON-NEXT: mov r5, r1 -; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #104 -; LE-I64-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload -; LE-I64-NEON-NEXT: vmov.32 d15[1], r8 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.f32 s0, s21 -; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 -; LE-I64-NEON-NEXT: add lr, sp, #72 -; LE-I64-NEON-NEXT: mov r8, r1 -; LE-I64-NEON-NEXT: vmov.32 d14[1], r7 -; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #104 -; LE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.f32 s0, s20 -; LE-I64-NEON-NEXT: add lr, sp, #88 -; LE-I64-NEON-NEXT: mov r7, r1 -; LE-I64-NEON-NEXT: vmov.32 d9[0], r0 -; LE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-I64-NEON-NEXT: vmov.32 d13[1], r9 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 -; LE-I64-NEON-NEXT: add lr, sp, #32 -; LE-I64-NEON-NEXT: mov r9, r1 -; LE-I64-NEON-NEXT: vmov.32 d12[1], r6 -; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #88 -; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #120 -; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-I64-NEON-NEXT: vmov.f32 s0, s19 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.f32 s0, s18 -; LE-I64-NEON-NEXT: add lr, sp, #72 -; LE-I64-NEON-NEXT: mov r10, r1 -; LE-I64-NEON-NEXT: vmov.32 d15[0], r0 -; LE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-I64-NEON-NEXT: vmov.32 d13[1], r4 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: add lr, sp, #152 -; LE-I64-NEON-NEXT: mov r4, r1 -; LE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: vmov.32 d16[1], r5 -; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #168 -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #48 -; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r11:128]! -; LE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-I64-NEON-NEXT: vmov.f32 s0, s21 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.f32 s0, s20 -; LE-I64-NEON-NEXT: vmov.32 d12[1], r8 -; LE-I64-NEON-NEXT: add lr, sp, #72 -; LE-I64-NEON-NEXT: mov r5, r1 -; LE-I64-NEON-NEXT: vmov.32 d9[0], r0 -; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: vmov.f32 s0, s23 -; LE-I64-NEON-NEXT: add lr, sp, #32 -; LE-I64-NEON-NEXT: mov r6, r1 -; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 -; LE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-I64-NEON-NEXT: vmov.32 d13[1], r7 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: add lr, sp, #48 -; LE-I64-NEON-NEXT: mov r7, r1 -; LE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; LE-I64-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-I64-NEON-NEXT: vmov.f32 s0, s2 -; LE-I64-NEON-NEXT: vmov.32 d12[1], r9 -; LE-I64-NEON-NEXT: bl lrintf -; LE-I64-NEON-NEXT: add lr, sp, #16 -; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #136 -; LE-I64-NEON-NEXT: vmov.32 d11[1], r7 -; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r11:128]! -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #152 -; LE-I64-NEON-NEXT: vmov.32 d15[1], r10 -; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r11:128] -; LE-I64-NEON-NEXT: vmov.32 d10[1], r1 -; LE-I64-NEON-NEXT: ldr r1, [sp, #68] @ 4-byte Reload -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: add r0, r1, #192 -; LE-I64-NEON-NEXT: add lr, sp, #72 -; LE-I64-NEON-NEXT: vmov.32 d14[1], r4 -; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! -; LE-I64-NEON-NEXT: vst1.64 {d14, d15}, [r0:128]! -; LE-I64-NEON-NEXT: vst1.64 {d12, d13}, [r0:128]! -; LE-I64-NEON-NEXT: vmov.32 d9[1], r5 -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #88 -; LE-I64-NEON-NEXT: vmov.32 d8[1], r6 -; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] -; LE-I64-NEON-NEXT: add r0, r1, #128 -; LE-I64-NEON-NEXT: vst1.64 {d8, d9}, [r0:128]! -; LE-I64-NEON-NEXT: vst1.64 {d10, d11}, [r0:128]! -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #104 -; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] -; LE-I64-NEON-NEXT: add sp, sp, #184 -; LE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEON-NEXT: add sp, sp, #4 -; LE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; -; BE-I32-LABEL: lrint_v32f32: -; BE-I32: @ %bb.0: -; BE-I32-NEXT: .save {r4, r5, r6, lr} -; BE-I32-NEXT: push {r4, r5, r6, lr} -; BE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I32-NEXT: .pad #144 -; BE-I32-NEXT: sub sp, sp, #144 -; BE-I32-NEXT: mov r4, r0 -; BE-I32-NEXT: add r0, sp, #256 -; BE-I32-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I32-NEXT: add lr, sp, #16 -; BE-I32-NEXT: add r0, sp, #272 -; BE-I32-NEXT: vrev64.32 q4, q3 -; BE-I32-NEXT: vrev64.32 q7, q1 -; BE-I32-NEXT: vrev64.32 q8, q8 -; BE-I32-NEXT: vld1.64 {d18, d19}, [r0] -; BE-I32-NEXT: add r0, sp, #224 -; BE-I32-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; BE-I32-NEXT: add lr, sp, #80 -; BE-I32-NEXT: vrev64.32 q5, q0 -; BE-I32-NEXT: vmov.f32 s0, s28 -; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I32-NEXT: add lr, sp, #64 -; BE-I32-NEXT: vrev64.32 q8, q9 -; BE-I32-NEXT: vld1.64 {d20, d21}, [r0] -; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I32-NEXT: add lr, sp, #32 -; BE-I32-NEXT: vrev64.32 q8, q10 -; BE-I32-NEXT: vrev64.32 q6, q2 -; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I32-NEXT: vstmia sp, {d14, d15} @ 16-byte Spill -; BE-I32-NEXT: bl lrintf -; BE-I32-NEXT: vmov.f32 s0, s18 -; BE-I32-NEXT: vmov.32 d16[0], r0 -; BE-I32-NEXT: add lr, sp, #128 -; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I32-NEXT: bl lrintf -; BE-I32-NEXT: vmov.f32 s0, s20 -; BE-I32-NEXT: add lr, sp, #112 -; BE-I32-NEXT: vmov.32 d17[0], r0 -; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I32-NEXT: add lr, sp, #48 -; BE-I32-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-I32-NEXT: bl lrintf -; BE-I32-NEXT: vmov.f32 s0, s22 -; BE-I32-NEXT: vmov.32 d8[0], r0 -; BE-I32-NEXT: bl lrintf -; BE-I32-NEXT: vmov.f32 s0, s30 -; BE-I32-NEXT: vmov.32 d9[0], r0 -; BE-I32-NEXT: bl lrintf -; BE-I32-NEXT: vmov.f32 s0, s26 -; BE-I32-NEXT: add lr, sp, #128 -; BE-I32-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload -; BE-I32-NEXT: vmov.32 d15[0], r0 -; BE-I32-NEXT: bl lrintf -; BE-I32-NEXT: vmov.f32 s0, s24 -; BE-I32-NEXT: vmov.32 d11[0], r0 -; BE-I32-NEXT: bl lrintf -; BE-I32-NEXT: vmov.f32 s0, s27 -; BE-I32-NEXT: vmov.32 d10[0], r0 -; BE-I32-NEXT: bl lrintf -; BE-I32-NEXT: vmov.f32 s0, s25 -; BE-I32-NEXT: vmov.32 d11[1], r0 -; BE-I32-NEXT: bl lrintf -; BE-I32-NEXT: vldmia sp, {d12, d13} @ 16-byte Reload -; BE-I32-NEXT: vmov.f32 s0, s27 -; BE-I32-NEXT: add lr, sp, #96 -; BE-I32-NEXT: vmov.32 d10[1], r0 -; BE-I32-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-I32-NEXT: bl lrintf -; BE-I32-NEXT: add lr, sp, #48 -; BE-I32-NEXT: vmov.32 d15[1], r0 -; BE-I32-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-I32-NEXT: vmov.f32 s0, s23 -; BE-I32-NEXT: bl lrintf -; BE-I32-NEXT: vmov.f32 s0, s21 -; BE-I32-NEXT: vmov.32 d9[1], r0 -; BE-I32-NEXT: bl lrintf -; BE-I32-NEXT: vmov.32 d8[1], r0 -; BE-I32-NEXT: add lr, sp, #48 -; BE-I32-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; BE-I32-NEXT: add lr, sp, #16 -; BE-I32-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-I32-NEXT: vmov.f32 s0, s23 -; BE-I32-NEXT: bl lrintf -; BE-I32-NEXT: vmov.f32 s0, s25 -; BE-I32-NEXT: add lr, sp, #112 -; BE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; BE-I32-NEXT: vmov.32 d9[1], r0 -; BE-I32-NEXT: bl lrintf -; BE-I32-NEXT: vmov.f32 s0, s20 -; BE-I32-NEXT: vmov.32 d14[1], r0 -; BE-I32-NEXT: add r0, sp, #240 -; BE-I32-NEXT: add lr, sp, #128 -; BE-I32-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; BE-I32-NEXT: vrev64.32 q6, q8 -; BE-I32-NEXT: bl lrintf -; BE-I32-NEXT: vmov.f32 s0, s21 -; BE-I32-NEXT: vmov.32 d8[0], r0 -; BE-I32-NEXT: bl lrintf -; BE-I32-NEXT: vmov.f32 s0, s24 -; BE-I32-NEXT: vmov.32 d8[1], r0 -; BE-I32-NEXT: add lr, sp, #112 -; BE-I32-NEXT: vorr q7, q6, q6 -; BE-I32-NEXT: vstmia sp, {d12, d13} @ 16-byte Spill -; BE-I32-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; BE-I32-NEXT: bl lrintf -; BE-I32-NEXT: add lr, sp, #32 -; BE-I32-NEXT: vmov.32 d12[0], r0 -; BE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; BE-I32-NEXT: vmov.f32 s0, s18 -; BE-I32-NEXT: bl lrintf -; BE-I32-NEXT: vmov.f32 s0, s16 -; BE-I32-NEXT: vmov.32 d11[0], r0 -; BE-I32-NEXT: bl lrintf -; BE-I32-NEXT: vmov.f32 s0, s19 -; BE-I32-NEXT: vmov.32 d10[0], r0 -; BE-I32-NEXT: bl lrintf -; BE-I32-NEXT: vmov.f32 s0, s30 -; BE-I32-NEXT: vmov.32 d11[1], r0 -; BE-I32-NEXT: bl lrintf -; BE-I32-NEXT: vmov.f32 s0, s17 -; BE-I32-NEXT: add lr, sp, #16 -; BE-I32-NEXT: vmov.32 d13[0], r0 -; BE-I32-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; BE-I32-NEXT: bl lrintf -; BE-I32-NEXT: vmov.32 d10[1], r0 -; BE-I32-NEXT: add lr, sp, #32 -; BE-I32-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-I32-NEXT: add lr, sp, #64 -; BE-I32-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-I32-NEXT: vmov.f32 s0, s20 -; BE-I32-NEXT: bl lrintf -; BE-I32-NEXT: add lr, sp, #80 -; BE-I32-NEXT: vmov.32 d8[0], r0 -; BE-I32-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; BE-I32-NEXT: vmov.f32 s0, s26 -; BE-I32-NEXT: bl lrintf -; BE-I32-NEXT: vmov.f32 s0, s24 -; BE-I32-NEXT: vmov.32 d15[0], r0 -; BE-I32-NEXT: bl lrintf -; BE-I32-NEXT: vmov.f32 s0, s27 -; BE-I32-NEXT: vmov.32 d14[0], r0 -; BE-I32-NEXT: bl lrintf -; BE-I32-NEXT: vmov.f32 s0, s22 -; BE-I32-NEXT: vmov.32 d15[1], r0 -; BE-I32-NEXT: bl lrintf -; BE-I32-NEXT: vmov.f32 s0, s25 -; BE-I32-NEXT: vmov.32 d9[0], r0 -; BE-I32-NEXT: bl lrintf -; BE-I32-NEXT: vmov.f32 s0, s23 -; BE-I32-NEXT: vmov.32 d14[1], r0 -; BE-I32-NEXT: bl lrintf -; BE-I32-NEXT: vmov.f32 s0, s21 -; BE-I32-NEXT: vmov.32 d9[1], r0 -; BE-I32-NEXT: bl lrintf -; BE-I32-NEXT: vldmia sp, {d12, d13} @ 16-byte Reload -; BE-I32-NEXT: vmov.f32 s0, s27 -; BE-I32-NEXT: vmov.32 d8[1], r0 -; BE-I32-NEXT: bl lrintf -; BE-I32-NEXT: add lr, sp, #16 -; BE-I32-NEXT: vmov.f32 s0, s25 -; BE-I32-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-I32-NEXT: add lr, sp, #112 -; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I32-NEXT: vmov.32 d11[1], r0 -; BE-I32-NEXT: vrev64.32 q8, q8 -; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I32-NEXT: bl lrintf -; BE-I32-NEXT: add lr, sp, #48 -; BE-I32-NEXT: vmov.32 d10[1], r0 -; BE-I32-NEXT: mov r0, r4 -; BE-I32-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; BE-I32-NEXT: add lr, sp, #128 -; BE-I32-NEXT: vrev64.32 q8, q4 -; BE-I32-NEXT: vst1.32 {d18, d19}, [r0:128]! -; BE-I32-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; BE-I32-NEXT: add lr, sp, #96 -; BE-I32-NEXT: vst1.32 {d18, d19}, [r0:128]! -; BE-I32-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; BE-I32-NEXT: add lr, sp, #112 -; BE-I32-NEXT: vst1.32 {d18, d19}, [r0:128]! -; BE-I32-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; BE-I32-NEXT: add lr, sp, #32 -; BE-I32-NEXT: vst1.64 {d18, d19}, [r0:128] -; BE-I32-NEXT: add r0, r4, #64 -; BE-I32-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; BE-I32-NEXT: vst1.32 {d18, d19}, [r0:128]! -; BE-I32-NEXT: vst1.32 {d10, d11}, [r0:128]! -; BE-I32-NEXT: vst1.32 {d14, d15}, [r0:128]! -; BE-I32-NEXT: vst1.64 {d16, d17}, [r0:128] -; BE-I32-NEXT: add sp, sp, #144 -; BE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I32-NEXT: pop {r4, r5, r6, pc} -; -; BE-I64-LABEL: lrint_v32f32: -; BE-I64: @ %bb.0: -; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-I64-NEXT: .pad #4 -; BE-I64-NEXT: sub sp, sp, #4 -; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I64-NEXT: .pad #256 -; BE-I64-NEXT: sub sp, sp, #256 -; BE-I64-NEXT: add lr, sp, #208 -; BE-I64-NEXT: str r0, [sp, #156] @ 4-byte Spill -; BE-I64-NEXT: add r0, sp, #408 -; BE-I64-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill -; BE-I64-NEXT: add lr, sp, #120 -; BE-I64-NEXT: vld1.64 {d10, d11}, [r0] -; BE-I64-NEXT: add r0, sp, #392 -; BE-I64-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill -; BE-I64-NEXT: add lr, sp, #160 -; BE-I64-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill -; BE-I64-NEXT: add lr, sp, #176 -; BE-I64-NEXT: vrev64.32 d8, d10 -; BE-I64-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; BE-I64-NEXT: add lr, sp, #136 -; BE-I64-NEXT: vmov.f32 s0, s17 -; BE-I64-NEXT: vld1.64 {d12, d13}, [r0] -; BE-I64-NEXT: add r0, sp, #360 -; BE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; BE-I64-NEXT: add lr, sp, #192 -; BE-I64-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I64-NEXT: add r0, sp, #376 -; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEXT: add lr, sp, #40 -; BE-I64-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEXT: bl lrintf -; BE-I64-NEXT: vmov.f32 s0, s16 -; BE-I64-NEXT: str r1, [sp, #88] @ 4-byte Spill -; BE-I64-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEXT: bl lrintf -; BE-I64-NEXT: vrev64.32 d9, d11 -; BE-I64-NEXT: add lr, sp, #240 -; BE-I64-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEXT: str r1, [sp, #104] @ 4-byte Spill -; BE-I64-NEXT: vmov.f32 s0, s18 -; BE-I64-NEXT: vrev64.32 d8, d13 -; BE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; BE-I64-NEXT: bl lrintf -; BE-I64-NEXT: vmov.f32 s0, s17 -; BE-I64-NEXT: mov r5, r1 -; BE-I64-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEXT: bl lrintf -; BE-I64-NEXT: vmov.f32 s0, s19 -; BE-I64-NEXT: add lr, sp, #192 -; BE-I64-NEXT: str r1, [sp, #72] @ 4-byte Spill -; BE-I64-NEXT: vmov.32 d13[0], r0 -; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEXT: vrev64.32 d10, d16 -; BE-I64-NEXT: bl lrintf -; BE-I64-NEXT: vmov.f32 s0, s20 -; BE-I64-NEXT: add lr, sp, #224 -; BE-I64-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEXT: mov r4, r1 -; BE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; BE-I64-NEXT: bl lrintf -; BE-I64-NEXT: vmov.f32 s0, s21 -; BE-I64-NEXT: mov r7, r1 -; BE-I64-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEXT: bl lrintf -; BE-I64-NEXT: vmov.f32 s0, s16 -; BE-I64-NEXT: mov r6, r1 -; BE-I64-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEXT: bl lrintf -; BE-I64-NEXT: vmov.32 d12[0], r0 -; BE-I64-NEXT: add lr, sp, #24 -; BE-I64-NEXT: mov r9, r1 -; BE-I64-NEXT: vmov.32 d15[1], r6 -; BE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; BE-I64-NEXT: add lr, sp, #192 -; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEXT: vrev64.32 d8, d17 -; BE-I64-NEXT: vmov.f32 s0, s17 -; BE-I64-NEXT: bl lrintf -; BE-I64-NEXT: vmov.f32 s0, s16 -; BE-I64-NEXT: vmov.32 d14[1], r7 -; BE-I64-NEXT: add lr, sp, #56 -; BE-I64-NEXT: mov r10, r1 -; BE-I64-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; BE-I64-NEXT: bl lrintf -; BE-I64-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEXT: add lr, sp, #192 -; BE-I64-NEXT: mov r11, r1 -; BE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-I64-NEXT: add lr, sp, #40 -; BE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; BE-I64-NEXT: add lr, sp, #224 -; BE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-I64-NEXT: vrev64.32 d8, d12 -; BE-I64-NEXT: vmov.32 d11[1], r4 -; BE-I64-NEXT: vmov.f32 s0, s17 -; BE-I64-NEXT: bl lrintf -; BE-I64-NEXT: vmov.f32 s0, s16 -; BE-I64-NEXT: vmov.32 d10[1], r5 -; BE-I64-NEXT: add lr, sp, #224 -; BE-I64-NEXT: mov r8, r1 -; BE-I64-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-I64-NEXT: bl lrintf -; BE-I64-NEXT: vrev64.32 d8, d13 -; BE-I64-NEXT: add lr, sp, #8 -; BE-I64-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEXT: ldr r0, [sp, #88] @ 4-byte Reload -; BE-I64-NEXT: vmov.f32 s0, s17 -; BE-I64-NEXT: mov r5, r1 -; BE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; BE-I64-NEXT: add lr, sp, #240 -; BE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-I64-NEXT: vmov.32 d11[1], r0 -; BE-I64-NEXT: bl lrintf -; BE-I64-NEXT: vmov.f32 s0, s16 -; BE-I64-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEXT: ldr r0, [sp, #104] @ 4-byte Reload -; BE-I64-NEXT: add lr, sp, #240 -; BE-I64-NEXT: mov r4, r1 -; BE-I64-NEXT: vmov.32 d10[1], r0 -; BE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-I64-NEXT: bl lrintf -; BE-I64-NEXT: add lr, sp, #136 -; BE-I64-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEXT: ldr r0, [sp, #72] @ 4-byte Reload -; BE-I64-NEXT: mov r6, r1 -; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEXT: add lr, sp, #24 -; BE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; BE-I64-NEXT: vrev64.32 d8, d16 -; BE-I64-NEXT: vmov.32 d13[1], r0 -; BE-I64-NEXT: vmov.f32 s0, s17 -; BE-I64-NEXT: bl lrintf -; BE-I64-NEXT: vmov.f32 s0, s16 -; BE-I64-NEXT: mov r7, r1 -; BE-I64-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEXT: vmov.32 d12[1], r9 -; BE-I64-NEXT: bl lrintf -; BE-I64-NEXT: add lr, sp, #192 -; BE-I64-NEXT: vmov.32 d15[1], r4 -; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEXT: add lr, sp, #8 -; BE-I64-NEXT: vmov.32 d17[1], r10 -; BE-I64-NEXT: vmov.32 d16[1], r11 -; BE-I64-NEXT: vorr q9, q8, q8 -; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEXT: add lr, sp, #192 -; BE-I64-NEXT: vmov.32 d17[1], r8 -; BE-I64-NEXT: vmov.32 d16[1], r5 -; BE-I64-NEXT: vorr q10, q8, q8 -; BE-I64-NEXT: vrev64.32 q8, q6 -; BE-I64-NEXT: vmov.32 d14[1], r6 -; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEXT: add lr, sp, #240 -; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEXT: vrev64.32 q8, q8 -; BE-I64-NEXT: vmov.32 d11[1], r7 -; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEXT: add lr, sp, #224 -; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEXT: vmov.32 d10[1], r1 -; BE-I64-NEXT: vrev64.32 q8, q8 -; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEXT: add lr, sp, #56 -; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEXT: add lr, sp, #136 -; BE-I64-NEXT: vrev64.32 q8, q8 -; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEXT: add lr, sp, #104 -; BE-I64-NEXT: vrev64.32 q8, q9 -; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEXT: add lr, sp, #88 -; BE-I64-NEXT: vrev64.32 q8, q10 -; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEXT: add lr, sp, #72 -; BE-I64-NEXT: vrev64.32 q8, q7 -; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEXT: add lr, sp, #208 -; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEXT: add lr, sp, #56 -; BE-I64-NEXT: vrev64.32 d8, d17 -; BE-I64-NEXT: vrev64.32 q8, q5 -; BE-I64-NEXT: vmov.f32 s0, s17 -; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEXT: bl lrintf -; BE-I64-NEXT: vmov.f32 s0, s16 -; BE-I64-NEXT: mov r4, r1 -; BE-I64-NEXT: vmov.32 d13[0], r0 -; BE-I64-NEXT: bl lrintf -; BE-I64-NEXT: add lr, sp, #120 -; BE-I64-NEXT: vmov.32 d12[0], r0 -; BE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-I64-NEXT: vmov.32 d13[1], r4 -; BE-I64-NEXT: vrev64.32 d8, d10 -; BE-I64-NEXT: vmov.32 d12[1], r1 -; BE-I64-NEXT: vmov.f32 s0, s17 -; BE-I64-NEXT: vrev64.32 q6, q6 -; BE-I64-NEXT: bl lrintf -; BE-I64-NEXT: vmov.f32 s0, s16 -; BE-I64-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEXT: vmov.32 d15[1], r1 -; BE-I64-NEXT: bl lrintf -; BE-I64-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEXT: ldr r6, [sp, #156] @ 4-byte Reload -; BE-I64-NEXT: vrev64.32 d8, d11 -; BE-I64-NEXT: add r5, r6, #64 -; BE-I64-NEXT: vmov.32 d14[1], r1 -; BE-I64-NEXT: vmov.f32 s0, s17 -; BE-I64-NEXT: vrev64.32 q8, q7 -; BE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]! -; BE-I64-NEXT: bl lrintf -; BE-I64-NEXT: vmov.f32 s0, s16 -; BE-I64-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEXT: vmov.32 d15[1], r1 -; BE-I64-NEXT: bl lrintf -; BE-I64-NEXT: add lr, sp, #208 -; BE-I64-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; BE-I64-NEXT: vmov.32 d14[1], r1 -; BE-I64-NEXT: vrev64.32 d8, d18 -; BE-I64-NEXT: vrev64.32 q8, q7 -; BE-I64-NEXT: vmov.f32 s0, s17 -; BE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]! -; BE-I64-NEXT: bl lrintf -; BE-I64-NEXT: vmov.f32 s0, s16 -; BE-I64-NEXT: mov r4, r1 -; BE-I64-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEXT: bl lrintf -; BE-I64-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEXT: add lr, sp, #160 -; BE-I64-NEXT: vmov.32 d15[1], r4 -; BE-I64-NEXT: vmov.32 d14[1], r1 -; BE-I64-NEXT: vrev64.32 q8, q7 -; BE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]! -; BE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-I64-NEXT: vrev64.32 d8, d11 -; BE-I64-NEXT: vst1.64 {d12, d13}, [r5:128] -; BE-I64-NEXT: vmov.f32 s0, s17 -; BE-I64-NEXT: bl lrintf -; BE-I64-NEXT: vmov.f32 s0, s16 -; BE-I64-NEXT: mov r4, r1 -; BE-I64-NEXT: vmov.32 d13[0], r0 -; BE-I64-NEXT: bl lrintf -; BE-I64-NEXT: vmov.32 d12[0], r0 -; BE-I64-NEXT: add lr, sp, #208 -; BE-I64-NEXT: vmov.32 d13[1], r4 -; BE-I64-NEXT: vmov.32 d12[1], r1 -; BE-I64-NEXT: vrev64.32 q8, q6 -; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEXT: add lr, sp, #176 -; BE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; BE-I64-NEXT: vrev64.32 d8, d12 -; BE-I64-NEXT: vmov.f32 s0, s17 -; BE-I64-NEXT: bl lrintf -; BE-I64-NEXT: vmov.f32 s0, s16 -; BE-I64-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEXT: vmov.32 d15[1], r1 -; BE-I64-NEXT: bl lrintf -; BE-I64-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEXT: mov r5, r6 -; BE-I64-NEXT: vrev64.32 d8, d13 -; BE-I64-NEXT: vmov.32 d14[1], r1 -; BE-I64-NEXT: vmov.f32 s0, s17 -; BE-I64-NEXT: vrev64.32 q8, q7 -; BE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]! -; BE-I64-NEXT: bl lrintf -; BE-I64-NEXT: vmov.f32 s0, s16 -; BE-I64-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEXT: vmov.32 d15[1], r1 -; BE-I64-NEXT: bl lrintf -; BE-I64-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEXT: vrev64.32 d8, d10 -; BE-I64-NEXT: vmov.32 d14[1], r1 -; BE-I64-NEXT: vmov.f32 s0, s17 -; BE-I64-NEXT: vrev64.32 q8, q7 -; BE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]! -; BE-I64-NEXT: bl lrintf -; BE-I64-NEXT: vmov.f32 s0, s16 -; BE-I64-NEXT: mov r4, r1 -; BE-I64-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEXT: bl lrintf -; BE-I64-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEXT: add lr, sp, #208 -; BE-I64-NEXT: add r0, r6, #192 -; BE-I64-NEXT: vmov.32 d15[1], r4 -; BE-I64-NEXT: vmov.32 d14[1], r1 -; BE-I64-NEXT: vrev64.32 q8, q7 -; BE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]! -; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEXT: add lr, sp, #56 -; BE-I64-NEXT: vst1.64 {d16, d17}, [r5:128] -; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEXT: add lr, sp, #192 -; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEXT: add lr, sp, #240 -; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEXT: add lr, sp, #224 -; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEXT: add lr, sp, #136 -; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128] -; BE-I64-NEXT: add r0, r6, #128 -; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEXT: add lr, sp, #104 -; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEXT: add lr, sp, #88 -; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEXT: add lr, sp, #72 -; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128] -; BE-I64-NEXT: add sp, sp, #256 -; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I64-NEXT: add sp, sp, #4 -; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; -; BE-I32-NEON-LABEL: lrint_v32f32: -; BE-I32-NEON: @ %bb.0: -; BE-I32-NEON-NEXT: .save {r4, r5, r6, lr} -; BE-I32-NEON-NEXT: push {r4, r5, r6, lr} -; BE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I32-NEON-NEXT: .pad #144 -; BE-I32-NEON-NEXT: sub sp, sp, #144 -; BE-I32-NEON-NEXT: mov r4, r0 -; BE-I32-NEON-NEXT: add r0, sp, #256 -; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I32-NEON-NEXT: add lr, sp, #16 -; BE-I32-NEON-NEXT: add r0, sp, #272 -; BE-I32-NEON-NEXT: vrev64.32 q4, q3 -; BE-I32-NEON-NEXT: vrev64.32 q7, q1 -; BE-I32-NEON-NEXT: vrev64.32 q8, q8 -; BE-I32-NEON-NEXT: vld1.64 {d18, d19}, [r0] -; BE-I32-NEON-NEXT: add r0, sp, #224 -; BE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; BE-I32-NEON-NEXT: add lr, sp, #80 -; BE-I32-NEON-NEXT: vrev64.32 q5, q0 -; BE-I32-NEON-NEXT: vmov.f32 s0, s28 -; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I32-NEON-NEXT: add lr, sp, #64 -; BE-I32-NEON-NEXT: vrev64.32 q8, q9 -; BE-I32-NEON-NEXT: vld1.64 {d20, d21}, [r0] -; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I32-NEON-NEXT: add lr, sp, #32 -; BE-I32-NEON-NEXT: vrev64.32 q8, q10 -; BE-I32-NEON-NEXT: vrev64.32 q6, q2 -; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I32-NEON-NEXT: vstmia sp, {d14, d15} @ 16-byte Spill -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s18 -; BE-I32-NEON-NEXT: vmov.32 d16[0], r0 -; BE-I32-NEON-NEXT: add lr, sp, #128 -; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s20 -; BE-I32-NEON-NEXT: add lr, sp, #112 -; BE-I32-NEON-NEXT: vmov.32 d17[0], r0 -; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I32-NEON-NEXT: add lr, sp, #48 -; BE-I32-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s22 -; BE-I32-NEON-NEXT: vmov.32 d8[0], r0 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s30 -; BE-I32-NEON-NEXT: vmov.32 d9[0], r0 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s26 -; BE-I32-NEON-NEXT: add lr, sp, #128 -; BE-I32-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload -; BE-I32-NEON-NEXT: vmov.32 d15[0], r0 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s24 -; BE-I32-NEON-NEXT: vmov.32 d11[0], r0 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s27 -; BE-I32-NEON-NEXT: vmov.32 d10[0], r0 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s25 -; BE-I32-NEON-NEXT: vmov.32 d11[1], r0 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vldmia sp, {d12, d13} @ 16-byte Reload -; BE-I32-NEON-NEXT: vmov.f32 s0, s27 -; BE-I32-NEON-NEXT: add lr, sp, #96 -; BE-I32-NEON-NEXT: vmov.32 d10[1], r0 -; BE-I32-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: add lr, sp, #48 -; BE-I32-NEON-NEXT: vmov.32 d15[1], r0 -; BE-I32-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-I32-NEON-NEXT: vmov.f32 s0, s23 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s21 -; BE-I32-NEON-NEXT: vmov.32 d9[1], r0 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.32 d8[1], r0 -; BE-I32-NEON-NEXT: add lr, sp, #48 -; BE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; BE-I32-NEON-NEXT: add lr, sp, #16 -; BE-I32-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-I32-NEON-NEXT: vmov.f32 s0, s23 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s25 -; BE-I32-NEON-NEXT: add lr, sp, #112 -; BE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; BE-I32-NEON-NEXT: vmov.32 d9[1], r0 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s20 -; BE-I32-NEON-NEXT: vmov.32 d14[1], r0 -; BE-I32-NEON-NEXT: add r0, sp, #240 -; BE-I32-NEON-NEXT: add lr, sp, #128 -; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; BE-I32-NEON-NEXT: vrev64.32 q6, q8 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s21 -; BE-I32-NEON-NEXT: vmov.32 d8[0], r0 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s24 -; BE-I32-NEON-NEXT: vmov.32 d8[1], r0 -; BE-I32-NEON-NEXT: add lr, sp, #112 -; BE-I32-NEON-NEXT: vorr q7, q6, q6 -; BE-I32-NEON-NEXT: vstmia sp, {d12, d13} @ 16-byte Spill -; BE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: add lr, sp, #32 -; BE-I32-NEON-NEXT: vmov.32 d12[0], r0 -; BE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; BE-I32-NEON-NEXT: vmov.f32 s0, s18 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s16 -; BE-I32-NEON-NEXT: vmov.32 d11[0], r0 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s19 -; BE-I32-NEON-NEXT: vmov.32 d10[0], r0 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s30 -; BE-I32-NEON-NEXT: vmov.32 d11[1], r0 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s17 -; BE-I32-NEON-NEXT: add lr, sp, #16 -; BE-I32-NEON-NEXT: vmov.32 d13[0], r0 -; BE-I32-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.32 d10[1], r0 -; BE-I32-NEON-NEXT: add lr, sp, #32 -; BE-I32-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-I32-NEON-NEXT: add lr, sp, #64 -; BE-I32-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-I32-NEON-NEXT: vmov.f32 s0, s20 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: add lr, sp, #80 -; BE-I32-NEON-NEXT: vmov.32 d8[0], r0 -; BE-I32-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; BE-I32-NEON-NEXT: vmov.f32 s0, s26 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s24 -; BE-I32-NEON-NEXT: vmov.32 d15[0], r0 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s27 -; BE-I32-NEON-NEXT: vmov.32 d14[0], r0 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s22 -; BE-I32-NEON-NEXT: vmov.32 d15[1], r0 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s25 -; BE-I32-NEON-NEXT: vmov.32 d9[0], r0 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s23 -; BE-I32-NEON-NEXT: vmov.32 d14[1], r0 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vmov.f32 s0, s21 -; BE-I32-NEON-NEXT: vmov.32 d9[1], r0 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: vldmia sp, {d12, d13} @ 16-byte Reload -; BE-I32-NEON-NEXT: vmov.f32 s0, s27 -; BE-I32-NEON-NEXT: vmov.32 d8[1], r0 -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: add lr, sp, #16 -; BE-I32-NEON-NEXT: vmov.f32 s0, s25 -; BE-I32-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-I32-NEON-NEXT: add lr, sp, #112 -; BE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I32-NEON-NEXT: vmov.32 d11[1], r0 -; BE-I32-NEON-NEXT: vrev64.32 q8, q8 -; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I32-NEON-NEXT: bl lrintf -; BE-I32-NEON-NEXT: add lr, sp, #48 -; BE-I32-NEON-NEXT: vmov.32 d10[1], r0 -; BE-I32-NEON-NEXT: mov r0, r4 -; BE-I32-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; BE-I32-NEON-NEXT: add lr, sp, #128 -; BE-I32-NEON-NEXT: vrev64.32 q8, q4 -; BE-I32-NEON-NEXT: vst1.32 {d18, d19}, [r0:128]! -; BE-I32-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; BE-I32-NEON-NEXT: add lr, sp, #96 -; BE-I32-NEON-NEXT: vst1.32 {d18, d19}, [r0:128]! -; BE-I32-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; BE-I32-NEON-NEXT: add lr, sp, #112 -; BE-I32-NEON-NEXT: vst1.32 {d18, d19}, [r0:128]! -; BE-I32-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; BE-I32-NEON-NEXT: add lr, sp, #32 -; BE-I32-NEON-NEXT: vst1.64 {d18, d19}, [r0:128] -; BE-I32-NEON-NEXT: add r0, r4, #64 -; BE-I32-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; BE-I32-NEON-NEXT: vst1.32 {d18, d19}, [r0:128]! -; BE-I32-NEON-NEXT: vst1.32 {d10, d11}, [r0:128]! -; BE-I32-NEON-NEXT: vst1.32 {d14, d15}, [r0:128]! -; BE-I32-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] -; BE-I32-NEON-NEXT: add sp, sp, #144 -; BE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I32-NEON-NEXT: pop {r4, r5, r6, pc} -; -; BE-I64-NEON-LABEL: lrint_v32f32: -; BE-I64-NEON: @ %bb.0: -; BE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-I64-NEON-NEXT: .pad #4 -; BE-I64-NEON-NEXT: sub sp, sp, #4 -; BE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I64-NEON-NEXT: .pad #256 -; BE-I64-NEON-NEXT: sub sp, sp, #256 -; BE-I64-NEON-NEXT: add lr, sp, #208 -; BE-I64-NEON-NEXT: str r0, [sp, #156] @ 4-byte Spill -; BE-I64-NEON-NEXT: add r0, sp, #408 -; BE-I64-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #120 -; BE-I64-NEON-NEXT: vld1.64 {d10, d11}, [r0] -; BE-I64-NEON-NEXT: add r0, sp, #392 -; BE-I64-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #160 -; BE-I64-NEON-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #176 -; BE-I64-NEON-NEXT: vrev64.32 d8, d10 -; BE-I64-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #136 -; BE-I64-NEON-NEXT: vmov.f32 s0, s17 -; BE-I64-NEON-NEXT: vld1.64 {d12, d13}, [r0] -; BE-I64-NEON-NEXT: add r0, sp, #360 -; BE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #192 -; BE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I64-NEON-NEXT: add r0, sp, #376 -; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #40 -; BE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.f32 s0, s16 -; BE-I64-NEON-NEXT: str r1, [sp, #88] @ 4-byte Spill -; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vrev64.32 d9, d11 -; BE-I64-NEON-NEXT: add lr, sp, #240 -; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEON-NEXT: str r1, [sp, #104] @ 4-byte Spill -; BE-I64-NEON-NEXT: vmov.f32 s0, s18 -; BE-I64-NEON-NEXT: vrev64.32 d8, d13 -; BE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.f32 s0, s17 -; BE-I64-NEON-NEXT: mov r5, r1 -; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.f32 s0, s19 -; BE-I64-NEON-NEXT: add lr, sp, #192 -; BE-I64-NEON-NEXT: str r1, [sp, #72] @ 4-byte Spill -; BE-I64-NEON-NEXT: vmov.32 d13[0], r0 -; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEON-NEXT: vrev64.32 d10, d16 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.f32 s0, s20 -; BE-I64-NEON-NEXT: add lr, sp, #224 -; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEON-NEXT: mov r4, r1 -; BE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.f32 s0, s21 -; BE-I64-NEON-NEXT: mov r7, r1 -; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.f32 s0, s16 -; BE-I64-NEON-NEXT: mov r6, r1 -; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.32 d12[0], r0 -; BE-I64-NEON-NEXT: add lr, sp, #24 -; BE-I64-NEON-NEXT: mov r9, r1 -; BE-I64-NEON-NEXT: vmov.32 d15[1], r6 -; BE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #192 -; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEON-NEXT: vrev64.32 d8, d17 -; BE-I64-NEON-NEXT: vmov.f32 s0, s17 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.f32 s0, s16 -; BE-I64-NEON-NEXT: vmov.32 d14[1], r7 -; BE-I64-NEON-NEXT: add lr, sp, #56 -; BE-I64-NEON-NEXT: mov r10, r1 -; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEON-NEXT: add lr, sp, #192 -; BE-I64-NEON-NEXT: mov r11, r1 -; BE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #40 -; BE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; BE-I64-NEON-NEXT: add lr, sp, #224 -; BE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-I64-NEON-NEXT: vrev64.32 d8, d12 -; BE-I64-NEON-NEXT: vmov.32 d11[1], r4 -; BE-I64-NEON-NEXT: vmov.f32 s0, s17 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.f32 s0, s16 -; BE-I64-NEON-NEXT: vmov.32 d10[1], r5 -; BE-I64-NEON-NEXT: add lr, sp, #224 -; BE-I64-NEON-NEXT: mov r8, r1 -; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vrev64.32 d8, d13 -; BE-I64-NEON-NEXT: add lr, sp, #8 -; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEON-NEXT: ldr r0, [sp, #88] @ 4-byte Reload -; BE-I64-NEON-NEXT: vmov.f32 s0, s17 -; BE-I64-NEON-NEXT: mov r5, r1 -; BE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #240 -; BE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-I64-NEON-NEXT: vmov.32 d11[1], r0 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.f32 s0, s16 -; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEON-NEXT: ldr r0, [sp, #104] @ 4-byte Reload -; BE-I64-NEON-NEXT: add lr, sp, #240 -; BE-I64-NEON-NEXT: mov r4, r1 -; BE-I64-NEON-NEXT: vmov.32 d10[1], r0 -; BE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: add lr, sp, #136 -; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEON-NEXT: ldr r0, [sp, #72] @ 4-byte Reload -; BE-I64-NEON-NEXT: mov r6, r1 -; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEON-NEXT: add lr, sp, #24 -; BE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; BE-I64-NEON-NEXT: vrev64.32 d8, d16 -; BE-I64-NEON-NEXT: vmov.32 d13[1], r0 -; BE-I64-NEON-NEXT: vmov.f32 s0, s17 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.f32 s0, s16 -; BE-I64-NEON-NEXT: mov r7, r1 -; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEON-NEXT: vmov.32 d12[1], r9 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: add lr, sp, #192 -; BE-I64-NEON-NEXT: vmov.32 d15[1], r4 -; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEON-NEXT: add lr, sp, #8 -; BE-I64-NEON-NEXT: vmov.32 d17[1], r10 -; BE-I64-NEON-NEXT: vmov.32 d16[1], r11 -; BE-I64-NEON-NEXT: vorr q9, q8, q8 -; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEON-NEXT: add lr, sp, #192 -; BE-I64-NEON-NEXT: vmov.32 d17[1], r8 -; BE-I64-NEON-NEXT: vmov.32 d16[1], r5 -; BE-I64-NEON-NEXT: vorr q10, q8, q8 -; BE-I64-NEON-NEXT: vrev64.32 q8, q6 -; BE-I64-NEON-NEXT: vmov.32 d14[1], r6 -; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #240 -; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEON-NEXT: vrev64.32 q8, q8 -; BE-I64-NEON-NEXT: vmov.32 d11[1], r7 -; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #224 -; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEON-NEXT: vmov.32 d10[1], r1 -; BE-I64-NEON-NEXT: vrev64.32 q8, q8 -; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #56 -; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEON-NEXT: add lr, sp, #136 -; BE-I64-NEON-NEXT: vrev64.32 q8, q8 -; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #104 -; BE-I64-NEON-NEXT: vrev64.32 q8, q9 -; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #88 -; BE-I64-NEON-NEXT: vrev64.32 q8, q10 -; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #72 -; BE-I64-NEON-NEXT: vrev64.32 q8, q7 -; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #208 -; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEON-NEXT: add lr, sp, #56 -; BE-I64-NEON-NEXT: vrev64.32 d8, d17 -; BE-I64-NEON-NEXT: vrev64.32 q8, q5 -; BE-I64-NEON-NEXT: vmov.f32 s0, s17 -; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.f32 s0, s16 -; BE-I64-NEON-NEXT: mov r4, r1 -; BE-I64-NEON-NEXT: vmov.32 d13[0], r0 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: add lr, sp, #120 -; BE-I64-NEON-NEXT: vmov.32 d12[0], r0 -; BE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-I64-NEON-NEXT: vmov.32 d13[1], r4 -; BE-I64-NEON-NEXT: vrev64.32 d8, d10 -; BE-I64-NEON-NEXT: vmov.32 d12[1], r1 -; BE-I64-NEON-NEXT: vmov.f32 s0, s17 -; BE-I64-NEON-NEXT: vrev64.32 q6, q6 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.f32 s0, s16 -; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEON-NEXT: vmov.32 d15[1], r1 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEON-NEXT: ldr r6, [sp, #156] @ 4-byte Reload -; BE-I64-NEON-NEXT: vrev64.32 d8, d11 -; BE-I64-NEON-NEXT: add r5, r6, #64 -; BE-I64-NEON-NEXT: vmov.32 d14[1], r1 -; BE-I64-NEON-NEXT: vmov.f32 s0, s17 -; BE-I64-NEON-NEXT: vrev64.32 q8, q7 -; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.f32 s0, s16 -; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEON-NEXT: vmov.32 d15[1], r1 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: add lr, sp, #208 -; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; BE-I64-NEON-NEXT: vmov.32 d14[1], r1 -; BE-I64-NEON-NEXT: vrev64.32 d8, d18 -; BE-I64-NEON-NEXT: vrev64.32 q8, q7 -; BE-I64-NEON-NEXT: vmov.f32 s0, s17 -; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.f32 s0, s16 -; BE-I64-NEON-NEXT: mov r4, r1 -; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEON-NEXT: add lr, sp, #160 -; BE-I64-NEON-NEXT: vmov.32 d15[1], r4 -; BE-I64-NEON-NEXT: vmov.32 d14[1], r1 -; BE-I64-NEON-NEXT: vrev64.32 q8, q7 -; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! -; BE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-I64-NEON-NEXT: vrev64.32 d8, d11 -; BE-I64-NEON-NEXT: vst1.64 {d12, d13}, [r5:128] -; BE-I64-NEON-NEXT: vmov.f32 s0, s17 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.f32 s0, s16 -; BE-I64-NEON-NEXT: mov r4, r1 -; BE-I64-NEON-NEXT: vmov.32 d13[0], r0 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.32 d12[0], r0 -; BE-I64-NEON-NEXT: add lr, sp, #208 -; BE-I64-NEON-NEXT: vmov.32 d13[1], r4 -; BE-I64-NEON-NEXT: vmov.32 d12[1], r1 -; BE-I64-NEON-NEXT: vrev64.32 q8, q6 -; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #176 -; BE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; BE-I64-NEON-NEXT: vrev64.32 d8, d12 -; BE-I64-NEON-NEXT: vmov.f32 s0, s17 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.f32 s0, s16 -; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEON-NEXT: vmov.32 d15[1], r1 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEON-NEXT: mov r5, r6 -; BE-I64-NEON-NEXT: vrev64.32 d8, d13 -; BE-I64-NEON-NEXT: vmov.32 d14[1], r1 -; BE-I64-NEON-NEXT: vmov.f32 s0, s17 -; BE-I64-NEON-NEXT: vrev64.32 q8, q7 -; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.f32 s0, s16 -; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEON-NEXT: vmov.32 d15[1], r1 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEON-NEXT: vrev64.32 d8, d10 -; BE-I64-NEON-NEXT: vmov.32 d14[1], r1 -; BE-I64-NEON-NEXT: vmov.f32 s0, s17 -; BE-I64-NEON-NEXT: vrev64.32 q8, q7 -; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.f32 s0, s16 -; BE-I64-NEON-NEXT: mov r4, r1 -; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEON-NEXT: bl lrintf -; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEON-NEXT: add lr, sp, #208 -; BE-I64-NEON-NEXT: add r0, r6, #192 -; BE-I64-NEON-NEXT: vmov.32 d15[1], r4 -; BE-I64-NEON-NEXT: vmov.32 d14[1], r1 -; BE-I64-NEON-NEXT: vrev64.32 q8, q7 -; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! -; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEON-NEXT: add lr, sp, #56 -; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128] -; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEON-NEXT: add lr, sp, #192 -; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEON-NEXT: add lr, sp, #240 -; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEON-NEXT: add lr, sp, #224 -; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEON-NEXT: add lr, sp, #136 -; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] -; BE-I64-NEON-NEXT: add r0, r6, #128 -; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEON-NEXT: add lr, sp, #104 -; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEON-NEXT: add lr, sp, #88 -; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEON-NEXT: add lr, sp, #72 -; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] -; BE-I64-NEON-NEXT: add sp, sp, #256 -; BE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I64-NEON-NEXT: add sp, sp, #4 -; BE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} - %a = call <32 x iXLen> @llvm.lrint.v32iXLen.v32f32(<32 x float> %x) - ret <32 x iXLen> %a -} -declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f32(<32 x float>) - -define <1 x iXLen> @lrint_v1f64(<1 x double> %x) { -; LE-I32-LABEL: lrint_v1f64: -; LE-I32: @ %bb.0: -; LE-I32-NEXT: .save {r11, lr} -; LE-I32-NEXT: push {r11, lr} -; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: pop {r11, pc} -; -; LE-I64-LABEL: lrint_v1f64: -; LE-I64: @ %bb.0: -; LE-I64-NEXT: .save {r11, lr} -; LE-I64-NEXT: push {r11, lr} -; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: vmov.32 d0[0], r0 -; LE-I64-NEXT: vmov.32 d0[1], r1 -; LE-I64-NEXT: pop {r11, pc} -; -; LE-I32-NEON-LABEL: lrint_v1f64: -; LE-I32-NEON: @ %bb.0: -; LE-I32-NEON-NEXT: .save {r11, lr} -; LE-I32-NEON-NEXT: push {r11, lr} -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: pop {r11, pc} -; -; LE-I64-NEON-LABEL: lrint_v1f64: -; LE-I64-NEON: @ %bb.0: -; LE-I64-NEON-NEXT: .save {r11, lr} -; LE-I64-NEON-NEXT: push {r11, lr} -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vmov.32 d0[0], r0 -; LE-I64-NEON-NEXT: vmov.32 d0[1], r1 -; LE-I64-NEON-NEXT: pop {r11, pc} -; -; BE-I32-LABEL: lrint_v1f64: -; BE-I32: @ %bb.0: -; BE-I32-NEXT: .save {r11, lr} -; BE-I32-NEXT: push {r11, lr} -; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: pop {r11, pc} -; -; BE-I64-LABEL: lrint_v1f64: -; BE-I64: @ %bb.0: -; BE-I64-NEXT: .save {r11, lr} -; BE-I64-NEXT: push {r11, lr} -; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: vmov.32 d16[0], r0 -; BE-I64-NEXT: vmov.32 d16[1], r1 -; BE-I64-NEXT: vrev64.32 d0, d16 -; BE-I64-NEXT: pop {r11, pc} -; -; BE-I32-NEON-LABEL: lrint_v1f64: -; BE-I32-NEON: @ %bb.0: -; BE-I32-NEON-NEXT: .save {r11, lr} -; BE-I32-NEON-NEXT: push {r11, lr} -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: pop {r11, pc} -; -; BE-I64-NEON-LABEL: lrint_v1f64: -; BE-I64-NEON: @ %bb.0: -; BE-I64-NEON-NEXT: .save {r11, lr} -; BE-I64-NEON-NEXT: push {r11, lr} -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vmov.32 d16[0], r0 -; BE-I64-NEON-NEXT: vmov.32 d16[1], r1 -; BE-I64-NEON-NEXT: vrev64.32 d0, d16 -; BE-I64-NEON-NEXT: pop {r11, pc} - %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double> %x) - ret <1 x iXLen> %a -} -declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double>) - -define <2 x iXLen> @lrint_v2f64(<2 x double> %x) { -; LE-I32-LABEL: lrint_v2f64: -; LE-I32: @ %bb.0: -; LE-I32-NEXT: .save {r11, lr} -; LE-I32-NEXT: push {r11, lr} -; LE-I32-NEXT: .vsave {d8, d9, d10} -; LE-I32-NEXT: vpush {d8, d9, d10} -; LE-I32-NEXT: vorr q4, q0, q0 -; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: vorr d0, d9, d9 -; LE-I32-NEXT: vmov.32 d10[0], r0 -; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: vmov.32 d10[1], r0 -; LE-I32-NEXT: vorr d0, d10, d10 -; LE-I32-NEXT: vpop {d8, d9, d10} -; LE-I32-NEXT: pop {r11, pc} -; -; LE-I64-LABEL: lrint_v2f64: -; LE-I64: @ %bb.0: -; LE-I64-NEXT: .save {r4, lr} -; LE-I64-NEXT: push {r4, lr} -; LE-I64-NEXT: .vsave {d8, d9, d10, d11} -; LE-I64-NEXT: vpush {d8, d9, d10, d11} -; LE-I64-NEXT: vorr q4, q0, q0 -; LE-I64-NEXT: vorr d0, d9, d9 -; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: vorr d0, d8, d8 +; LE-I64-NEXT: .save {r4, lr} +; LE-I64-NEXT: push {r4, lr} +; LE-I64-NEXT: .vsave {d8, d9, d10, d11} +; LE-I64-NEXT: vpush {d8, d9, d10, d11} +; LE-I64-NEXT: vorr q4, q0, q0 +; LE-I64-NEXT: vorr d0, d9, d9 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: vorr d0, d8, d8 ; LE-I64-NEXT: mov r4, r1 ; LE-I64-NEXT: vmov.32 d11[0], r0 ; LE-I64-NEXT: bl lrint @@ -4120,42 +1082,6 @@ define <2 x iXLen> @lrint_v2f64(<2 x double> %x) { ; LE-I64-NEXT: vpop {d8, d9, d10, d11} ; LE-I64-NEXT: pop {r4, pc} ; -; LE-I32-NEON-LABEL: lrint_v2f64: -; LE-I32-NEON: @ %bb.0: -; LE-I32-NEON-NEXT: .save {r11, lr} -; LE-I32-NEON-NEXT: push {r11, lr} -; LE-I32-NEON-NEXT: .vsave {d8, d9, d10} -; LE-I32-NEON-NEXT: vpush {d8, d9, d10} -; LE-I32-NEON-NEXT: vorr q4, q0, q0 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: vorr d0, d9, d9 -; LE-I32-NEON-NEXT: vmov.32 d10[0], r0 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: vmov.32 d10[1], r0 -; LE-I32-NEON-NEXT: vorr d0, d10, d10 -; LE-I32-NEON-NEXT: vpop {d8, d9, d10} -; LE-I32-NEON-NEXT: pop {r11, pc} -; -; LE-I64-NEON-LABEL: lrint_v2f64: -; LE-I64-NEON: @ %bb.0: -; LE-I64-NEON-NEXT: .save {r4, lr} -; LE-I64-NEON-NEXT: push {r4, lr} -; LE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11} -; LE-I64-NEON-NEXT: vpush {d8, d9, d10, d11} -; LE-I64-NEON-NEXT: vorr q4, q0, q0 -; LE-I64-NEON-NEXT: vorr d0, d9, d9 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vorr d0, d8, d8 -; LE-I64-NEON-NEXT: mov r4, r1 -; LE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; LE-I64-NEON-NEXT: vmov.32 d11[1], r4 -; LE-I64-NEON-NEXT: vmov.32 d10[1], r1 -; LE-I64-NEON-NEXT: vorr q0, q5, q5 -; LE-I64-NEON-NEXT: vpop {d8, d9, d10, d11} -; LE-I64-NEON-NEXT: pop {r4, pc} -; ; BE-I32-LABEL: lrint_v2f64: ; BE-I32: @ %bb.0: ; BE-I32-NEXT: .save {r11, lr} @@ -4191,42 +1117,6 @@ define <2 x iXLen> @lrint_v2f64(<2 x double> %x) { ; BE-I64-NEXT: vrev64.32 q0, q5 ; BE-I64-NEXT: vpop {d8, d9, d10, d11} ; BE-I64-NEXT: pop {r4, pc} -; -; BE-I32-NEON-LABEL: lrint_v2f64: -; BE-I32-NEON: @ %bb.0: -; BE-I32-NEON-NEXT: .save {r11, lr} -; BE-I32-NEON-NEXT: push {r11, lr} -; BE-I32-NEON-NEXT: .vsave {d8, d9, d10} -; BE-I32-NEON-NEXT: vpush {d8, d9, d10} -; BE-I32-NEON-NEXT: vorr q4, q0, q0 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: vorr d0, d9, d9 -; BE-I32-NEON-NEXT: vmov.32 d10[0], r0 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: vmov.32 d10[1], r0 -; BE-I32-NEON-NEXT: vrev64.32 d0, d10 -; BE-I32-NEON-NEXT: vpop {d8, d9, d10} -; BE-I32-NEON-NEXT: pop {r11, pc} -; -; BE-I64-NEON-LABEL: lrint_v2f64: -; BE-I64-NEON: @ %bb.0: -; BE-I64-NEON-NEXT: .save {r4, lr} -; BE-I64-NEON-NEXT: push {r4, lr} -; BE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11} -; BE-I64-NEON-NEXT: vpush {d8, d9, d10, d11} -; BE-I64-NEON-NEXT: vorr q4, q0, q0 -; BE-I64-NEON-NEXT: vorr d0, d9, d9 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vorr d0, d8, d8 -; BE-I64-NEON-NEXT: mov r4, r1 -; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEON-NEXT: vmov.32 d11[1], r4 -; BE-I64-NEON-NEXT: vmov.32 d10[1], r1 -; BE-I64-NEON-NEXT: vrev64.32 q0, q5 -; BE-I64-NEON-NEXT: vpop {d8, d9, d10, d11} -; BE-I64-NEON-NEXT: pop {r4, pc} %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double> %x) ret <2 x iXLen> %a } @@ -4289,62 +1179,6 @@ define <4 x iXLen> @lrint_v4f64(<4 x double> %x) { ; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; LE-I64-NEXT: pop {r4, r5, r6, pc} ; -; LE-I32-NEON-LABEL: lrint_v4f64: -; LE-I32-NEON: @ %bb.0: -; LE-I32-NEON-NEXT: .save {r11, lr} -; LE-I32-NEON-NEXT: push {r11, lr} -; LE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13} -; LE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13} -; LE-I32-NEON-NEXT: vorr q4, q1, q1 -; LE-I32-NEON-NEXT: vorr q5, q0, q0 -; LE-I32-NEON-NEXT: vorr d0, d8, d8 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: vorr d0, d10, d10 -; LE-I32-NEON-NEXT: vmov.32 d13[0], r0 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: vorr d0, d9, d9 -; LE-I32-NEON-NEXT: vmov.32 d12[0], r0 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: vorr d0, d11, d11 -; LE-I32-NEON-NEXT: vmov.32 d13[1], r0 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: vmov.32 d12[1], r0 -; LE-I32-NEON-NEXT: vorr q0, q6, q6 -; LE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13} -; LE-I32-NEON-NEXT: pop {r11, pc} -; -; LE-I64-NEON-LABEL: lrint_v4f64: -; LE-I64-NEON: @ %bb.0: -; LE-I64-NEON-NEXT: .save {r4, r5, r6, lr} -; LE-I64-NEON-NEXT: push {r4, r5, r6, lr} -; LE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEON-NEXT: vorr q5, q1, q1 -; LE-I64-NEON-NEXT: vorr q6, q0, q0 -; LE-I64-NEON-NEXT: vorr d0, d11, d11 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vorr d0, d12, d12 -; LE-I64-NEON-NEXT: mov r4, r1 -; LE-I64-NEON-NEXT: vmov.32 d9[0], r0 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vorr d0, d13, d13 -; LE-I64-NEON-NEXT: mov r5, r1 -; LE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vorr d0, d10, d10 -; LE-I64-NEON-NEXT: mov r6, r1 -; LE-I64-NEON-NEXT: vmov.32 d15[0], r0 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 -; LE-I64-NEON-NEXT: vmov.32 d15[1], r6 -; LE-I64-NEON-NEXT: vmov.32 d9[1], r4 -; LE-I64-NEON-NEXT: vmov.32 d14[1], r5 -; LE-I64-NEON-NEXT: vmov.32 d8[1], r1 -; LE-I64-NEON-NEXT: vorr q0, q7, q7 -; LE-I64-NEON-NEXT: vorr q1, q4, q4 -; LE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEON-NEXT: pop {r4, r5, r6, pc} -; ; BE-I32-LABEL: lrint_v4f64: ; BE-I32: @ %bb.0: ; BE-I32-NEXT: .save {r11, lr} @@ -4400,62 +1234,6 @@ define <4 x iXLen> @lrint_v4f64(<4 x double> %x) { ; BE-I64-NEXT: vrev64.32 q1, q6 ; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; BE-I64-NEXT: pop {r4, r5, r6, pc} -; -; BE-I32-NEON-LABEL: lrint_v4f64: -; BE-I32-NEON: @ %bb.0: -; BE-I32-NEON-NEXT: .save {r11, lr} -; BE-I32-NEON-NEXT: push {r11, lr} -; BE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13} -; BE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13} -; BE-I32-NEON-NEXT: vorr q4, q1, q1 -; BE-I32-NEON-NEXT: vorr q5, q0, q0 -; BE-I32-NEON-NEXT: vorr d0, d8, d8 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: vorr d0, d10, d10 -; BE-I32-NEON-NEXT: vmov.32 d13[0], r0 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: vorr d0, d9, d9 -; BE-I32-NEON-NEXT: vmov.32 d12[0], r0 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: vorr d0, d11, d11 -; BE-I32-NEON-NEXT: vmov.32 d13[1], r0 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: vmov.32 d12[1], r0 -; BE-I32-NEON-NEXT: vrev64.32 q0, q6 -; BE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13} -; BE-I32-NEON-NEXT: pop {r11, pc} -; -; BE-I64-NEON-LABEL: lrint_v4f64: -; BE-I64-NEON: @ %bb.0: -; BE-I64-NEON-NEXT: .save {r4, r5, r6, lr} -; BE-I64-NEON-NEXT: push {r4, r5, r6, lr} -; BE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I64-NEON-NEXT: vorr q4, q1, q1 -; BE-I64-NEON-NEXT: vorr q5, q0, q0 -; BE-I64-NEON-NEXT: vorr d0, d9, d9 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vorr d0, d10, d10 -; BE-I64-NEON-NEXT: mov r4, r1 -; BE-I64-NEON-NEXT: vmov.32 d13[0], r0 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vorr d0, d11, d11 -; BE-I64-NEON-NEXT: mov r5, r1 -; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vorr d0, d8, d8 -; BE-I64-NEON-NEXT: mov r6, r1 -; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vmov.32 d12[0], r0 -; BE-I64-NEON-NEXT: vmov.32 d15[1], r6 -; BE-I64-NEON-NEXT: vmov.32 d13[1], r4 -; BE-I64-NEON-NEXT: vmov.32 d14[1], r5 -; BE-I64-NEON-NEXT: vmov.32 d12[1], r1 -; BE-I64-NEON-NEXT: vrev64.32 q0, q7 -; BE-I64-NEON-NEXT: vrev64.32 q1, q6 -; BE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I64-NEON-NEXT: pop {r4, r5, r6, pc} %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double> %x) ret <4 x iXLen> %a } @@ -4470,1842 +1248,342 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) { ; LE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} ; LE-I32-NEXT: .pad #32 ; LE-I32-NEXT: sub sp, sp, #32 -; LE-I32-NEXT: vorr q5, q0, q0 -; LE-I32-NEXT: add lr, sp, #16 -; LE-I32-NEXT: vorr d0, d4, d4 -; LE-I32-NEXT: vstmia sp, {d6, d7} @ 16-byte Spill -; LE-I32-NEXT: vorr q7, q3, q3 -; LE-I32-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill -; LE-I32-NEXT: vorr q6, q1, q1 -; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: vorr d0, d14, d14 -; LE-I32-NEXT: vmov.32 d8[0], r0 -; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: vorr d0, d12, d12 -; LE-I32-NEXT: vmov.32 d9[0], r0 -; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: vorr d0, d10, d10 -; LE-I32-NEXT: vmov.32 d15[0], r0 -; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: vorr d0, d13, d13 -; LE-I32-NEXT: vmov.32 d14[0], r0 -; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: vorr d0, d11, d11 -; LE-I32-NEXT: vmov.32 d15[1], r0 -; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: vldmia sp, {d16, d17} @ 16-byte Reload -; LE-I32-NEXT: vorr d0, d17, d17 -; LE-I32-NEXT: vmov.32 d14[1], r0 -; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: add lr, sp, #16 -; LE-I32-NEXT: vmov.32 d9[1], r0 -; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEXT: vorr d0, d17, d17 -; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: vmov.32 d8[1], r0 -; LE-I32-NEXT: vorr q0, q7, q7 -; LE-I32-NEXT: vorr q1, q4, q4 -; LE-I32-NEXT: add sp, sp, #32 -; LE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I32-NEXT: pop {r11, pc} -; -; LE-I64-LABEL: lrint_v8f64: -; LE-I64: @ %bb.0: -; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} -; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEXT: .pad #40 -; LE-I64-NEXT: sub sp, sp, #40 -; LE-I64-NEXT: vorr q4, q0, q0 -; LE-I64-NEXT: add lr, sp, #24 -; LE-I64-NEXT: vorr d0, d7, d7 -; LE-I64-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill -; LE-I64-NEXT: vorr q7, q2, q2 -; LE-I64-NEXT: vorr q6, q1, q1 -; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: vorr d0, d14, d14 -; LE-I64-NEXT: add lr, sp, #8 -; LE-I64-NEXT: vmov.32 d17[0], r0 -; LE-I64-NEXT: mov r8, r1 -; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: vorr d0, d15, d15 -; LE-I64-NEXT: mov r9, r1 -; LE-I64-NEXT: vmov.32 d10[0], r0 -; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: vorr d0, d12, d12 -; LE-I64-NEXT: mov r10, r1 -; LE-I64-NEXT: vmov.32 d11[0], r0 -; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: vorr d0, d13, d13 -; LE-I64-NEXT: mov r7, r1 -; LE-I64-NEXT: vmov.32 d14[0], r0 -; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: vorr d0, d8, d8 -; LE-I64-NEXT: mov r4, r1 -; LE-I64-NEXT: vmov.32 d15[0], r0 -; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: vorr d0, d9, d9 -; LE-I64-NEXT: mov r5, r1 -; LE-I64-NEXT: vmov.32 d12[0], r0 -; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: add lr, sp, #24 -; LE-I64-NEXT: mov r6, r1 -; LE-I64-NEXT: vmov.32 d13[0], r0 -; LE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-I64-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: add lr, sp, #8 -; LE-I64-NEXT: vmov.32 d13[1], r6 -; LE-I64-NEXT: vldmia lr, {d6, d7} @ 16-byte Reload -; LE-I64-NEXT: vmov.32 d15[1], r4 -; LE-I64-NEXT: vmov.32 d11[1], r10 -; LE-I64-NEXT: vmov.32 d6[0], r0 -; LE-I64-NEXT: vmov.32 d12[1], r5 -; LE-I64-NEXT: vmov.32 d14[1], r7 -; LE-I64-NEXT: vorr q0, q6, q6 -; LE-I64-NEXT: vmov.32 d10[1], r9 -; LE-I64-NEXT: vorr q1, q7, q7 -; LE-I64-NEXT: vmov.32 d7[1], r8 -; LE-I64-NEXT: vorr q2, q5, q5 -; LE-I64-NEXT: vmov.32 d6[1], r1 -; LE-I64-NEXT: add sp, sp, #40 -; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} -; -; LE-I32-NEON-LABEL: lrint_v8f64: -; LE-I32-NEON: @ %bb.0: -; LE-I32-NEON-NEXT: .save {r11, lr} -; LE-I32-NEON-NEXT: push {r11, lr} -; LE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I32-NEON-NEXT: .pad #32 -; LE-I32-NEON-NEXT: sub sp, sp, #32 -; LE-I32-NEON-NEXT: vorr q5, q0, q0 -; LE-I32-NEON-NEXT: add lr, sp, #16 -; LE-I32-NEON-NEXT: vorr d0, d4, d4 -; LE-I32-NEON-NEXT: vstmia sp, {d6, d7} @ 16-byte Spill -; LE-I32-NEON-NEXT: vorr q7, q3, q3 -; LE-I32-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill -; LE-I32-NEON-NEXT: vorr q6, q1, q1 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: vorr d0, d14, d14 -; LE-I32-NEON-NEXT: vmov.32 d8[0], r0 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: vorr d0, d12, d12 -; LE-I32-NEON-NEXT: vmov.32 d9[0], r0 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: vorr d0, d10, d10 -; LE-I32-NEON-NEXT: vmov.32 d15[0], r0 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: vorr d0, d13, d13 -; LE-I32-NEON-NEXT: vmov.32 d14[0], r0 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: vorr d0, d11, d11 -; LE-I32-NEON-NEXT: vmov.32 d15[1], r0 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: vldmia sp, {d16, d17} @ 16-byte Reload -; LE-I32-NEON-NEXT: vorr d0, d17, d17 -; LE-I32-NEON-NEXT: vmov.32 d14[1], r0 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: add lr, sp, #16 -; LE-I32-NEON-NEXT: vmov.32 d9[1], r0 -; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEON-NEXT: vorr d0, d17, d17 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: vmov.32 d8[1], r0 -; LE-I32-NEON-NEXT: vorr q0, q7, q7 -; LE-I32-NEON-NEXT: vorr q1, q4, q4 -; LE-I32-NEON-NEXT: add sp, sp, #32 -; LE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I32-NEON-NEXT: pop {r11, pc} -; -; LE-I64-NEON-LABEL: lrint_v8f64: -; LE-I64-NEON: @ %bb.0: -; LE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; LE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} -; LE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEON-NEXT: .pad #40 -; LE-I64-NEON-NEXT: sub sp, sp, #40 -; LE-I64-NEON-NEXT: vorr q4, q0, q0 -; LE-I64-NEON-NEXT: add lr, sp, #24 -; LE-I64-NEON-NEXT: vorr d0, d7, d7 -; LE-I64-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill -; LE-I64-NEON-NEXT: vorr q7, q2, q2 -; LE-I64-NEON-NEXT: vorr q6, q1, q1 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vorr d0, d14, d14 -; LE-I64-NEON-NEXT: add lr, sp, #8 -; LE-I64-NEON-NEXT: vmov.32 d17[0], r0 -; LE-I64-NEON-NEXT: mov r8, r1 -; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vorr d0, d15, d15 -; LE-I64-NEON-NEXT: mov r9, r1 -; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vorr d0, d12, d12 -; LE-I64-NEON-NEXT: mov r10, r1 -; LE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vorr d0, d13, d13 -; LE-I64-NEON-NEXT: mov r7, r1 -; LE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vorr d0, d8, d8 -; LE-I64-NEON-NEXT: mov r4, r1 -; LE-I64-NEON-NEXT: vmov.32 d15[0], r0 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vorr d0, d9, d9 -; LE-I64-NEON-NEXT: mov r5, r1 -; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: add lr, sp, #24 -; LE-I64-NEON-NEXT: mov r6, r1 -; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 -; LE-I64-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-I64-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: add lr, sp, #8 -; LE-I64-NEON-NEXT: vmov.32 d13[1], r6 -; LE-I64-NEON-NEXT: vldmia lr, {d6, d7} @ 16-byte Reload -; LE-I64-NEON-NEXT: vmov.32 d15[1], r4 -; LE-I64-NEON-NEXT: vmov.32 d11[1], r10 -; LE-I64-NEON-NEXT: vmov.32 d6[0], r0 -; LE-I64-NEON-NEXT: vmov.32 d12[1], r5 -; LE-I64-NEON-NEXT: vmov.32 d14[1], r7 -; LE-I64-NEON-NEXT: vorr q0, q6, q6 -; LE-I64-NEON-NEXT: vmov.32 d10[1], r9 -; LE-I64-NEON-NEXT: vorr q1, q7, q7 -; LE-I64-NEON-NEXT: vmov.32 d7[1], r8 -; LE-I64-NEON-NEXT: vorr q2, q5, q5 -; LE-I64-NEON-NEXT: vmov.32 d6[1], r1 -; LE-I64-NEON-NEXT: add sp, sp, #40 -; LE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} -; -; BE-I32-LABEL: lrint_v8f64: -; BE-I32: @ %bb.0: -; BE-I32-NEXT: .save {r11, lr} -; BE-I32-NEXT: push {r11, lr} -; BE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I32-NEXT: .pad #32 -; BE-I32-NEXT: sub sp, sp, #32 -; BE-I32-NEXT: vorr q5, q0, q0 -; BE-I32-NEXT: vstmia sp, {d0, d1} @ 16-byte Spill -; BE-I32-NEXT: vorr d0, d4, d4 -; BE-I32-NEXT: add lr, sp, #16 -; BE-I32-NEXT: vorr q7, q3, q3 -; BE-I32-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill -; BE-I32-NEXT: vorr q6, q1, q1 -; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: vorr d0, d10, d10 -; BE-I32-NEXT: vmov.32 d8[0], r0 -; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: vorr d0, d14, d14 -; BE-I32-NEXT: vmov.32 d10[0], r0 -; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: vorr d0, d12, d12 -; BE-I32-NEXT: vmov.32 d9[0], r0 -; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: vorr d0, d15, d15 -; BE-I32-NEXT: vmov.32 d11[0], r0 -; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: vorr d0, d13, d13 -; BE-I32-NEXT: vmov.32 d9[1], r0 -; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: vldmia sp, {d16, d17} @ 16-byte Reload -; BE-I32-NEXT: vorr d0, d17, d17 -; BE-I32-NEXT: vmov.32 d11[1], r0 -; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: add lr, sp, #16 -; BE-I32-NEXT: vmov.32 d10[1], r0 -; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I32-NEXT: vorr d0, d17, d17 -; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: vmov.32 d8[1], r0 -; BE-I32-NEXT: vrev64.32 q0, q5 -; BE-I32-NEXT: vrev64.32 q1, q4 -; BE-I32-NEXT: add sp, sp, #32 -; BE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I32-NEXT: pop {r11, pc} -; -; BE-I64-LABEL: lrint_v8f64: -; BE-I64: @ %bb.0: -; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} -; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I64-NEXT: .pad #40 -; BE-I64-NEXT: sub sp, sp, #40 -; BE-I64-NEXT: vorr q4, q0, q0 -; BE-I64-NEXT: add lr, sp, #24 -; BE-I64-NEXT: vorr d0, d7, d7 -; BE-I64-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill -; BE-I64-NEXT: vorr q7, q2, q2 -; BE-I64-NEXT: vorr q6, q1, q1 -; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: vorr d0, d14, d14 -; BE-I64-NEXT: add lr, sp, #8 -; BE-I64-NEXT: vmov.32 d17[0], r0 -; BE-I64-NEXT: mov r8, r1 -; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: vorr d0, d15, d15 -; BE-I64-NEXT: mov r9, r1 -; BE-I64-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: vorr d0, d12, d12 -; BE-I64-NEXT: mov r10, r1 -; BE-I64-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: vorr d0, d13, d13 -; BE-I64-NEXT: mov r7, r1 -; BE-I64-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: vorr d0, d8, d8 -; BE-I64-NEXT: mov r4, r1 -; BE-I64-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: vorr d0, d9, d9 -; BE-I64-NEXT: mov r5, r1 -; BE-I64-NEXT: vmov.32 d12[0], r0 -; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: add lr, sp, #24 -; BE-I64-NEXT: mov r6, r1 -; BE-I64-NEXT: vmov.32 d13[0], r0 -; BE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; BE-I64-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: add lr, sp, #8 -; BE-I64-NEXT: vmov.32 d13[1], r6 -; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEXT: vmov.32 d15[1], r4 -; BE-I64-NEXT: vmov.32 d16[0], r0 -; BE-I64-NEXT: vmov.32 d11[1], r10 -; BE-I64-NEXT: vmov.32 d17[1], r8 -; BE-I64-NEXT: vmov.32 d12[1], r5 -; BE-I64-NEXT: vmov.32 d14[1], r7 -; BE-I64-NEXT: vmov.32 d10[1], r9 -; BE-I64-NEXT: vmov.32 d16[1], r1 -; BE-I64-NEXT: vrev64.32 q0, q6 -; BE-I64-NEXT: vrev64.32 q1, q7 -; BE-I64-NEXT: vrev64.32 q2, q5 -; BE-I64-NEXT: vrev64.32 q3, q8 -; BE-I64-NEXT: add sp, sp, #40 -; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} -; -; BE-I32-NEON-LABEL: lrint_v8f64: -; BE-I32-NEON: @ %bb.0: -; BE-I32-NEON-NEXT: .save {r11, lr} -; BE-I32-NEON-NEXT: push {r11, lr} -; BE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I32-NEON-NEXT: .pad #32 -; BE-I32-NEON-NEXT: sub sp, sp, #32 -; BE-I32-NEON-NEXT: vorr q5, q0, q0 -; BE-I32-NEON-NEXT: vstmia sp, {d0, d1} @ 16-byte Spill -; BE-I32-NEON-NEXT: vorr d0, d4, d4 -; BE-I32-NEON-NEXT: add lr, sp, #16 -; BE-I32-NEON-NEXT: vorr q7, q3, q3 -; BE-I32-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill -; BE-I32-NEON-NEXT: vorr q6, q1, q1 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: vorr d0, d10, d10 -; BE-I32-NEON-NEXT: vmov.32 d8[0], r0 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: vorr d0, d14, d14 -; BE-I32-NEON-NEXT: vmov.32 d10[0], r0 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: vorr d0, d12, d12 -; BE-I32-NEON-NEXT: vmov.32 d9[0], r0 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: vorr d0, d15, d15 -; BE-I32-NEON-NEXT: vmov.32 d11[0], r0 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: vorr d0, d13, d13 -; BE-I32-NEON-NEXT: vmov.32 d9[1], r0 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: vldmia sp, {d16, d17} @ 16-byte Reload -; BE-I32-NEON-NEXT: vorr d0, d17, d17 -; BE-I32-NEON-NEXT: vmov.32 d11[1], r0 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: add lr, sp, #16 -; BE-I32-NEON-NEXT: vmov.32 d10[1], r0 -; BE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I32-NEON-NEXT: vorr d0, d17, d17 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: vmov.32 d8[1], r0 -; BE-I32-NEON-NEXT: vrev64.32 q0, q5 -; BE-I32-NEON-NEXT: vrev64.32 q1, q4 -; BE-I32-NEON-NEXT: add sp, sp, #32 -; BE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I32-NEON-NEXT: pop {r11, pc} -; -; BE-I64-NEON-LABEL: lrint_v8f64: -; BE-I64-NEON: @ %bb.0: -; BE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; BE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} -; BE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I64-NEON-NEXT: .pad #40 -; BE-I64-NEON-NEXT: sub sp, sp, #40 -; BE-I64-NEON-NEXT: vorr q4, q0, q0 -; BE-I64-NEON-NEXT: add lr, sp, #24 -; BE-I64-NEON-NEXT: vorr d0, d7, d7 -; BE-I64-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill -; BE-I64-NEON-NEXT: vorr q7, q2, q2 -; BE-I64-NEON-NEXT: vorr q6, q1, q1 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vorr d0, d14, d14 -; BE-I64-NEON-NEXT: add lr, sp, #8 -; BE-I64-NEON-NEXT: vmov.32 d17[0], r0 -; BE-I64-NEON-NEXT: mov r8, r1 -; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vorr d0, d15, d15 -; BE-I64-NEON-NEXT: mov r9, r1 -; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vorr d0, d12, d12 -; BE-I64-NEON-NEXT: mov r10, r1 -; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vorr d0, d13, d13 -; BE-I64-NEON-NEXT: mov r7, r1 -; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vorr d0, d8, d8 -; BE-I64-NEON-NEXT: mov r4, r1 -; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vorr d0, d9, d9 -; BE-I64-NEON-NEXT: mov r5, r1 -; BE-I64-NEON-NEXT: vmov.32 d12[0], r0 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: add lr, sp, #24 -; BE-I64-NEON-NEXT: mov r6, r1 -; BE-I64-NEON-NEXT: vmov.32 d13[0], r0 -; BE-I64-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; BE-I64-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: add lr, sp, #8 -; BE-I64-NEON-NEXT: vmov.32 d13[1], r6 -; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEON-NEXT: vmov.32 d15[1], r4 -; BE-I64-NEON-NEXT: vmov.32 d16[0], r0 -; BE-I64-NEON-NEXT: vmov.32 d11[1], r10 -; BE-I64-NEON-NEXT: vmov.32 d17[1], r8 -; BE-I64-NEON-NEXT: vmov.32 d12[1], r5 -; BE-I64-NEON-NEXT: vmov.32 d14[1], r7 -; BE-I64-NEON-NEXT: vmov.32 d10[1], r9 -; BE-I64-NEON-NEXT: vmov.32 d16[1], r1 -; BE-I64-NEON-NEXT: vrev64.32 q0, q6 -; BE-I64-NEON-NEXT: vrev64.32 q1, q7 -; BE-I64-NEON-NEXT: vrev64.32 q2, q5 -; BE-I64-NEON-NEXT: vrev64.32 q3, q8 -; BE-I64-NEON-NEXT: add sp, sp, #40 -; BE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} - %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double> %x) - ret <8 x iXLen> %a -} -declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double>) - -define <16 x iXLen> @lrint_v16f64(<16 x double> %x) { -; LE-I32-LABEL: lrint_v16f64: -; LE-I32: @ %bb.0: -; LE-I32-NEXT: .save {r4, r5, r6, lr} -; LE-I32-NEXT: push {r4, r5, r6, lr} -; LE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I32-NEXT: .pad #128 -; LE-I32-NEXT: sub sp, sp, #128 -; LE-I32-NEXT: add lr, sp, #80 -; LE-I32-NEXT: add r0, sp, #240 -; LE-I32-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I32-NEXT: add r0, sp, #208 -; LE-I32-NEXT: vorr q6, q0, q0 -; LE-I32-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill -; LE-I32-NEXT: add lr, sp, #32 -; LE-I32-NEXT: vorr q5, q1, q1 -; LE-I32-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill +; LE-I32-NEXT: vorr q5, q0, q0 ; LE-I32-NEXT: add lr, sp, #16 -; LE-I32-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; LE-I32-NEXT: add lr, sp, #64 ; LE-I32-NEXT: vorr d0, d4, d4 -; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I32-NEXT: add lr, sp, #112 -; LE-I32-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I32-NEXT: add r0, sp, #224 -; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I32-NEXT: add lr, sp, #96 -; LE-I32-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I32-NEXT: add r0, sp, #256 -; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I32-NEXT: add lr, sp, #48 -; LE-I32-NEXT: vld1.64 {d14, d15}, [r0] -; LE-I32-NEXT: vstmia sp, {d2, d3} @ 16-byte Spill -; LE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-I32-NEXT: vstmia sp, {d6, d7} @ 16-byte Spill +; LE-I32-NEXT: vorr q7, q3, q3 +; LE-I32-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill +; LE-I32-NEXT: vorr q6, q1, q1 ; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: vorr d0, d12, d12 +; LE-I32-NEXT: vorr d0, d14, d14 ; LE-I32-NEXT: vmov.32 d8[0], r0 ; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: vorr d0, d10, d10 -; LE-I32-NEXT: vmov.32 d12[0], r0 -; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: vorr d0, d14, d14 -; LE-I32-NEXT: vmov.32 d13[0], r0 +; LE-I32-NEXT: vorr d0, d12, d12 +; LE-I32-NEXT: vmov.32 d9[0], r0 ; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: add lr, sp, #64 -; LE-I32-NEXT: mov r4, r0 -; LE-I32-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-I32-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; LE-I32-NEXT: vorr d0, d10, d10 +; LE-I32-NEXT: vmov.32 d15[0], r0 ; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: add lr, sp, #80 +; LE-I32-NEXT: vorr d0, d13, d13 ; LE-I32-NEXT: vmov.32 d14[0], r0 -; LE-I32-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-I32-NEXT: @ kill: def $d0 killed $d0 killed $q0 ; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: add lr, sp, #112 -; LE-I32-NEXT: vmov.32 d9[0], r0 -; LE-I32-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-I32-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; LE-I32-NEXT: vmov.32 d15[0], r4 +; LE-I32-NEXT: vorr d0, d11, d11 +; LE-I32-NEXT: vmov.32 d15[1], r0 ; LE-I32-NEXT: bl lrint ; LE-I32-NEXT: vldmia sp, {d16, d17} @ 16-byte Reload ; LE-I32-NEXT: vorr d0, d17, d17 -; LE-I32-NEXT: vmov.32 d10[0], r0 -; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: add lr, sp, #96 -; LE-I32-NEXT: vmov.32 d13[1], r0 -; LE-I32-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-I32-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; LE-I32-NEXT: vmov.32 d14[1], r0 ; LE-I32-NEXT: bl lrint ; LE-I32-NEXT: add lr, sp, #16 -; LE-I32-NEXT: vmov.32 d11[0], r0 -; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEXT: vorr d0, d17, d17 -; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: add lr, sp, #80 -; LE-I32-NEXT: vmov.32 d12[1], r0 -; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEXT: vorr d0, d17, d17 -; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: add lr, sp, #32 ; LE-I32-NEXT: vmov.32 d9[1], r0 ; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload ; LE-I32-NEXT: vorr d0, d17, d17 ; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: add lr, sp, #96 ; LE-I32-NEXT: vmov.32 d8[1], r0 -; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEXT: vorr d0, d17, d17 -; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: add lr, sp, #112 -; LE-I32-NEXT: vmov.32 d11[1], r0 -; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEXT: vorr d0, d17, d17 -; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: add lr, sp, #48 -; LE-I32-NEXT: vmov.32 d10[1], r0 -; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEXT: vorr d0, d17, d17 -; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: add lr, sp, #64 -; LE-I32-NEXT: vmov.32 d15[1], r0 -; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEXT: vorr d0, d17, d17 -; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: vmov.32 d14[1], r0 -; LE-I32-NEXT: vorr q0, q6, q6 +; LE-I32-NEXT: vorr q0, q7, q7 ; LE-I32-NEXT: vorr q1, q4, q4 -; LE-I32-NEXT: vorr q2, q5, q5 -; LE-I32-NEXT: vorr q3, q7, q7 -; LE-I32-NEXT: add sp, sp, #128 +; LE-I32-NEXT: add sp, sp, #32 ; LE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I32-NEXT: pop {r4, r5, r6, pc} +; LE-I32-NEXT: pop {r11, pc} ; -; LE-I64-LABEL: lrint_v16f64: +; LE-I64-LABEL: lrint_v8f64: ; LE-I64: @ %bb.0: -; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-I64-NEXT: .pad #4 -; LE-I64-NEXT: sub sp, sp, #4 +; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} ; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} ; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEXT: .pad #176 -; LE-I64-NEXT: sub sp, sp, #176 -; LE-I64-NEXT: add lr, sp, #40 -; LE-I64-NEXT: str r0, [sp, #140] @ 4-byte Spill -; LE-I64-NEXT: add r0, sp, #312 -; LE-I64-NEXT: vorr q6, q2, q2 +; LE-I64-NEXT: .pad #40 +; LE-I64-NEXT: sub sp, sp, #40 +; LE-I64-NEXT: vorr q4, q0, q0 +; LE-I64-NEXT: add lr, sp, #24 +; LE-I64-NEXT: vorr d0, d7, d7 ; LE-I64-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #96 -; LE-I64-NEXT: vorr q7, q1, q1 -; LE-I64-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #144 -; LE-I64-NEXT: vorr d0, d1, d1 -; LE-I64-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I64-NEXT: add r0, sp, #280 -; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #80 -; LE-I64-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I64-NEXT: add r0, sp, #296 -; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #120 -; LE-I64-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I64-NEXT: add r0, sp, #328 -; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #56 -; LE-I64-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEXT: vorr q7, q2, q2 +; LE-I64-NEXT: vorr q6, q1, q1 ; LE-I64-NEXT: bl lrint ; LE-I64-NEXT: vorr d0, d14, d14 -; LE-I64-NEXT: str r1, [sp, #116] @ 4-byte Spill -; LE-I64-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEXT: add lr, sp, #8 +; LE-I64-NEXT: vmov.32 d17[0], r0 +; LE-I64-NEXT: mov r8, r1 +; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill ; LE-I64-NEXT: bl lrint ; LE-I64-NEXT: vorr d0, d15, d15 -; LE-I64-NEXT: str r1, [sp, #76] @ 4-byte Spill -; LE-I64-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEXT: mov r9, r1 +; LE-I64-NEXT: vmov.32 d10[0], r0 ; LE-I64-NEXT: bl lrint ; LE-I64-NEXT: vorr d0, d12, d12 -; LE-I64-NEXT: add lr, sp, #160 -; LE-I64-NEXT: vmov.32 d9[0], r0 -; LE-I64-NEXT: str r1, [sp, #72] @ 4-byte Spill -; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I64-NEXT: mov r10, r1 +; LE-I64-NEXT: vmov.32 d11[0], r0 ; LE-I64-NEXT: bl lrint ; LE-I64-NEXT: vorr d0, d13, d13 -; LE-I64-NEXT: mov r6, r1 +; LE-I64-NEXT: mov r7, r1 ; LE-I64-NEXT: vmov.32 d14[0], r0 ; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: add lr, sp, #40 +; LE-I64-NEXT: vorr d0, d8, d8 ; LE-I64-NEXT: mov r4, r1 ; LE-I64-NEXT: vmov.32 d15[0], r0 -; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-I64-NEXT: vorr d0, d8, d8 ; LE-I64-NEXT: bl lrint ; LE-I64-NEXT: vorr d0, d9, d9 -; LE-I64-NEXT: mov r7, r1 -; LE-I64-NEXT: vmov.32 d12[0], r0 -; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: add lr, sp, #96 ; LE-I64-NEXT: mov r5, r1 -; LE-I64-NEXT: vmov.32 d13[0], r0 -; LE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-I64-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: vmov.32 d10[0], r0 -; LE-I64-NEXT: add lr, sp, #40 -; LE-I64-NEXT: mov r10, r1 -; LE-I64-NEXT: vmov.32 d13[1], r5 -; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #56 -; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-I64-NEXT: vorr d0, d9, d9 -; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: vorr d0, d8, d8 -; LE-I64-NEXT: vmov.32 d12[1], r7 -; LE-I64-NEXT: add lr, sp, #96 -; LE-I64-NEXT: mov r9, r1 -; LE-I64-NEXT: vmov.32 d11[0], r0 -; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I64-NEXT: vmov.32 d12[0], r0 ; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: vmov.32 d10[0], r0 ; LE-I64-NEXT: add lr, sp, #24 -; LE-I64-NEXT: mov r11, r1 -; LE-I64-NEXT: vmov.32 d15[1], r4 -; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #144 -; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEXT: vorr d0, d17, d17 -; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: add lr, sp, #8 -; LE-I64-NEXT: vmov.32 d14[1], r6 -; LE-I64-NEXT: mov r8, r1 -; LE-I64-NEXT: vmov.32 d17[0], r0 -; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #56 -; LE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #80 -; LE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-I64-NEXT: vorr d0, d11, d11 -; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: vmov.32 d15[0], r0 -; LE-I64-NEXT: add lr, sp, #160 -; LE-I64-NEXT: vorr d0, d10, d10 -; LE-I64-NEXT: ldr r0, [sp, #72] @ 4-byte Reload -; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload ; LE-I64-NEXT: mov r6, r1 -; LE-I64-NEXT: vmov.32 d9[1], r0 -; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: vmov.32 d14[0], r0 -; LE-I64-NEXT: ldr r0, [sp, #76] @ 4-byte Reload -; LE-I64-NEXT: add lr, sp, #160 -; LE-I64-NEXT: mov r4, r1 -; LE-I64-NEXT: vmov.32 d8[1], r0 -; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #120 -; LE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-I64-NEXT: vorr d0, d11, d11 -; LE-I64-NEXT: bl lrint ; LE-I64-NEXT: vmov.32 d13[0], r0 -; LE-I64-NEXT: add lr, sp, #40 -; LE-I64-NEXT: vorr d0, d10, d10 -; LE-I64-NEXT: ldr r0, [sp, #116] @ 4-byte Reload -; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-I64-NEXT: mov r5, r1 -; LE-I64-NEXT: vmov.32 d9[1], r0 -; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: add lr, sp, #144 -; LE-I64-NEXT: mov r7, r1 -; LE-I64-NEXT: vmov.32 d12[0], r0 ; LE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload ; LE-I64-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; LE-I64-NEXT: vmov.32 d8[1], r10 -; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: add lr, sp, #8 -; LE-I64-NEXT: vmov.32 d15[1], r6 -; LE-I64-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload -; LE-I64-NEXT: add lr, sp, #24 -; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEXT: add lr, sp, #160 -; LE-I64-NEXT: vmov.32 d20[0], r0 -; LE-I64-NEXT: vmov.32 d21[1], r8 -; LE-I64-NEXT: vmov.32 d20[1], r1 -; LE-I64-NEXT: ldr r1, [sp, #140] @ 4-byte Reload -; LE-I64-NEXT: vmov.32 d13[1], r5 -; LE-I64-NEXT: mov r0, r1 -; LE-I64-NEXT: vst1.64 {d8, d9}, [r0:128]! -; LE-I64-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; LE-I64-NEXT: add lr, sp, #56 -; LE-I64-NEXT: vmov.32 d14[1], r4 -; LE-I64-NEXT: vst1.64 {d18, d19}, [r0:128]! -; LE-I64-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; LE-I64-NEXT: add lr, sp, #96 -; LE-I64-NEXT: vmov.32 d12[1], r7 -; LE-I64-NEXT: vst1.64 {d18, d19}, [r0:128]! -; LE-I64-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; LE-I64-NEXT: vmov.32 d17[1], r9 -; LE-I64-NEXT: vst1.64 {d18, d19}, [r0:128] -; LE-I64-NEXT: add r0, r1, #64 -; LE-I64-NEXT: vst1.64 {d14, d15}, [r0:128]! -; LE-I64-NEXT: vst1.64 {d12, d13}, [r0:128]! -; LE-I64-NEXT: vmov.32 d16[1], r11 -; LE-I64-NEXT: vst1.64 {d20, d21}, [r0:128]! -; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128] -; LE-I64-NEXT: add sp, sp, #176 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: add lr, sp, #8 +; LE-I64-NEXT: vmov.32 d13[1], r6 +; LE-I64-NEXT: vldmia lr, {d6, d7} @ 16-byte Reload +; LE-I64-NEXT: vmov.32 d15[1], r4 +; LE-I64-NEXT: vmov.32 d11[1], r10 +; LE-I64-NEXT: vmov.32 d6[0], r0 +; LE-I64-NEXT: vmov.32 d12[1], r5 +; LE-I64-NEXT: vmov.32 d14[1], r7 +; LE-I64-NEXT: vorr q0, q6, q6 +; LE-I64-NEXT: vmov.32 d10[1], r9 +; LE-I64-NEXT: vorr q1, q7, q7 +; LE-I64-NEXT: vmov.32 d7[1], r8 +; LE-I64-NEXT: vorr q2, q5, q5 +; LE-I64-NEXT: vmov.32 d6[1], r1 +; LE-I64-NEXT: add sp, sp, #40 ; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEXT: add sp, sp, #4 -; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; -; LE-I32-NEON-LABEL: lrint_v16f64: -; LE-I32-NEON: @ %bb.0: -; LE-I32-NEON-NEXT: .save {r4, r5, r6, lr} -; LE-I32-NEON-NEXT: push {r4, r5, r6, lr} -; LE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I32-NEON-NEXT: .pad #128 -; LE-I32-NEON-NEXT: sub sp, sp, #128 -; LE-I32-NEON-NEXT: add lr, sp, #80 -; LE-I32-NEON-NEXT: add r0, sp, #240 -; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I32-NEON-NEXT: add r0, sp, #208 -; LE-I32-NEON-NEXT: vorr q6, q0, q0 -; LE-I32-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill -; LE-I32-NEON-NEXT: add lr, sp, #32 -; LE-I32-NEON-NEXT: vorr q5, q1, q1 -; LE-I32-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill -; LE-I32-NEON-NEXT: add lr, sp, #16 -; LE-I32-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; LE-I32-NEON-NEXT: add lr, sp, #64 -; LE-I32-NEON-NEXT: vorr d0, d4, d4 -; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I32-NEON-NEXT: add lr, sp, #112 -; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I32-NEON-NEXT: add r0, sp, #224 -; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I32-NEON-NEXT: add lr, sp, #96 -; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I32-NEON-NEXT: add r0, sp, #256 -; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I32-NEON-NEXT: add lr, sp, #48 -; LE-I32-NEON-NEXT: vld1.64 {d14, d15}, [r0] -; LE-I32-NEON-NEXT: vstmia sp, {d2, d3} @ 16-byte Spill -; LE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: vorr d0, d12, d12 -; LE-I32-NEON-NEXT: vmov.32 d8[0], r0 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: vorr d0, d10, d10 -; LE-I32-NEON-NEXT: vmov.32 d12[0], r0 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: vorr d0, d14, d14 -; LE-I32-NEON-NEXT: vmov.32 d13[0], r0 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: add lr, sp, #64 -; LE-I32-NEON-NEXT: mov r4, r0 -; LE-I32-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-I32-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: add lr, sp, #80 -; LE-I32-NEON-NEXT: vmov.32 d14[0], r0 -; LE-I32-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-I32-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: add lr, sp, #112 -; LE-I32-NEON-NEXT: vmov.32 d9[0], r0 -; LE-I32-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-I32-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; LE-I32-NEON-NEXT: vmov.32 d15[0], r4 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: vldmia sp, {d16, d17} @ 16-byte Reload -; LE-I32-NEON-NEXT: vorr d0, d17, d17 -; LE-I32-NEON-NEXT: vmov.32 d10[0], r0 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: add lr, sp, #96 -; LE-I32-NEON-NEXT: vmov.32 d13[1], r0 -; LE-I32-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-I32-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: add lr, sp, #16 -; LE-I32-NEON-NEXT: vmov.32 d11[0], r0 -; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEON-NEXT: vorr d0, d17, d17 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: add lr, sp, #80 -; LE-I32-NEON-NEXT: vmov.32 d12[1], r0 -; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEON-NEXT: vorr d0, d17, d17 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: add lr, sp, #32 -; LE-I32-NEON-NEXT: vmov.32 d9[1], r0 -; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEON-NEXT: vorr d0, d17, d17 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: add lr, sp, #96 -; LE-I32-NEON-NEXT: vmov.32 d8[1], r0 -; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEON-NEXT: vorr d0, d17, d17 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: add lr, sp, #112 -; LE-I32-NEON-NEXT: vmov.32 d11[1], r0 -; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEON-NEXT: vorr d0, d17, d17 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: add lr, sp, #48 -; LE-I32-NEON-NEXT: vmov.32 d10[1], r0 -; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEON-NEXT: vorr d0, d17, d17 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: add lr, sp, #64 -; LE-I32-NEON-NEXT: vmov.32 d15[1], r0 -; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEON-NEXT: vorr d0, d17, d17 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: vmov.32 d14[1], r0 -; LE-I32-NEON-NEXT: vorr q0, q6, q6 -; LE-I32-NEON-NEXT: vorr q1, q4, q4 -; LE-I32-NEON-NEXT: vorr q2, q5, q5 -; LE-I32-NEON-NEXT: vorr q3, q7, q7 -; LE-I32-NEON-NEXT: add sp, sp, #128 -; LE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I32-NEON-NEXT: pop {r4, r5, r6, pc} -; -; LE-I64-NEON-LABEL: lrint_v16f64: -; LE-I64-NEON: @ %bb.0: -; LE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-I64-NEON-NEXT: .pad #4 -; LE-I64-NEON-NEXT: sub sp, sp, #4 -; LE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEON-NEXT: .pad #176 -; LE-I64-NEON-NEXT: sub sp, sp, #176 -; LE-I64-NEON-NEXT: add lr, sp, #40 -; LE-I64-NEON-NEXT: str r0, [sp, #140] @ 4-byte Spill -; LE-I64-NEON-NEXT: add r0, sp, #312 -; LE-I64-NEON-NEXT: vorr q6, q2, q2 -; LE-I64-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #96 -; LE-I64-NEON-NEXT: vorr q7, q1, q1 -; LE-I64-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #144 -; LE-I64-NEON-NEXT: vorr d0, d1, d1 -; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I64-NEON-NEXT: add r0, sp, #280 -; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #80 -; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I64-NEON-NEXT: add r0, sp, #296 -; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #120 -; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I64-NEON-NEXT: add r0, sp, #328 -; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #56 -; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vorr d0, d14, d14 -; LE-I64-NEON-NEXT: str r1, [sp, #116] @ 4-byte Spill -; LE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vorr d0, d15, d15 -; LE-I64-NEON-NEXT: str r1, [sp, #76] @ 4-byte Spill -; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vorr d0, d12, d12 -; LE-I64-NEON-NEXT: add lr, sp, #160 -; LE-I64-NEON-NEXT: vmov.32 d9[0], r0 -; LE-I64-NEON-NEXT: str r1, [sp, #72] @ 4-byte Spill -; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vorr d0, d13, d13 -; LE-I64-NEON-NEXT: mov r6, r1 -; LE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: add lr, sp, #40 -; LE-I64-NEON-NEXT: mov r4, r1 -; LE-I64-NEON-NEXT: vmov.32 d15[0], r0 -; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-I64-NEON-NEXT: vorr d0, d8, d8 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vorr d0, d9, d9 -; LE-I64-NEON-NEXT: mov r7, r1 -; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: add lr, sp, #96 -; LE-I64-NEON-NEXT: mov r5, r1 -; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 -; LE-I64-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-I64-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; LE-I64-NEON-NEXT: add lr, sp, #40 -; LE-I64-NEON-NEXT: mov r10, r1 -; LE-I64-NEON-NEXT: vmov.32 d13[1], r5 -; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #56 -; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-I64-NEON-NEXT: vorr d0, d9, d9 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vorr d0, d8, d8 -; LE-I64-NEON-NEXT: vmov.32 d12[1], r7 -; LE-I64-NEON-NEXT: add lr, sp, #96 -; LE-I64-NEON-NEXT: mov r9, r1 -; LE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; LE-I64-NEON-NEXT: add lr, sp, #24 -; LE-I64-NEON-NEXT: mov r11, r1 -; LE-I64-NEON-NEXT: vmov.32 d15[1], r4 -; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #144 -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: vorr d0, d17, d17 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: add lr, sp, #8 -; LE-I64-NEON-NEXT: vmov.32 d14[1], r6 -; LE-I64-NEON-NEXT: mov r8, r1 -; LE-I64-NEON-NEXT: vmov.32 d17[0], r0 -; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #56 -; LE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #80 -; LE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-I64-NEON-NEXT: vorr d0, d11, d11 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vmov.32 d15[0], r0 -; LE-I64-NEON-NEXT: add lr, sp, #160 -; LE-I64-NEON-NEXT: vorr d0, d10, d10 -; LE-I64-NEON-NEXT: ldr r0, [sp, #72] @ 4-byte Reload -; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-I64-NEON-NEXT: mov r6, r1 -; LE-I64-NEON-NEXT: vmov.32 d9[1], r0 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; LE-I64-NEON-NEXT: ldr r0, [sp, #76] @ 4-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #160 -; LE-I64-NEON-NEXT: mov r4, r1 -; LE-I64-NEON-NEXT: vmov.32 d8[1], r0 -; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #120 -; LE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-I64-NEON-NEXT: vorr d0, d11, d11 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 -; LE-I64-NEON-NEXT: add lr, sp, #40 -; LE-I64-NEON-NEXT: vorr d0, d10, d10 -; LE-I64-NEON-NEXT: ldr r0, [sp, #116] @ 4-byte Reload -; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-I64-NEON-NEXT: mov r5, r1 -; LE-I64-NEON-NEXT: vmov.32 d9[1], r0 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: add lr, sp, #144 -; LE-I64-NEON-NEXT: mov r7, r1 -; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 -; LE-I64-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-I64-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; LE-I64-NEON-NEXT: vmov.32 d8[1], r10 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: add lr, sp, #8 -; LE-I64-NEON-NEXT: vmov.32 d15[1], r6 -; LE-I64-NEON-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #24 -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #160 -; LE-I64-NEON-NEXT: vmov.32 d20[0], r0 -; LE-I64-NEON-NEXT: vmov.32 d21[1], r8 -; LE-I64-NEON-NEXT: vmov.32 d20[1], r1 -; LE-I64-NEON-NEXT: ldr r1, [sp, #140] @ 4-byte Reload -; LE-I64-NEON-NEXT: vmov.32 d13[1], r5 -; LE-I64-NEON-NEXT: mov r0, r1 -; LE-I64-NEON-NEXT: vst1.64 {d8, d9}, [r0:128]! -; LE-I64-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #56 -; LE-I64-NEON-NEXT: vmov.32 d14[1], r4 -; LE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]! -; LE-I64-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #96 -; LE-I64-NEON-NEXT: vmov.32 d12[1], r7 -; LE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]! -; LE-I64-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; LE-I64-NEON-NEXT: vmov.32 d17[1], r9 -; LE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r0:128] -; LE-I64-NEON-NEXT: add r0, r1, #64 -; LE-I64-NEON-NEXT: vst1.64 {d14, d15}, [r0:128]! -; LE-I64-NEON-NEXT: vst1.64 {d12, d13}, [r0:128]! -; LE-I64-NEON-NEXT: vmov.32 d16[1], r11 -; LE-I64-NEON-NEXT: vst1.64 {d20, d21}, [r0:128]! -; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] -; LE-I64-NEON-NEXT: add sp, sp, #176 -; LE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEON-NEXT: add sp, sp, #4 -; LE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} ; -; BE-I32-LABEL: lrint_v16f64: +; BE-I32-LABEL: lrint_v8f64: ; BE-I32: @ %bb.0: -; BE-I32-NEXT: .save {r4, r5, r6, lr} -; BE-I32-NEXT: push {r4, r5, r6, lr} +; BE-I32-NEXT: .save {r11, lr} +; BE-I32-NEXT: push {r11, lr} ; BE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} ; BE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I32-NEXT: .pad #128 -; BE-I32-NEXT: sub sp, sp, #128 -; BE-I32-NEXT: add lr, sp, #64 -; BE-I32-NEXT: add r0, sp, #240 -; BE-I32-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I32-NEXT: add r0, sp, #224 -; BE-I32-NEXT: vorr q6, q3, q3 -; BE-I32-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill +; BE-I32-NEXT: .pad #32 +; BE-I32-NEXT: sub sp, sp, #32 +; BE-I32-NEXT: vorr q5, q0, q0 +; BE-I32-NEXT: vstmia sp, {d0, d1} @ 16-byte Spill +; BE-I32-NEXT: vorr d0, d4, d4 ; BE-I32-NEXT: add lr, sp, #16 -; BE-I32-NEXT: vorr q5, q1, q1 -; BE-I32-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill -; BE-I32-NEXT: add lr, sp, #32 -; BE-I32-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; BE-I32-NEXT: add lr, sp, #80 -; BE-I32-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I32-NEXT: add lr, sp, #112 -; BE-I32-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I32-NEXT: add r0, sp, #256 -; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I32-NEXT: add lr, sp, #96 -; BE-I32-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I32-NEXT: add r0, sp, #208 -; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I32-NEXT: add lr, sp, #48 -; BE-I32-NEXT: vld1.64 {d14, d15}, [r0] -; BE-I32-NEXT: vstmia sp, {d6, d7} @ 16-byte Spill -; BE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-I32-NEXT: vorr q7, q3, q3 +; BE-I32-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill +; BE-I32-NEXT: vorr q6, q1, q1 ; BE-I32-NEXT: bl lrint ; BE-I32-NEXT: vorr d0, d10, d10 ; BE-I32-NEXT: vmov.32 d8[0], r0 ; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: vorr d0, d14, d14 +; BE-I32-NEXT: vmov.32 d10[0], r0 +; BE-I32-NEXT: bl lrint ; BE-I32-NEXT: vorr d0, d12, d12 ; BE-I32-NEXT: vmov.32 d9[0], r0 ; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: vorr d0, d14, d14 +; BE-I32-NEXT: vorr d0, d15, d15 ; BE-I32-NEXT: vmov.32 d11[0], r0 ; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: add lr, sp, #80 -; BE-I32-NEXT: mov r4, r0 -; BE-I32-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; BE-I32-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: add lr, sp, #64 -; BE-I32-NEXT: vmov.32 d12[0], r0 -; BE-I32-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; BE-I32-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: add lr, sp, #112 -; BE-I32-NEXT: vmov.32 d10[0], r0 -; BE-I32-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; BE-I32-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; BE-I32-NEXT: vmov.32 d14[0], r4 +; BE-I32-NEXT: vorr d0, d13, d13 +; BE-I32-NEXT: vmov.32 d9[1], r0 ; BE-I32-NEXT: bl lrint ; BE-I32-NEXT: vldmia sp, {d16, d17} @ 16-byte Reload ; BE-I32-NEXT: vorr d0, d17, d17 -; BE-I32-NEXT: vmov.32 d15[0], r0 -; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: add lr, sp, #96 ; BE-I32-NEXT: vmov.32 d11[1], r0 -; BE-I32-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; BE-I32-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: add lr, sp, #64 -; BE-I32-NEXT: vmov.32 d13[0], r0 -; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I32-NEXT: vorr d0, d17, d17 ; BE-I32-NEXT: bl lrint ; BE-I32-NEXT: add lr, sp, #16 ; BE-I32-NEXT: vmov.32 d10[1], r0 ; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload ; BE-I32-NEXT: vorr d0, d17, d17 ; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: add lr, sp, #32 -; BE-I32-NEXT: vmov.32 d9[1], r0 -; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I32-NEXT: vorr d0, d17, d17 -; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: add lr, sp, #96 ; BE-I32-NEXT: vmov.32 d8[1], r0 -; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I32-NEXT: vorr d0, d17, d17 -; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: add lr, sp, #112 -; BE-I32-NEXT: vmov.32 d13[1], r0 -; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I32-NEXT: vorr d0, d17, d17 -; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: add lr, sp, #48 -; BE-I32-NEXT: vmov.32 d15[1], r0 -; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I32-NEXT: vorr d0, d17, d17 -; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: add lr, sp, #80 -; BE-I32-NEXT: vmov.32 d14[1], r0 -; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I32-NEXT: vorr d0, d17, d17 -; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: vmov.32 d12[1], r0 -; BE-I32-NEXT: vrev64.32 q0, q4 -; BE-I32-NEXT: vrev64.32 q1, q5 -; BE-I32-NEXT: vrev64.32 q2, q7 -; BE-I32-NEXT: vrev64.32 q3, q6 -; BE-I32-NEXT: add sp, sp, #128 +; BE-I32-NEXT: vrev64.32 q0, q5 +; BE-I32-NEXT: vrev64.32 q1, q4 +; BE-I32-NEXT: add sp, sp, #32 ; BE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I32-NEXT: pop {r4, r5, r6, pc} +; BE-I32-NEXT: pop {r11, pc} ; -; BE-I64-LABEL: lrint_v16f64: +; BE-I64-LABEL: lrint_v8f64: ; BE-I64: @ %bb.0: -; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-I64-NEXT: .pad #4 -; BE-I64-NEXT: sub sp, sp, #4 +; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} ; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} ; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I64-NEXT: .pad #168 -; BE-I64-NEXT: sub sp, sp, #168 -; BE-I64-NEXT: add lr, sp, #64 -; BE-I64-NEXT: str r0, [sp, #132] @ 4-byte Spill -; BE-I64-NEXT: add r0, sp, #304 -; BE-I64-NEXT: vorr q4, q3, q3 -; BE-I64-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; BE-I64-NEXT: add lr, sp, #48 -; BE-I64-NEXT: vorr d0, d1, d1 -; BE-I64-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I64-NEXT: add r0, sp, #320 -; BE-I64-NEXT: vorr q6, q2, q2 -; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEXT: add lr, sp, #88 -; BE-I64-NEXT: vorr q7, q1, q1 -; BE-I64-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I64-NEXT: add r0, sp, #272 -; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEXT: add lr, sp, #112 -; BE-I64-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I64-NEXT: add r0, sp, #288 -; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEXT: .pad #40 +; BE-I64-NEXT: sub sp, sp, #40 +; BE-I64-NEXT: vorr q4, q0, q0 ; BE-I64-NEXT: add lr, sp, #24 -; BE-I64-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEXT: vorr d0, d7, d7 +; BE-I64-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill +; BE-I64-NEXT: vorr q7, q2, q2 +; BE-I64-NEXT: vorr q6, q1, q1 ; BE-I64-NEXT: bl lrint ; BE-I64-NEXT: vorr d0, d14, d14 -; BE-I64-NEXT: add lr, sp, #136 +; BE-I64-NEXT: add lr, sp, #8 ; BE-I64-NEXT: vmov.32 d17[0], r0 -; BE-I64-NEXT: str r1, [sp, #108] @ 4-byte Spill +; BE-I64-NEXT: mov r8, r1 ; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill ; BE-I64-NEXT: bl lrint ; BE-I64-NEXT: vorr d0, d15, d15 -; BE-I64-NEXT: str r1, [sp, #84] @ 4-byte Spill +; BE-I64-NEXT: mov r9, r1 ; BE-I64-NEXT: vmov.32 d10[0], r0 ; BE-I64-NEXT: bl lrint ; BE-I64-NEXT: vorr d0, d12, d12 -; BE-I64-NEXT: add lr, sp, #152 +; BE-I64-NEXT: mov r10, r1 ; BE-I64-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEXT: str r1, [sp, #44] @ 4-byte Spill -; BE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill ; BE-I64-NEXT: bl lrint ; BE-I64-NEXT: vorr d0, d13, d13 -; BE-I64-NEXT: mov r6, r1 -; BE-I64-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: vorr d0, d8, d8 -; BE-I64-NEXT: mov r4, r1 -; BE-I64-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: vorr d0, d9, d9 ; BE-I64-NEXT: mov r7, r1 -; BE-I64-NEXT: vmov.32 d12[0], r0 -; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: add lr, sp, #64 -; BE-I64-NEXT: mov r5, r1 -; BE-I64-NEXT: vmov.32 d13[0], r0 -; BE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; BE-I64-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: add lr, sp, #136 -; BE-I64-NEXT: mov r9, r1 -; BE-I64-NEXT: vmov.32 d13[1], r5 -; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEXT: vmov.32 d16[0], r0 -; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEXT: add lr, sp, #24 -; BE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; BE-I64-NEXT: vorr d0, d9, d9 -; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: vorr d0, d8, d8 -; BE-I64-NEXT: vmov.32 d12[1], r7 -; BE-I64-NEXT: add lr, sp, #64 -; BE-I64-NEXT: mov r10, r1 -; BE-I64-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; BE-I64-NEXT: bl lrint ; BE-I64-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEXT: add lr, sp, #8 -; BE-I64-NEXT: mov r11, r1 -; BE-I64-NEXT: vmov.32 d11[1], r4 -; BE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; BE-I64-NEXT: add lr, sp, #48 -; BE-I64-NEXT: vorr q6, q5, q5 -; BE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; BE-I64-NEXT: vorr d0, d9, d9 ; BE-I64-NEXT: bl lrint ; BE-I64-NEXT: vorr d0, d8, d8 -; BE-I64-NEXT: vmov.32 d12[1], r6 -; BE-I64-NEXT: add lr, sp, #24 -; BE-I64-NEXT: mov r8, r1 -; BE-I64-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEXT: add lr, sp, #48 -; BE-I64-NEXT: ldr r0, [sp, #44] @ 4-byte Reload -; BE-I64-NEXT: mov r6, r1 -; BE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-I64-NEXT: add lr, sp, #152 -; BE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; BE-I64-NEXT: add lr, sp, #88 -; BE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; BE-I64-NEXT: vorr d0, d13, d13 -; BE-I64-NEXT: vmov.32 d9[1], r0 -; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEXT: ldr r0, [sp, #84] @ 4-byte Reload -; BE-I64-NEXT: vorr d0, d12, d12 -; BE-I64-NEXT: add lr, sp, #152 ; BE-I64-NEXT: mov r4, r1 -; BE-I64-NEXT: vmov.32 d8[1], r0 -; BE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; BE-I64-NEXT: vmov.32 d15[0], r0 ; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: add lr, sp, #136 -; BE-I64-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEXT: ldr r0, [sp, #108] @ 4-byte Reload -; BE-I64-NEXT: mov r5, r1 -; BE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-I64-NEXT: add lr, sp, #112 -; BE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload ; BE-I64-NEXT: vorr d0, d9, d9 -; BE-I64-NEXT: vmov.32 d11[1], r0 +; BE-I64-NEXT: mov r5, r1 +; BE-I64-NEXT: vmov.32 d12[0], r0 ; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: vorr d0, d8, d8 -; BE-I64-NEXT: mov r7, r1 +; BE-I64-NEXT: add lr, sp, #24 +; BE-I64-NEXT: mov r6, r1 ; BE-I64-NEXT: vmov.32 d13[0], r0 -; BE-I64-NEXT: vmov.32 d10[1], r9 +; BE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; BE-I64-NEXT: @ kill: def $d0 killed $d0 killed $q0 ; BE-I64-NEXT: bl lrint ; BE-I64-NEXT: add lr, sp, #8 -; BE-I64-NEXT: vmov.32 d12[0], r0 -; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEXT: add lr, sp, #48 -; BE-I64-NEXT: vmov.32 d17[1], r10 -; BE-I64-NEXT: vmov.32 d16[1], r11 -; BE-I64-NEXT: vorr q12, q8, q8 +; BE-I64-NEXT: vmov.32 d13[1], r6 ; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEXT: add lr, sp, #152 -; BE-I64-NEXT: vmov.32 d17[1], r8 -; BE-I64-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; BE-I64-NEXT: add lr, sp, #24 -; BE-I64-NEXT: vmov.32 d13[1], r7 -; BE-I64-NEXT: vmov.32 d16[1], r6 -; BE-I64-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload -; BE-I64-NEXT: add lr, sp, #64 -; BE-I64-NEXT: vorr q13, q8, q8 -; BE-I64-NEXT: vmov.32 d12[1], r1 -; BE-I64-NEXT: ldr r1, [sp, #132] @ 4-byte Reload -; BE-I64-NEXT: vrev64.32 q8, q5 -; BE-I64-NEXT: mov r0, r1 -; BE-I64-NEXT: vldmia lr, {d22, d23} @ 16-byte Reload -; BE-I64-NEXT: vrev64.32 q9, q9 -; BE-I64-NEXT: vrev64.32 q10, q10 -; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-I64-NEXT: vst1.64 {d18, d19}, [r0:128]! -; BE-I64-NEXT: vrev64.32 q11, q11 ; BE-I64-NEXT: vmov.32 d15[1], r4 -; BE-I64-NEXT: vst1.64 {d20, d21}, [r0:128]! -; BE-I64-NEXT: vrev64.32 q15, q6 -; BE-I64-NEXT: vmov.32 d14[1], r5 -; BE-I64-NEXT: vrev64.32 q12, q12 -; BE-I64-NEXT: vst1.64 {d22, d23}, [r0:128] -; BE-I64-NEXT: add r0, r1, #64 -; BE-I64-NEXT: vrev64.32 q13, q13 -; BE-I64-NEXT: vst1.64 {d30, d31}, [r0:128]! -; BE-I64-NEXT: vst1.64 {d24, d25}, [r0:128]! -; BE-I64-NEXT: vrev64.32 q14, q7 -; BE-I64-NEXT: vst1.64 {d26, d27}, [r0:128]! -; BE-I64-NEXT: vst1.64 {d28, d29}, [r0:128] -; BE-I64-NEXT: add sp, sp, #168 +; BE-I64-NEXT: vmov.32 d16[0], r0 +; BE-I64-NEXT: vmov.32 d11[1], r10 +; BE-I64-NEXT: vmov.32 d17[1], r8 +; BE-I64-NEXT: vmov.32 d12[1], r5 +; BE-I64-NEXT: vmov.32 d14[1], r7 +; BE-I64-NEXT: vmov.32 d10[1], r9 +; BE-I64-NEXT: vmov.32 d16[1], r1 +; BE-I64-NEXT: vrev64.32 q0, q6 +; BE-I64-NEXT: vrev64.32 q1, q7 +; BE-I64-NEXT: vrev64.32 q2, q5 +; BE-I64-NEXT: vrev64.32 q3, q8 +; BE-I64-NEXT: add sp, sp, #40 ; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I64-NEXT: add sp, sp, #4 -; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; -; BE-I32-NEON-LABEL: lrint_v16f64: -; BE-I32-NEON: @ %bb.0: -; BE-I32-NEON-NEXT: .save {r4, r5, r6, lr} -; BE-I32-NEON-NEXT: push {r4, r5, r6, lr} -; BE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I32-NEON-NEXT: .pad #128 -; BE-I32-NEON-NEXT: sub sp, sp, #128 -; BE-I32-NEON-NEXT: add lr, sp, #64 -; BE-I32-NEON-NEXT: add r0, sp, #240 -; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I32-NEON-NEXT: add r0, sp, #224 -; BE-I32-NEON-NEXT: vorr q6, q3, q3 -; BE-I32-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill -; BE-I32-NEON-NEXT: add lr, sp, #16 -; BE-I32-NEON-NEXT: vorr q5, q1, q1 -; BE-I32-NEON-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill -; BE-I32-NEON-NEXT: add lr, sp, #32 -; BE-I32-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; BE-I32-NEON-NEXT: add lr, sp, #80 -; BE-I32-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I32-NEON-NEXT: add lr, sp, #112 -; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I32-NEON-NEXT: add r0, sp, #256 -; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I32-NEON-NEXT: add lr, sp, #96 -; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I32-NEON-NEXT: add r0, sp, #208 -; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I32-NEON-NEXT: add lr, sp, #48 -; BE-I32-NEON-NEXT: vld1.64 {d14, d15}, [r0] -; BE-I32-NEON-NEXT: vstmia sp, {d6, d7} @ 16-byte Spill -; BE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: vorr d0, d10, d10 -; BE-I32-NEON-NEXT: vmov.32 d8[0], r0 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: vorr d0, d12, d12 -; BE-I32-NEON-NEXT: vmov.32 d9[0], r0 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: vorr d0, d14, d14 -; BE-I32-NEON-NEXT: vmov.32 d11[0], r0 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: add lr, sp, #80 -; BE-I32-NEON-NEXT: mov r4, r0 -; BE-I32-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; BE-I32-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: add lr, sp, #64 -; BE-I32-NEON-NEXT: vmov.32 d12[0], r0 -; BE-I32-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; BE-I32-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: add lr, sp, #112 -; BE-I32-NEON-NEXT: vmov.32 d10[0], r0 -; BE-I32-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; BE-I32-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; BE-I32-NEON-NEXT: vmov.32 d14[0], r4 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: vldmia sp, {d16, d17} @ 16-byte Reload -; BE-I32-NEON-NEXT: vorr d0, d17, d17 -; BE-I32-NEON-NEXT: vmov.32 d15[0], r0 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: add lr, sp, #96 -; BE-I32-NEON-NEXT: vmov.32 d11[1], r0 -; BE-I32-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; BE-I32-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: add lr, sp, #64 -; BE-I32-NEON-NEXT: vmov.32 d13[0], r0 -; BE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I32-NEON-NEXT: vorr d0, d17, d17 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: add lr, sp, #16 -; BE-I32-NEON-NEXT: vmov.32 d10[1], r0 -; BE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I32-NEON-NEXT: vorr d0, d17, d17 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: add lr, sp, #32 -; BE-I32-NEON-NEXT: vmov.32 d9[1], r0 -; BE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I32-NEON-NEXT: vorr d0, d17, d17 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: add lr, sp, #96 -; BE-I32-NEON-NEXT: vmov.32 d8[1], r0 -; BE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I32-NEON-NEXT: vorr d0, d17, d17 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: add lr, sp, #112 -; BE-I32-NEON-NEXT: vmov.32 d13[1], r0 -; BE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I32-NEON-NEXT: vorr d0, d17, d17 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: add lr, sp, #48 -; BE-I32-NEON-NEXT: vmov.32 d15[1], r0 -; BE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I32-NEON-NEXT: vorr d0, d17, d17 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: add lr, sp, #80 -; BE-I32-NEON-NEXT: vmov.32 d14[1], r0 -; BE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I32-NEON-NEXT: vorr d0, d17, d17 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: vmov.32 d12[1], r0 -; BE-I32-NEON-NEXT: vrev64.32 q0, q4 -; BE-I32-NEON-NEXT: vrev64.32 q1, q5 -; BE-I32-NEON-NEXT: vrev64.32 q2, q7 -; BE-I32-NEON-NEXT: vrev64.32 q3, q6 -; BE-I32-NEON-NEXT: add sp, sp, #128 -; BE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I32-NEON-NEXT: pop {r4, r5, r6, pc} -; -; BE-I64-NEON-LABEL: lrint_v16f64: -; BE-I64-NEON: @ %bb.0: -; BE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-I64-NEON-NEXT: .pad #4 -; BE-I64-NEON-NEXT: sub sp, sp, #4 -; BE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I64-NEON-NEXT: .pad #168 -; BE-I64-NEON-NEXT: sub sp, sp, #168 -; BE-I64-NEON-NEXT: add lr, sp, #64 -; BE-I64-NEON-NEXT: str r0, [sp, #132] @ 4-byte Spill -; BE-I64-NEON-NEXT: add r0, sp, #304 -; BE-I64-NEON-NEXT: vorr q4, q3, q3 -; BE-I64-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #48 -; BE-I64-NEON-NEXT: vorr d0, d1, d1 -; BE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I64-NEON-NEXT: add r0, sp, #320 -; BE-I64-NEON-NEXT: vorr q6, q2, q2 -; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #88 -; BE-I64-NEON-NEXT: vorr q7, q1, q1 -; BE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I64-NEON-NEXT: add r0, sp, #272 -; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #112 -; BE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I64-NEON-NEXT: add r0, sp, #288 -; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #24 -; BE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vorr d0, d14, d14 -; BE-I64-NEON-NEXT: add lr, sp, #136 -; BE-I64-NEON-NEXT: vmov.32 d17[0], r0 -; BE-I64-NEON-NEXT: str r1, [sp, #108] @ 4-byte Spill -; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vorr d0, d15, d15 -; BE-I64-NEON-NEXT: str r1, [sp, #84] @ 4-byte Spill -; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vorr d0, d12, d12 -; BE-I64-NEON-NEXT: add lr, sp, #152 -; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEON-NEXT: str r1, [sp, #44] @ 4-byte Spill -; BE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vorr d0, d13, d13 -; BE-I64-NEON-NEXT: mov r6, r1 -; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vorr d0, d8, d8 -; BE-I64-NEON-NEXT: mov r4, r1 -; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vorr d0, d9, d9 -; BE-I64-NEON-NEXT: mov r7, r1 -; BE-I64-NEON-NEXT: vmov.32 d12[0], r0 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: add lr, sp, #64 -; BE-I64-NEON-NEXT: mov r5, r1 -; BE-I64-NEON-NEXT: vmov.32 d13[0], r0 -; BE-I64-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; BE-I64-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: add lr, sp, #136 -; BE-I64-NEON-NEXT: mov r9, r1 -; BE-I64-NEON-NEXT: vmov.32 d13[1], r5 -; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEON-NEXT: vmov.32 d16[0], r0 -; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #24 -; BE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; BE-I64-NEON-NEXT: vorr d0, d9, d9 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vorr d0, d8, d8 -; BE-I64-NEON-NEXT: vmov.32 d12[1], r7 -; BE-I64-NEON-NEXT: add lr, sp, #64 -; BE-I64-NEON-NEXT: mov r10, r1 -; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEON-NEXT: add lr, sp, #8 -; BE-I64-NEON-NEXT: mov r11, r1 -; BE-I64-NEON-NEXT: vmov.32 d11[1], r4 -; BE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #48 -; BE-I64-NEON-NEXT: vorr q6, q5, q5 -; BE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; BE-I64-NEON-NEXT: vorr d0, d9, d9 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vorr d0, d8, d8 -; BE-I64-NEON-NEXT: vmov.32 d12[1], r6 -; BE-I64-NEON-NEXT: add lr, sp, #24 -; BE-I64-NEON-NEXT: mov r8, r1 -; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEON-NEXT: add lr, sp, #48 -; BE-I64-NEON-NEXT: ldr r0, [sp, #44] @ 4-byte Reload -; BE-I64-NEON-NEXT: mov r6, r1 -; BE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #152 -; BE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; BE-I64-NEON-NEXT: add lr, sp, #88 -; BE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; BE-I64-NEON-NEXT: vorr d0, d13, d13 -; BE-I64-NEON-NEXT: vmov.32 d9[1], r0 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEON-NEXT: ldr r0, [sp, #84] @ 4-byte Reload -; BE-I64-NEON-NEXT: vorr d0, d12, d12 -; BE-I64-NEON-NEXT: add lr, sp, #152 -; BE-I64-NEON-NEXT: mov r4, r1 -; BE-I64-NEON-NEXT: vmov.32 d8[1], r0 -; BE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: add lr, sp, #136 -; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEON-NEXT: ldr r0, [sp, #108] @ 4-byte Reload -; BE-I64-NEON-NEXT: mov r5, r1 -; BE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-I64-NEON-NEXT: add lr, sp, #112 -; BE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; BE-I64-NEON-NEXT: vorr d0, d9, d9 -; BE-I64-NEON-NEXT: vmov.32 d11[1], r0 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vorr d0, d8, d8 -; BE-I64-NEON-NEXT: mov r7, r1 -; BE-I64-NEON-NEXT: vmov.32 d13[0], r0 -; BE-I64-NEON-NEXT: vmov.32 d10[1], r9 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: add lr, sp, #8 -; BE-I64-NEON-NEXT: vmov.32 d12[0], r0 -; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEON-NEXT: add lr, sp, #48 -; BE-I64-NEON-NEXT: vmov.32 d17[1], r10 -; BE-I64-NEON-NEXT: vmov.32 d16[1], r11 -; BE-I64-NEON-NEXT: vorr q12, q8, q8 -; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEON-NEXT: add lr, sp, #152 -; BE-I64-NEON-NEXT: vmov.32 d17[1], r8 -; BE-I64-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; BE-I64-NEON-NEXT: add lr, sp, #24 -; BE-I64-NEON-NEXT: vmov.32 d13[1], r7 -; BE-I64-NEON-NEXT: vmov.32 d16[1], r6 -; BE-I64-NEON-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload -; BE-I64-NEON-NEXT: add lr, sp, #64 -; BE-I64-NEON-NEXT: vorr q13, q8, q8 -; BE-I64-NEON-NEXT: vmov.32 d12[1], r1 -; BE-I64-NEON-NEXT: ldr r1, [sp, #132] @ 4-byte Reload -; BE-I64-NEON-NEXT: vrev64.32 q8, q5 -; BE-I64-NEON-NEXT: mov r0, r1 -; BE-I64-NEON-NEXT: vldmia lr, {d22, d23} @ 16-byte Reload -; BE-I64-NEON-NEXT: vrev64.32 q9, q9 -; BE-I64-NEON-NEXT: vrev64.32 q10, q10 -; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]! -; BE-I64-NEON-NEXT: vrev64.32 q11, q11 -; BE-I64-NEON-NEXT: vmov.32 d15[1], r4 -; BE-I64-NEON-NEXT: vst1.64 {d20, d21}, [r0:128]! -; BE-I64-NEON-NEXT: vrev64.32 q15, q6 -; BE-I64-NEON-NEXT: vmov.32 d14[1], r5 -; BE-I64-NEON-NEXT: vrev64.32 q12, q12 -; BE-I64-NEON-NEXT: vst1.64 {d22, d23}, [r0:128] -; BE-I64-NEON-NEXT: add r0, r1, #64 -; BE-I64-NEON-NEXT: vrev64.32 q13, q13 -; BE-I64-NEON-NEXT: vst1.64 {d30, d31}, [r0:128]! -; BE-I64-NEON-NEXT: vst1.64 {d24, d25}, [r0:128]! -; BE-I64-NEON-NEXT: vrev64.32 q14, q7 -; BE-I64-NEON-NEXT: vst1.64 {d26, d27}, [r0:128]! -; BE-I64-NEON-NEXT: vst1.64 {d28, d29}, [r0:128] -; BE-I64-NEON-NEXT: add sp, sp, #168 -; BE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I64-NEON-NEXT: add sp, sp, #4 -; BE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} - %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f64(<16 x double> %x) - ret <16 x iXLen> %a +; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} + %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double> %x) + ret <8 x iXLen> %a } -declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f64(<16 x double>) +declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double>) -define <32 x iXLen> @lrint_v32f64(<32 x double> %x) { -; LE-I32-LABEL: lrint_v32f64: +define <16 x iXLen> @lrint_v16f64(<16 x double> %x) { +; LE-I32-LABEL: lrint_v16f64: ; LE-I32: @ %bb.0: ; LE-I32-NEXT: .save {r4, r5, r6, lr} ; LE-I32-NEXT: push {r4, r5, r6, lr} ; LE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} ; LE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I32-NEXT: .pad #160 -; LE-I32-NEXT: sub sp, sp, #160 -; LE-I32-NEXT: add lr, sp, #96 -; LE-I32-NEXT: mov r4, r0 -; LE-I32-NEXT: add r0, sp, #304 -; LE-I32-NEXT: vorr q6, q3, q3 -; LE-I32-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill -; LE-I32-NEXT: add lr, sp, #64 +; LE-I32-NEXT: .pad #128 +; LE-I32-NEXT: sub sp, sp, #128 +; LE-I32-NEXT: add lr, sp, #80 +; LE-I32-NEXT: add r0, sp, #240 +; LE-I32-NEXT: vld1.64 {d16, d17}, [r0] +; LE-I32-NEXT: add r0, sp, #208 +; LE-I32-NEXT: vorr q6, q0, q0 +; LE-I32-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill +; LE-I32-NEXT: add lr, sp, #32 ; LE-I32-NEXT: vorr q5, q1, q1 +; LE-I32-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill +; LE-I32-NEXT: add lr, sp, #16 ; LE-I32-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; LE-I32-NEXT: add lr, sp, #48 +; LE-I32-NEXT: add lr, sp, #64 ; LE-I32-NEXT: vorr d0, d4, d4 -; LE-I32-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I32-NEXT: add r0, sp, #352 -; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I32-NEXT: add lr, sp, #16 -; LE-I32-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I32-NEXT: add r0, sp, #272 ; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill ; LE-I32-NEXT: add lr, sp, #112 ; LE-I32-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I32-NEXT: add r0, sp, #288 -; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I32-NEXT: add lr, sp, #80 -; LE-I32-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I32-NEXT: add r0, sp, #336 +; LE-I32-NEXT: add r0, sp, #224 ; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I32-NEXT: add lr, sp, #144 +; LE-I32-NEXT: add lr, sp, #96 ; LE-I32-NEXT: vld1.64 {d16, d17}, [r0] ; LE-I32-NEXT: add r0, sp, #256 ; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I32-NEXT: add lr, sp, #128 -; LE-I32-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I32-NEXT: add r0, sp, #320 -; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I32-NEXT: add lr, sp, #32 -; LE-I32-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I32-NEXT: add lr, sp, #48 +; LE-I32-NEXT: vld1.64 {d14, d15}, [r0] +; LE-I32-NEXT: vstmia sp, {d2, d3} @ 16-byte Spill +; LE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill ; LE-I32-NEXT: bl lrint ; LE-I32-NEXT: vorr d0, d12, d12 -; LE-I32-NEXT: vmov.32 d14[0], r0 +; LE-I32-NEXT: vmov.32 d8[0], r0 ; LE-I32-NEXT: bl lrint ; LE-I32-NEXT: vorr d0, d10, d10 -; LE-I32-NEXT: vmov.32 d15[0], r0 -; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: vorr d0, d13, d13 -; LE-I32-NEXT: vmov.32 d9[0], r0 -; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: vorr d0, d11, d11 -; LE-I32-NEXT: vmov.32 d15[1], r0 -; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: add lr, sp, #96 -; LE-I32-NEXT: vorr q5, q4, q4 -; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEXT: vorr d0, d17, d17 -; LE-I32-NEXT: vmov.32 d11[1], r0 -; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: vmov.32 d14[1], r0 -; LE-I32-NEXT: add lr, sp, #96 -; LE-I32-NEXT: add r0, sp, #416 -; LE-I32-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-I32-NEXT: add lr, sp, #64 -; LE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-I32-NEXT: vorr d0, d8, d8 -; LE-I32-NEXT: vstmia sp, {d16, d17} @ 16-byte Spill +; LE-I32-NEXT: vmov.32 d12[0], r0 ; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: add lr, sp, #32 -; LE-I32-NEXT: vmov.32 d10[0], r0 -; LE-I32-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload -; LE-I32-NEXT: vorr q6, q5, q5 ; LE-I32-NEXT: vorr d0, d14, d14 +; LE-I32-NEXT: vmov.32 d13[0], r0 ; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: vorr d0, d9, d9 -; LE-I32-NEXT: vmov.32 d11[0], r0 -; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: vorr d0, d15, d15 -; LE-I32-NEXT: vmov.32 d12[1], r0 ; LE-I32-NEXT: add lr, sp, #64 -; LE-I32-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: vmov.32 d11[1], r0 -; LE-I32-NEXT: add r0, sp, #400 -; LE-I32-NEXT: add lr, sp, #32 -; LE-I32-NEXT: vorr q6, q5, q5 -; LE-I32-NEXT: vldmia sp, {d10, d11} @ 16-byte Reload -; LE-I32-NEXT: vorr d0, d10, d10 -; LE-I32-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: add lr, sp, #48 -; LE-I32-NEXT: vmov.32 d15[0], r0 -; LE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-I32-NEXT: vorr d0, d8, d8 -; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: vorr d0, d11, d11 -; LE-I32-NEXT: vmov.32 d12[0], r0 -; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: vorr d0, d9, d9 -; LE-I32-NEXT: vmov.32 d15[1], r0 -; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: vmov.32 d12[1], r0 -; LE-I32-NEXT: add lr, sp, #48 -; LE-I32-NEXT: add r0, sp, #384 -; LE-I32-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I32-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-I32-NEXT: add lr, sp, #32 -; LE-I32-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-I32-NEXT: vorr d0, d10, d10 -; LE-I32-NEXT: vstmia sp, {d16, d17} @ 16-byte Spill +; LE-I32-NEXT: mov r4, r0 +; LE-I32-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-I32-NEXT: @ kill: def $d0 killed $d0 killed $q0 ; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: add lr, sp, #16 +; LE-I32-NEXT: add lr, sp, #80 ; LE-I32-NEXT: vmov.32 d14[0], r0 -; LE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-I32-NEXT: vorr d0, d8, d8 -; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: vorr d0, d11, d11 -; LE-I32-NEXT: vmov.32 d13[0], r0 -; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: vorr d0, d9, d9 -; LE-I32-NEXT: vmov.32 d14[1], r0 -; LE-I32-NEXT: add lr, sp, #32 -; LE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: vorr q7, q6, q6 -; LE-I32-NEXT: vldmia sp, {d10, d11} @ 16-byte Reload -; LE-I32-NEXT: vorr d0, d10, d10 -; LE-I32-NEXT: vmov.32 d15[1], r0 -; LE-I32-NEXT: add r0, sp, #368 -; LE-I32-NEXT: vld1.64 {d12, d13}, [r0] +; LE-I32-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-I32-NEXT: @ kill: def $d0 killed $d0 killed $q0 ; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: add lr, sp, #144 +; LE-I32-NEXT: add lr, sp, #112 ; LE-I32-NEXT: vmov.32 d9[0], r0 ; LE-I32-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload ; LE-I32-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; LE-I32-NEXT: vmov.32 d15[0], r4 ; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: vorr d0, d11, d11 -; LE-I32-NEXT: vmov.32 d14[0], r0 -; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: vorr d0, d12, d12 -; LE-I32-NEXT: vmov.32 d9[1], r0 -; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: add lr, sp, #144 -; LE-I32-NEXT: vmov.32 d8[0], r0 -; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEXT: vldmia sp, {d16, d17} @ 16-byte Reload ; LE-I32-NEXT: vorr d0, d17, d17 +; LE-I32-NEXT: vmov.32 d10[0], r0 ; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: vmov.32 d14[1], r0 -; LE-I32-NEXT: add r0, sp, #240 -; LE-I32-NEXT: vorr d0, d13, d13 -; LE-I32-NEXT: add lr, sp, #144 -; LE-I32-NEXT: vld1.64 {d10, d11}, [r0] -; LE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-I32-NEXT: vstmia sp, {d10, d11} @ 16-byte Spill +; LE-I32-NEXT: add lr, sp, #96 +; LE-I32-NEXT: vmov.32 d13[1], r0 +; LE-I32-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-I32-NEXT: @ kill: def $d0 killed $d0 killed $q0 ; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: vorr d0, d10, d10 -; LE-I32-NEXT: vmov.32 d8[1], r0 ; LE-I32-NEXT: add lr, sp, #16 -; LE-I32-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I32-NEXT: vmov.32 d11[0], r0 +; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEXT: vorr d0, d17, d17 ; LE-I32-NEXT: bl lrint ; LE-I32-NEXT: add lr, sp, #80 -; LE-I32-NEXT: vmov.32 d8[0], r0 -; LE-I32-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-I32-NEXT: vorr d0, d12, d12 +; LE-I32-NEXT: vmov.32 d12[1], r0 +; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEXT: vorr d0, d17, d17 ; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: add lr, sp, #112 -; LE-I32-NEXT: vmov.32 d11[0], r0 -; LE-I32-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload -; LE-I32-NEXT: vorr d0, d14, d14 +; LE-I32-NEXT: add lr, sp, #32 +; LE-I32-NEXT: vmov.32 d9[1], r0 +; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEXT: vorr d0, d17, d17 ; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: vorr d0, d13, d13 -; LE-I32-NEXT: vmov.32 d10[0], r0 +; LE-I32-NEXT: add lr, sp, #96 +; LE-I32-NEXT: vmov.32 d8[1], r0 +; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEXT: vorr d0, d17, d17 ; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: add lr, sp, #128 +; LE-I32-NEXT: add lr, sp, #112 ; LE-I32-NEXT: vmov.32 d11[1], r0 -; LE-I32-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-I32-NEXT: vorr d0, d12, d12 -; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: vorr d0, d15, d15 -; LE-I32-NEXT: vmov.32 d9[0], r0 +; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I32-NEXT: vorr d0, d17, d17 ; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: vorr d0, d13, d13 +; LE-I32-NEXT: add lr, sp, #48 ; LE-I32-NEXT: vmov.32 d10[1], r0 -; LE-I32-NEXT: bl lrint -; LE-I32-NEXT: vldmia sp, {d16, d17} @ 16-byte Reload +; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload ; LE-I32-NEXT: vorr d0, d17, d17 -; LE-I32-NEXT: vmov.32 d9[1], r0 ; LE-I32-NEXT: bl lrint ; LE-I32-NEXT: add lr, sp, #64 -; LE-I32-NEXT: vmov.32 d8[1], r0 -; LE-I32-NEXT: mov r0, r4 -; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEXT: add lr, sp, #96 -; LE-I32-NEXT: vst1.32 {d16, d17}, [r0:128]! -; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEXT: add lr, sp, #48 -; LE-I32-NEXT: vst1.32 {d16, d17}, [r0:128]! -; LE-I32-NEXT: vst1.32 {d8, d9}, [r0:128]! -; LE-I32-NEXT: vst1.64 {d10, d11}, [r0:128] -; LE-I32-NEXT: add r0, r4, #64 -; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEXT: add lr, sp, #144 -; LE-I32-NEXT: vst1.32 {d16, d17}, [r0:128]! -; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEXT: add lr, sp, #16 -; LE-I32-NEXT: vst1.32 {d16, d17}, [r0:128]! -; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEXT: add lr, sp, #32 -; LE-I32-NEXT: vst1.32 {d16, d17}, [r0:128]! +; LE-I32-NEXT: vmov.32 d15[1], r0 ; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEXT: vst1.64 {d16, d17}, [r0:128] -; LE-I32-NEXT: add sp, sp, #160 +; LE-I32-NEXT: vorr d0, d17, d17 +; LE-I32-NEXT: bl lrint +; LE-I32-NEXT: vmov.32 d14[1], r0 +; LE-I32-NEXT: vorr q0, q6, q6 +; LE-I32-NEXT: vorr q1, q4, q4 +; LE-I32-NEXT: vorr q2, q5, q5 +; LE-I32-NEXT: vorr q3, q7, q7 +; LE-I32-NEXT: add sp, sp, #128 ; LE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; LE-I32-NEXT: pop {r4, r5, r6, pc} ; -; LE-I64-LABEL: lrint_v32f64: +; LE-I64-LABEL: lrint_v16f64: ; LE-I64: @ %bb.0: ; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} @@ -6313,1139 +1591,285 @@ define <32 x iXLen> @lrint_v32f64(<32 x double> %x) { ; LE-I64-NEXT: sub sp, sp, #4 ; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} ; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEXT: .pad #208 -; LE-I64-NEXT: sub sp, sp, #208 -; LE-I64-NEXT: add lr, sp, #72 -; LE-I64-NEXT: str r0, [sp, #156] @ 4-byte Spill -; LE-I64-NEXT: add r0, sp, #456 -; LE-I64-NEXT: vorr q4, q0, q0 +; LE-I64-NEXT: .pad #176 +; LE-I64-NEXT: sub sp, sp, #176 +; LE-I64-NEXT: add lr, sp, #40 +; LE-I64-NEXT: str r0, [sp, #140] @ 4-byte Spill +; LE-I64-NEXT: add r0, sp, #312 +; LE-I64-NEXT: vorr q6, q2, q2 ; LE-I64-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #56 -; LE-I64-NEXT: vorr d0, d7, d7 -; LE-I64-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #104 -; LE-I64-NEXT: vorr q5, q2, q2 -; LE-I64-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I64-NEXT: add r0, sp, #344 -; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #192 +; LE-I64-NEXT: add lr, sp, #96 +; LE-I64-NEXT: vorr q7, q1, q1 +; LE-I64-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #144 +; LE-I64-NEXT: vorr d0, d1, d1 ; LE-I64-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I64-NEXT: add r0, sp, #376 +; LE-I64-NEXT: add r0, sp, #280 ; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #160 +; LE-I64-NEXT: add lr, sp, #80 ; LE-I64-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I64-NEXT: add r0, sp, #360 +; LE-I64-NEXT: add r0, sp, #296 ; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #136 +; LE-I64-NEXT: add lr, sp, #120 ; LE-I64-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I64-NEXT: add r0, sp, #440 +; LE-I64-NEXT: add r0, sp, #328 ; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #176 +; LE-I64-NEXT: add lr, sp, #56 ; LE-I64-NEXT: vld1.64 {d16, d17}, [r0] ; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill ; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: vorr d0, d10, d10 -; LE-I64-NEXT: str r1, [sp, #120] @ 4-byte Spill -; LE-I64-NEXT: vmov.32 d13[0], r0 -; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: vorr d0, d11, d11 -; LE-I64-NEXT: mov r10, r1 -; LE-I64-NEXT: vmov.32 d14[0], r0 -; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: vorr d0, d8, d8 -; LE-I64-NEXT: add lr, sp, #88 -; LE-I64-NEXT: vmov.32 d15[0], r0 -; LE-I64-NEXT: mov r11, r1 -; LE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: vorr d0, d9, d9 -; LE-I64-NEXT: mov r9, r1 -; LE-I64-NEXT: vmov.32 d10[0], r0 -; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: add lr, sp, #40 +; LE-I64-NEXT: vorr d0, d14, d14 +; LE-I64-NEXT: str r1, [sp, #116] @ 4-byte Spill ; LE-I64-NEXT: vmov.32 d11[0], r0 -; LE-I64-NEXT: mov r6, r1 -; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #56 -; LE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-I64-NEXT: vorr d0, d10, d10 ; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: vorr d0, d11, d11 -; LE-I64-NEXT: mov r4, r1 +; LE-I64-NEXT: vorr d0, d15, d15 +; LE-I64-NEXT: str r1, [sp, #76] @ 4-byte Spill ; LE-I64-NEXT: vmov.32 d8[0], r0 ; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: add lr, sp, #72 -; LE-I64-NEXT: mov r7, r1 +; LE-I64-NEXT: vorr d0, d12, d12 +; LE-I64-NEXT: add lr, sp, #160 ; LE-I64-NEXT: vmov.32 d9[0], r0 -; LE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-I64-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: vmov.32 d12[0], r0 -; LE-I64-NEXT: add lr, sp, #56 -; LE-I64-NEXT: mov r5, r1 -; LE-I64-NEXT: vmov.32 d9[1], r7 -; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #176 -; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEXT: vorr d0, d17, d17 -; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: vmov.32 d8[1], r4 -; LE-I64-NEXT: add lr, sp, #72 -; LE-I64-NEXT: mov r7, r1 -; LE-I64-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEXT: str r1, [sp, #72] @ 4-byte Spill ; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #104 -; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-I64-NEXT: vorr d0, d9, d9 -; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: add lr, sp, #40 -; LE-I64-NEXT: vorr d0, d8, d8 -; LE-I64-NEXT: mov r8, r1 -; LE-I64-NEXT: vmov.32 d15[0], r0 -; LE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-I64-NEXT: vmov.32 d11[1], r6 ; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: add lr, sp, #176 -; LE-I64-NEXT: mov r4, r1 +; LE-I64-NEXT: vorr d0, d13, d13 +; LE-I64-NEXT: mov r6, r1 ; LE-I64-NEXT: vmov.32 d14[0], r0 -; LE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-I64-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; LE-I64-NEXT: vmov.32 d10[1], r9 ; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: add lr, sp, #88 -; LE-I64-NEXT: vmov.32 d12[0], r0 -; LE-I64-NEXT: ldr r0, [sp, #120] @ 4-byte Reload -; LE-I64-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload -; LE-I64-NEXT: add lr, sp, #56 -; LE-I64-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; LE-I64-NEXT: add lr, sp, #24 -; LE-I64-NEXT: vmov.32 d19[1], r0 -; LE-I64-NEXT: add r0, sp, #408 -; LE-I64-NEXT: ldr r2, [sp, #156] @ 4-byte Reload -; LE-I64-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I64-NEXT: vmov.32 d13[1], r7 -; LE-I64-NEXT: mov r0, r2 -; LE-I64-NEXT: vmov.32 d12[1], r1 -; LE-I64-NEXT: add r1, sp, #488 -; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill ; LE-I64-NEXT: add lr, sp, #40 -; LE-I64-NEXT: vld1.64 {d16, d17}, [r1] -; LE-I64-NEXT: add r1, sp, #472 -; LE-I64-NEXT: vst1.64 {d10, d11}, [r0:128]! -; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #72 -; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEXT: add lr, sp, #56 -; LE-I64-NEXT: vmov.32 d21[1], r11 -; LE-I64-NEXT: vmov.32 d20[1], r10 -; LE-I64-NEXT: add r10, r2, #192 -; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]! -; LE-I64-NEXT: vld1.64 {d16, d17}, [r1] -; LE-I64-NEXT: add r1, sp, #392 -; LE-I64-NEXT: vmov.32 d18[1], r5 -; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #72 -; LE-I64-NEXT: vst1.64 {d20, d21}, [r0:128]! -; LE-I64-NEXT: vld1.64 {d16, d17}, [r1] -; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #104 -; LE-I64-NEXT: vst1.64 {d18, d19}, [r0:128] -; LE-I64-NEXT: add r0, sp, #312 -; LE-I64-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I64-NEXT: add r0, sp, #328 -; LE-I64-NEXT: vmov.32 d15[1], r8 -; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #120 -; LE-I64-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I64-NEXT: add r0, sp, #424 -; LE-I64-NEXT: vmov.32 d14[1], r4 -; LE-I64-NEXT: vst1.64 {d12, d13}, [r10:128]! -; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #176 -; LE-I64-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I64-NEXT: vst1.64 {d14, d15}, [r10:128]! -; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #192 -; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEXT: vorr d0, d17, d17 -; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: add lr, sp, #136 -; LE-I64-NEXT: mov r9, r1 -; LE-I64-NEXT: vmov.32 d9[0], r0 -; LE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-I64-NEXT: vorr d0, d10, d10 +; LE-I64-NEXT: mov r4, r1 +; LE-I64-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; LE-I64-NEXT: vorr d0, d8, d8 ; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: vorr d0, d11, d11 -; LE-I64-NEXT: mov r8, r1 +; LE-I64-NEXT: vorr d0, d9, d9 +; LE-I64-NEXT: mov r7, r1 ; LE-I64-NEXT: vmov.32 d12[0], r0 ; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: add lr, sp, #160 +; LE-I64-NEXT: add lr, sp, #96 ; LE-I64-NEXT: mov r5, r1 ; LE-I64-NEXT: vmov.32 d13[0], r0 -; LE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-I64-NEXT: vorr d0, d10, d10 -; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: vorr d0, d11, d11 -; LE-I64-NEXT: mov r6, r1 -; LE-I64-NEXT: vmov.32 d14[0], r0 -; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: add lr, sp, #192 -; LE-I64-NEXT: mov r4, r1 -; LE-I64-NEXT: vmov.32 d15[0], r0 ; LE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload ; LE-I64-NEXT: @ kill: def $d0 killed $d0 killed $q0 ; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: vmov.32 d8[0], r0 -; LE-I64-NEXT: add lr, sp, #192 -; LE-I64-NEXT: mov r11, r1 -; LE-I64-NEXT: vmov.32 d15[1], r4 -; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #176 -; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-I64-NEXT: vorr d0, d9, d9 -; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: vorr d0, d8, d8 -; LE-I64-NEXT: vmov.32 d14[1], r6 -; LE-I64-NEXT: add lr, sp, #136 -; LE-I64-NEXT: mov r4, r1 -; LE-I64-NEXT: vmov.32 d11[0], r0 -; LE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-I64-NEXT: bl lrint ; LE-I64-NEXT: vmov.32 d10[0], r0 -; LE-I64-NEXT: add lr, sp, #176 -; LE-I64-NEXT: mov r7, r1 +; LE-I64-NEXT: add lr, sp, #40 +; LE-I64-NEXT: mov r10, r1 ; LE-I64-NEXT: vmov.32 d13[1], r5 ; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #24 +; LE-I64-NEXT: add lr, sp, #56 ; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload ; LE-I64-NEXT: vorr d0, d9, d9 ; LE-I64-NEXT: bl lrint ; LE-I64-NEXT: vorr d0, d8, d8 -; LE-I64-NEXT: vmov.32 d12[1], r8 -; LE-I64-NEXT: add lr, sp, #88 -; LE-I64-NEXT: mov r5, r1 -; LE-I64-NEXT: vmov.32 d15[0], r0 -; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: add lr, sp, #192 -; LE-I64-NEXT: str r1, [sp, #24] @ 4-byte Spill -; LE-I64-NEXT: vmov.32 d14[0], r0 -; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-I64-NEXT: add lr, sp, #40 -; LE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-I64-NEXT: vorr d0, d11, d11 -; LE-I64-NEXT: vmov.32 d9[1], r9 -; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: vorr d0, d10, d10 -; LE-I64-NEXT: vmov.32 d8[1], r11 -; LE-I64-NEXT: add lr, sp, #192 -; LE-I64-NEXT: mov r6, r1 -; LE-I64-NEXT: vmov.32 d13[0], r0 -; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: vmov.32 d12[0], r0 -; LE-I64-NEXT: add lr, sp, #160 -; LE-I64-NEXT: str r1, [sp, #40] @ 4-byte Spill +; LE-I64-NEXT: vmov.32 d12[1], r7 +; LE-I64-NEXT: add lr, sp, #96 +; LE-I64-NEXT: mov r9, r1 +; LE-I64-NEXT: vmov.32 d11[0], r0 ; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #176 -; LE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-I64-NEXT: add lr, sp, #56 -; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-I64-NEXT: vorr d0, d9, d9 -; LE-I64-NEXT: vmov.32 d11[1], r4 ; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: vorr d0, d8, d8 -; LE-I64-NEXT: vmov.32 d10[1], r7 -; LE-I64-NEXT: add lr, sp, #176 -; LE-I64-NEXT: mov r8, r1 -; LE-I64-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEXT: add lr, sp, #24 +; LE-I64-NEXT: mov r11, r1 +; LE-I64-NEXT: vmov.32 d15[1], r4 ; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #144 +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: vorr d0, d17, d17 ; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: vmov.32 d12[0], r0 ; LE-I64-NEXT: add lr, sp, #8 -; LE-I64-NEXT: mov r11, r1 -; LE-I64-NEXT: vmov.32 d15[1], r5 -; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #72 -; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-I64-NEXT: vorr d0, d9, d9 -; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: vmov.32 d11[0], r0 -; LE-I64-NEXT: ldr r0, [sp, #24] @ 4-byte Reload -; LE-I64-NEXT: vorr d0, d8, d8 +; LE-I64-NEXT: vmov.32 d14[1], r6 +; LE-I64-NEXT: mov r8, r1 +; LE-I64-NEXT: vmov.32 d17[0], r0 +; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill ; LE-I64-NEXT: add lr, sp, #56 -; LE-I64-NEXT: mov r9, r1 -; LE-I64-NEXT: vmov.32 d14[1], r0 ; LE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #80 +; LE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-I64-NEXT: vorr d0, d11, d11 ; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: vmov.32 d10[0], r0 -; LE-I64-NEXT: add lr, sp, #72 -; LE-I64-NEXT: mov r7, r1 -; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-I64-NEXT: vmov.32 d15[0], r0 ; LE-I64-NEXT: add lr, sp, #160 -; LE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-I64-NEXT: add lr, sp, #104 +; LE-I64-NEXT: vorr d0, d10, d10 +; LE-I64-NEXT: ldr r0, [sp, #72] @ 4-byte Reload ; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-I64-NEXT: vorr d0, d9, d9 -; LE-I64-NEXT: vmov.32 d13[1], r6 -; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: vmov.32 d11[0], r0 -; LE-I64-NEXT: ldr r0, [sp, #40] @ 4-byte Reload -; LE-I64-NEXT: vorr d0, d8, d8 -; LE-I64-NEXT: add lr, sp, #160 ; LE-I64-NEXT: mov r6, r1 -; LE-I64-NEXT: vmov.32 d12[1], r0 -; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I64-NEXT: vmov.32 d9[1], r0 ; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: add lr, sp, #8 +; LE-I64-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEXT: ldr r0, [sp, #76] @ 4-byte Reload +; LE-I64-NEXT: add lr, sp, #160 ; LE-I64-NEXT: mov r4, r1 -; LE-I64-NEXT: vmov.32 d10[0], r0 -; LE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; LE-I64-NEXT: vmov.32 d8[1], r0 +; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill ; LE-I64-NEXT: add lr, sp, #120 -; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-I64-NEXT: vorr d0, d9, d9 -; LE-I64-NEXT: vmov.32 d13[1], r8 +; LE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload +; LE-I64-NEXT: vorr d0, d11, d11 ; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: vorr d0, d8, d8 +; LE-I64-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEXT: add lr, sp, #40 +; LE-I64-NEXT: vorr d0, d10, d10 +; LE-I64-NEXT: ldr r0, [sp, #116] @ 4-byte Reload +; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload ; LE-I64-NEXT: mov r5, r1 -; LE-I64-NEXT: vmov.32 d15[0], r0 -; LE-I64-NEXT: vmov.32 d12[1], r11 +; LE-I64-NEXT: vmov.32 d9[1], r0 +; LE-I64-NEXT: bl lrint +; LE-I64-NEXT: add lr, sp, #144 +; LE-I64-NEXT: mov r7, r1 +; LE-I64-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; LE-I64-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; LE-I64-NEXT: vmov.32 d8[1], r10 ; LE-I64-NEXT: bl lrint -; LE-I64-NEXT: add lr, sp, #72 -; LE-I64-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEXT: add lr, sp, #8 +; LE-I64-NEXT: vmov.32 d15[1], r6 +; LE-I64-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #24 ; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload ; LE-I64-NEXT: add lr, sp, #160 -; LE-I64-NEXT: vmov.32 d17[1], r9 -; LE-I64-NEXT: vmov.32 d16[1], r7 -; LE-I64-NEXT: vst1.64 {d12, d13}, [r10:128]! -; LE-I64-NEXT: vorr q9, q8, q8 -; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEXT: add lr, sp, #136 -; LE-I64-NEXT: vmov.32 d15[1], r5 -; LE-I64-NEXT: vst1.64 {d16, d17}, [r10:128] -; LE-I64-NEXT: vmov.32 d14[1], r1 -; LE-I64-NEXT: ldr r1, [sp, #156] @ 4-byte Reload -; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEXT: add r0, r1, #128 +; LE-I64-NEXT: vmov.32 d20[0], r0 +; LE-I64-NEXT: vmov.32 d21[1], r8 +; LE-I64-NEXT: vmov.32 d20[1], r1 +; LE-I64-NEXT: ldr r1, [sp, #140] @ 4-byte Reload +; LE-I64-NEXT: vmov.32 d13[1], r5 +; LE-I64-NEXT: mov r0, r1 +; LE-I64-NEXT: vst1.64 {d8, d9}, [r0:128]! +; LE-I64-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload ; LE-I64-NEXT: add lr, sp, #56 -; LE-I64-NEXT: vmov.32 d11[1], r6 -; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]! +; LE-I64-NEXT: vmov.32 d14[1], r4 ; LE-I64-NEXT: vst1.64 {d18, d19}, [r0:128]! -; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEXT: add lr, sp, #176 -; LE-I64-NEXT: vmov.32 d10[1], r4 -; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]! -; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEXT: add lr, sp, #192 -; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128] +; LE-I64-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #96 +; LE-I64-NEXT: vmov.32 d12[1], r7 +; LE-I64-NEXT: vst1.64 {d18, d19}, [r0:128]! +; LE-I64-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; LE-I64-NEXT: vmov.32 d17[1], r9 +; LE-I64-NEXT: vst1.64 {d18, d19}, [r0:128] ; LE-I64-NEXT: add r0, r1, #64 -; LE-I64-NEXT: vst1.64 {d10, d11}, [r0:128]! ; LE-I64-NEXT: vst1.64 {d14, d15}, [r0:128]! -; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEXT: add lr, sp, #88 -; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]! -; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: vst1.64 {d12, d13}, [r0:128]! +; LE-I64-NEXT: vmov.32 d16[1], r11 +; LE-I64-NEXT: vst1.64 {d20, d21}, [r0:128]! ; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128] -; LE-I64-NEXT: add sp, sp, #208 +; LE-I64-NEXT: add sp, sp, #176 ; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; LE-I64-NEXT: add sp, sp, #4 ; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; -; LE-I32-NEON-LABEL: lrint_v32f64: -; LE-I32-NEON: @ %bb.0: -; LE-I32-NEON-NEXT: .save {r4, r5, r6, lr} -; LE-I32-NEON-NEXT: push {r4, r5, r6, lr} -; LE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I32-NEON-NEXT: .pad #160 -; LE-I32-NEON-NEXT: sub sp, sp, #160 -; LE-I32-NEON-NEXT: add lr, sp, #96 -; LE-I32-NEON-NEXT: mov r4, r0 -; LE-I32-NEON-NEXT: add r0, sp, #304 -; LE-I32-NEON-NEXT: vorr q6, q3, q3 -; LE-I32-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill -; LE-I32-NEON-NEXT: add lr, sp, #64 -; LE-I32-NEON-NEXT: vorr q5, q1, q1 -; LE-I32-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; LE-I32-NEON-NEXT: add lr, sp, #48 -; LE-I32-NEON-NEXT: vorr d0, d4, d4 -; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I32-NEON-NEXT: add r0, sp, #352 -; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I32-NEON-NEXT: add lr, sp, #16 -; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I32-NEON-NEXT: add r0, sp, #272 -; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I32-NEON-NEXT: add lr, sp, #112 -; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I32-NEON-NEXT: add r0, sp, #288 -; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I32-NEON-NEXT: add lr, sp, #80 -; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I32-NEON-NEXT: add r0, sp, #336 -; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I32-NEON-NEXT: add lr, sp, #144 -; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I32-NEON-NEXT: add r0, sp, #256 -; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I32-NEON-NEXT: add lr, sp, #128 -; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I32-NEON-NEXT: add r0, sp, #320 -; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I32-NEON-NEXT: add lr, sp, #32 -; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: vorr d0, d12, d12 -; LE-I32-NEON-NEXT: vmov.32 d14[0], r0 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: vorr d0, d10, d10 -; LE-I32-NEON-NEXT: vmov.32 d15[0], r0 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: vorr d0, d13, d13 -; LE-I32-NEON-NEXT: vmov.32 d9[0], r0 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: vorr d0, d11, d11 -; LE-I32-NEON-NEXT: vmov.32 d15[1], r0 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: add lr, sp, #96 -; LE-I32-NEON-NEXT: vorr q5, q4, q4 -; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEON-NEXT: vorr d0, d17, d17 -; LE-I32-NEON-NEXT: vmov.32 d11[1], r0 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: vmov.32 d14[1], r0 -; LE-I32-NEON-NEXT: add lr, sp, #96 -; LE-I32-NEON-NEXT: add r0, sp, #416 -; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-I32-NEON-NEXT: add lr, sp, #64 -; LE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-I32-NEON-NEXT: vorr d0, d8, d8 -; LE-I32-NEON-NEXT: vstmia sp, {d16, d17} @ 16-byte Spill -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: add lr, sp, #32 -; LE-I32-NEON-NEXT: vmov.32 d10[0], r0 -; LE-I32-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload -; LE-I32-NEON-NEXT: vorr q6, q5, q5 -; LE-I32-NEON-NEXT: vorr d0, d14, d14 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: vorr d0, d9, d9 -; LE-I32-NEON-NEXT: vmov.32 d11[0], r0 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: vorr d0, d15, d15 -; LE-I32-NEON-NEXT: vmov.32 d12[1], r0 -; LE-I32-NEON-NEXT: add lr, sp, #64 -; LE-I32-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: vmov.32 d11[1], r0 -; LE-I32-NEON-NEXT: add r0, sp, #400 -; LE-I32-NEON-NEXT: add lr, sp, #32 -; LE-I32-NEON-NEXT: vorr q6, q5, q5 -; LE-I32-NEON-NEXT: vldmia sp, {d10, d11} @ 16-byte Reload -; LE-I32-NEON-NEXT: vorr d0, d10, d10 -; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: add lr, sp, #48 -; LE-I32-NEON-NEXT: vmov.32 d15[0], r0 -; LE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-I32-NEON-NEXT: vorr d0, d8, d8 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: vorr d0, d11, d11 -; LE-I32-NEON-NEXT: vmov.32 d12[0], r0 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: vorr d0, d9, d9 -; LE-I32-NEON-NEXT: vmov.32 d15[1], r0 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: vmov.32 d12[1], r0 -; LE-I32-NEON-NEXT: add lr, sp, #48 -; LE-I32-NEON-NEXT: add r0, sp, #384 -; LE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I32-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-I32-NEON-NEXT: add lr, sp, #32 -; LE-I32-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-I32-NEON-NEXT: vorr d0, d10, d10 -; LE-I32-NEON-NEXT: vstmia sp, {d16, d17} @ 16-byte Spill -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: add lr, sp, #16 -; LE-I32-NEON-NEXT: vmov.32 d14[0], r0 -; LE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-I32-NEON-NEXT: vorr d0, d8, d8 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: vorr d0, d11, d11 -; LE-I32-NEON-NEXT: vmov.32 d13[0], r0 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: vorr d0, d9, d9 -; LE-I32-NEON-NEXT: vmov.32 d14[1], r0 -; LE-I32-NEON-NEXT: add lr, sp, #32 -; LE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: vorr q7, q6, q6 -; LE-I32-NEON-NEXT: vldmia sp, {d10, d11} @ 16-byte Reload -; LE-I32-NEON-NEXT: vorr d0, d10, d10 -; LE-I32-NEON-NEXT: vmov.32 d15[1], r0 -; LE-I32-NEON-NEXT: add r0, sp, #368 -; LE-I32-NEON-NEXT: vld1.64 {d12, d13}, [r0] -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: add lr, sp, #144 -; LE-I32-NEON-NEXT: vmov.32 d9[0], r0 -; LE-I32-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-I32-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: vorr d0, d11, d11 -; LE-I32-NEON-NEXT: vmov.32 d14[0], r0 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: vorr d0, d12, d12 -; LE-I32-NEON-NEXT: vmov.32 d9[1], r0 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: add lr, sp, #144 -; LE-I32-NEON-NEXT: vmov.32 d8[0], r0 -; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEON-NEXT: vorr d0, d17, d17 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: vmov.32 d14[1], r0 -; LE-I32-NEON-NEXT: add r0, sp, #240 -; LE-I32-NEON-NEXT: vorr d0, d13, d13 -; LE-I32-NEON-NEXT: add lr, sp, #144 -; LE-I32-NEON-NEXT: vld1.64 {d10, d11}, [r0] -; LE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-I32-NEON-NEXT: vstmia sp, {d10, d11} @ 16-byte Spill -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: vorr d0, d10, d10 -; LE-I32-NEON-NEXT: vmov.32 d8[1], r0 -; LE-I32-NEON-NEXT: add lr, sp, #16 -; LE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: add lr, sp, #80 -; LE-I32-NEON-NEXT: vmov.32 d8[0], r0 -; LE-I32-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-I32-NEON-NEXT: vorr d0, d12, d12 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: add lr, sp, #112 -; LE-I32-NEON-NEXT: vmov.32 d11[0], r0 -; LE-I32-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload -; LE-I32-NEON-NEXT: vorr d0, d14, d14 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: vorr d0, d13, d13 -; LE-I32-NEON-NEXT: vmov.32 d10[0], r0 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: add lr, sp, #128 -; LE-I32-NEON-NEXT: vmov.32 d11[1], r0 -; LE-I32-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-I32-NEON-NEXT: vorr d0, d12, d12 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: vorr d0, d15, d15 -; LE-I32-NEON-NEXT: vmov.32 d9[0], r0 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: vorr d0, d13, d13 -; LE-I32-NEON-NEXT: vmov.32 d10[1], r0 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: vldmia sp, {d16, d17} @ 16-byte Reload -; LE-I32-NEON-NEXT: vorr d0, d17, d17 -; LE-I32-NEON-NEXT: vmov.32 d9[1], r0 -; LE-I32-NEON-NEXT: bl lrint -; LE-I32-NEON-NEXT: add lr, sp, #64 -; LE-I32-NEON-NEXT: vmov.32 d8[1], r0 -; LE-I32-NEON-NEXT: mov r0, r4 -; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEON-NEXT: add lr, sp, #96 -; LE-I32-NEON-NEXT: vst1.32 {d16, d17}, [r0:128]! -; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEON-NEXT: add lr, sp, #48 -; LE-I32-NEON-NEXT: vst1.32 {d16, d17}, [r0:128]! -; LE-I32-NEON-NEXT: vst1.32 {d8, d9}, [r0:128]! -; LE-I32-NEON-NEXT: vst1.64 {d10, d11}, [r0:128] -; LE-I32-NEON-NEXT: add r0, r4, #64 -; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEON-NEXT: add lr, sp, #144 -; LE-I32-NEON-NEXT: vst1.32 {d16, d17}, [r0:128]! -; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEON-NEXT: add lr, sp, #16 -; LE-I32-NEON-NEXT: vst1.32 {d16, d17}, [r0:128]! -; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEON-NEXT: add lr, sp, #32 -; LE-I32-NEON-NEXT: vst1.32 {d16, d17}, [r0:128]! -; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] -; LE-I32-NEON-NEXT: add sp, sp, #160 -; LE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I32-NEON-NEXT: pop {r4, r5, r6, pc} -; -; LE-I64-NEON-LABEL: lrint_v32f64: -; LE-I64-NEON: @ %bb.0: -; LE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-I64-NEON-NEXT: .pad #4 -; LE-I64-NEON-NEXT: sub sp, sp, #4 -; LE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEON-NEXT: .pad #208 -; LE-I64-NEON-NEXT: sub sp, sp, #208 -; LE-I64-NEON-NEXT: add lr, sp, #72 -; LE-I64-NEON-NEXT: str r0, [sp, #156] @ 4-byte Spill -; LE-I64-NEON-NEXT: add r0, sp, #456 -; LE-I64-NEON-NEXT: vorr q4, q0, q0 -; LE-I64-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #56 -; LE-I64-NEON-NEXT: vorr d0, d7, d7 -; LE-I64-NEON-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #104 -; LE-I64-NEON-NEXT: vorr q5, q2, q2 -; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I64-NEON-NEXT: add r0, sp, #344 -; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #192 -; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I64-NEON-NEXT: add r0, sp, #376 -; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #160 -; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I64-NEON-NEXT: add r0, sp, #360 -; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #136 -; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I64-NEON-NEXT: add r0, sp, #440 -; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #176 -; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vorr d0, d10, d10 -; LE-I64-NEON-NEXT: str r1, [sp, #120] @ 4-byte Spill -; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vorr d0, d11, d11 -; LE-I64-NEON-NEXT: mov r10, r1 -; LE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vorr d0, d8, d8 -; LE-I64-NEON-NEXT: add lr, sp, #88 -; LE-I64-NEON-NEXT: vmov.32 d15[0], r0 -; LE-I64-NEON-NEXT: mov r11, r1 -; LE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vorr d0, d9, d9 -; LE-I64-NEON-NEXT: mov r9, r1 -; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: add lr, sp, #40 -; LE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; LE-I64-NEON-NEXT: mov r6, r1 -; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #56 -; LE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-I64-NEON-NEXT: vorr d0, d10, d10 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vorr d0, d11, d11 -; LE-I64-NEON-NEXT: mov r4, r1 -; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: add lr, sp, #72 -; LE-I64-NEON-NEXT: mov r7, r1 -; LE-I64-NEON-NEXT: vmov.32 d9[0], r0 -; LE-I64-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-I64-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 -; LE-I64-NEON-NEXT: add lr, sp, #56 -; LE-I64-NEON-NEXT: mov r5, r1 -; LE-I64-NEON-NEXT: vmov.32 d9[1], r7 -; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #176 -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: vorr d0, d17, d17 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vmov.32 d8[1], r4 -; LE-I64-NEON-NEXT: add lr, sp, #72 -; LE-I64-NEON-NEXT: mov r7, r1 -; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 -; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #104 -; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-I64-NEON-NEXT: vorr d0, d9, d9 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: add lr, sp, #40 -; LE-I64-NEON-NEXT: vorr d0, d8, d8 -; LE-I64-NEON-NEXT: mov r8, r1 -; LE-I64-NEON-NEXT: vmov.32 d15[0], r0 -; LE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-I64-NEON-NEXT: vmov.32 d11[1], r6 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: add lr, sp, #176 -; LE-I64-NEON-NEXT: mov r4, r1 -; LE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; LE-I64-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-I64-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; LE-I64-NEON-NEXT: vmov.32 d10[1], r9 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: add lr, sp, #88 -; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 -; LE-I64-NEON-NEXT: ldr r0, [sp, #120] @ 4-byte Reload -; LE-I64-NEON-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #56 -; LE-I64-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #24 -; LE-I64-NEON-NEXT: vmov.32 d19[1], r0 -; LE-I64-NEON-NEXT: add r0, sp, #408 -; LE-I64-NEON-NEXT: ldr r2, [sp, #156] @ 4-byte Reload -; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I64-NEON-NEXT: vmov.32 d13[1], r7 -; LE-I64-NEON-NEXT: mov r0, r2 -; LE-I64-NEON-NEXT: vmov.32 d12[1], r1 -; LE-I64-NEON-NEXT: add r1, sp, #488 -; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #40 -; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r1] -; LE-I64-NEON-NEXT: add r1, sp, #472 -; LE-I64-NEON-NEXT: vst1.64 {d10, d11}, [r0:128]! -; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #72 -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #56 -; LE-I64-NEON-NEXT: vmov.32 d21[1], r11 -; LE-I64-NEON-NEXT: vmov.32 d20[1], r10 -; LE-I64-NEON-NEXT: add r10, r2, #192 -; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! -; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r1] -; LE-I64-NEON-NEXT: add r1, sp, #392 -; LE-I64-NEON-NEXT: vmov.32 d18[1], r5 -; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #72 -; LE-I64-NEON-NEXT: vst1.64 {d20, d21}, [r0:128]! -; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r1] -; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #104 -; LE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r0:128] -; LE-I64-NEON-NEXT: add r0, sp, #312 -; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I64-NEON-NEXT: add r0, sp, #328 -; LE-I64-NEON-NEXT: vmov.32 d15[1], r8 -; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #120 -; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I64-NEON-NEXT: add r0, sp, #424 -; LE-I64-NEON-NEXT: vmov.32 d14[1], r4 -; LE-I64-NEON-NEXT: vst1.64 {d12, d13}, [r10:128]! -; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #176 -; LE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; LE-I64-NEON-NEXT: vst1.64 {d14, d15}, [r10:128]! -; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #192 -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: vorr d0, d17, d17 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: add lr, sp, #136 -; LE-I64-NEON-NEXT: mov r9, r1 -; LE-I64-NEON-NEXT: vmov.32 d9[0], r0 -; LE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-I64-NEON-NEXT: vorr d0, d10, d10 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vorr d0, d11, d11 -; LE-I64-NEON-NEXT: mov r8, r1 -; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: add lr, sp, #160 -; LE-I64-NEON-NEXT: mov r5, r1 -; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 -; LE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-I64-NEON-NEXT: vorr d0, d10, d10 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vorr d0, d11, d11 -; LE-I64-NEON-NEXT: mov r6, r1 -; LE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: add lr, sp, #192 -; LE-I64-NEON-NEXT: mov r4, r1 -; LE-I64-NEON-NEXT: vmov.32 d15[0], r0 -; LE-I64-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; LE-I64-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 -; LE-I64-NEON-NEXT: add lr, sp, #192 -; LE-I64-NEON-NEXT: mov r11, r1 -; LE-I64-NEON-NEXT: vmov.32 d15[1], r4 -; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #176 -; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-I64-NEON-NEXT: vorr d0, d9, d9 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vorr d0, d8, d8 -; LE-I64-NEON-NEXT: vmov.32 d14[1], r6 -; LE-I64-NEON-NEXT: add lr, sp, #136 -; LE-I64-NEON-NEXT: mov r4, r1 -; LE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; LE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; LE-I64-NEON-NEXT: add lr, sp, #176 -; LE-I64-NEON-NEXT: mov r7, r1 -; LE-I64-NEON-NEXT: vmov.32 d13[1], r5 -; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #24 -; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-I64-NEON-NEXT: vorr d0, d9, d9 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vorr d0, d8, d8 -; LE-I64-NEON-NEXT: vmov.32 d12[1], r8 -; LE-I64-NEON-NEXT: add lr, sp, #88 -; LE-I64-NEON-NEXT: mov r5, r1 -; LE-I64-NEON-NEXT: vmov.32 d15[0], r0 -; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: add lr, sp, #192 -; LE-I64-NEON-NEXT: str r1, [sp, #24] @ 4-byte Spill -; LE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #40 -; LE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-I64-NEON-NEXT: vorr d0, d11, d11 -; LE-I64-NEON-NEXT: vmov.32 d9[1], r9 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vorr d0, d10, d10 -; LE-I64-NEON-NEXT: vmov.32 d8[1], r11 -; LE-I64-NEON-NEXT: add lr, sp, #192 -; LE-I64-NEON-NEXT: mov r6, r1 -; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 -; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 -; LE-I64-NEON-NEXT: add lr, sp, #160 -; LE-I64-NEON-NEXT: str r1, [sp, #40] @ 4-byte Spill -; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #176 -; LE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #56 -; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-I64-NEON-NEXT: vorr d0, d9, d9 -; LE-I64-NEON-NEXT: vmov.32 d11[1], r4 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vorr d0, d8, d8 -; LE-I64-NEON-NEXT: vmov.32 d10[1], r7 -; LE-I64-NEON-NEXT: add lr, sp, #176 -; LE-I64-NEON-NEXT: mov r8, r1 -; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 -; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 -; LE-I64-NEON-NEXT: add lr, sp, #8 -; LE-I64-NEON-NEXT: mov r11, r1 -; LE-I64-NEON-NEXT: vmov.32 d15[1], r5 -; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #72 -; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-I64-NEON-NEXT: vorr d0, d9, d9 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; LE-I64-NEON-NEXT: ldr r0, [sp, #24] @ 4-byte Reload -; LE-I64-NEON-NEXT: vorr d0, d8, d8 -; LE-I64-NEON-NEXT: add lr, sp, #56 -; LE-I64-NEON-NEXT: mov r9, r1 -; LE-I64-NEON-NEXT: vmov.32 d14[1], r0 -; LE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; LE-I64-NEON-NEXT: add lr, sp, #72 -; LE-I64-NEON-NEXT: mov r7, r1 -; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #160 -; LE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #104 -; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-I64-NEON-NEXT: vorr d0, d9, d9 -; LE-I64-NEON-NEXT: vmov.32 d13[1], r6 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; LE-I64-NEON-NEXT: ldr r0, [sp, #40] @ 4-byte Reload -; LE-I64-NEON-NEXT: vorr d0, d8, d8 -; LE-I64-NEON-NEXT: add lr, sp, #160 -; LE-I64-NEON-NEXT: mov r6, r1 -; LE-I64-NEON-NEXT: vmov.32 d12[1], r0 -; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: add lr, sp, #8 -; LE-I64-NEON-NEXT: mov r4, r1 -; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; LE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #120 -; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-I64-NEON-NEXT: vorr d0, d9, d9 -; LE-I64-NEON-NEXT: vmov.32 d13[1], r8 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: vorr d0, d8, d8 -; LE-I64-NEON-NEXT: mov r5, r1 -; LE-I64-NEON-NEXT: vmov.32 d15[0], r0 -; LE-I64-NEON-NEXT: vmov.32 d12[1], r11 -; LE-I64-NEON-NEXT: bl lrint -; LE-I64-NEON-NEXT: add lr, sp, #72 -; LE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #160 -; LE-I64-NEON-NEXT: vmov.32 d17[1], r9 -; LE-I64-NEON-NEXT: vmov.32 d16[1], r7 -; LE-I64-NEON-NEXT: vst1.64 {d12, d13}, [r10:128]! -; LE-I64-NEON-NEXT: vorr q9, q8, q8 -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #136 -; LE-I64-NEON-NEXT: vmov.32 d15[1], r5 -; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r10:128] -; LE-I64-NEON-NEXT: vmov.32 d14[1], r1 -; LE-I64-NEON-NEXT: ldr r1, [sp, #156] @ 4-byte Reload -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: add r0, r1, #128 -; LE-I64-NEON-NEXT: add lr, sp, #56 -; LE-I64-NEON-NEXT: vmov.32 d11[1], r6 -; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! -; LE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]! -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #176 -; LE-I64-NEON-NEXT: vmov.32 d10[1], r4 -; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #192 -; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] -; LE-I64-NEON-NEXT: add r0, r1, #64 -; LE-I64-NEON-NEXT: vst1.64 {d10, d11}, [r0:128]! -; LE-I64-NEON-NEXT: vst1.64 {d14, d15}, [r0:128]! -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #88 -; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] -; LE-I64-NEON-NEXT: add sp, sp, #208 -; LE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEON-NEXT: add sp, sp, #4 -; LE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; -; BE-I32-LABEL: lrint_v32f64: +; BE-I32-LABEL: lrint_v16f64: ; BE-I32: @ %bb.0: ; BE-I32-NEXT: .save {r4, r5, r6, lr} ; BE-I32-NEXT: push {r4, r5, r6, lr} ; BE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} ; BE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I32-NEXT: .pad #176 -; BE-I32-NEXT: sub sp, sp, #176 -; BE-I32-NEXT: add lr, sp, #128 -; BE-I32-NEXT: mov r4, r0 -; BE-I32-NEXT: add r0, sp, #336 +; BE-I32-NEXT: .pad #128 +; BE-I32-NEXT: sub sp, sp, #128 +; BE-I32-NEXT: add lr, sp, #64 +; BE-I32-NEXT: add r0, sp, #240 +; BE-I32-NEXT: vld1.64 {d16, d17}, [r0] +; BE-I32-NEXT: add r0, sp, #224 ; BE-I32-NEXT: vorr q6, q3, q3 ; BE-I32-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill -; BE-I32-NEXT: add lr, sp, #80 +; BE-I32-NEXT: add lr, sp, #16 ; BE-I32-NEXT: vorr q5, q1, q1 -; BE-I32-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; BE-I32-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill ; BE-I32-NEXT: add lr, sp, #32 -; BE-I32-NEXT: vorr d0, d4, d4 -; BE-I32-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I32-NEXT: add r0, sp, #320 -; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I32-NEXT: add lr, sp, #160 -; BE-I32-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I32-NEXT: add r0, sp, #432 +; BE-I32-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill +; BE-I32-NEXT: add lr, sp, #80 +; BE-I32-NEXT: @ kill: def $d0 killed $d0 killed $q0 ; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill ; BE-I32-NEXT: add lr, sp, #112 ; BE-I32-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I32-NEXT: add r0, sp, #288 +; BE-I32-NEXT: add r0, sp, #256 ; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill ; BE-I32-NEXT: add lr, sp, #96 ; BE-I32-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I32-NEXT: add r0, sp, #368 +; BE-I32-NEXT: add r0, sp, #208 ; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill ; BE-I32-NEXT: add lr, sp, #48 -; BE-I32-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I32-NEXT: add r0, sp, #416 -; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I32-NEXT: add lr, sp, #144 -; BE-I32-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I32-NEXT: add r0, sp, #400 -; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I32-NEXT: add lr, sp, #64 -; BE-I32-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: vorr d0, d12, d12 -; BE-I32-NEXT: vmov.32 d14[0], r0 +; BE-I32-NEXT: vld1.64 {d14, d15}, [r0] +; BE-I32-NEXT: vstmia sp, {d6, d7} @ 16-byte Spill +; BE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill ; BE-I32-NEXT: bl lrint ; BE-I32-NEXT: vorr d0, d10, d10 -; BE-I32-NEXT: vmov.32 d15[0], r0 +; BE-I32-NEXT: vmov.32 d8[0], r0 ; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: vorr d0, d13, d13 +; BE-I32-NEXT: vorr d0, d12, d12 ; BE-I32-NEXT: vmov.32 d9[0], r0 ; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: vorr d0, d11, d11 -; BE-I32-NEXT: vmov.32 d15[1], r0 -; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: add lr, sp, #128 -; BE-I32-NEXT: vorr q5, q4, q4 -; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I32-NEXT: vorr d0, d17, d17 -; BE-I32-NEXT: vmov.32 d11[1], r0 -; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: vmov.32 d14[1], r0 -; BE-I32-NEXT: add lr, sp, #128 -; BE-I32-NEXT: add r0, sp, #384 -; BE-I32-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; BE-I32-NEXT: add lr, sp, #16 -; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I32-NEXT: add lr, sp, #80 -; BE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; BE-I32-NEXT: vorr d0, d8, d8 -; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: add lr, sp, #64 -; BE-I32-NEXT: vmov.32 d10[0], r0 -; BE-I32-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload ; BE-I32-NEXT: vorr d0, d14, d14 +; BE-I32-NEXT: vmov.32 d11[0], r0 ; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: vorr d0, d9, d9 -; BE-I32-NEXT: vmov.32 d13[0], r0 -; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: vorr d0, d15, d15 -; BE-I32-NEXT: vmov.32 d10[1], r0 ; BE-I32-NEXT: add lr, sp, #80 -; BE-I32-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: add lr, sp, #16 -; BE-I32-NEXT: vmov.32 d13[1], r0 -; BE-I32-NEXT: add r0, sp, #272 -; BE-I32-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-I32-NEXT: vorr d0, d10, d10 -; BE-I32-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I32-NEXT: vstmia sp, {d16, d17} @ 16-byte Spill +; BE-I32-NEXT: mov r4, r0 +; BE-I32-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; BE-I32-NEXT: @ kill: def $d0 killed $d0 killed $q0 ; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: add lr, sp, #32 +; BE-I32-NEXT: add lr, sp, #64 ; BE-I32-NEXT: vmov.32 d12[0], r0 -; BE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; BE-I32-NEXT: vorr d0, d8, d8 +; BE-I32-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; BE-I32-NEXT: @ kill: def $d0 killed $d0 killed $q0 ; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: vorr d0, d11, d11 -; BE-I32-NEXT: vmov.32 d15[0], r0 +; BE-I32-NEXT: add lr, sp, #112 +; BE-I32-NEXT: vmov.32 d10[0], r0 +; BE-I32-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; BE-I32-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; BE-I32-NEXT: vmov.32 d14[0], r4 ; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: vorr d0, d9, d9 -; BE-I32-NEXT: vmov.32 d12[1], r0 -; BE-I32-NEXT: add lr, sp, #64 -; BE-I32-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; BE-I32-NEXT: vldmia sp, {d16, d17} @ 16-byte Reload +; BE-I32-NEXT: vorr d0, d17, d17 +; BE-I32-NEXT: vmov.32 d15[0], r0 ; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: vldmia sp, {d10, d11} @ 16-byte Reload -; BE-I32-NEXT: add lr, sp, #16 -; BE-I32-NEXT: vmov.32 d15[1], r0 -; BE-I32-NEXT: add r0, sp, #256 -; BE-I32-NEXT: vorr d0, d10, d10 -; BE-I32-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEXT: add lr, sp, #96 +; BE-I32-NEXT: vmov.32 d11[1], r0 +; BE-I32-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; BE-I32-NEXT: @ kill: def $d0 killed $d0 killed $q0 ; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: add lr, sp, #160 +; BE-I32-NEXT: add lr, sp, #64 ; BE-I32-NEXT: vmov.32 d13[0], r0 -; BE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; BE-I32-NEXT: vorr d0, d8, d8 -; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: vorr d0, d11, d11 -; BE-I32-NEXT: vmov.32 d14[0], r0 -; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: vorr d0, d9, d9 -; BE-I32-NEXT: vmov.32 d13[1], r0 -; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: vmov.32 d14[1], r0 -; BE-I32-NEXT: add lr, sp, #32 -; BE-I32-NEXT: add r0, sp, #304 -; BE-I32-NEXT: vld1.64 {d10, d11}, [r0] -; BE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; BE-I32-NEXT: add lr, sp, #16 -; BE-I32-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload -; BE-I32-NEXT: vorr d0, d14, d14 -; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: add lr, sp, #48 -; BE-I32-NEXT: vorr q4, q6, q6 -; BE-I32-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; BE-I32-NEXT: vorr d0, d12, d12 -; BE-I32-NEXT: vmov.32 d8[0], r0 -; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: vorr d0, d15, d15 -; BE-I32-NEXT: add lr, sp, #160 -; BE-I32-NEXT: vmov.32 d17[0], r0 -; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I32-NEXT: vorr d0, d17, d17 ; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: vorr d0, d10, d10 -; BE-I32-NEXT: vmov.32 d8[1], r0 ; BE-I32-NEXT: add lr, sp, #16 -; BE-I32-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: vorr d0, d13, d13 -; BE-I32-NEXT: vmov.32 d9[0], r0 -; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: add lr, sp, #160 -; BE-I32-NEXT: vorr d0, d11, d11 -; BE-I32-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; BE-I32-NEXT: add lr, sp, #48 -; BE-I32-NEXT: vmov.32 d13[1], r0 -; BE-I32-NEXT: add r0, sp, #352 -; BE-I32-NEXT: vld1.64 {d14, d15}, [r0] -; BE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-I32-NEXT: vmov.32 d10[1], r0 +; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I32-NEXT: vorr d0, d17, d17 ; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: vorr d0, d14, d14 +; BE-I32-NEXT: add lr, sp, #32 ; BE-I32-NEXT: vmov.32 d9[1], r0 +; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I32-NEXT: vorr d0, d17, d17 ; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: vmov.32 d12[0], r0 -; BE-I32-NEXT: add lr, sp, #160 -; BE-I32-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill ; BE-I32-NEXT: add lr, sp, #96 -; BE-I32-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; BE-I32-NEXT: vorr d0, d12, d12 -; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: add lr, sp, #112 -; BE-I32-NEXT: vmov.32 d8[0], r0 -; BE-I32-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload -; BE-I32-NEXT: vorr d0, d14, d14 -; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: vorr d0, d13, d13 -; BE-I32-NEXT: vmov.32 d11[0], r0 -; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: add lr, sp, #144 ; BE-I32-NEXT: vmov.32 d8[1], r0 -; BE-I32-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; BE-I32-NEXT: vorr d0, d12, d12 -; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: vorr d0, d15, d15 -; BE-I32-NEXT: vmov.32 d10[0], r0 +; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I32-NEXT: vorr d0, d17, d17 ; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: vorr d0, d13, d13 -; BE-I32-NEXT: vmov.32 d11[1], r0 +; BE-I32-NEXT: add lr, sp, #112 +; BE-I32-NEXT: vmov.32 d13[1], r0 +; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I32-NEXT: vorr d0, d17, d17 ; BE-I32-NEXT: bl lrint ; BE-I32-NEXT: add lr, sp, #48 -; BE-I32-NEXT: vmov.32 d10[1], r0 +; BE-I32-NEXT: vmov.32 d15[1], r0 ; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload ; BE-I32-NEXT: vorr d0, d17, d17 ; BE-I32-NEXT: bl lrint -; BE-I32-NEXT: add lr, sp, #160 -; BE-I32-NEXT: vrev64.32 q9, q4 -; BE-I32-NEXT: vldmia lr, {d22, d23} @ 16-byte Reload ; BE-I32-NEXT: add lr, sp, #80 -; BE-I32-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload -; BE-I32-NEXT: add lr, sp, #128 -; BE-I32-NEXT: vmov.32 d22[1], r0 -; BE-I32-NEXT: mov r0, r4 -; BE-I32-NEXT: vst1.32 {d20, d21}, [r0:128]! -; BE-I32-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload -; BE-I32-NEXT: add lr, sp, #16 -; BE-I32-NEXT: vrev64.32 q8, q5 -; BE-I32-NEXT: vst1.32 {d20, d21}, [r0:128]! -; BE-I32-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload -; BE-I32-NEXT: add lr, sp, #32 -; BE-I32-NEXT: vst1.32 {d20, d21}, [r0:128]! -; BE-I32-NEXT: vst1.64 {d18, d19}, [r0:128] -; BE-I32-NEXT: add r0, r4, #64 -; BE-I32-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; BE-I32-NEXT: add lr, sp, #64 -; BE-I32-NEXT: vst1.32 {d18, d19}, [r0:128]! -; BE-I32-NEXT: vst1.32 {d22, d23}, [r0:128]! -; BE-I32-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; BE-I32-NEXT: vst1.32 {d18, d19}, [r0:128]! -; BE-I32-NEXT: vst1.64 {d16, d17}, [r0:128] -; BE-I32-NEXT: add sp, sp, #176 +; BE-I32-NEXT: vmov.32 d14[1], r0 +; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I32-NEXT: vorr d0, d17, d17 +; BE-I32-NEXT: bl lrint +; BE-I32-NEXT: vmov.32 d12[1], r0 +; BE-I32-NEXT: vrev64.32 q0, q4 +; BE-I32-NEXT: vrev64.32 q1, q5 +; BE-I32-NEXT: vrev64.32 q2, q7 +; BE-I32-NEXT: vrev64.32 q3, q6 +; BE-I32-NEXT: add sp, sp, #128 ; BE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; BE-I32-NEXT: pop {r4, r5, r6, pc} ; -; BE-I64-LABEL: lrint_v32f64: +; BE-I64-LABEL: lrint_v16f64: ; BE-I64: @ %bb.0: ; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} @@ -7453,902 +1877,183 @@ define <32 x iXLen> @lrint_v32f64(<32 x double> %x) { ; BE-I64-NEXT: sub sp, sp, #4 ; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} ; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I64-NEXT: .pad #232 -; BE-I64-NEXT: sub sp, sp, #232 -; BE-I64-NEXT: add lr, sp, #184 -; BE-I64-NEXT: str r0, [sp, #148] @ 4-byte Spill -; BE-I64-NEXT: add r0, sp, #416 -; BE-I64-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill -; BE-I64-NEXT: add lr, sp, #168 -; BE-I64-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill -; BE-I64-NEXT: add lr, sp, #152 -; BE-I64-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill -; BE-I64-NEXT: add lr, sp, #128 -; BE-I64-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; BE-I64-NEXT: add lr, sp, #200 -; BE-I64-NEXT: vld1.64 {d18, d19}, [r0] -; BE-I64-NEXT: add r0, sp, #448 -; BE-I64-NEXT: vorr d0, d19, d19 -; BE-I64-NEXT: vld1.64 {d14, d15}, [r0] -; BE-I64-NEXT: add r0, sp, #336 -; BE-I64-NEXT: vstmia lr, {d18, d19} @ 16-byte Spill +; BE-I64-NEXT: .pad #168 +; BE-I64-NEXT: sub sp, sp, #168 ; BE-I64-NEXT: add lr, sp, #64 -; BE-I64-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I64-NEXT: add r0, sp, #400 -; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEXT: add lr, sp, #8 -; BE-I64-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I64-NEXT: add r0, sp, #352 -; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEXT: add lr, sp, #24 -; BE-I64-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I64-NEXT: add r0, sp, #368 -; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEXT: str r0, [sp, #132] @ 4-byte Spill +; BE-I64-NEXT: add r0, sp, #304 +; BE-I64-NEXT: vorr q4, q3, q3 +; BE-I64-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill ; BE-I64-NEXT: add lr, sp, #48 +; BE-I64-NEXT: vorr d0, d1, d1 ; BE-I64-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I64-NEXT: add r0, sp, #384 +; BE-I64-NEXT: add r0, sp, #320 +; BE-I64-NEXT: vorr q6, q2, q2 ; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEXT: add lr, sp, #96 +; BE-I64-NEXT: add lr, sp, #88 +; BE-I64-NEXT: vorr q7, q1, q1 ; BE-I64-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I64-NEXT: add r0, sp, #512 +; BE-I64-NEXT: add r0, sp, #272 ; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill ; BE-I64-NEXT: add lr, sp, #112 ; BE-I64-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I64-NEXT: add r0, sp, #432 -; BE-I64-NEXT: vld1.64 {d8, d9}, [r0] +; BE-I64-NEXT: add r0, sp, #288 +; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; BE-I64-NEXT: add lr, sp, #24 +; BE-I64-NEXT: vld1.64 {d16, d17}, [r0] ; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill ; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: vorr d0, d8, d8 -; BE-I64-NEXT: str r1, [sp, #80] @ 4-byte Spill -; BE-I64-NEXT: vmov.32 d13[0], r0 +; BE-I64-NEXT: vorr d0, d14, d14 +; BE-I64-NEXT: add lr, sp, #136 +; BE-I64-NEXT: vmov.32 d17[0], r0 +; BE-I64-NEXT: str r1, [sp, #108] @ 4-byte Spill +; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill ; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: vorr d0, d9, d9 -; BE-I64-NEXT: str r1, [sp, #44] @ 4-byte Spill +; BE-I64-NEXT: vorr d0, d15, d15 +; BE-I64-NEXT: str r1, [sp, #84] @ 4-byte Spill ; BE-I64-NEXT: vmov.32 d10[0], r0 ; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: vorr d0, d14, d14 -; BE-I64-NEXT: add lr, sp, #216 +; BE-I64-NEXT: vorr d0, d12, d12 +; BE-I64-NEXT: add lr, sp, #152 ; BE-I64-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEXT: mov r9, r1 +; BE-I64-NEXT: str r1, [sp, #44] @ 4-byte Spill ; BE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill ; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: vorr d0, d15, d15 -; BE-I64-NEXT: mov r8, r1 -; BE-I64-NEXT: vmov.32 d8[0], r0 +; BE-I64-NEXT: vorr d0, d13, d13 +; BE-I64-NEXT: mov r6, r1 +; BE-I64-NEXT: vmov.32 d10[0], r0 ; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: add lr, sp, #64 +; BE-I64-NEXT: vorr d0, d8, d8 ; BE-I64-NEXT: mov r4, r1 -; BE-I64-NEXT: vmov.32 d9[0], r0 -; BE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-I64-NEXT: vorr d0, d10, d10 -; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: vorr d0, d11, d11 -; BE-I64-NEXT: mov r6, r1 -; BE-I64-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEXT: vmov.32 d11[0], r0 ; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: add lr, sp, #200 +; BE-I64-NEXT: vorr d0, d9, d9 ; BE-I64-NEXT: mov r7, r1 -; BE-I64-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; BE-I64-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; BE-I64-NEXT: bl lrint ; BE-I64-NEXT: vmov.32 d12[0], r0 -; BE-I64-NEXT: add lr, sp, #200 -; BE-I64-NEXT: mov r5, r1 -; BE-I64-NEXT: vmov.32 d15[1], r7 -; BE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; BE-I64-NEXT: add lr, sp, #8 -; BE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-I64-NEXT: vorr d0, d11, d11 ; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: vorr d0, d10, d10 -; BE-I64-NEXT: vmov.32 d14[1], r6 ; BE-I64-NEXT: add lr, sp, #64 -; BE-I64-NEXT: mov r10, r1 +; BE-I64-NEXT: mov r5, r1 ; BE-I64-NEXT: vmov.32 d13[0], r0 -; BE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: add lr, sp, #24 -; BE-I64-NEXT: mov r11, r1 -; BE-I64-NEXT: vmov.32 d12[0], r0 -; BE-I64-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload -; BE-I64-NEXT: vorr d0, d15, d15 -; BE-I64-NEXT: vmov.32 d9[1], r4 -; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: vorr d0, d14, d14 -; BE-I64-NEXT: vmov.32 d8[1], r8 -; BE-I64-NEXT: add lr, sp, #8 -; BE-I64-NEXT: str r1, [sp, #4] @ 4-byte Spill -; BE-I64-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; BE-I64-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload +; BE-I64-NEXT: @ kill: def $d0 killed $d0 killed $q0 ; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEXT: add lr, sp, #136 +; BE-I64-NEXT: mov r9, r1 +; BE-I64-NEXT: vmov.32 d13[1], r5 +; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: vmov.32 d16[0], r0 +; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill ; BE-I64-NEXT: add lr, sp, #24 -; BE-I64-NEXT: mov r8, r1 -; BE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-I64-NEXT: add lr, sp, #216 -; BE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-I64-NEXT: add lr, sp, #48 ; BE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload ; BE-I64-NEXT: vorr d0, d9, d9 -; BE-I64-NEXT: vmov.32 d11[1], r9 ; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEXT: ldr r0, [sp, #44] @ 4-byte Reload ; BE-I64-NEXT: vorr d0, d8, d8 -; BE-I64-NEXT: add lr, sp, #216 -; BE-I64-NEXT: mov r9, r1 -; BE-I64-NEXT: vmov.32 d10[1], r0 -; BE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; BE-I64-NEXT: vmov.32 d12[1], r7 +; BE-I64-NEXT: add lr, sp, #64 +; BE-I64-NEXT: mov r10, r1 +; BE-I64-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill ; BE-I64-NEXT: bl lrint ; BE-I64-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEXT: add lr, sp, #48 -; BE-I64-NEXT: ldr r0, [sp, #80] @ 4-byte Reload -; BE-I64-NEXT: mov r6, r1 +; BE-I64-NEXT: add lr, sp, #8 +; BE-I64-NEXT: mov r11, r1 +; BE-I64-NEXT: vmov.32 d11[1], r4 ; BE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; BE-I64-NEXT: add lr, sp, #200 -; BE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-I64-NEXT: add lr, sp, #96 +; BE-I64-NEXT: add lr, sp, #48 +; BE-I64-NEXT: vorr q6, q5, q5 ; BE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload ; BE-I64-NEXT: vorr d0, d9, d9 -; BE-I64-NEXT: vmov.32 d11[1], r0 ; BE-I64-NEXT: bl lrint ; BE-I64-NEXT: vorr d0, d8, d8 -; BE-I64-NEXT: vmov.32 d10[1], r5 -; BE-I64-NEXT: add lr, sp, #200 -; BE-I64-NEXT: mov r7, r1 -; BE-I64-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: add lr, sp, #112 -; BE-I64-NEXT: vorr q4, q6, q6 -; BE-I64-NEXT: mov r4, r1 -; BE-I64-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; BE-I64-NEXT: vorr d0, d13, d13 -; BE-I64-NEXT: vmov.32 d9[1], r10 -; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: vorr d0, d12, d12 -; BE-I64-NEXT: mov r5, r1 +; BE-I64-NEXT: vmov.32 d12[1], r6 +; BE-I64-NEXT: add lr, sp, #24 +; BE-I64-NEXT: mov r8, r1 ; BE-I64-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEXT: vmov.32 d8[1], r11 +; BE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill ; BE-I64-NEXT: bl lrint ; BE-I64-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEXT: add lr, sp, #24 -; BE-I64-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEXT: add lr, sp, #48 -; BE-I64-NEXT: vmov.32 d17[1], r0 -; BE-I64-NEXT: vmov.32 d16[1], r8 -; BE-I64-NEXT: vorr q9, q8, q8 -; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEXT: add lr, sp, #112 -; BE-I64-NEXT: vmov.32 d17[1], r9 -; BE-I64-NEXT: vmov.32 d16[1], r6 -; BE-I64-NEXT: vorr q10, q8, q8 -; BE-I64-NEXT: vrev64.32 q8, q4 -; BE-I64-NEXT: vmov.32 d15[1], r7 -; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEXT: add lr, sp, #200 -; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEXT: vmov.32 d11[1], r5 -; BE-I64-NEXT: vrev64.32 q8, q8 -; BE-I64-NEXT: vmov.32 d14[1], r4 -; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEXT: add lr, sp, #216 -; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEXT: vmov.32 d10[1], r1 -; BE-I64-NEXT: vrev64.32 q8, q8 -; BE-I64-NEXT: vrev64.32 q6, q7 -; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEXT: add lr, sp, #8 -; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEXT: add lr, sp, #96 -; BE-I64-NEXT: vrev64.32 q7, q5 -; BE-I64-NEXT: vrev64.32 q8, q8 -; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEXT: add lr, sp, #64 -; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEXT: add lr, sp, #80 -; BE-I64-NEXT: vrev64.32 q8, q8 -; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEXT: add lr, sp, #64 -; BE-I64-NEXT: vrev64.32 q8, q9 -; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill ; BE-I64-NEXT: add lr, sp, #48 -; BE-I64-NEXT: vrev64.32 q8, q10 -; BE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEXT: add lr, sp, #128 -; BE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-I64-NEXT: vorr d0, d11, d11 -; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: vorr d0, d10, d10 -; BE-I64-NEXT: mov r4, r1 -; BE-I64-NEXT: vmov.32 d9[0], r0 -; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: vmov.32 d8[0], r0 -; BE-I64-NEXT: ldr r6, [sp, #148] @ 4-byte Reload +; BE-I64-NEXT: ldr r0, [sp, #44] @ 4-byte Reload +; BE-I64-NEXT: mov r6, r1 +; BE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill ; BE-I64-NEXT: add lr, sp, #152 -; BE-I64-NEXT: vmov.32 d9[1], r4 -; BE-I64-NEXT: mov r5, r6 -; BE-I64-NEXT: vmov.32 d8[1], r1 -; BE-I64-NEXT: vrev64.32 q8, q4 -; BE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]! -; BE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-I64-NEXT: vorr d0, d11, d11 -; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: vorr d0, d10, d10 -; BE-I64-NEXT: mov r4, r1 -; BE-I64-NEXT: vmov.32 d9[0], r0 -; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: vmov.32 d8[0], r0 -; BE-I64-NEXT: add lr, sp, #168 -; BE-I64-NEXT: vmov.32 d9[1], r4 -; BE-I64-NEXT: vmov.32 d8[1], r1 -; BE-I64-NEXT: vrev64.32 q8, q4 -; BE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]! -; BE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-I64-NEXT: vorr d0, d11, d11 +; BE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload +; BE-I64-NEXT: add lr, sp, #88 +; BE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload +; BE-I64-NEXT: vorr d0, d13, d13 +; BE-I64-NEXT: vmov.32 d9[1], r0 ; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: vorr d0, d10, d10 +; BE-I64-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEXT: ldr r0, [sp, #84] @ 4-byte Reload +; BE-I64-NEXT: vorr d0, d12, d12 +; BE-I64-NEXT: add lr, sp, #152 ; BE-I64-NEXT: mov r4, r1 -; BE-I64-NEXT: vmov.32 d9[0], r0 +; BE-I64-NEXT: vmov.32 d8[1], r0 +; BE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill ; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: vmov.32 d8[0], r0 -; BE-I64-NEXT: add lr, sp, #184 -; BE-I64-NEXT: vmov.32 d9[1], r4 -; BE-I64-NEXT: vmov.32 d8[1], r1 -; BE-I64-NEXT: vrev64.32 q8, q4 -; BE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-I64-NEXT: add lr, sp, #136 +; BE-I64-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEXT: ldr r0, [sp, #108] @ 4-byte Reload +; BE-I64-NEXT: mov r5, r1 ; BE-I64-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-I64-NEXT: vorr d0, d11, d11 -; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: vorr d0, d10, d10 -; BE-I64-NEXT: mov r4, r1 -; BE-I64-NEXT: vmov.32 d9[0], r0 -; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: vmov.32 d8[0], r0 -; BE-I64-NEXT: add r0, sp, #464 -; BE-I64-NEXT: vmov.32 d9[1], r4 -; BE-I64-NEXT: vmov.32 d8[1], r1 -; BE-I64-NEXT: vrev64.32 q8, q4 -; BE-I64-NEXT: vld1.64 {d8, d9}, [r0] -; BE-I64-NEXT: vorr d0, d9, d9 -; BE-I64-NEXT: vst1.64 {d16, d17}, [r5:128] -; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: vorr d0, d8, d8 -; BE-I64-NEXT: mov r4, r1 -; BE-I64-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEXT: add r0, sp, #480 -; BE-I64-NEXT: add r5, r6, #192 -; BE-I64-NEXT: vmov.32 d11[1], r4 -; BE-I64-NEXT: vmov.32 d10[1], r1 -; BE-I64-NEXT: vld1.64 {d8, d9}, [r0] -; BE-I64-NEXT: vorr d0, d9, d9 -; BE-I64-NEXT: vrev64.32 q8, q5 -; BE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]! -; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: vorr d0, d8, d8 -; BE-I64-NEXT: mov r4, r1 -; BE-I64-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEXT: add r0, sp, #496 -; BE-I64-NEXT: vmov.32 d11[1], r4 -; BE-I64-NEXT: vmov.32 d10[1], r1 -; BE-I64-NEXT: vld1.64 {d8, d9}, [r0] +; BE-I64-NEXT: add lr, sp, #112 +; BE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload ; BE-I64-NEXT: vorr d0, d9, d9 -; BE-I64-NEXT: vrev64.32 q8, q5 -; BE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]! +; BE-I64-NEXT: vmov.32 d11[1], r0 ; BE-I64-NEXT: bl lrint ; BE-I64-NEXT: vorr d0, d8, d8 -; BE-I64-NEXT: mov r4, r1 -; BE-I64-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEXT: mov r7, r1 +; BE-I64-NEXT: vmov.32 d13[0], r0 +; BE-I64-NEXT: vmov.32 d10[1], r9 ; BE-I64-NEXT: bl lrint -; BE-I64-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEXT: add lr, sp, #112 -; BE-I64-NEXT: add r0, r6, #128 -; BE-I64-NEXT: vmov.32 d11[1], r4 -; BE-I64-NEXT: vmov.32 d10[1], r1 -; BE-I64-NEXT: vrev64.32 q8, q5 -; BE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]! -; BE-I64-NEXT: vst1.64 {d14, d15}, [r5:128] -; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEXT: add lr, sp, #200 -; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEXT: add lr, sp, #216 -; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEXT: add lr, sp, #96 -; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEXT: add lr, sp, #80 -; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128] -; BE-I64-NEXT: add r0, r6, #64 -; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEXT: add lr, sp, #64 -; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-I64-NEXT: add lr, sp, #8 +; BE-I64-NEXT: vmov.32 d12[0], r0 ; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload ; BE-I64-NEXT: add lr, sp, #48 -; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]! +; BE-I64-NEXT: vmov.32 d17[1], r10 +; BE-I64-NEXT: vmov.32 d16[1], r11 +; BE-I64-NEXT: vorr q12, q8, q8 ; BE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; BE-I64-NEXT: add lr, sp, #152 +; BE-I64-NEXT: vmov.32 d17[1], r8 +; BE-I64-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; BE-I64-NEXT: add lr, sp, #24 +; BE-I64-NEXT: vmov.32 d13[1], r7 +; BE-I64-NEXT: vmov.32 d16[1], r6 +; BE-I64-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload +; BE-I64-NEXT: add lr, sp, #64 +; BE-I64-NEXT: vorr q13, q8, q8 +; BE-I64-NEXT: vmov.32 d12[1], r1 +; BE-I64-NEXT: ldr r1, [sp, #132] @ 4-byte Reload +; BE-I64-NEXT: vrev64.32 q8, q5 +; BE-I64-NEXT: mov r0, r1 +; BE-I64-NEXT: vldmia lr, {d22, d23} @ 16-byte Reload +; BE-I64-NEXT: vrev64.32 q9, q9 +; BE-I64-NEXT: vrev64.32 q10, q10 ; BE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-I64-NEXT: vst1.64 {d12, d13}, [r0:128] -; BE-I64-NEXT: add sp, sp, #232 +; BE-I64-NEXT: vst1.64 {d18, d19}, [r0:128]! +; BE-I64-NEXT: vrev64.32 q11, q11 +; BE-I64-NEXT: vmov.32 d15[1], r4 +; BE-I64-NEXT: vst1.64 {d20, d21}, [r0:128]! +; BE-I64-NEXT: vrev64.32 q15, q6 +; BE-I64-NEXT: vmov.32 d14[1], r5 +; BE-I64-NEXT: vrev64.32 q12, q12 +; BE-I64-NEXT: vst1.64 {d22, d23}, [r0:128] +; BE-I64-NEXT: add r0, r1, #64 +; BE-I64-NEXT: vrev64.32 q13, q13 +; BE-I64-NEXT: vst1.64 {d30, d31}, [r0:128]! +; BE-I64-NEXT: vst1.64 {d24, d25}, [r0:128]! +; BE-I64-NEXT: vrev64.32 q14, q7 +; BE-I64-NEXT: vst1.64 {d26, d27}, [r0:128]! +; BE-I64-NEXT: vst1.64 {d28, d29}, [r0:128] +; BE-I64-NEXT: add sp, sp, #168 ; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; BE-I64-NEXT: add sp, sp, #4 ; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; -; BE-I32-NEON-LABEL: lrint_v32f64: -; BE-I32-NEON: @ %bb.0: -; BE-I32-NEON-NEXT: .save {r4, r5, r6, lr} -; BE-I32-NEON-NEXT: push {r4, r5, r6, lr} -; BE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I32-NEON-NEXT: .pad #176 -; BE-I32-NEON-NEXT: sub sp, sp, #176 -; BE-I32-NEON-NEXT: add lr, sp, #128 -; BE-I32-NEON-NEXT: mov r4, r0 -; BE-I32-NEON-NEXT: add r0, sp, #336 -; BE-I32-NEON-NEXT: vorr q6, q3, q3 -; BE-I32-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill -; BE-I32-NEON-NEXT: add lr, sp, #80 -; BE-I32-NEON-NEXT: vorr q5, q1, q1 -; BE-I32-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; BE-I32-NEON-NEXT: add lr, sp, #32 -; BE-I32-NEON-NEXT: vorr d0, d4, d4 -; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I32-NEON-NEXT: add r0, sp, #320 -; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I32-NEON-NEXT: add lr, sp, #160 -; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I32-NEON-NEXT: add r0, sp, #432 -; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I32-NEON-NEXT: add lr, sp, #112 -; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I32-NEON-NEXT: add r0, sp, #288 -; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I32-NEON-NEXT: add lr, sp, #96 -; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I32-NEON-NEXT: add r0, sp, #368 -; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I32-NEON-NEXT: add lr, sp, #48 -; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I32-NEON-NEXT: add r0, sp, #416 -; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I32-NEON-NEXT: add lr, sp, #144 -; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I32-NEON-NEXT: add r0, sp, #400 -; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I32-NEON-NEXT: add lr, sp, #64 -; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: vorr d0, d12, d12 -; BE-I32-NEON-NEXT: vmov.32 d14[0], r0 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: vorr d0, d10, d10 -; BE-I32-NEON-NEXT: vmov.32 d15[0], r0 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: vorr d0, d13, d13 -; BE-I32-NEON-NEXT: vmov.32 d9[0], r0 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: vorr d0, d11, d11 -; BE-I32-NEON-NEXT: vmov.32 d15[1], r0 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: add lr, sp, #128 -; BE-I32-NEON-NEXT: vorr q5, q4, q4 -; BE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I32-NEON-NEXT: vorr d0, d17, d17 -; BE-I32-NEON-NEXT: vmov.32 d11[1], r0 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: vmov.32 d14[1], r0 -; BE-I32-NEON-NEXT: add lr, sp, #128 -; BE-I32-NEON-NEXT: add r0, sp, #384 -; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; BE-I32-NEON-NEXT: add lr, sp, #16 -; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I32-NEON-NEXT: add lr, sp, #80 -; BE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; BE-I32-NEON-NEXT: vorr d0, d8, d8 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: add lr, sp, #64 -; BE-I32-NEON-NEXT: vmov.32 d10[0], r0 -; BE-I32-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload -; BE-I32-NEON-NEXT: vorr d0, d14, d14 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: vorr d0, d9, d9 -; BE-I32-NEON-NEXT: vmov.32 d13[0], r0 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: vorr d0, d15, d15 -; BE-I32-NEON-NEXT: vmov.32 d10[1], r0 -; BE-I32-NEON-NEXT: add lr, sp, #80 -; BE-I32-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: add lr, sp, #16 -; BE-I32-NEON-NEXT: vmov.32 d13[1], r0 -; BE-I32-NEON-NEXT: add r0, sp, #272 -; BE-I32-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-I32-NEON-NEXT: vorr d0, d10, d10 -; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I32-NEON-NEXT: vstmia sp, {d16, d17} @ 16-byte Spill -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: add lr, sp, #32 -; BE-I32-NEON-NEXT: vmov.32 d12[0], r0 -; BE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; BE-I32-NEON-NEXT: vorr d0, d8, d8 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: vorr d0, d11, d11 -; BE-I32-NEON-NEXT: vmov.32 d15[0], r0 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: vorr d0, d9, d9 -; BE-I32-NEON-NEXT: vmov.32 d12[1], r0 -; BE-I32-NEON-NEXT: add lr, sp, #64 -; BE-I32-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: vldmia sp, {d10, d11} @ 16-byte Reload -; BE-I32-NEON-NEXT: add lr, sp, #16 -; BE-I32-NEON-NEXT: vmov.32 d15[1], r0 -; BE-I32-NEON-NEXT: add r0, sp, #256 -; BE-I32-NEON-NEXT: vorr d0, d10, d10 -; BE-I32-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: add lr, sp, #160 -; BE-I32-NEON-NEXT: vmov.32 d13[0], r0 -; BE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; BE-I32-NEON-NEXT: vorr d0, d8, d8 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: vorr d0, d11, d11 -; BE-I32-NEON-NEXT: vmov.32 d14[0], r0 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: vorr d0, d9, d9 -; BE-I32-NEON-NEXT: vmov.32 d13[1], r0 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: vmov.32 d14[1], r0 -; BE-I32-NEON-NEXT: add lr, sp, #32 -; BE-I32-NEON-NEXT: add r0, sp, #304 -; BE-I32-NEON-NEXT: vld1.64 {d10, d11}, [r0] -; BE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; BE-I32-NEON-NEXT: add lr, sp, #16 -; BE-I32-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload -; BE-I32-NEON-NEXT: vorr d0, d14, d14 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: add lr, sp, #48 -; BE-I32-NEON-NEXT: vorr q4, q6, q6 -; BE-I32-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; BE-I32-NEON-NEXT: vorr d0, d12, d12 -; BE-I32-NEON-NEXT: vmov.32 d8[0], r0 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: vorr d0, d15, d15 -; BE-I32-NEON-NEXT: add lr, sp, #160 -; BE-I32-NEON-NEXT: vmov.32 d17[0], r0 -; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: vorr d0, d10, d10 -; BE-I32-NEON-NEXT: vmov.32 d8[1], r0 -; BE-I32-NEON-NEXT: add lr, sp, #16 -; BE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: vorr d0, d13, d13 -; BE-I32-NEON-NEXT: vmov.32 d9[0], r0 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: add lr, sp, #160 -; BE-I32-NEON-NEXT: vorr d0, d11, d11 -; BE-I32-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; BE-I32-NEON-NEXT: add lr, sp, #48 -; BE-I32-NEON-NEXT: vmov.32 d13[1], r0 -; BE-I32-NEON-NEXT: add r0, sp, #352 -; BE-I32-NEON-NEXT: vld1.64 {d14, d15}, [r0] -; BE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: vorr d0, d14, d14 -; BE-I32-NEON-NEXT: vmov.32 d9[1], r0 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: vmov.32 d12[0], r0 -; BE-I32-NEON-NEXT: add lr, sp, #160 -; BE-I32-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; BE-I32-NEON-NEXT: add lr, sp, #96 -; BE-I32-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; BE-I32-NEON-NEXT: vorr d0, d12, d12 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: add lr, sp, #112 -; BE-I32-NEON-NEXT: vmov.32 d8[0], r0 -; BE-I32-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload -; BE-I32-NEON-NEXT: vorr d0, d14, d14 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: vorr d0, d13, d13 -; BE-I32-NEON-NEXT: vmov.32 d11[0], r0 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: add lr, sp, #144 -; BE-I32-NEON-NEXT: vmov.32 d8[1], r0 -; BE-I32-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; BE-I32-NEON-NEXT: vorr d0, d12, d12 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: vorr d0, d15, d15 -; BE-I32-NEON-NEXT: vmov.32 d10[0], r0 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: vorr d0, d13, d13 -; BE-I32-NEON-NEXT: vmov.32 d11[1], r0 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: add lr, sp, #48 -; BE-I32-NEON-NEXT: vmov.32 d10[1], r0 -; BE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I32-NEON-NEXT: vorr d0, d17, d17 -; BE-I32-NEON-NEXT: bl lrint -; BE-I32-NEON-NEXT: add lr, sp, #160 -; BE-I32-NEON-NEXT: vrev64.32 q9, q4 -; BE-I32-NEON-NEXT: vldmia lr, {d22, d23} @ 16-byte Reload -; BE-I32-NEON-NEXT: add lr, sp, #80 -; BE-I32-NEON-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload -; BE-I32-NEON-NEXT: add lr, sp, #128 -; BE-I32-NEON-NEXT: vmov.32 d22[1], r0 -; BE-I32-NEON-NEXT: mov r0, r4 -; BE-I32-NEON-NEXT: vst1.32 {d20, d21}, [r0:128]! -; BE-I32-NEON-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload -; BE-I32-NEON-NEXT: add lr, sp, #16 -; BE-I32-NEON-NEXT: vrev64.32 q8, q5 -; BE-I32-NEON-NEXT: vst1.32 {d20, d21}, [r0:128]! -; BE-I32-NEON-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload -; BE-I32-NEON-NEXT: add lr, sp, #32 -; BE-I32-NEON-NEXT: vst1.32 {d20, d21}, [r0:128]! -; BE-I32-NEON-NEXT: vst1.64 {d18, d19}, [r0:128] -; BE-I32-NEON-NEXT: add r0, r4, #64 -; BE-I32-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; BE-I32-NEON-NEXT: add lr, sp, #64 -; BE-I32-NEON-NEXT: vst1.32 {d18, d19}, [r0:128]! -; BE-I32-NEON-NEXT: vst1.32 {d22, d23}, [r0:128]! -; BE-I32-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; BE-I32-NEON-NEXT: vst1.32 {d18, d19}, [r0:128]! -; BE-I32-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] -; BE-I32-NEON-NEXT: add sp, sp, #176 -; BE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I32-NEON-NEXT: pop {r4, r5, r6, pc} -; -; BE-I64-NEON-LABEL: lrint_v32f64: -; BE-I64-NEON: @ %bb.0: -; BE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-I64-NEON-NEXT: .pad #4 -; BE-I64-NEON-NEXT: sub sp, sp, #4 -; BE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I64-NEON-NEXT: .pad #232 -; BE-I64-NEON-NEXT: sub sp, sp, #232 -; BE-I64-NEON-NEXT: add lr, sp, #184 -; BE-I64-NEON-NEXT: str r0, [sp, #148] @ 4-byte Spill -; BE-I64-NEON-NEXT: add r0, sp, #416 -; BE-I64-NEON-NEXT: vstmia lr, {d6, d7} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #168 -; BE-I64-NEON-NEXT: vstmia lr, {d4, d5} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #152 -; BE-I64-NEON-NEXT: vstmia lr, {d2, d3} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #128 -; BE-I64-NEON-NEXT: vstmia lr, {d0, d1} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #200 -; BE-I64-NEON-NEXT: vld1.64 {d18, d19}, [r0] -; BE-I64-NEON-NEXT: add r0, sp, #448 -; BE-I64-NEON-NEXT: vorr d0, d19, d19 -; BE-I64-NEON-NEXT: vld1.64 {d14, d15}, [r0] -; BE-I64-NEON-NEXT: add r0, sp, #336 -; BE-I64-NEON-NEXT: vstmia lr, {d18, d19} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #64 -; BE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I64-NEON-NEXT: add r0, sp, #400 -; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #8 -; BE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I64-NEON-NEXT: add r0, sp, #352 -; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #24 -; BE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I64-NEON-NEXT: add r0, sp, #368 -; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #48 -; BE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I64-NEON-NEXT: add r0, sp, #384 -; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #96 -; BE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I64-NEON-NEXT: add r0, sp, #512 -; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #112 -; BE-I64-NEON-NEXT: vld1.64 {d16, d17}, [r0] -; BE-I64-NEON-NEXT: add r0, sp, #432 -; BE-I64-NEON-NEXT: vld1.64 {d8, d9}, [r0] -; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vorr d0, d8, d8 -; BE-I64-NEON-NEXT: str r1, [sp, #80] @ 4-byte Spill -; BE-I64-NEON-NEXT: vmov.32 d13[0], r0 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vorr d0, d9, d9 -; BE-I64-NEON-NEXT: str r1, [sp, #44] @ 4-byte Spill -; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vorr d0, d14, d14 -; BE-I64-NEON-NEXT: add lr, sp, #216 -; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEON-NEXT: mov r9, r1 -; BE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vorr d0, d15, d15 -; BE-I64-NEON-NEXT: mov r8, r1 -; BE-I64-NEON-NEXT: vmov.32 d8[0], r0 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: add lr, sp, #64 -; BE-I64-NEON-NEXT: mov r4, r1 -; BE-I64-NEON-NEXT: vmov.32 d9[0], r0 -; BE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-I64-NEON-NEXT: vorr d0, d10, d10 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vorr d0, d11, d11 -; BE-I64-NEON-NEXT: mov r6, r1 -; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: add lr, sp, #200 -; BE-I64-NEON-NEXT: mov r7, r1 -; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEON-NEXT: vldmia lr, {d0, d1} @ 16-byte Reload -; BE-I64-NEON-NEXT: @ kill: def $d0 killed $d0 killed $q0 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vmov.32 d12[0], r0 -; BE-I64-NEON-NEXT: add lr, sp, #200 -; BE-I64-NEON-NEXT: mov r5, r1 -; BE-I64-NEON-NEXT: vmov.32 d15[1], r7 -; BE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #8 -; BE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-I64-NEON-NEXT: vorr d0, d11, d11 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vorr d0, d10, d10 -; BE-I64-NEON-NEXT: vmov.32 d14[1], r6 -; BE-I64-NEON-NEXT: add lr, sp, #64 -; BE-I64-NEON-NEXT: mov r10, r1 -; BE-I64-NEON-NEXT: vmov.32 d13[0], r0 -; BE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: add lr, sp, #24 -; BE-I64-NEON-NEXT: mov r11, r1 -; BE-I64-NEON-NEXT: vmov.32 d12[0], r0 -; BE-I64-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload -; BE-I64-NEON-NEXT: vorr d0, d15, d15 -; BE-I64-NEON-NEXT: vmov.32 d9[1], r4 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vorr d0, d14, d14 -; BE-I64-NEON-NEXT: vmov.32 d8[1], r8 -; BE-I64-NEON-NEXT: add lr, sp, #8 -; BE-I64-NEON-NEXT: str r1, [sp, #4] @ 4-byte Spill -; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEON-NEXT: add lr, sp, #24 -; BE-I64-NEON-NEXT: mov r8, r1 -; BE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #216 -; BE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-I64-NEON-NEXT: add lr, sp, #48 -; BE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; BE-I64-NEON-NEXT: vorr d0, d9, d9 -; BE-I64-NEON-NEXT: vmov.32 d11[1], r9 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEON-NEXT: ldr r0, [sp, #44] @ 4-byte Reload -; BE-I64-NEON-NEXT: vorr d0, d8, d8 -; BE-I64-NEON-NEXT: add lr, sp, #216 -; BE-I64-NEON-NEXT: mov r9, r1 -; BE-I64-NEON-NEXT: vmov.32 d10[1], r0 -; BE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEON-NEXT: add lr, sp, #48 -; BE-I64-NEON-NEXT: ldr r0, [sp, #80] @ 4-byte Reload -; BE-I64-NEON-NEXT: mov r6, r1 -; BE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #200 -; BE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-I64-NEON-NEXT: add lr, sp, #96 -; BE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; BE-I64-NEON-NEXT: vorr d0, d9, d9 -; BE-I64-NEON-NEXT: vmov.32 d11[1], r0 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vorr d0, d8, d8 -; BE-I64-NEON-NEXT: vmov.32 d10[1], r5 -; BE-I64-NEON-NEXT: add lr, sp, #200 -; BE-I64-NEON-NEXT: mov r7, r1 -; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: add lr, sp, #112 -; BE-I64-NEON-NEXT: vorr q4, q6, q6 -; BE-I64-NEON-NEXT: mov r4, r1 -; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; BE-I64-NEON-NEXT: vorr d0, d13, d13 -; BE-I64-NEON-NEXT: vmov.32 d9[1], r10 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vorr d0, d12, d12 -; BE-I64-NEON-NEXT: mov r5, r1 -; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEON-NEXT: vmov.32 d8[1], r11 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEON-NEXT: add lr, sp, #24 -; BE-I64-NEON-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEON-NEXT: add lr, sp, #48 -; BE-I64-NEON-NEXT: vmov.32 d17[1], r0 -; BE-I64-NEON-NEXT: vmov.32 d16[1], r8 -; BE-I64-NEON-NEXT: vorr q9, q8, q8 -; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEON-NEXT: add lr, sp, #112 -; BE-I64-NEON-NEXT: vmov.32 d17[1], r9 -; BE-I64-NEON-NEXT: vmov.32 d16[1], r6 -; BE-I64-NEON-NEXT: vorr q10, q8, q8 -; BE-I64-NEON-NEXT: vrev64.32 q8, q4 -; BE-I64-NEON-NEXT: vmov.32 d15[1], r7 -; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #200 -; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEON-NEXT: vmov.32 d11[1], r5 -; BE-I64-NEON-NEXT: vrev64.32 q8, q8 -; BE-I64-NEON-NEXT: vmov.32 d14[1], r4 -; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #216 -; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEON-NEXT: vmov.32 d10[1], r1 -; BE-I64-NEON-NEXT: vrev64.32 q8, q8 -; BE-I64-NEON-NEXT: vrev64.32 q6, q7 -; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #8 -; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEON-NEXT: add lr, sp, #96 -; BE-I64-NEON-NEXT: vrev64.32 q7, q5 -; BE-I64-NEON-NEXT: vrev64.32 q8, q8 -; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #64 -; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEON-NEXT: add lr, sp, #80 -; BE-I64-NEON-NEXT: vrev64.32 q8, q8 -; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #64 -; BE-I64-NEON-NEXT: vrev64.32 q8, q9 -; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #48 -; BE-I64-NEON-NEXT: vrev64.32 q8, q10 -; BE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I64-NEON-NEXT: add lr, sp, #128 -; BE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-I64-NEON-NEXT: vorr d0, d11, d11 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vorr d0, d10, d10 -; BE-I64-NEON-NEXT: mov r4, r1 -; BE-I64-NEON-NEXT: vmov.32 d9[0], r0 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vmov.32 d8[0], r0 -; BE-I64-NEON-NEXT: ldr r6, [sp, #148] @ 4-byte Reload -; BE-I64-NEON-NEXT: add lr, sp, #152 -; BE-I64-NEON-NEXT: vmov.32 d9[1], r4 -; BE-I64-NEON-NEXT: mov r5, r6 -; BE-I64-NEON-NEXT: vmov.32 d8[1], r1 -; BE-I64-NEON-NEXT: vrev64.32 q8, q4 -; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! -; BE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-I64-NEON-NEXT: vorr d0, d11, d11 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vorr d0, d10, d10 -; BE-I64-NEON-NEXT: mov r4, r1 -; BE-I64-NEON-NEXT: vmov.32 d9[0], r0 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vmov.32 d8[0], r0 -; BE-I64-NEON-NEXT: add lr, sp, #168 -; BE-I64-NEON-NEXT: vmov.32 d9[1], r4 -; BE-I64-NEON-NEXT: vmov.32 d8[1], r1 -; BE-I64-NEON-NEXT: vrev64.32 q8, q4 -; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! -; BE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-I64-NEON-NEXT: vorr d0, d11, d11 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vorr d0, d10, d10 -; BE-I64-NEON-NEXT: mov r4, r1 -; BE-I64-NEON-NEXT: vmov.32 d9[0], r0 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vmov.32 d8[0], r0 -; BE-I64-NEON-NEXT: add lr, sp, #184 -; BE-I64-NEON-NEXT: vmov.32 d9[1], r4 -; BE-I64-NEON-NEXT: vmov.32 d8[1], r1 -; BE-I64-NEON-NEXT: vrev64.32 q8, q4 -; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! -; BE-I64-NEON-NEXT: vldmia lr, {d10, d11} @ 16-byte Reload -; BE-I64-NEON-NEXT: vorr d0, d11, d11 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vorr d0, d10, d10 -; BE-I64-NEON-NEXT: mov r4, r1 -; BE-I64-NEON-NEXT: vmov.32 d9[0], r0 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vmov.32 d8[0], r0 -; BE-I64-NEON-NEXT: add r0, sp, #464 -; BE-I64-NEON-NEXT: vmov.32 d9[1], r4 -; BE-I64-NEON-NEXT: vmov.32 d8[1], r1 -; BE-I64-NEON-NEXT: vrev64.32 q8, q4 -; BE-I64-NEON-NEXT: vld1.64 {d8, d9}, [r0] -; BE-I64-NEON-NEXT: vorr d0, d9, d9 -; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128] -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vorr d0, d8, d8 -; BE-I64-NEON-NEXT: mov r4, r1 -; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEON-NEXT: add r0, sp, #480 -; BE-I64-NEON-NEXT: add r5, r6, #192 -; BE-I64-NEON-NEXT: vmov.32 d11[1], r4 -; BE-I64-NEON-NEXT: vmov.32 d10[1], r1 -; BE-I64-NEON-NEXT: vld1.64 {d8, d9}, [r0] -; BE-I64-NEON-NEXT: vorr d0, d9, d9 -; BE-I64-NEON-NEXT: vrev64.32 q8, q5 -; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vorr d0, d8, d8 -; BE-I64-NEON-NEXT: mov r4, r1 -; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEON-NEXT: add r0, sp, #496 -; BE-I64-NEON-NEXT: vmov.32 d11[1], r4 -; BE-I64-NEON-NEXT: vmov.32 d10[1], r1 -; BE-I64-NEON-NEXT: vld1.64 {d8, d9}, [r0] -; BE-I64-NEON-NEXT: vorr d0, d9, d9 -; BE-I64-NEON-NEXT: vrev64.32 q8, q5 -; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vorr d0, d8, d8 -; BE-I64-NEON-NEXT: mov r4, r1 -; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEON-NEXT: bl lrint -; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEON-NEXT: add lr, sp, #112 -; BE-I64-NEON-NEXT: add r0, r6, #128 -; BE-I64-NEON-NEXT: vmov.32 d11[1], r4 -; BE-I64-NEON-NEXT: vmov.32 d10[1], r1 -; BE-I64-NEON-NEXT: vrev64.32 q8, q5 -; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! -; BE-I64-NEON-NEXT: vst1.64 {d14, d15}, [r5:128] -; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEON-NEXT: add lr, sp, #200 -; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEON-NEXT: add lr, sp, #216 -; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEON-NEXT: add lr, sp, #96 -; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEON-NEXT: add lr, sp, #80 -; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] -; BE-I64-NEON-NEXT: add r0, r6, #64 -; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEON-NEXT: add lr, sp, #64 -; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEON-NEXT: add lr, sp, #48 -; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128]! -; BE-I64-NEON-NEXT: vst1.64 {d12, d13}, [r0:128] -; BE-I64-NEON-NEXT: add sp, sp, #232 -; BE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I64-NEON-NEXT: add sp, sp, #4 -; BE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} - %a = call <32 x iXLen> @llvm.lrint.v32iXLen.v16f64(<32 x double> %x) - ret <32 x iXLen> %a + %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f64(<16 x double> %x) + ret <16 x iXLen> %a } -declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f64(<32 x double>) +declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f64(<16 x double>) define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) { ; LE-I32-LABEL: lrint_v1fp128: @@ -8367,22 +2072,6 @@ define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) { ; LE-I64-NEXT: vmov.32 d0[1], r1 ; LE-I64-NEXT: pop {r11, pc} ; -; LE-I32-NEON-LABEL: lrint_v1fp128: -; LE-I32-NEON: @ %bb.0: -; LE-I32-NEON-NEXT: .save {r11, lr} -; LE-I32-NEON-NEXT: push {r11, lr} -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: pop {r11, pc} -; -; LE-I64-NEON-LABEL: lrint_v1fp128: -; LE-I64-NEON: @ %bb.0: -; LE-I64-NEON-NEXT: .save {r11, lr} -; LE-I64-NEON-NEXT: push {r11, lr} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: vmov.32 d0[0], r0 -; LE-I64-NEON-NEXT: vmov.32 d0[1], r1 -; LE-I64-NEON-NEXT: pop {r11, pc} -; ; BE-I32-LABEL: lrint_v1fp128: ; BE-I32: @ %bb.0: ; BE-I32-NEXT: .save {r11, lr} @@ -8399,23 +2088,6 @@ define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) { ; BE-I64-NEXT: vmov.32 d16[1], r1 ; BE-I64-NEXT: vrev64.32 d0, d16 ; BE-I64-NEXT: pop {r11, pc} -; -; BE-I32-NEON-LABEL: lrint_v1fp128: -; BE-I32-NEON: @ %bb.0: -; BE-I32-NEON-NEXT: .save {r11, lr} -; BE-I32-NEON-NEXT: push {r11, lr} -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: pop {r11, pc} -; -; BE-I64-NEON-LABEL: lrint_v1fp128: -; BE-I64-NEON: @ %bb.0: -; BE-I64-NEON-NEXT: .save {r11, lr} -; BE-I64-NEON-NEXT: push {r11, lr} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: vmov.32 d16[0], r0 -; BE-I64-NEON-NEXT: vmov.32 d16[1], r1 -; BE-I64-NEON-NEXT: vrev64.32 d0, d16 -; BE-I64-NEON-NEXT: pop {r11, pc} %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1fp128(<1 x fp128> %x) ret <1 x iXLen> %a } @@ -8470,54 +2142,6 @@ define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) { ; LE-I64-NEXT: vpop {d8, d9} ; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, pc} ; -; LE-I32-NEON-LABEL: lrint_v2fp128: -; LE-I32-NEON: @ %bb.0: -; LE-I32-NEON-NEXT: .save {r4, r5, r6, r7, r8, lr} -; LE-I32-NEON-NEXT: push {r4, r5, r6, r7, r8, lr} -; LE-I32-NEON-NEXT: mov r8, r3 -; LE-I32-NEON-NEXT: add r3, sp, #24 -; LE-I32-NEON-NEXT: mov r5, r2 -; LE-I32-NEON-NEXT: mov r6, r1 -; LE-I32-NEON-NEXT: mov r7, r0 -; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: mov r4, r0 -; LE-I32-NEON-NEXT: mov r0, r7 -; LE-I32-NEON-NEXT: mov r1, r6 -; LE-I32-NEON-NEXT: mov r2, r5 -; LE-I32-NEON-NEXT: mov r3, r8 -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: vmov.32 d0[0], r0 -; LE-I32-NEON-NEXT: vmov.32 d0[1], r4 -; LE-I32-NEON-NEXT: pop {r4, r5, r6, r7, r8, pc} -; -; LE-I64-NEON-LABEL: lrint_v2fp128: -; LE-I64-NEON: @ %bb.0: -; LE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, lr} -; LE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, lr} -; LE-I64-NEON-NEXT: .vsave {d8, d9} -; LE-I64-NEON-NEXT: vpush {d8, d9} -; LE-I64-NEON-NEXT: mov r8, r3 -; LE-I64-NEON-NEXT: add r3, sp, #40 -; LE-I64-NEON-NEXT: mov r5, r2 -; LE-I64-NEON-NEXT: mov r6, r1 -; LE-I64-NEON-NEXT: mov r7, r0 -; LE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: mov r4, r1 -; LE-I64-NEON-NEXT: vmov.32 d9[0], r0 -; LE-I64-NEON-NEXT: mov r0, r7 -; LE-I64-NEON-NEXT: mov r1, r6 -; LE-I64-NEON-NEXT: mov r2, r5 -; LE-I64-NEON-NEXT: mov r3, r8 -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 -; LE-I64-NEON-NEXT: vmov.32 d9[1], r4 -; LE-I64-NEON-NEXT: vmov.32 d8[1], r1 -; LE-I64-NEON-NEXT: vorr q0, q4, q4 -; LE-I64-NEON-NEXT: vpop {d8, d9} -; LE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, pc} -; ; BE-I32-LABEL: lrint_v2fp128: ; BE-I32: @ %bb.0: ; BE-I32-NEXT: .save {r4, r5, r6, r7, r8, lr} @@ -8567,56 +2191,6 @@ define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) { ; BE-I64-NEXT: vrev64.32 d0, d16 ; BE-I64-NEXT: vpop {d8} ; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, pc} -; -; BE-I32-NEON-LABEL: lrint_v2fp128: -; BE-I32-NEON: @ %bb.0: -; BE-I32-NEON-NEXT: .save {r4, r5, r6, r7, r8, lr} -; BE-I32-NEON-NEXT: push {r4, r5, r6, r7, r8, lr} -; BE-I32-NEON-NEXT: mov r8, r3 -; BE-I32-NEON-NEXT: add r3, sp, #24 -; BE-I32-NEON-NEXT: mov r5, r2 -; BE-I32-NEON-NEXT: mov r6, r1 -; BE-I32-NEON-NEXT: mov r7, r0 -; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: mov r4, r0 -; BE-I32-NEON-NEXT: mov r0, r7 -; BE-I32-NEON-NEXT: mov r1, r6 -; BE-I32-NEON-NEXT: mov r2, r5 -; BE-I32-NEON-NEXT: mov r3, r8 -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: vmov.32 d16[0], r0 -; BE-I32-NEON-NEXT: vmov.32 d16[1], r4 -; BE-I32-NEON-NEXT: vrev64.32 d0, d16 -; BE-I32-NEON-NEXT: pop {r4, r5, r6, r7, r8, pc} -; -; BE-I64-NEON-LABEL: lrint_v2fp128: -; BE-I64-NEON: @ %bb.0: -; BE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, lr} -; BE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, lr} -; BE-I64-NEON-NEXT: .vsave {d8} -; BE-I64-NEON-NEXT: vpush {d8} -; BE-I64-NEON-NEXT: mov r8, r3 -; BE-I64-NEON-NEXT: add r3, sp, #32 -; BE-I64-NEON-NEXT: mov r5, r2 -; BE-I64-NEON-NEXT: mov r6, r1 -; BE-I64-NEON-NEXT: mov r7, r0 -; BE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: mov r4, r1 -; BE-I64-NEON-NEXT: vmov.32 d8[0], r0 -; BE-I64-NEON-NEXT: mov r0, r7 -; BE-I64-NEON-NEXT: mov r1, r6 -; BE-I64-NEON-NEXT: mov r2, r5 -; BE-I64-NEON-NEXT: mov r3, r8 -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: vmov.32 d16[0], r0 -; BE-I64-NEON-NEXT: vmov.32 d8[1], r4 -; BE-I64-NEON-NEXT: vmov.32 d16[1], r1 -; BE-I64-NEON-NEXT: vrev64.32 d1, d8 -; BE-I64-NEON-NEXT: vrev64.32 d0, d16 -; BE-I64-NEON-NEXT: vpop {d8} -; BE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, pc} %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2fp128(<2 x fp128> %x) ret <2 x iXLen> %a } @@ -8696,991 +2270,154 @@ define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) { ; LE-I64-NEXT: vpop {d8, d9, d10, d11} ; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} ; -; LE-I32-NEON-LABEL: lrint_v4fp128: -; LE-I32-NEON: @ %bb.0: -; LE-I32-NEON-NEXT: .save {r4, lr} -; LE-I32-NEON-NEXT: push {r4, lr} -; LE-I32-NEON-NEXT: .vsave {d8, d9} -; LE-I32-NEON-NEXT: vpush {d8, d9} -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: add r3, sp, #60 -; LE-I32-NEON-NEXT: ldr r12, [sp, #56] -; LE-I32-NEON-NEXT: vmov.32 d8[0], r0 -; LE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I32-NEON-NEXT: mov r0, r12 -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: add r3, sp, #40 -; LE-I32-NEON-NEXT: mov r4, r0 -; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: add r3, sp, #28 -; LE-I32-NEON-NEXT: ldr r12, [sp, #24] -; LE-I32-NEON-NEXT: vmov.32 d9[0], r0 -; LE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I32-NEON-NEXT: mov r0, r12 -; LE-I32-NEON-NEXT: vmov.32 d9[1], r4 -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: vmov.32 d8[1], r0 -; LE-I32-NEON-NEXT: vorr q0, q4, q4 -; LE-I32-NEON-NEXT: vpop {d8, d9} -; LE-I32-NEON-NEXT: pop {r4, pc} -; -; LE-I64-NEON-LABEL: lrint_v4fp128: -; LE-I64-NEON: @ %bb.0: -; LE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; LE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} -; LE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11} -; LE-I64-NEON-NEXT: vpush {d8, d9, d10, d11} -; LE-I64-NEON-NEXT: mov r5, r3 -; LE-I64-NEON-NEXT: add r3, sp, #96 -; LE-I64-NEON-NEXT: mov r7, r2 -; LE-I64-NEON-NEXT: mov r6, r1 -; LE-I64-NEON-NEXT: mov r4, r0 -; LE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: mov r9, r1 -; LE-I64-NEON-NEXT: vmov.32 d9[0], r0 -; LE-I64-NEON-NEXT: mov r0, r4 -; LE-I64-NEON-NEXT: mov r1, r6 -; LE-I64-NEON-NEXT: mov r2, r7 -; LE-I64-NEON-NEXT: mov r3, r5 -; LE-I64-NEON-NEXT: ldr r8, [sp, #80] -; LE-I64-NEON-NEXT: ldr r10, [sp, #64] -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add r3, sp, #68 -; LE-I64-NEON-NEXT: mov r5, r1 -; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; LE-I64-NEON-NEXT: mov r0, r10 -; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add r3, sp, #84 -; LE-I64-NEON-NEXT: mov r4, r1 -; LE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; LE-I64-NEON-NEXT: mov r0, r8 -; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 -; LE-I64-NEON-NEXT: vmov.32 d11[1], r4 -; LE-I64-NEON-NEXT: vmov.32 d9[1], r9 -; LE-I64-NEON-NEXT: vmov.32 d10[1], r5 -; LE-I64-NEON-NEXT: vmov.32 d8[1], r1 -; LE-I64-NEON-NEXT: vorr q0, q5, q5 -; LE-I64-NEON-NEXT: vorr q1, q4, q4 -; LE-I64-NEON-NEXT: vpop {d8, d9, d10, d11} -; LE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} -; -; BE-I32-LABEL: lrint_v4fp128: -; BE-I32: @ %bb.0: -; BE-I32-NEXT: .save {r4, lr} -; BE-I32-NEXT: push {r4, lr} -; BE-I32-NEXT: .vsave {d8, d9} -; BE-I32-NEXT: vpush {d8, d9} -; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: add r3, sp, #60 -; BE-I32-NEXT: ldr r12, [sp, #56] -; BE-I32-NEXT: vmov.32 d8[0], r0 -; BE-I32-NEXT: ldm r3, {r1, r2, r3} -; BE-I32-NEXT: mov r0, r12 -; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: add r3, sp, #40 -; BE-I32-NEXT: mov r4, r0 -; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: add r3, sp, #28 -; BE-I32-NEXT: ldr r12, [sp, #24] -; BE-I32-NEXT: vmov.32 d9[0], r0 -; BE-I32-NEXT: ldm r3, {r1, r2, r3} -; BE-I32-NEXT: mov r0, r12 -; BE-I32-NEXT: vmov.32 d9[1], r4 -; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: vmov.32 d8[1], r0 -; BE-I32-NEXT: vrev64.32 q0, q4 -; BE-I32-NEXT: vpop {d8, d9} -; BE-I32-NEXT: pop {r4, pc} -; -; BE-I64-LABEL: lrint_v4fp128: -; BE-I64: @ %bb.0: -; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} -; BE-I64-NEXT: .vsave {d8, d9, d10} -; BE-I64-NEXT: vpush {d8, d9, d10} -; BE-I64-NEXT: mov r5, r3 -; BE-I64-NEXT: add r3, sp, #88 -; BE-I64-NEXT: mov r7, r2 -; BE-I64-NEXT: mov r6, r1 -; BE-I64-NEXT: mov r4, r0 -; BE-I64-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: mov r9, r1 -; BE-I64-NEXT: vmov.32 d8[0], r0 -; BE-I64-NEXT: mov r0, r4 -; BE-I64-NEXT: mov r1, r6 -; BE-I64-NEXT: mov r2, r7 -; BE-I64-NEXT: mov r3, r5 -; BE-I64-NEXT: ldr r8, [sp, #72] -; BE-I64-NEXT: ldr r10, [sp, #56] -; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: add r3, sp, #60 -; BE-I64-NEXT: mov r5, r1 -; BE-I64-NEXT: vmov.32 d9[0], r0 -; BE-I64-NEXT: mov r0, r10 -; BE-I64-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: add r3, sp, #76 -; BE-I64-NEXT: mov r4, r1 -; BE-I64-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEXT: mov r0, r8 -; BE-I64-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: vmov.32 d16[0], r0 -; BE-I64-NEXT: vmov.32 d10[1], r4 -; BE-I64-NEXT: vmov.32 d8[1], r9 -; BE-I64-NEXT: vmov.32 d9[1], r5 -; BE-I64-NEXT: vmov.32 d16[1], r1 -; BE-I64-NEXT: vrev64.32 d1, d10 -; BE-I64-NEXT: vrev64.32 d3, d8 -; BE-I64-NEXT: vrev64.32 d0, d9 -; BE-I64-NEXT: vrev64.32 d2, d16 -; BE-I64-NEXT: vpop {d8, d9, d10} -; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} -; -; BE-I32-NEON-LABEL: lrint_v4fp128: -; BE-I32-NEON: @ %bb.0: -; BE-I32-NEON-NEXT: .save {r4, lr} -; BE-I32-NEON-NEXT: push {r4, lr} -; BE-I32-NEON-NEXT: .vsave {d8, d9} -; BE-I32-NEON-NEXT: vpush {d8, d9} -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: add r3, sp, #60 -; BE-I32-NEON-NEXT: ldr r12, [sp, #56] -; BE-I32-NEON-NEXT: vmov.32 d8[0], r0 -; BE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I32-NEON-NEXT: mov r0, r12 -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: add r3, sp, #40 -; BE-I32-NEON-NEXT: mov r4, r0 -; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: add r3, sp, #28 -; BE-I32-NEON-NEXT: ldr r12, [sp, #24] -; BE-I32-NEON-NEXT: vmov.32 d9[0], r0 -; BE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I32-NEON-NEXT: mov r0, r12 -; BE-I32-NEON-NEXT: vmov.32 d9[1], r4 -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: vmov.32 d8[1], r0 -; BE-I32-NEON-NEXT: vrev64.32 q0, q4 -; BE-I32-NEON-NEXT: vpop {d8, d9} -; BE-I32-NEON-NEXT: pop {r4, pc} -; -; BE-I64-NEON-LABEL: lrint_v4fp128: -; BE-I64-NEON: @ %bb.0: -; BE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; BE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} -; BE-I64-NEON-NEXT: .vsave {d8, d9, d10} -; BE-I64-NEON-NEXT: vpush {d8, d9, d10} -; BE-I64-NEON-NEXT: mov r5, r3 -; BE-I64-NEON-NEXT: add r3, sp, #88 -; BE-I64-NEON-NEXT: mov r7, r2 -; BE-I64-NEON-NEXT: mov r6, r1 -; BE-I64-NEON-NEXT: mov r4, r0 -; BE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: mov r9, r1 -; BE-I64-NEON-NEXT: vmov.32 d8[0], r0 -; BE-I64-NEON-NEXT: mov r0, r4 -; BE-I64-NEON-NEXT: mov r1, r6 -; BE-I64-NEON-NEXT: mov r2, r7 -; BE-I64-NEON-NEXT: mov r3, r5 -; BE-I64-NEON-NEXT: ldr r8, [sp, #72] -; BE-I64-NEON-NEXT: ldr r10, [sp, #56] -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: add r3, sp, #60 -; BE-I64-NEON-NEXT: mov r5, r1 -; BE-I64-NEON-NEXT: vmov.32 d9[0], r0 -; BE-I64-NEON-NEXT: mov r0, r10 -; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: add r3, sp, #76 -; BE-I64-NEON-NEXT: mov r4, r1 -; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEON-NEXT: mov r0, r8 -; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: vmov.32 d16[0], r0 -; BE-I64-NEON-NEXT: vmov.32 d10[1], r4 -; BE-I64-NEON-NEXT: vmov.32 d8[1], r9 -; BE-I64-NEON-NEXT: vmov.32 d9[1], r5 -; BE-I64-NEON-NEXT: vmov.32 d16[1], r1 -; BE-I64-NEON-NEXT: vrev64.32 d1, d10 -; BE-I64-NEON-NEXT: vrev64.32 d3, d8 -; BE-I64-NEON-NEXT: vrev64.32 d0, d9 -; BE-I64-NEON-NEXT: vrev64.32 d2, d16 -; BE-I64-NEON-NEXT: vpop {d8, d9, d10} -; BE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} - %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128> %x) - ret <4 x iXLen> %a -} -declare <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128>) - -define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) { -; LE-I32-LABEL: lrint_v8fp128: -; LE-I32: @ %bb.0: -; LE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; LE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} -; LE-I32-NEXT: .vsave {d8, d9, d10, d11} -; LE-I32-NEXT: vpush {d8, d9, d10, d11} -; LE-I32-NEXT: mov r6, r3 -; LE-I32-NEXT: add r3, sp, #112 -; LE-I32-NEXT: mov r7, r2 -; LE-I32-NEXT: mov r4, r1 -; LE-I32-NEXT: mov r5, r0 -; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: vmov.32 d8[0], r0 -; LE-I32-NEXT: mov r0, r5 -; LE-I32-NEXT: mov r1, r4 -; LE-I32-NEXT: mov r2, r7 -; LE-I32-NEXT: mov r3, r6 -; LE-I32-NEXT: ldr r8, [sp, #160] -; LE-I32-NEXT: ldr r9, [sp, #64] -; LE-I32-NEXT: ldr r10, [sp, #80] -; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: add r3, sp, #84 -; LE-I32-NEXT: vmov.32 d10[0], r0 -; LE-I32-NEXT: mov r0, r10 -; LE-I32-NEXT: ldm r3, {r1, r2, r3} -; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: ldr r6, [sp, #96] -; LE-I32-NEXT: vmov.32 d11[0], r0 -; LE-I32-NEXT: ldr r1, [sp, #100] -; LE-I32-NEXT: ldr r2, [sp, #104] -; LE-I32-NEXT: ldr r3, [sp, #108] -; LE-I32-NEXT: mov r0, r6 -; LE-I32-NEXT: ldr r4, [sp, #68] -; LE-I32-NEXT: ldr r5, [sp, #72] -; LE-I32-NEXT: ldr r10, [sp, #164] -; LE-I32-NEXT: ldr r7, [sp, #168] -; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: ldr r3, [sp, #76] -; LE-I32-NEXT: vmov.32 d11[1], r0 -; LE-I32-NEXT: mov r0, r9 -; LE-I32-NEXT: mov r1, r4 -; LE-I32-NEXT: mov r2, r5 -; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: ldr r3, [sp, #172] -; LE-I32-NEXT: vmov.32 d10[1], r0 -; LE-I32-NEXT: mov r0, r8 -; LE-I32-NEXT: mov r1, r10 -; LE-I32-NEXT: mov r2, r7 -; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: add r3, sp, #144 -; LE-I32-NEXT: mov r4, r0 -; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: add r3, sp, #132 -; LE-I32-NEXT: ldr r7, [sp, #128] -; LE-I32-NEXT: vmov.32 d9[0], r0 -; LE-I32-NEXT: ldm r3, {r1, r2, r3} -; LE-I32-NEXT: mov r0, r7 -; LE-I32-NEXT: vmov.32 d9[1], r4 -; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: vmov.32 d8[1], r0 -; LE-I32-NEXT: vorr q0, q5, q5 -; LE-I32-NEXT: vorr q1, q4, q4 -; LE-I32-NEXT: vpop {d8, d9, d10, d11} -; LE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} -; -; LE-I64-LABEL: lrint_v8fp128: -; LE-I64: @ %bb.0: -; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-I64-NEXT: .pad #4 -; LE-I64-NEXT: sub sp, sp, #4 -; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEXT: .pad #8 -; LE-I64-NEXT: sub sp, sp, #8 -; LE-I64-NEXT: mov r11, r3 -; LE-I64-NEXT: add r3, sp, #208 -; LE-I64-NEXT: mov r10, r2 -; LE-I64-NEXT: mov r4, r1 -; LE-I64-NEXT: mov r5, r0 -; LE-I64-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add r7, sp, #164 -; LE-I64-NEXT: ldr r6, [sp, #160] -; LE-I64-NEXT: str r1, [sp, #4] @ 4-byte Spill -; LE-I64-NEXT: vmov.32 d9[0], r0 -; LE-I64-NEXT: ldm r7, {r1, r2, r3, r7} -; LE-I64-NEXT: mov r0, r6 -; LE-I64-NEXT: ldr r8, [sp, #128] -; LE-I64-NEXT: ldr r9, [sp, #144] -; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add r3, sp, #180 -; LE-I64-NEXT: str r1, [sp] @ 4-byte Spill -; LE-I64-NEXT: vmov.32 d10[0], r0 -; LE-I64-NEXT: mov r0, r7 -; LE-I64-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add r3, sp, #132 -; LE-I64-NEXT: mov r7, r1 -; LE-I64-NEXT: vmov.32 d11[0], r0 -; LE-I64-NEXT: mov r0, r8 -; LE-I64-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add r3, sp, #148 -; LE-I64-NEXT: mov r8, r1 -; LE-I64-NEXT: vmov.32 d12[0], r0 -; LE-I64-NEXT: mov r0, r9 -; LE-I64-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: mov r9, r1 -; LE-I64-NEXT: vmov.32 d13[0], r0 -; LE-I64-NEXT: mov r0, r5 -; LE-I64-NEXT: mov r1, r4 -; LE-I64-NEXT: mov r2, r10 -; LE-I64-NEXT: mov r3, r11 -; LE-I64-NEXT: ldr r6, [sp, #112] -; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add r3, sp, #116 -; LE-I64-NEXT: mov r4, r1 -; LE-I64-NEXT: vmov.32 d14[0], r0 -; LE-I64-NEXT: mov r0, r6 -; LE-I64-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add r3, sp, #196 -; LE-I64-NEXT: vmov.32 d15[0], r0 -; LE-I64-NEXT: ldr r0, [sp, #192] -; LE-I64-NEXT: mov r5, r1 -; LE-I64-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: vmov.32 d8[0], r0 -; LE-I64-NEXT: ldr r0, [sp] @ 4-byte Reload -; LE-I64-NEXT: vmov.32 d11[1], r7 -; LE-I64-NEXT: vmov.32 d10[1], r0 -; LE-I64-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; LE-I64-NEXT: vmov.32 d15[1], r5 -; LE-I64-NEXT: vorr q2, q5, q5 -; LE-I64-NEXT: vmov.32 d13[1], r9 -; LE-I64-NEXT: vmov.32 d9[1], r0 -; LE-I64-NEXT: vmov.32 d14[1], r4 -; LE-I64-NEXT: vmov.32 d12[1], r8 -; LE-I64-NEXT: vorr q0, q7, q7 -; LE-I64-NEXT: vmov.32 d8[1], r1 -; LE-I64-NEXT: vorr q1, q6, q6 -; LE-I64-NEXT: vorr q3, q4, q4 -; LE-I64-NEXT: add sp, sp, #8 -; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEXT: add sp, sp, #4 -; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; -; LE-I32-NEON-LABEL: lrint_v8fp128: -; LE-I32-NEON: @ %bb.0: -; LE-I32-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; LE-I32-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} -; LE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11} -; LE-I32-NEON-NEXT: vpush {d8, d9, d10, d11} -; LE-I32-NEON-NEXT: mov r6, r3 -; LE-I32-NEON-NEXT: add r3, sp, #112 -; LE-I32-NEON-NEXT: mov r7, r2 -; LE-I32-NEON-NEXT: mov r4, r1 -; LE-I32-NEON-NEXT: mov r5, r0 -; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: vmov.32 d8[0], r0 -; LE-I32-NEON-NEXT: mov r0, r5 -; LE-I32-NEON-NEXT: mov r1, r4 -; LE-I32-NEON-NEXT: mov r2, r7 -; LE-I32-NEON-NEXT: mov r3, r6 -; LE-I32-NEON-NEXT: ldr r8, [sp, #160] -; LE-I32-NEON-NEXT: ldr r9, [sp, #64] -; LE-I32-NEON-NEXT: ldr r10, [sp, #80] -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: add r3, sp, #84 -; LE-I32-NEON-NEXT: vmov.32 d10[0], r0 -; LE-I32-NEON-NEXT: mov r0, r10 -; LE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: ldr r6, [sp, #96] -; LE-I32-NEON-NEXT: vmov.32 d11[0], r0 -; LE-I32-NEON-NEXT: ldr r1, [sp, #100] -; LE-I32-NEON-NEXT: ldr r2, [sp, #104] -; LE-I32-NEON-NEXT: ldr r3, [sp, #108] -; LE-I32-NEON-NEXT: mov r0, r6 -; LE-I32-NEON-NEXT: ldr r4, [sp, #68] -; LE-I32-NEON-NEXT: ldr r5, [sp, #72] -; LE-I32-NEON-NEXT: ldr r10, [sp, #164] -; LE-I32-NEON-NEXT: ldr r7, [sp, #168] -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: ldr r3, [sp, #76] -; LE-I32-NEON-NEXT: vmov.32 d11[1], r0 -; LE-I32-NEON-NEXT: mov r0, r9 -; LE-I32-NEON-NEXT: mov r1, r4 -; LE-I32-NEON-NEXT: mov r2, r5 -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: ldr r3, [sp, #172] -; LE-I32-NEON-NEXT: vmov.32 d10[1], r0 -; LE-I32-NEON-NEXT: mov r0, r8 -; LE-I32-NEON-NEXT: mov r1, r10 -; LE-I32-NEON-NEXT: mov r2, r7 -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: add r3, sp, #144 -; LE-I32-NEON-NEXT: mov r4, r0 -; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: add r3, sp, #132 -; LE-I32-NEON-NEXT: ldr r7, [sp, #128] -; LE-I32-NEON-NEXT: vmov.32 d9[0], r0 -; LE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I32-NEON-NEXT: mov r0, r7 -; LE-I32-NEON-NEXT: vmov.32 d9[1], r4 -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: vmov.32 d8[1], r0 -; LE-I32-NEON-NEXT: vorr q0, q5, q5 -; LE-I32-NEON-NEXT: vorr q1, q4, q4 -; LE-I32-NEON-NEXT: vpop {d8, d9, d10, d11} -; LE-I32-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} -; -; LE-I64-NEON-LABEL: lrint_v8fp128: -; LE-I64-NEON: @ %bb.0: -; LE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-I64-NEON-NEXT: .pad #4 -; LE-I64-NEON-NEXT: sub sp, sp, #4 -; LE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEON-NEXT: .pad #8 -; LE-I64-NEON-NEXT: sub sp, sp, #8 -; LE-I64-NEON-NEXT: mov r11, r3 -; LE-I64-NEON-NEXT: add r3, sp, #208 -; LE-I64-NEON-NEXT: mov r10, r2 -; LE-I64-NEON-NEXT: mov r4, r1 -; LE-I64-NEON-NEXT: mov r5, r0 -; LE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add r7, sp, #164 -; LE-I64-NEON-NEXT: ldr r6, [sp, #160] -; LE-I64-NEON-NEXT: str r1, [sp, #4] @ 4-byte Spill -; LE-I64-NEON-NEXT: vmov.32 d9[0], r0 -; LE-I64-NEON-NEXT: ldm r7, {r1, r2, r3, r7} -; LE-I64-NEON-NEXT: mov r0, r6 -; LE-I64-NEON-NEXT: ldr r8, [sp, #128] -; LE-I64-NEON-NEXT: ldr r9, [sp, #144] -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add r3, sp, #180 -; LE-I64-NEON-NEXT: str r1, [sp] @ 4-byte Spill -; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; LE-I64-NEON-NEXT: mov r0, r7 -; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add r3, sp, #132 -; LE-I64-NEON-NEXT: mov r7, r1 -; LE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; LE-I64-NEON-NEXT: mov r0, r8 -; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add r3, sp, #148 -; LE-I64-NEON-NEXT: mov r8, r1 -; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 -; LE-I64-NEON-NEXT: mov r0, r9 -; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: mov r9, r1 -; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 -; LE-I64-NEON-NEXT: mov r0, r5 -; LE-I64-NEON-NEXT: mov r1, r4 -; LE-I64-NEON-NEXT: mov r2, r10 -; LE-I64-NEON-NEXT: mov r3, r11 -; LE-I64-NEON-NEXT: ldr r6, [sp, #112] -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add r3, sp, #116 -; LE-I64-NEON-NEXT: mov r4, r1 -; LE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; LE-I64-NEON-NEXT: mov r0, r6 -; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add r3, sp, #196 -; LE-I64-NEON-NEXT: vmov.32 d15[0], r0 -; LE-I64-NEON-NEXT: ldr r0, [sp, #192] -; LE-I64-NEON-NEXT: mov r5, r1 -; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 -; LE-I64-NEON-NEXT: ldr r0, [sp] @ 4-byte Reload -; LE-I64-NEON-NEXT: vmov.32 d11[1], r7 -; LE-I64-NEON-NEXT: vmov.32 d10[1], r0 -; LE-I64-NEON-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; LE-I64-NEON-NEXT: vmov.32 d15[1], r5 -; LE-I64-NEON-NEXT: vorr q2, q5, q5 -; LE-I64-NEON-NEXT: vmov.32 d13[1], r9 -; LE-I64-NEON-NEXT: vmov.32 d9[1], r0 -; LE-I64-NEON-NEXT: vmov.32 d14[1], r4 -; LE-I64-NEON-NEXT: vmov.32 d12[1], r8 -; LE-I64-NEON-NEXT: vorr q0, q7, q7 -; LE-I64-NEON-NEXT: vmov.32 d8[1], r1 -; LE-I64-NEON-NEXT: vorr q1, q6, q6 -; LE-I64-NEON-NEXT: vorr q3, q4, q4 -; LE-I64-NEON-NEXT: add sp, sp, #8 -; LE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEON-NEXT: add sp, sp, #4 -; LE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; -; BE-I32-LABEL: lrint_v8fp128: +; BE-I32-LABEL: lrint_v4fp128: ; BE-I32: @ %bb.0: -; BE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-I32-NEXT: .pad #4 -; BE-I32-NEXT: sub sp, sp, #4 -; BE-I32-NEXT: .vsave {d8, d9, d10, d11} -; BE-I32-NEXT: vpush {d8, d9, d10, d11} -; BE-I32-NEXT: .pad #8 -; BE-I32-NEXT: sub sp, sp, #8 -; BE-I32-NEXT: str r3, [sp, #4] @ 4-byte Spill -; BE-I32-NEXT: add r3, sp, #128 -; BE-I32-NEXT: mov r11, r2 -; BE-I32-NEXT: mov r6, r1 -; BE-I32-NEXT: mov r7, r0 -; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I32-NEXT: .save {r4, lr} +; BE-I32-NEXT: push {r4, lr} +; BE-I32-NEXT: .vsave {d8, d9} +; BE-I32-NEXT: vpush {d8, d9} ; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: add r3, sp, #100 -; BE-I32-NEXT: ldr r5, [sp, #96] +; BE-I32-NEXT: add r3, sp, #60 +; BE-I32-NEXT: ldr r12, [sp, #56] ; BE-I32-NEXT: vmov.32 d8[0], r0 -; BE-I32-NEXT: ldr r4, [sp, #160] ; BE-I32-NEXT: ldm r3, {r1, r2, r3} -; BE-I32-NEXT: mov r0, r5 -; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: add r3, sp, #164 -; BE-I32-NEXT: vmov.32 d11[0], r0 -; BE-I32-NEXT: mov r0, r4 -; BE-I32-NEXT: ldm r3, {r1, r2, r3} -; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: ldr r4, [sp, #176] -; BE-I32-NEXT: vmov.32 d9[0], r0 -; BE-I32-NEXT: ldr r1, [sp, #180] -; BE-I32-NEXT: ldr r2, [sp, #184] -; BE-I32-NEXT: ldr r3, [sp, #188] -; BE-I32-NEXT: mov r0, r4 -; BE-I32-NEXT: ldr r5, [sp, #116] -; BE-I32-NEXT: ldr r8, [sp, #120] -; BE-I32-NEXT: ldr r10, [sp, #84] -; BE-I32-NEXT: ldr r9, [sp, #88] -; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: vmov.32 d9[1], r0 -; BE-I32-NEXT: ldr r3, [sp, #124] -; BE-I32-NEXT: ldr r0, [sp, #112] -; BE-I32-NEXT: mov r1, r5 -; BE-I32-NEXT: mov r2, r8 -; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: vmov.32 d11[1], r0 -; BE-I32-NEXT: ldr r3, [sp, #92] -; BE-I32-NEXT: ldr r0, [sp, #80] -; BE-I32-NEXT: mov r1, r10 -; BE-I32-NEXT: mov r2, r9 +; BE-I32-NEXT: mov r0, r12 ; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: ldr r3, [sp, #4] @ 4-byte Reload +; BE-I32-NEXT: add r3, sp, #40 ; BE-I32-NEXT: mov r4, r0 -; BE-I32-NEXT: mov r0, r7 -; BE-I32-NEXT: mov r1, r6 -; BE-I32-NEXT: mov r2, r11 +; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3} ; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: add r3, sp, #148 -; BE-I32-NEXT: ldr r7, [sp, #144] -; BE-I32-NEXT: vmov.32 d10[0], r0 +; BE-I32-NEXT: add r3, sp, #28 +; BE-I32-NEXT: ldr r12, [sp, #24] +; BE-I32-NEXT: vmov.32 d9[0], r0 ; BE-I32-NEXT: ldm r3, {r1, r2, r3} -; BE-I32-NEXT: mov r0, r7 -; BE-I32-NEXT: vmov.32 d10[1], r4 +; BE-I32-NEXT: mov r0, r12 +; BE-I32-NEXT: vmov.32 d9[1], r4 ; BE-I32-NEXT: bl lrintl ; BE-I32-NEXT: vmov.32 d8[1], r0 -; BE-I32-NEXT: vrev64.32 q0, q5 -; BE-I32-NEXT: vrev64.32 q1, q4 -; BE-I32-NEXT: add sp, sp, #8 -; BE-I32-NEXT: vpop {d8, d9, d10, d11} -; BE-I32-NEXT: add sp, sp, #4 -; BE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; BE-I32-NEXT: vrev64.32 q0, q4 +; BE-I32-NEXT: vpop {d8, d9} +; BE-I32-NEXT: pop {r4, pc} ; -; BE-I64-LABEL: lrint_v8fp128: +; BE-I64-LABEL: lrint_v4fp128: ; BE-I64: @ %bb.0: -; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-I64-NEXT: .pad #4 -; BE-I64-NEXT: sub sp, sp, #4 -; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14} -; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14} -; BE-I64-NEXT: .pad #16 -; BE-I64-NEXT: sub sp, sp, #16 -; BE-I64-NEXT: str r3, [sp, #4] @ 4-byte Spill -; BE-I64-NEXT: add r3, sp, #208 -; BE-I64-NEXT: mov r11, r2 -; BE-I64-NEXT: mov r4, r1 -; BE-I64-NEXT: mov r5, r0 +; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; BE-I64-NEXT: .vsave {d8, d9, d10} +; BE-I64-NEXT: vpush {d8, d9, d10} +; BE-I64-NEXT: mov r5, r3 +; BE-I64-NEXT: add r3, sp, #88 +; BE-I64-NEXT: mov r7, r2 +; BE-I64-NEXT: mov r6, r1 +; BE-I64-NEXT: mov r4, r0 ; BE-I64-NEXT: ldm r3, {r0, r1, r2, r3} ; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: ldr r7, [sp, #176] -; BE-I64-NEXT: add r3, sp, #180 -; BE-I64-NEXT: str r1, [sp, #12] @ 4-byte Spill +; BE-I64-NEXT: mov r9, r1 ; BE-I64-NEXT: vmov.32 d8[0], r0 -; BE-I64-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEXT: mov r0, r7 -; BE-I64-NEXT: ldr r6, [sp, #128] -; BE-I64-NEXT: ldr r8, [sp, #144] +; BE-I64-NEXT: mov r0, r4 +; BE-I64-NEXT: mov r1, r6 +; BE-I64-NEXT: mov r2, r7 +; BE-I64-NEXT: mov r3, r5 +; BE-I64-NEXT: ldr r8, [sp, #72] +; BE-I64-NEXT: ldr r10, [sp, #56] ; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: add r3, sp, #132 -; BE-I64-NEXT: str r1, [sp, #8] @ 4-byte Spill +; BE-I64-NEXT: add r3, sp, #60 +; BE-I64-NEXT: mov r5, r1 ; BE-I64-NEXT: vmov.32 d9[0], r0 -; BE-I64-NEXT: mov r0, r6 +; BE-I64-NEXT: mov r0, r10 ; BE-I64-NEXT: ldm r3, {r1, r2, r3} ; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: add r3, sp, #148 -; BE-I64-NEXT: mov r6, r1 +; BE-I64-NEXT: add r3, sp, #76 +; BE-I64-NEXT: mov r4, r1 ; BE-I64-NEXT: vmov.32 d10[0], r0 ; BE-I64-NEXT: mov r0, r8 ; BE-I64-NEXT: ldm r3, {r1, r2, r3} ; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: add r3, sp, #160 -; BE-I64-NEXT: mov r9, r0 -; BE-I64-NEXT: mov r7, r1 -; BE-I64-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: ldr r3, [sp, #4] @ 4-byte Reload -; BE-I64-NEXT: mov r8, r1 -; BE-I64-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEXT: mov r0, r5 -; BE-I64-NEXT: mov r1, r4 -; BE-I64-NEXT: mov r2, r11 -; BE-I64-NEXT: ldr r10, [sp, #112] -; BE-I64-NEXT: vmov.32 d12[0], r9 -; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: add r3, sp, #116 -; BE-I64-NEXT: mov r4, r1 -; BE-I64-NEXT: vmov.32 d13[0], r0 -; BE-I64-NEXT: mov r0, r10 -; BE-I64-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: add r3, sp, #196 -; BE-I64-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEXT: ldr r0, [sp, #192] -; BE-I64-NEXT: mov r5, r1 -; BE-I64-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEXT: bl lrintl ; BE-I64-NEXT: vmov.32 d16[0], r0 -; BE-I64-NEXT: ldr r0, [sp, #8] @ 4-byte Reload -; BE-I64-NEXT: vmov.32 d14[1], r5 -; BE-I64-NEXT: vmov.32 d9[1], r0 -; BE-I64-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; BE-I64-NEXT: vmov.32 d12[1], r7 -; BE-I64-NEXT: vmov.32 d8[1], r0 -; BE-I64-NEXT: vmov.32 d13[1], r4 -; BE-I64-NEXT: vmov.32 d10[1], r6 -; BE-I64-NEXT: vmov.32 d11[1], r8 +; BE-I64-NEXT: vmov.32 d10[1], r4 +; BE-I64-NEXT: vmov.32 d8[1], r9 +; BE-I64-NEXT: vmov.32 d9[1], r5 ; BE-I64-NEXT: vmov.32 d16[1], r1 -; BE-I64-NEXT: vrev64.32 d1, d14 -; BE-I64-NEXT: vrev64.32 d3, d12 -; BE-I64-NEXT: vrev64.32 d5, d9 -; BE-I64-NEXT: vrev64.32 d7, d8 -; BE-I64-NEXT: vrev64.32 d0, d13 -; BE-I64-NEXT: vrev64.32 d2, d10 -; BE-I64-NEXT: vrev64.32 d4, d11 -; BE-I64-NEXT: vrev64.32 d6, d16 -; BE-I64-NEXT: add sp, sp, #16 -; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14} -; BE-I64-NEXT: add sp, sp, #4 -; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; -; BE-I32-NEON-LABEL: lrint_v8fp128: -; BE-I32-NEON: @ %bb.0: -; BE-I32-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-I32-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-I32-NEON-NEXT: .pad #4 -; BE-I32-NEON-NEXT: sub sp, sp, #4 -; BE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11} -; BE-I32-NEON-NEXT: vpush {d8, d9, d10, d11} -; BE-I32-NEON-NEXT: .pad #8 -; BE-I32-NEON-NEXT: sub sp, sp, #8 -; BE-I32-NEON-NEXT: str r3, [sp, #4] @ 4-byte Spill -; BE-I32-NEON-NEXT: add r3, sp, #128 -; BE-I32-NEON-NEXT: mov r11, r2 -; BE-I32-NEON-NEXT: mov r6, r1 -; BE-I32-NEON-NEXT: mov r7, r0 -; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: add r3, sp, #100 -; BE-I32-NEON-NEXT: ldr r5, [sp, #96] -; BE-I32-NEON-NEXT: vmov.32 d8[0], r0 -; BE-I32-NEON-NEXT: ldr r4, [sp, #160] -; BE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I32-NEON-NEXT: mov r0, r5 -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: add r3, sp, #164 -; BE-I32-NEON-NEXT: vmov.32 d11[0], r0 -; BE-I32-NEON-NEXT: mov r0, r4 -; BE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: ldr r4, [sp, #176] -; BE-I32-NEON-NEXT: vmov.32 d9[0], r0 -; BE-I32-NEON-NEXT: ldr r1, [sp, #180] -; BE-I32-NEON-NEXT: ldr r2, [sp, #184] -; BE-I32-NEON-NEXT: ldr r3, [sp, #188] -; BE-I32-NEON-NEXT: mov r0, r4 -; BE-I32-NEON-NEXT: ldr r5, [sp, #116] -; BE-I32-NEON-NEXT: ldr r8, [sp, #120] -; BE-I32-NEON-NEXT: ldr r10, [sp, #84] -; BE-I32-NEON-NEXT: ldr r9, [sp, #88] -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: vmov.32 d9[1], r0 -; BE-I32-NEON-NEXT: ldr r3, [sp, #124] -; BE-I32-NEON-NEXT: ldr r0, [sp, #112] -; BE-I32-NEON-NEXT: mov r1, r5 -; BE-I32-NEON-NEXT: mov r2, r8 -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: vmov.32 d11[1], r0 -; BE-I32-NEON-NEXT: ldr r3, [sp, #92] -; BE-I32-NEON-NEXT: ldr r0, [sp, #80] -; BE-I32-NEON-NEXT: mov r1, r10 -; BE-I32-NEON-NEXT: mov r2, r9 -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: ldr r3, [sp, #4] @ 4-byte Reload -; BE-I32-NEON-NEXT: mov r4, r0 -; BE-I32-NEON-NEXT: mov r0, r7 -; BE-I32-NEON-NEXT: mov r1, r6 -; BE-I32-NEON-NEXT: mov r2, r11 -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: add r3, sp, #148 -; BE-I32-NEON-NEXT: ldr r7, [sp, #144] -; BE-I32-NEON-NEXT: vmov.32 d10[0], r0 -; BE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I32-NEON-NEXT: mov r0, r7 -; BE-I32-NEON-NEXT: vmov.32 d10[1], r4 -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: vmov.32 d8[1], r0 -; BE-I32-NEON-NEXT: vrev64.32 q0, q5 -; BE-I32-NEON-NEXT: vrev64.32 q1, q4 -; BE-I32-NEON-NEXT: add sp, sp, #8 -; BE-I32-NEON-NEXT: vpop {d8, d9, d10, d11} -; BE-I32-NEON-NEXT: add sp, sp, #4 -; BE-I32-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; -; BE-I64-NEON-LABEL: lrint_v8fp128: -; BE-I64-NEON: @ %bb.0: -; BE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-I64-NEON-NEXT: .pad #4 -; BE-I64-NEON-NEXT: sub sp, sp, #4 -; BE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14} -; BE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14} -; BE-I64-NEON-NEXT: .pad #16 -; BE-I64-NEON-NEXT: sub sp, sp, #16 -; BE-I64-NEON-NEXT: str r3, [sp, #4] @ 4-byte Spill -; BE-I64-NEON-NEXT: add r3, sp, #208 -; BE-I64-NEON-NEXT: mov r11, r2 -; BE-I64-NEON-NEXT: mov r4, r1 -; BE-I64-NEON-NEXT: mov r5, r0 -; BE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: ldr r7, [sp, #176] -; BE-I64-NEON-NEXT: add r3, sp, #180 -; BE-I64-NEON-NEXT: str r1, [sp, #12] @ 4-byte Spill -; BE-I64-NEON-NEXT: vmov.32 d8[0], r0 -; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEON-NEXT: mov r0, r7 -; BE-I64-NEON-NEXT: ldr r6, [sp, #128] -; BE-I64-NEON-NEXT: ldr r8, [sp, #144] -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: add r3, sp, #132 -; BE-I64-NEON-NEXT: str r1, [sp, #8] @ 4-byte Spill -; BE-I64-NEON-NEXT: vmov.32 d9[0], r0 -; BE-I64-NEON-NEXT: mov r0, r6 -; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: add r3, sp, #148 -; BE-I64-NEON-NEXT: mov r6, r1 -; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEON-NEXT: mov r0, r8 -; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: add r3, sp, #160 -; BE-I64-NEON-NEXT: mov r9, r0 -; BE-I64-NEON-NEXT: mov r7, r1 -; BE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: ldr r3, [sp, #4] @ 4-byte Reload -; BE-I64-NEON-NEXT: mov r8, r1 -; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEON-NEXT: mov r0, r5 -; BE-I64-NEON-NEXT: mov r1, r4 -; BE-I64-NEON-NEXT: mov r2, r11 -; BE-I64-NEON-NEXT: ldr r10, [sp, #112] -; BE-I64-NEON-NEXT: vmov.32 d12[0], r9 -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: add r3, sp, #116 -; BE-I64-NEON-NEXT: mov r4, r1 -; BE-I64-NEON-NEXT: vmov.32 d13[0], r0 -; BE-I64-NEON-NEXT: mov r0, r10 -; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: add r3, sp, #196 -; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEON-NEXT: ldr r0, [sp, #192] -; BE-I64-NEON-NEXT: mov r5, r1 -; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: vmov.32 d16[0], r0 -; BE-I64-NEON-NEXT: ldr r0, [sp, #8] @ 4-byte Reload -; BE-I64-NEON-NEXT: vmov.32 d14[1], r5 -; BE-I64-NEON-NEXT: vmov.32 d9[1], r0 -; BE-I64-NEON-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; BE-I64-NEON-NEXT: vmov.32 d12[1], r7 -; BE-I64-NEON-NEXT: vmov.32 d8[1], r0 -; BE-I64-NEON-NEXT: vmov.32 d13[1], r4 -; BE-I64-NEON-NEXT: vmov.32 d10[1], r6 -; BE-I64-NEON-NEXT: vmov.32 d11[1], r8 -; BE-I64-NEON-NEXT: vmov.32 d16[1], r1 -; BE-I64-NEON-NEXT: vrev64.32 d1, d14 -; BE-I64-NEON-NEXT: vrev64.32 d3, d12 -; BE-I64-NEON-NEXT: vrev64.32 d5, d9 -; BE-I64-NEON-NEXT: vrev64.32 d7, d8 -; BE-I64-NEON-NEXT: vrev64.32 d0, d13 -; BE-I64-NEON-NEXT: vrev64.32 d2, d10 -; BE-I64-NEON-NEXT: vrev64.32 d4, d11 -; BE-I64-NEON-NEXT: vrev64.32 d6, d16 -; BE-I64-NEON-NEXT: add sp, sp, #16 -; BE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14} -; BE-I64-NEON-NEXT: add sp, sp, #4 -; BE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} - %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128> %x) - ret <8 x iXLen> %a +; BE-I64-NEXT: vrev64.32 d1, d10 +; BE-I64-NEXT: vrev64.32 d3, d8 +; BE-I64-NEXT: vrev64.32 d0, d9 +; BE-I64-NEXT: vrev64.32 d2, d16 +; BE-I64-NEXT: vpop {d8, d9, d10} +; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} + %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128> %x) + ret <4 x iXLen> %a } -declare <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128>) +declare <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128>) -define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) { -; LE-I32-LABEL: lrint_v16fp128: +define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) { +; LE-I32-LABEL: lrint_v8fp128: ; LE-I32: @ %bb.0: -; LE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-I32-NEXT: .pad #4 -; LE-I32-NEXT: sub sp, sp, #4 -; LE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I32-NEXT: mov r8, r3 -; LE-I32-NEXT: add r3, sp, #280 -; LE-I32-NEXT: mov r9, r2 -; LE-I32-NEXT: mov r10, r1 -; LE-I32-NEXT: mov r6, r0 -; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: ldr r4, [sp, #216] -; LE-I32-NEXT: vmov.32 d8[0], r0 -; LE-I32-NEXT: ldr r1, [sp, #220] -; LE-I32-NEXT: ldr r2, [sp, #224] -; LE-I32-NEXT: ldr r3, [sp, #228] -; LE-I32-NEXT: mov r0, r4 -; LE-I32-NEXT: ldr r7, [sp, #152] -; LE-I32-NEXT: ldr r11, [sp, #104] -; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: add r3, sp, #156 -; LE-I32-NEXT: vmov.32 d10[0], r0 -; LE-I32-NEXT: mov r0, r7 -; LE-I32-NEXT: ldm r3, {r1, r2, r3} -; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: ldr r7, [sp, #184] -; LE-I32-NEXT: vmov.32 d12[0], r0 -; LE-I32-NEXT: ldr r1, [sp, #188] -; LE-I32-NEXT: ldr r2, [sp, #192] -; LE-I32-NEXT: ldr r3, [sp, #196] -; LE-I32-NEXT: mov r0, r7 -; LE-I32-NEXT: ldr r4, [sp, #120] -; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: add r3, sp, #124 -; LE-I32-NEXT: vmov.32 d13[0], r0 -; LE-I32-NEXT: mov r0, r4 -; LE-I32-NEXT: ldm r3, {r1, r2, r3} -; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: ldr r5, [sp, #136] -; LE-I32-NEXT: vmov.32 d15[0], r0 -; LE-I32-NEXT: ldr r1, [sp, #140] -; LE-I32-NEXT: ldr r2, [sp, #144] -; LE-I32-NEXT: ldr r3, [sp, #148] -; LE-I32-NEXT: mov r0, r5 -; LE-I32-NEXT: ldr r4, [sp, #108] -; LE-I32-NEXT: ldr r7, [sp, #112] -; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: ldr r3, [sp, #116] -; LE-I32-NEXT: vmov.32 d15[1], r0 -; LE-I32-NEXT: mov r0, r11 +; LE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; LE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; LE-I32-NEXT: .vsave {d8, d9, d10, d11} +; LE-I32-NEXT: vpush {d8, d9, d10, d11} +; LE-I32-NEXT: mov r6, r3 +; LE-I32-NEXT: add r3, sp, #112 +; LE-I32-NEXT: mov r7, r2 +; LE-I32-NEXT: mov r4, r1 +; LE-I32-NEXT: mov r5, r0 +; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: vmov.32 d8[0], r0 +; LE-I32-NEXT: mov r0, r5 ; LE-I32-NEXT: mov r1, r4 ; LE-I32-NEXT: mov r2, r7 +; LE-I32-NEXT: mov r3, r6 +; LE-I32-NEXT: ldr r8, [sp, #160] +; LE-I32-NEXT: ldr r9, [sp, #64] +; LE-I32-NEXT: ldr r10, [sp, #80] ; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: mov r4, r0 -; LE-I32-NEXT: mov r0, r6 -; LE-I32-NEXT: mov r1, r10 -; LE-I32-NEXT: mov r2, r9 -; LE-I32-NEXT: mov r3, r8 -; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: ldr r7, [sp, #200] -; LE-I32-NEXT: vmov.32 d14[0], r0 -; LE-I32-NEXT: ldr r1, [sp, #204] -; LE-I32-NEXT: ldr r2, [sp, #208] -; LE-I32-NEXT: ldr r3, [sp, #212] -; LE-I32-NEXT: mov r0, r7 -; LE-I32-NEXT: ldr r5, [sp, #172] -; LE-I32-NEXT: vmov.32 d14[1], r4 -; LE-I32-NEXT: ldr r6, [sp, #176] -; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: vmov.32 d13[1], r0 -; LE-I32-NEXT: ldr r3, [sp, #180] -; LE-I32-NEXT: ldr r0, [sp, #168] -; LE-I32-NEXT: mov r1, r5 -; LE-I32-NEXT: mov r2, r6 -; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: add r3, sp, #248 -; LE-I32-NEXT: mov r5, r0 -; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I32-NEXT: add r3, sp, #84 +; LE-I32-NEXT: vmov.32 d10[0], r0 +; LE-I32-NEXT: mov r0, r10 +; LE-I32-NEXT: ldm r3, {r1, r2, r3} ; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: ldr r4, [sp, #264] +; LE-I32-NEXT: ldr r6, [sp, #96] ; LE-I32-NEXT: vmov.32 d11[0], r0 -; LE-I32-NEXT: ldr r1, [sp, #268] -; LE-I32-NEXT: ldr r2, [sp, #272] -; LE-I32-NEXT: vmov.32 d12[1], r5 -; LE-I32-NEXT: ldr r3, [sp, #276] -; LE-I32-NEXT: mov r0, r4 -; LE-I32-NEXT: ldr r6, [sp, #236] -; LE-I32-NEXT: ldr r7, [sp, #240] -; LE-I32-NEXT: ldr r8, [sp, #332] -; LE-I32-NEXT: ldr r5, [sp, #336] +; LE-I32-NEXT: ldr r1, [sp, #100] +; LE-I32-NEXT: ldr r2, [sp, #104] +; LE-I32-NEXT: ldr r3, [sp, #108] +; LE-I32-NEXT: mov r0, r6 +; LE-I32-NEXT: ldr r4, [sp, #68] +; LE-I32-NEXT: ldr r5, [sp, #72] +; LE-I32-NEXT: ldr r10, [sp, #164] +; LE-I32-NEXT: ldr r7, [sp, #168] ; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: ldr r3, [sp, #76] ; LE-I32-NEXT: vmov.32 d11[1], r0 -; LE-I32-NEXT: ldr r3, [sp, #244] -; LE-I32-NEXT: ldr r0, [sp, #232] -; LE-I32-NEXT: mov r1, r6 -; LE-I32-NEXT: mov r2, r7 +; LE-I32-NEXT: mov r0, r9 +; LE-I32-NEXT: mov r1, r4 +; LE-I32-NEXT: mov r2, r5 ; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: ldr r3, [sp, #172] ; LE-I32-NEXT: vmov.32 d10[1], r0 -; LE-I32-NEXT: ldr r3, [sp, #340] -; LE-I32-NEXT: ldr r0, [sp, #328] -; LE-I32-NEXT: mov r1, r8 -; LE-I32-NEXT: mov r2, r5 +; LE-I32-NEXT: mov r0, r8 +; LE-I32-NEXT: mov r1, r10 +; LE-I32-NEXT: mov r2, r7 ; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: add r3, sp, #312 +; LE-I32-NEXT: add r3, sp, #144 ; LE-I32-NEXT: mov r4, r0 ; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3} ; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: add r3, sp, #300 -; LE-I32-NEXT: ldr r7, [sp, #296] +; LE-I32-NEXT: add r3, sp, #132 +; LE-I32-NEXT: ldr r7, [sp, #128] ; LE-I32-NEXT: vmov.32 d9[0], r0 ; LE-I32-NEXT: ldm r3, {r1, r2, r3} ; LE-I32-NEXT: mov r0, r7 ; LE-I32-NEXT: vmov.32 d9[1], r4 ; LE-I32-NEXT: bl lrintl ; LE-I32-NEXT: vmov.32 d8[1], r0 -; LE-I32-NEXT: vorr q0, q7, q7 -; LE-I32-NEXT: vorr q1, q6, q6 -; LE-I32-NEXT: vorr q2, q5, q5 -; LE-I32-NEXT: vorr q3, q4, q4 -; LE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I32-NEXT: add sp, sp, #4 -; LE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; LE-I32-NEXT: vorr q0, q5, q5 +; LE-I32-NEXT: vorr q1, q4, q4 +; LE-I32-NEXT: vpop {d8, d9, d10, d11} +; LE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} ; -; LE-I64-LABEL: lrint_v16fp128: +; LE-I64-LABEL: lrint_v8fp128: ; LE-I64: @ %bb.0: ; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} @@ -9688,1063 +2425,249 @@ define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) { ; LE-I64-NEXT: sub sp, sp, #4 ; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} ; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEXT: .pad #72 -; LE-I64-NEXT: sub sp, sp, #72 -; LE-I64-NEXT: mov r6, r3 -; LE-I64-NEXT: add r3, sp, #408 -; LE-I64-NEXT: mov r7, r2 -; LE-I64-NEXT: mov r4, r0 +; LE-I64-NEXT: .pad #8 +; LE-I64-NEXT: sub sp, sp, #8 +; LE-I64-NEXT: mov r11, r3 +; LE-I64-NEXT: add r3, sp, #208 +; LE-I64-NEXT: mov r10, r2 +; LE-I64-NEXT: mov r4, r1 +; LE-I64-NEXT: mov r5, r0 ; LE-I64-NEXT: ldm r3, {r0, r1, r2, r3} ; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add r5, sp, #176 -; LE-I64-NEXT: mov r10, r1 -; LE-I64-NEXT: vmov.32 d13[0], r0 -; LE-I64-NEXT: mov r0, r7 -; LE-I64-NEXT: ldm r5, {r2, r3, r5} -; LE-I64-NEXT: mov r1, r6 -; LE-I64-NEXT: ldr r8, [sp, #232] -; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add r3, sp, #188 -; LE-I64-NEXT: mov r9, r1 -; LE-I64-NEXT: vmov.32 d8[0], r0 -; LE-I64-NEXT: mov r0, r5 -; LE-I64-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add r3, sp, #236 -; LE-I64-NEXT: mov r11, r1 +; LE-I64-NEXT: add r7, sp, #164 +; LE-I64-NEXT: ldr r6, [sp, #160] +; LE-I64-NEXT: str r1, [sp, #4] @ 4-byte Spill ; LE-I64-NEXT: vmov.32 d9[0], r0 -; LE-I64-NEXT: mov r0, r8 -; LE-I64-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add r3, sp, #252 -; LE-I64-NEXT: vmov.32 d10[0], r0 -; LE-I64-NEXT: ldr r0, [sp, #248] -; LE-I64-NEXT: mov r8, r1 -; LE-I64-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add r3, sp, #268 -; LE-I64-NEXT: vmov.32 d11[0], r0 -; LE-I64-NEXT: ldr r0, [sp, #264] -; LE-I64-NEXT: mov r6, r1 -; LE-I64-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add r3, sp, #284 -; LE-I64-NEXT: vmov.32 d14[0], r0 -; LE-I64-NEXT: ldr r0, [sp, #280] -; LE-I64-NEXT: mov r7, r1 -; LE-I64-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add r3, sp, #316 -; LE-I64-NEXT: vmov.32 d15[0], r0 -; LE-I64-NEXT: ldr r0, [sp, #312] -; LE-I64-NEXT: mov r5, r1 -; LE-I64-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: vmov.32 d15[1], r5 -; LE-I64-NEXT: add lr, sp, #56 -; LE-I64-NEXT: ldr r5, [sp, #300] -; LE-I64-NEXT: vmov.32 d14[1], r7 -; LE-I64-NEXT: ldr r2, [sp, #304] -; LE-I64-NEXT: ldr r3, [sp, #308] -; LE-I64-NEXT: vmov.32 d11[1], r6 -; LE-I64-NEXT: ldr r6, [sp, #200] -; LE-I64-NEXT: ldr r7, [sp, #204] -; LE-I64-NEXT: vmov.32 d10[1], r8 -; LE-I64-NEXT: ldr r8, [sp, #344] -; LE-I64-NEXT: vmov.32 d9[1], r11 -; LE-I64-NEXT: ldr r11, [sp, #216] -; LE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #40 -; LE-I64-NEXT: vmov.32 d17[0], r0 -; LE-I64-NEXT: ldr r0, [sp, #296] -; LE-I64-NEXT: vmov.32 d8[1], r9 -; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #24 -; LE-I64-NEXT: vorr q5, q8, q8 -; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-I64-NEXT: vorr q4, q6, q6 -; LE-I64-NEXT: vmov.32 d11[1], r1 -; LE-I64-NEXT: mov r1, r5 -; LE-I64-NEXT: vmov.32 d9[1], r10 -; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: vmov.32 d10[0], r0 -; LE-I64-NEXT: ldr r2, [sp, #208] -; LE-I64-NEXT: ldr r3, [sp, #212] -; LE-I64-NEXT: add lr, sp, #8 -; LE-I64-NEXT: mov r9, r1 +; LE-I64-NEXT: ldm r7, {r1, r2, r3, r7} ; LE-I64-NEXT: mov r0, r6 -; LE-I64-NEXT: mov r1, r7 -; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-I64-NEXT: ldr r8, [sp, #128] +; LE-I64-NEXT: ldr r9, [sp, #144] ; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add r3, sp, #220 -; LE-I64-NEXT: mov r10, r1 +; LE-I64-NEXT: add r3, sp, #180 +; LE-I64-NEXT: str r1, [sp] @ 4-byte Spill ; LE-I64-NEXT: vmov.32 d10[0], r0 -; LE-I64-NEXT: mov r0, r11 +; LE-I64-NEXT: mov r0, r7 ; LE-I64-NEXT: ldm r3, {r1, r2, r3} ; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add r3, sp, #348 -; LE-I64-NEXT: mov r11, r1 +; LE-I64-NEXT: add r3, sp, #132 +; LE-I64-NEXT: mov r7, r1 ; LE-I64-NEXT: vmov.32 d11[0], r0 ; LE-I64-NEXT: mov r0, r8 ; LE-I64-NEXT: ldm r3, {r1, r2, r3} ; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add r3, sp, #364 -; LE-I64-NEXT: vmov.32 d13[0], r0 -; LE-I64-NEXT: ldr r0, [sp, #360] +; LE-I64-NEXT: add r3, sp, #148 ; LE-I64-NEXT: mov r8, r1 +; LE-I64-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEXT: mov r0, r9 ; LE-I64-NEXT: ldm r3, {r1, r2, r3} ; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add r3, sp, #380 +; LE-I64-NEXT: mov r9, r1 +; LE-I64-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEXT: mov r0, r5 +; LE-I64-NEXT: mov r1, r4 +; LE-I64-NEXT: mov r2, r10 +; LE-I64-NEXT: mov r3, r11 +; LE-I64-NEXT: ldr r6, [sp, #112] +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: add r3, sp, #116 +; LE-I64-NEXT: mov r4, r1 ; LE-I64-NEXT: vmov.32 d14[0], r0 -; LE-I64-NEXT: ldr r0, [sp, #376] -; LE-I64-NEXT: mov r5, r1 +; LE-I64-NEXT: mov r0, r6 ; LE-I64-NEXT: ldm r3, {r1, r2, r3} ; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add r3, sp, #396 +; LE-I64-NEXT: add r3, sp, #196 ; LE-I64-NEXT: vmov.32 d15[0], r0 -; LE-I64-NEXT: ldr r0, [sp, #392] -; LE-I64-NEXT: mov r6, r1 -; LE-I64-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add r3, sp, #332 -; LE-I64-NEXT: vmov.32 d8[0], r0 -; LE-I64-NEXT: ldr r0, [sp, #328] -; LE-I64-NEXT: mov r7, r1 -; LE-I64-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add lr, sp, #8 -; LE-I64-NEXT: vmov.32 d12[0], r0 -; LE-I64-NEXT: add r0, r4, #64 -; LE-I64-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; LE-I64-NEXT: add lr, sp, #24 -; LE-I64-NEXT: vmov.32 d13[1], r8 -; LE-I64-NEXT: vmov.32 d18[1], r9 -; LE-I64-NEXT: vmov.32 d15[1], r6 -; LE-I64-NEXT: vmov.32 d12[1], r1 -; LE-I64-NEXT: vmov.32 d14[1], r5 -; LE-I64-NEXT: vst1.64 {d18, d19}, [r0:128]! -; LE-I64-NEXT: vst1.64 {d12, d13}, [r0:128]! -; LE-I64-NEXT: vmov.32 d8[1], r7 -; LE-I64-NEXT: vst1.64 {d14, d15}, [r0:128]! -; LE-I64-NEXT: vst1.64 {d8, d9}, [r0:128] -; LE-I64-NEXT: vmov.32 d11[1], r11 -; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEXT: add lr, sp, #40 -; LE-I64-NEXT: vmov.32 d10[1], r10 -; LE-I64-NEXT: vst1.64 {d16, d17}, [r4:128]! -; LE-I64-NEXT: vst1.64 {d10, d11}, [r4:128]! -; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEXT: add lr, sp, #56 -; LE-I64-NEXT: vst1.64 {d16, d17}, [r4:128]! -; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEXT: vst1.64 {d16, d17}, [r4:128] -; LE-I64-NEXT: add sp, sp, #72 +; LE-I64-NEXT: ldr r0, [sp, #192] +; LE-I64-NEXT: mov r5, r1 +; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: bl lrintl +; LE-I64-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEXT: ldr r0, [sp] @ 4-byte Reload +; LE-I64-NEXT: vmov.32 d11[1], r7 +; LE-I64-NEXT: vmov.32 d10[1], r0 +; LE-I64-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; LE-I64-NEXT: vmov.32 d15[1], r5 +; LE-I64-NEXT: vorr q2, q5, q5 +; LE-I64-NEXT: vmov.32 d13[1], r9 +; LE-I64-NEXT: vmov.32 d9[1], r0 +; LE-I64-NEXT: vmov.32 d14[1], r4 +; LE-I64-NEXT: vmov.32 d12[1], r8 +; LE-I64-NEXT: vorr q0, q7, q7 +; LE-I64-NEXT: vmov.32 d8[1], r1 +; LE-I64-NEXT: vorr q1, q6, q6 +; LE-I64-NEXT: vorr q3, q4, q4 +; LE-I64-NEXT: add sp, sp, #8 ; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; LE-I64-NEXT: add sp, sp, #4 ; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; -; LE-I32-NEON-LABEL: lrint_v16fp128: -; LE-I32-NEON: @ %bb.0: -; LE-I32-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-I32-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-I32-NEON-NEXT: .pad #4 -; LE-I32-NEON-NEXT: sub sp, sp, #4 -; LE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I32-NEON-NEXT: mov r8, r3 -; LE-I32-NEON-NEXT: add r3, sp, #280 -; LE-I32-NEON-NEXT: mov r9, r2 -; LE-I32-NEON-NEXT: mov r10, r1 -; LE-I32-NEON-NEXT: mov r6, r0 -; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: ldr r4, [sp, #216] -; LE-I32-NEON-NEXT: vmov.32 d8[0], r0 -; LE-I32-NEON-NEXT: ldr r1, [sp, #220] -; LE-I32-NEON-NEXT: ldr r2, [sp, #224] -; LE-I32-NEON-NEXT: ldr r3, [sp, #228] -; LE-I32-NEON-NEXT: mov r0, r4 -; LE-I32-NEON-NEXT: ldr r7, [sp, #152] -; LE-I32-NEON-NEXT: ldr r11, [sp, #104] -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: add r3, sp, #156 -; LE-I32-NEON-NEXT: vmov.32 d10[0], r0 -; LE-I32-NEON-NEXT: mov r0, r7 -; LE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: ldr r7, [sp, #184] -; LE-I32-NEON-NEXT: vmov.32 d12[0], r0 -; LE-I32-NEON-NEXT: ldr r1, [sp, #188] -; LE-I32-NEON-NEXT: ldr r2, [sp, #192] -; LE-I32-NEON-NEXT: ldr r3, [sp, #196] -; LE-I32-NEON-NEXT: mov r0, r7 -; LE-I32-NEON-NEXT: ldr r4, [sp, #120] -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: add r3, sp, #124 -; LE-I32-NEON-NEXT: vmov.32 d13[0], r0 -; LE-I32-NEON-NEXT: mov r0, r4 -; LE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: ldr r5, [sp, #136] -; LE-I32-NEON-NEXT: vmov.32 d15[0], r0 -; LE-I32-NEON-NEXT: ldr r1, [sp, #140] -; LE-I32-NEON-NEXT: ldr r2, [sp, #144] -; LE-I32-NEON-NEXT: ldr r3, [sp, #148] -; LE-I32-NEON-NEXT: mov r0, r5 -; LE-I32-NEON-NEXT: ldr r4, [sp, #108] -; LE-I32-NEON-NEXT: ldr r7, [sp, #112] -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: ldr r3, [sp, #116] -; LE-I32-NEON-NEXT: vmov.32 d15[1], r0 -; LE-I32-NEON-NEXT: mov r0, r11 -; LE-I32-NEON-NEXT: mov r1, r4 -; LE-I32-NEON-NEXT: mov r2, r7 -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: mov r4, r0 -; LE-I32-NEON-NEXT: mov r0, r6 -; LE-I32-NEON-NEXT: mov r1, r10 -; LE-I32-NEON-NEXT: mov r2, r9 -; LE-I32-NEON-NEXT: mov r3, r8 -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: ldr r7, [sp, #200] -; LE-I32-NEON-NEXT: vmov.32 d14[0], r0 -; LE-I32-NEON-NEXT: ldr r1, [sp, #204] -; LE-I32-NEON-NEXT: ldr r2, [sp, #208] -; LE-I32-NEON-NEXT: ldr r3, [sp, #212] -; LE-I32-NEON-NEXT: mov r0, r7 -; LE-I32-NEON-NEXT: ldr r5, [sp, #172] -; LE-I32-NEON-NEXT: vmov.32 d14[1], r4 -; LE-I32-NEON-NEXT: ldr r6, [sp, #176] -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: vmov.32 d13[1], r0 -; LE-I32-NEON-NEXT: ldr r3, [sp, #180] -; LE-I32-NEON-NEXT: ldr r0, [sp, #168] -; LE-I32-NEON-NEXT: mov r1, r5 -; LE-I32-NEON-NEXT: mov r2, r6 -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: add r3, sp, #248 -; LE-I32-NEON-NEXT: mov r5, r0 -; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: ldr r4, [sp, #264] -; LE-I32-NEON-NEXT: vmov.32 d11[0], r0 -; LE-I32-NEON-NEXT: ldr r1, [sp, #268] -; LE-I32-NEON-NEXT: ldr r2, [sp, #272] -; LE-I32-NEON-NEXT: vmov.32 d12[1], r5 -; LE-I32-NEON-NEXT: ldr r3, [sp, #276] -; LE-I32-NEON-NEXT: mov r0, r4 -; LE-I32-NEON-NEXT: ldr r6, [sp, #236] -; LE-I32-NEON-NEXT: ldr r7, [sp, #240] -; LE-I32-NEON-NEXT: ldr r8, [sp, #332] -; LE-I32-NEON-NEXT: ldr r5, [sp, #336] -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: vmov.32 d11[1], r0 -; LE-I32-NEON-NEXT: ldr r3, [sp, #244] -; LE-I32-NEON-NEXT: ldr r0, [sp, #232] -; LE-I32-NEON-NEXT: mov r1, r6 -; LE-I32-NEON-NEXT: mov r2, r7 -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: vmov.32 d10[1], r0 -; LE-I32-NEON-NEXT: ldr r3, [sp, #340] -; LE-I32-NEON-NEXT: ldr r0, [sp, #328] -; LE-I32-NEON-NEXT: mov r1, r8 -; LE-I32-NEON-NEXT: mov r2, r5 -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: add r3, sp, #312 -; LE-I32-NEON-NEXT: mov r4, r0 -; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: add r3, sp, #300 -; LE-I32-NEON-NEXT: ldr r7, [sp, #296] -; LE-I32-NEON-NEXT: vmov.32 d9[0], r0 -; LE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I32-NEON-NEXT: mov r0, r7 -; LE-I32-NEON-NEXT: vmov.32 d9[1], r4 -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: vmov.32 d8[1], r0 -; LE-I32-NEON-NEXT: vorr q0, q7, q7 -; LE-I32-NEON-NEXT: vorr q1, q6, q6 -; LE-I32-NEON-NEXT: vorr q2, q5, q5 -; LE-I32-NEON-NEXT: vorr q3, q4, q4 -; LE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I32-NEON-NEXT: add sp, sp, #4 -; LE-I32-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; -; LE-I64-NEON-LABEL: lrint_v16fp128: -; LE-I64-NEON: @ %bb.0: -; LE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-I64-NEON-NEXT: .pad #4 -; LE-I64-NEON-NEXT: sub sp, sp, #4 -; LE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEON-NEXT: .pad #72 -; LE-I64-NEON-NEXT: sub sp, sp, #72 -; LE-I64-NEON-NEXT: mov r6, r3 -; LE-I64-NEON-NEXT: add r3, sp, #408 -; LE-I64-NEON-NEXT: mov r7, r2 -; LE-I64-NEON-NEXT: mov r4, r0 -; LE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add r5, sp, #176 -; LE-I64-NEON-NEXT: mov r10, r1 -; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 -; LE-I64-NEON-NEXT: mov r0, r7 -; LE-I64-NEON-NEXT: ldm r5, {r2, r3, r5} -; LE-I64-NEON-NEXT: mov r1, r6 -; LE-I64-NEON-NEXT: ldr r8, [sp, #232] -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add r3, sp, #188 -; LE-I64-NEON-NEXT: mov r9, r1 -; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 -; LE-I64-NEON-NEXT: mov r0, r5 -; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add r3, sp, #236 -; LE-I64-NEON-NEXT: mov r11, r1 -; LE-I64-NEON-NEXT: vmov.32 d9[0], r0 -; LE-I64-NEON-NEXT: mov r0, r8 -; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add r3, sp, #252 -; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; LE-I64-NEON-NEXT: ldr r0, [sp, #248] -; LE-I64-NEON-NEXT: mov r8, r1 -; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add r3, sp, #268 -; LE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; LE-I64-NEON-NEXT: ldr r0, [sp, #264] -; LE-I64-NEON-NEXT: mov r6, r1 -; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add r3, sp, #284 -; LE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; LE-I64-NEON-NEXT: ldr r0, [sp, #280] -; LE-I64-NEON-NEXT: mov r7, r1 -; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add r3, sp, #316 -; LE-I64-NEON-NEXT: vmov.32 d15[0], r0 -; LE-I64-NEON-NEXT: ldr r0, [sp, #312] -; LE-I64-NEON-NEXT: mov r5, r1 -; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: vmov.32 d15[1], r5 -; LE-I64-NEON-NEXT: add lr, sp, #56 -; LE-I64-NEON-NEXT: ldr r5, [sp, #300] -; LE-I64-NEON-NEXT: vmov.32 d14[1], r7 -; LE-I64-NEON-NEXT: ldr r2, [sp, #304] -; LE-I64-NEON-NEXT: ldr r3, [sp, #308] -; LE-I64-NEON-NEXT: vmov.32 d11[1], r6 -; LE-I64-NEON-NEXT: ldr r6, [sp, #200] -; LE-I64-NEON-NEXT: ldr r7, [sp, #204] -; LE-I64-NEON-NEXT: vmov.32 d10[1], r8 -; LE-I64-NEON-NEXT: ldr r8, [sp, #344] -; LE-I64-NEON-NEXT: vmov.32 d9[1], r11 -; LE-I64-NEON-NEXT: ldr r11, [sp, #216] -; LE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #40 -; LE-I64-NEON-NEXT: vmov.32 d17[0], r0 -; LE-I64-NEON-NEXT: ldr r0, [sp, #296] -; LE-I64-NEON-NEXT: vmov.32 d8[1], r9 -; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #24 -; LE-I64-NEON-NEXT: vorr q5, q8, q8 -; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-I64-NEON-NEXT: vorr q4, q6, q6 -; LE-I64-NEON-NEXT: vmov.32 d11[1], r1 -; LE-I64-NEON-NEXT: mov r1, r5 -; LE-I64-NEON-NEXT: vmov.32 d9[1], r10 -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; LE-I64-NEON-NEXT: ldr r2, [sp, #208] -; LE-I64-NEON-NEXT: ldr r3, [sp, #212] -; LE-I64-NEON-NEXT: add lr, sp, #8 -; LE-I64-NEON-NEXT: mov r9, r1 -; LE-I64-NEON-NEXT: mov r0, r6 -; LE-I64-NEON-NEXT: mov r1, r7 -; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add r3, sp, #220 -; LE-I64-NEON-NEXT: mov r10, r1 -; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; LE-I64-NEON-NEXT: mov r0, r11 -; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add r3, sp, #348 -; LE-I64-NEON-NEXT: mov r11, r1 -; LE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; LE-I64-NEON-NEXT: mov r0, r8 -; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add r3, sp, #364 -; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 -; LE-I64-NEON-NEXT: ldr r0, [sp, #360] -; LE-I64-NEON-NEXT: mov r8, r1 -; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add r3, sp, #380 -; LE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; LE-I64-NEON-NEXT: ldr r0, [sp, #376] -; LE-I64-NEON-NEXT: mov r5, r1 -; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add r3, sp, #396 -; LE-I64-NEON-NEXT: vmov.32 d15[0], r0 -; LE-I64-NEON-NEXT: ldr r0, [sp, #392] -; LE-I64-NEON-NEXT: mov r6, r1 -; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add r3, sp, #332 -; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 -; LE-I64-NEON-NEXT: ldr r0, [sp, #328] -; LE-I64-NEON-NEXT: mov r7, r1 -; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add lr, sp, #8 -; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 -; LE-I64-NEON-NEXT: add r0, r4, #64 -; LE-I64-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #24 -; LE-I64-NEON-NEXT: vmov.32 d13[1], r8 -; LE-I64-NEON-NEXT: vmov.32 d18[1], r9 -; LE-I64-NEON-NEXT: vmov.32 d15[1], r6 -; LE-I64-NEON-NEXT: vmov.32 d12[1], r1 -; LE-I64-NEON-NEXT: vmov.32 d14[1], r5 -; LE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r0:128]! -; LE-I64-NEON-NEXT: vst1.64 {d12, d13}, [r0:128]! -; LE-I64-NEON-NEXT: vmov.32 d8[1], r7 -; LE-I64-NEON-NEXT: vst1.64 {d14, d15}, [r0:128]! -; LE-I64-NEON-NEXT: vst1.64 {d8, d9}, [r0:128] -; LE-I64-NEON-NEXT: vmov.32 d11[1], r11 -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #40 -; LE-I64-NEON-NEXT: vmov.32 d10[1], r10 -; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r4:128]! -; LE-I64-NEON-NEXT: vst1.64 {d10, d11}, [r4:128]! -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #56 -; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r4:128]! -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r4:128] -; LE-I64-NEON-NEXT: add sp, sp, #72 -; LE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEON-NEXT: add sp, sp, #4 -; LE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; -; BE-I32-LABEL: lrint_v16fp128: +; BE-I32-LABEL: lrint_v8fp128: ; BE-I32: @ %bb.0: ; BE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; BE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; BE-I32-NEXT: .pad #4 ; BE-I32-NEXT: sub sp, sp, #4 -; BE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I32-NEXT: .pad #16 -; BE-I32-NEXT: sub sp, sp, #16 -; BE-I32-NEXT: stm sp, {r0, r1, r2, r3} @ 16-byte Folded Spill -; BE-I32-NEXT: add r3, sp, #264 +; BE-I32-NEXT: .vsave {d8, d9, d10, d11} +; BE-I32-NEXT: vpush {d8, d9, d10, d11} +; BE-I32-NEXT: .pad #8 +; BE-I32-NEXT: sub sp, sp, #8 +; BE-I32-NEXT: str r3, [sp, #4] @ 4-byte Spill +; BE-I32-NEXT: add r3, sp, #128 +; BE-I32-NEXT: mov r11, r2 +; BE-I32-NEXT: mov r6, r1 +; BE-I32-NEXT: mov r7, r0 ; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3} ; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: add r3, sp, #332 -; BE-I32-NEXT: ldr r7, [sp, #328] -; BE-I32-NEXT: vmov.32 d9[0], r0 -; BE-I32-NEXT: ldr r10, [sp, #280] +; BE-I32-NEXT: add r3, sp, #100 +; BE-I32-NEXT: ldr r5, [sp, #96] +; BE-I32-NEXT: vmov.32 d8[0], r0 +; BE-I32-NEXT: ldr r4, [sp, #160] ; BE-I32-NEXT: ldm r3, {r1, r2, r3} -; BE-I32-NEXT: mov r0, r7 -; BE-I32-NEXT: ldr r8, [sp, #168] +; BE-I32-NEXT: mov r0, r5 ; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: ldr r5, [sp, #344] +; BE-I32-NEXT: add r3, sp, #164 ; BE-I32-NEXT: vmov.32 d11[0], r0 -; BE-I32-NEXT: ldr r1, [sp, #348] -; BE-I32-NEXT: ldr r2, [sp, #352] -; BE-I32-NEXT: ldr r3, [sp, #356] -; BE-I32-NEXT: mov r0, r5 -; BE-I32-NEXT: ldr r7, [sp, #284] -; BE-I32-NEXT: ldr r4, [sp, #288] -; BE-I32-NEXT: ldr r6, [sp, #172] -; BE-I32-NEXT: ldr r9, [sp, #176] +; BE-I32-NEXT: mov r0, r4 +; BE-I32-NEXT: ldm r3, {r1, r2, r3} ; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: ldr r3, [sp, #292] -; BE-I32-NEXT: vmov.32 d11[1], r0 -; BE-I32-NEXT: mov r0, r10 -; BE-I32-NEXT: mov r1, r7 -; BE-I32-NEXT: mov r2, r4 +; BE-I32-NEXT: ldr r4, [sp, #176] +; BE-I32-NEXT: vmov.32 d9[0], r0 +; BE-I32-NEXT: ldr r1, [sp, #180] +; BE-I32-NEXT: ldr r2, [sp, #184] +; BE-I32-NEXT: ldr r3, [sp, #188] +; BE-I32-NEXT: mov r0, r4 +; BE-I32-NEXT: ldr r5, [sp, #116] +; BE-I32-NEXT: ldr r8, [sp, #120] +; BE-I32-NEXT: ldr r10, [sp, #84] +; BE-I32-NEXT: ldr r9, [sp, #88] ; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: ldr r3, [sp, #180] ; BE-I32-NEXT: vmov.32 d9[1], r0 -; BE-I32-NEXT: mov r0, r8 -; BE-I32-NEXT: mov r1, r6 +; BE-I32-NEXT: ldr r3, [sp, #124] +; BE-I32-NEXT: ldr r0, [sp, #112] +; BE-I32-NEXT: mov r1, r5 +; BE-I32-NEXT: mov r2, r8 +; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: vmov.32 d11[1], r0 +; BE-I32-NEXT: ldr r3, [sp, #92] +; BE-I32-NEXT: ldr r0, [sp, #80] +; BE-I32-NEXT: mov r1, r10 ; BE-I32-NEXT: mov r2, r9 ; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: add r3, sp, #232 +; BE-I32-NEXT: ldr r3, [sp, #4] @ 4-byte Reload ; BE-I32-NEXT: mov r4, r0 -; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: add r3, sp, #136 -; BE-I32-NEXT: mov r6, r0 -; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: ldr r5, [sp, #296] -; BE-I32-NEXT: vmov.32 d13[0], r0 -; BE-I32-NEXT: ldr r1, [sp, #300] -; BE-I32-NEXT: ldr r2, [sp, #304] -; BE-I32-NEXT: ldr r3, [sp, #308] -; BE-I32-NEXT: mov r0, r5 -; BE-I32-NEXT: ldr r10, [sp, #216] -; BE-I32-NEXT: ldr r8, [sp, #220] -; BE-I32-NEXT: ldr r9, [sp, #152] -; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: ldr r7, [sp, #248] -; BE-I32-NEXT: vmov.32 d10[0], r0 -; BE-I32-NEXT: ldr r1, [sp, #252] -; BE-I32-NEXT: ldr r2, [sp, #256] -; BE-I32-NEXT: vmov.32 d8[0], r6 -; BE-I32-NEXT: ldr r3, [sp, #260] ; BE-I32-NEXT: mov r0, r7 -; BE-I32-NEXT: ldr r5, [sp, #224] -; BE-I32-NEXT: ldr r11, [sp, #120] -; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: ldr r3, [sp, #228] -; BE-I32-NEXT: vmov.32 d8[1], r0 -; BE-I32-NEXT: mov r0, r10 -; BE-I32-NEXT: mov r1, r8 -; BE-I32-NEXT: mov r2, r5 -; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: add r3, sp, #200 -; BE-I32-NEXT: mov r5, r0 -; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: vmov.32 d15[0], r0 -; BE-I32-NEXT: ldr r0, [sp, #184] -; BE-I32-NEXT: ldr r1, [sp, #188] -; BE-I32-NEXT: ldr r2, [sp, #192] -; BE-I32-NEXT: vmov.32 d14[0], r4 -; BE-I32-NEXT: ldr r3, [sp, #196] -; BE-I32-NEXT: vmov.32 d15[1], r5 -; BE-I32-NEXT: ldr r7, [sp, #156] -; BE-I32-NEXT: ldr r6, [sp, #160] -; BE-I32-NEXT: ldr r4, [sp, #124] -; BE-I32-NEXT: ldr r5, [sp, #128] -; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: ldr r3, [sp, #164] -; BE-I32-NEXT: vmov.32 d14[1], r0 -; BE-I32-NEXT: mov r0, r9 -; BE-I32-NEXT: mov r1, r7 -; BE-I32-NEXT: mov r2, r6 -; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: ldr r3, [sp, #132] -; BE-I32-NEXT: vmov.32 d13[1], r0 -; BE-I32-NEXT: mov r0, r11 -; BE-I32-NEXT: mov r1, r4 -; BE-I32-NEXT: mov r2, r5 -; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: mov r4, r0 -; BE-I32-NEXT: ldm sp, {r0, r1, r2, r3} @ 16-byte Folded Reload +; BE-I32-NEXT: mov r1, r6 +; BE-I32-NEXT: mov r2, r11 ; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: add r3, sp, #316 -; BE-I32-NEXT: ldr r7, [sp, #312] -; BE-I32-NEXT: vmov.32 d12[0], r0 +; BE-I32-NEXT: add r3, sp, #148 +; BE-I32-NEXT: ldr r7, [sp, #144] +; BE-I32-NEXT: vmov.32 d10[0], r0 ; BE-I32-NEXT: ldm r3, {r1, r2, r3} ; BE-I32-NEXT: mov r0, r7 -; BE-I32-NEXT: vmov.32 d12[1], r4 +; BE-I32-NEXT: vmov.32 d10[1], r4 ; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: vmov.32 d10[1], r0 -; BE-I32-NEXT: vrev64.32 q0, q6 -; BE-I32-NEXT: vrev64.32 q1, q7 -; BE-I32-NEXT: vrev64.32 q2, q4 -; BE-I32-NEXT: vrev64.32 q3, q5 -; BE-I32-NEXT: add sp, sp, #16 -; BE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEXT: vmov.32 d8[1], r0 +; BE-I32-NEXT: vrev64.32 q0, q5 +; BE-I32-NEXT: vrev64.32 q1, q4 +; BE-I32-NEXT: add sp, sp, #8 +; BE-I32-NEXT: vpop {d8, d9, d10, d11} ; BE-I32-NEXT: add sp, sp, #4 ; BE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; -; BE-I64-LABEL: lrint_v16fp128: +; BE-I64-LABEL: lrint_v8fp128: ; BE-I64: @ %bb.0: ; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; BE-I64-NEXT: .pad #4 ; BE-I64-NEXT: sub sp, sp, #4 -; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I64-NEXT: .pad #56 -; BE-I64-NEXT: sub sp, sp, #56 -; BE-I64-NEXT: mov r5, r3 -; BE-I64-NEXT: add r3, sp, #376 -; BE-I64-NEXT: mov r6, r2 -; BE-I64-NEXT: mov r4, r0 -; BE-I64-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: ldr r7, [sp, #392] -; BE-I64-NEXT: add r3, sp, #396 -; BE-I64-NEXT: mov r9, r1 -; BE-I64-NEXT: vmov.32 d8[0], r0 -; BE-I64-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEXT: mov r0, r7 -; BE-I64-NEXT: ldr r11, [sp, #168] -; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: ldr r2, [sp, #160] -; BE-I64-NEXT: mov r10, r1 -; BE-I64-NEXT: ldr r3, [sp, #164] -; BE-I64-NEXT: vmov.32 d9[0], r0 -; BE-I64-NEXT: mov r0, r6 -; BE-I64-NEXT: mov r1, r5 -; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: add r3, sp, #172 -; BE-I64-NEXT: mov r8, r1 -; BE-I64-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEXT: mov r0, r11 -; BE-I64-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: add r3, sp, #220 -; BE-I64-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEXT: ldr r0, [sp, #216] -; BE-I64-NEXT: mov r11, r1 -; BE-I64-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: add r3, sp, #236 -; BE-I64-NEXT: vmov.32 d12[0], r0 -; BE-I64-NEXT: ldr r0, [sp, #232] -; BE-I64-NEXT: mov r6, r1 -; BE-I64-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: add r3, sp, #252 -; BE-I64-NEXT: vmov.32 d13[0], r0 -; BE-I64-NEXT: ldr r0, [sp, #248] -; BE-I64-NEXT: mov r7, r1 -; BE-I64-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: add r3, sp, #268 -; BE-I64-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEXT: ldr r0, [sp, #264] -; BE-I64-NEXT: mov r5, r1 -; BE-I64-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEXT: ldr r0, [sp, #280] -; BE-I64-NEXT: ldr r2, [sp, #288] -; BE-I64-NEXT: vmov.32 d13[1], r7 -; BE-I64-NEXT: ldr r7, [sp, #284] -; BE-I64-NEXT: ldr r3, [sp, #292] -; BE-I64-NEXT: vmov.32 d14[1], r5 -; BE-I64-NEXT: ldr r5, [sp, #328] -; BE-I64-NEXT: vmov.32 d12[1], r6 -; BE-I64-NEXT: ldr r6, [sp, #300] -; BE-I64-NEXT: vmov.32 d10[1], r8 -; BE-I64-NEXT: ldr r8, [sp, #184] -; BE-I64-NEXT: vmov.32 d11[1], r11 -; BE-I64-NEXT: vmov.32 d9[1], r10 -; BE-I64-NEXT: vmov.32 d8[1], r9 -; BE-I64-NEXT: vmov.32 d15[1], r1 -; BE-I64-NEXT: mov r1, r7 -; BE-I64-NEXT: vstr d14, [sp, #48] @ 8-byte Spill -; BE-I64-NEXT: vstr d13, [sp, #40] @ 8-byte Spill -; BE-I64-NEXT: vstr d12, [sp, #32] @ 8-byte Spill -; BE-I64-NEXT: vstr d11, [sp, #24] @ 8-byte Spill -; BE-I64-NEXT: vstr d10, [sp, #16] @ 8-byte Spill -; BE-I64-NEXT: vstr d9, [sp, #8] @ 8-byte Spill -; BE-I64-NEXT: vstr d8, [sp] @ 8-byte Spill +; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14} +; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14} +; BE-I64-NEXT: .pad #16 +; BE-I64-NEXT: sub sp, sp, #16 +; BE-I64-NEXT: str r3, [sp, #4] @ 4-byte Spill +; BE-I64-NEXT: add r3, sp, #208 +; BE-I64-NEXT: mov r11, r2 +; BE-I64-NEXT: mov r4, r1 +; BE-I64-NEXT: mov r5, r0 +; BE-I64-NEXT: ldm r3, {r0, r1, r2, r3} ; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: mov r10, r1 -; BE-I64-NEXT: ldr r1, [sp, #296] -; BE-I64-NEXT: ldr r2, [sp, #304] +; BE-I64-NEXT: ldr r7, [sp, #176] +; BE-I64-NEXT: add r3, sp, #180 +; BE-I64-NEXT: str r1, [sp, #12] @ 4-byte Spill ; BE-I64-NEXT: vmov.32 d8[0], r0 -; BE-I64-NEXT: ldr r3, [sp, #308] -; BE-I64-NEXT: mov r0, r1 -; BE-I64-NEXT: mov r1, r6 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: mov r0, r7 +; BE-I64-NEXT: ldr r6, [sp, #128] +; BE-I64-NEXT: ldr r8, [sp, #144] ; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: add r3, sp, #332 -; BE-I64-NEXT: mov r11, r1 +; BE-I64-NEXT: add r3, sp, #132 +; BE-I64-NEXT: str r1, [sp, #8] @ 4-byte Spill ; BE-I64-NEXT: vmov.32 d9[0], r0 -; BE-I64-NEXT: mov r0, r5 +; BE-I64-NEXT: mov r0, r6 ; BE-I64-NEXT: ldm r3, {r1, r2, r3} ; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: add r3, sp, #188 -; BE-I64-NEXT: mov r7, r1 +; BE-I64-NEXT: add r3, sp, #148 +; BE-I64-NEXT: mov r6, r1 ; BE-I64-NEXT: vmov.32 d10[0], r0 ; BE-I64-NEXT: mov r0, r8 ; BE-I64-NEXT: ldm r3, {r1, r2, r3} ; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: add r3, sp, #204 -; BE-I64-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEXT: ldr r0, [sp, #200] -; BE-I64-NEXT: mov r8, r1 -; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: add r3, sp, #160 +; BE-I64-NEXT: mov r9, r0 +; BE-I64-NEXT: mov r7, r1 +; BE-I64-NEXT: ldm r3, {r0, r1, r2, r3} ; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: add r3, sp, #348 -; BE-I64-NEXT: vmov.32 d12[0], r0 -; BE-I64-NEXT: ldr r0, [sp, #344] -; BE-I64-NEXT: mov r5, r1 -; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: ldr r3, [sp, #4] @ 4-byte Reload +; BE-I64-NEXT: mov r8, r1 +; BE-I64-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEXT: mov r0, r5 +; BE-I64-NEXT: mov r1, r4 +; BE-I64-NEXT: mov r2, r11 +; BE-I64-NEXT: ldr r10, [sp, #112] +; BE-I64-NEXT: vmov.32 d12[0], r9 ; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: add r3, sp, #364 +; BE-I64-NEXT: add r3, sp, #116 +; BE-I64-NEXT: mov r4, r1 ; BE-I64-NEXT: vmov.32 d13[0], r0 -; BE-I64-NEXT: ldr r0, [sp, #360] -; BE-I64-NEXT: mov r9, r1 +; BE-I64-NEXT: mov r0, r10 ; BE-I64-NEXT: ldm r3, {r1, r2, r3} ; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: add r3, sp, #316 +; BE-I64-NEXT: add r3, sp, #196 ; BE-I64-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEXT: ldr r0, [sp, #312] -; BE-I64-NEXT: mov r6, r1 +; BE-I64-NEXT: ldr r0, [sp, #192] +; BE-I64-NEXT: mov r5, r1 ; BE-I64-NEXT: ldm r3, {r1, r2, r3} ; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: vldr d18, [sp, #48] @ 8-byte Reload -; BE-I64-NEXT: vrev64.32 d17, d15 -; BE-I64-NEXT: vrev64.32 d16, d18 -; BE-I64-NEXT: vldr d18, [sp, #40] @ 8-byte Reload -; BE-I64-NEXT: vmov.32 d24[0], r0 -; BE-I64-NEXT: add r0, r4, #64 -; BE-I64-NEXT: vldr d20, [sp, #32] @ 8-byte Reload -; BE-I64-NEXT: vrev64.32 d19, d18 -; BE-I64-NEXT: vmov.32 d9[1], r11 -; BE-I64-NEXT: vmov.32 d10[1], r7 -; BE-I64-NEXT: vrev64.32 d18, d20 -; BE-I64-NEXT: vldr d20, [sp, #24] @ 8-byte Reload -; BE-I64-NEXT: vmov.32 d8[1], r10 -; BE-I64-NEXT: vmov.32 d14[1], r6 -; BE-I64-NEXT: vmov.32 d24[1], r1 -; BE-I64-NEXT: vldr d22, [sp, #16] @ 8-byte Reload -; BE-I64-NEXT: vrev64.32 d21, d20 -; BE-I64-NEXT: vrev64.32 d1, d9 -; BE-I64-NEXT: vmov.32 d13[1], r9 -; BE-I64-NEXT: vrev64.32 d31, d10 -; BE-I64-NEXT: vrev64.32 d20, d22 -; BE-I64-NEXT: vldr d22, [sp, #8] @ 8-byte Reload -; BE-I64-NEXT: vrev64.32 d0, d8 -; BE-I64-NEXT: vrev64.32 d29, d14 -; BE-I64-NEXT: vmov.32 d12[1], r5 -; BE-I64-NEXT: vrev64.32 d30, d24 -; BE-I64-NEXT: vrev64.32 d27, d22 -; BE-I64-NEXT: vldr d22, [sp] @ 8-byte Reload -; BE-I64-NEXT: vst1.64 {d0, d1}, [r0:128]! +; BE-I64-NEXT: vmov.32 d16[0], r0 +; BE-I64-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; BE-I64-NEXT: vmov.32 d14[1], r5 +; BE-I64-NEXT: vmov.32 d9[1], r0 +; BE-I64-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; BE-I64-NEXT: vmov.32 d12[1], r7 +; BE-I64-NEXT: vmov.32 d8[1], r0 +; BE-I64-NEXT: vmov.32 d13[1], r4 +; BE-I64-NEXT: vmov.32 d10[1], r6 ; BE-I64-NEXT: vmov.32 d11[1], r8 -; BE-I64-NEXT: vrev64.32 d28, d13 -; BE-I64-NEXT: vst1.64 {d30, d31}, [r0:128]! -; BE-I64-NEXT: vrev64.32 d26, d22 -; BE-I64-NEXT: vrev64.32 d23, d12 -; BE-I64-NEXT: vst1.64 {d28, d29}, [r0:128]! -; BE-I64-NEXT: vrev64.32 d22, d11 -; BE-I64-NEXT: vst1.64 {d26, d27}, [r0:128] -; BE-I64-NEXT: vst1.64 {d20, d21}, [r4:128]! -; BE-I64-NEXT: vst1.64 {d22, d23}, [r4:128]! -; BE-I64-NEXT: vst1.64 {d18, d19}, [r4:128]! -; BE-I64-NEXT: vst1.64 {d16, d17}, [r4:128] -; BE-I64-NEXT: add sp, sp, #56 -; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEXT: vmov.32 d16[1], r1 +; BE-I64-NEXT: vrev64.32 d1, d14 +; BE-I64-NEXT: vrev64.32 d3, d12 +; BE-I64-NEXT: vrev64.32 d5, d9 +; BE-I64-NEXT: vrev64.32 d7, d8 +; BE-I64-NEXT: vrev64.32 d0, d13 +; BE-I64-NEXT: vrev64.32 d2, d10 +; BE-I64-NEXT: vrev64.32 d4, d11 +; BE-I64-NEXT: vrev64.32 d6, d16 +; BE-I64-NEXT: add sp, sp, #16 +; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14} ; BE-I64-NEXT: add sp, sp, #4 ; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; -; BE-I32-NEON-LABEL: lrint_v16fp128: -; BE-I32-NEON: @ %bb.0: -; BE-I32-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-I32-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-I32-NEON-NEXT: .pad #4 -; BE-I32-NEON-NEXT: sub sp, sp, #4 -; BE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I32-NEON-NEXT: .pad #16 -; BE-I32-NEON-NEXT: sub sp, sp, #16 -; BE-I32-NEON-NEXT: stm sp, {r0, r1, r2, r3} @ 16-byte Folded Spill -; BE-I32-NEON-NEXT: add r3, sp, #264 -; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: add r3, sp, #332 -; BE-I32-NEON-NEXT: ldr r7, [sp, #328] -; BE-I32-NEON-NEXT: vmov.32 d9[0], r0 -; BE-I32-NEON-NEXT: ldr r10, [sp, #280] -; BE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I32-NEON-NEXT: mov r0, r7 -; BE-I32-NEON-NEXT: ldr r8, [sp, #168] -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: ldr r5, [sp, #344] -; BE-I32-NEON-NEXT: vmov.32 d11[0], r0 -; BE-I32-NEON-NEXT: ldr r1, [sp, #348] -; BE-I32-NEON-NEXT: ldr r2, [sp, #352] -; BE-I32-NEON-NEXT: ldr r3, [sp, #356] -; BE-I32-NEON-NEXT: mov r0, r5 -; BE-I32-NEON-NEXT: ldr r7, [sp, #284] -; BE-I32-NEON-NEXT: ldr r4, [sp, #288] -; BE-I32-NEON-NEXT: ldr r6, [sp, #172] -; BE-I32-NEON-NEXT: ldr r9, [sp, #176] -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: ldr r3, [sp, #292] -; BE-I32-NEON-NEXT: vmov.32 d11[1], r0 -; BE-I32-NEON-NEXT: mov r0, r10 -; BE-I32-NEON-NEXT: mov r1, r7 -; BE-I32-NEON-NEXT: mov r2, r4 -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: ldr r3, [sp, #180] -; BE-I32-NEON-NEXT: vmov.32 d9[1], r0 -; BE-I32-NEON-NEXT: mov r0, r8 -; BE-I32-NEON-NEXT: mov r1, r6 -; BE-I32-NEON-NEXT: mov r2, r9 -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: add r3, sp, #232 -; BE-I32-NEON-NEXT: mov r4, r0 -; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: add r3, sp, #136 -; BE-I32-NEON-NEXT: mov r6, r0 -; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: ldr r5, [sp, #296] -; BE-I32-NEON-NEXT: vmov.32 d13[0], r0 -; BE-I32-NEON-NEXT: ldr r1, [sp, #300] -; BE-I32-NEON-NEXT: ldr r2, [sp, #304] -; BE-I32-NEON-NEXT: ldr r3, [sp, #308] -; BE-I32-NEON-NEXT: mov r0, r5 -; BE-I32-NEON-NEXT: ldr r10, [sp, #216] -; BE-I32-NEON-NEXT: ldr r8, [sp, #220] -; BE-I32-NEON-NEXT: ldr r9, [sp, #152] -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: ldr r7, [sp, #248] -; BE-I32-NEON-NEXT: vmov.32 d10[0], r0 -; BE-I32-NEON-NEXT: ldr r1, [sp, #252] -; BE-I32-NEON-NEXT: ldr r2, [sp, #256] -; BE-I32-NEON-NEXT: vmov.32 d8[0], r6 -; BE-I32-NEON-NEXT: ldr r3, [sp, #260] -; BE-I32-NEON-NEXT: mov r0, r7 -; BE-I32-NEON-NEXT: ldr r5, [sp, #224] -; BE-I32-NEON-NEXT: ldr r11, [sp, #120] -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: ldr r3, [sp, #228] -; BE-I32-NEON-NEXT: vmov.32 d8[1], r0 -; BE-I32-NEON-NEXT: mov r0, r10 -; BE-I32-NEON-NEXT: mov r1, r8 -; BE-I32-NEON-NEXT: mov r2, r5 -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: add r3, sp, #200 -; BE-I32-NEON-NEXT: mov r5, r0 -; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: vmov.32 d15[0], r0 -; BE-I32-NEON-NEXT: ldr r0, [sp, #184] -; BE-I32-NEON-NEXT: ldr r1, [sp, #188] -; BE-I32-NEON-NEXT: ldr r2, [sp, #192] -; BE-I32-NEON-NEXT: vmov.32 d14[0], r4 -; BE-I32-NEON-NEXT: ldr r3, [sp, #196] -; BE-I32-NEON-NEXT: vmov.32 d15[1], r5 -; BE-I32-NEON-NEXT: ldr r7, [sp, #156] -; BE-I32-NEON-NEXT: ldr r6, [sp, #160] -; BE-I32-NEON-NEXT: ldr r4, [sp, #124] -; BE-I32-NEON-NEXT: ldr r5, [sp, #128] -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: ldr r3, [sp, #164] -; BE-I32-NEON-NEXT: vmov.32 d14[1], r0 -; BE-I32-NEON-NEXT: mov r0, r9 -; BE-I32-NEON-NEXT: mov r1, r7 -; BE-I32-NEON-NEXT: mov r2, r6 -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: ldr r3, [sp, #132] -; BE-I32-NEON-NEXT: vmov.32 d13[1], r0 -; BE-I32-NEON-NEXT: mov r0, r11 -; BE-I32-NEON-NEXT: mov r1, r4 -; BE-I32-NEON-NEXT: mov r2, r5 -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: mov r4, r0 -; BE-I32-NEON-NEXT: ldm sp, {r0, r1, r2, r3} @ 16-byte Folded Reload -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: add r3, sp, #316 -; BE-I32-NEON-NEXT: ldr r7, [sp, #312] -; BE-I32-NEON-NEXT: vmov.32 d12[0], r0 -; BE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I32-NEON-NEXT: mov r0, r7 -; BE-I32-NEON-NEXT: vmov.32 d12[1], r4 -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: vmov.32 d10[1], r0 -; BE-I32-NEON-NEXT: vrev64.32 q0, q6 -; BE-I32-NEON-NEXT: vrev64.32 q1, q7 -; BE-I32-NEON-NEXT: vrev64.32 q2, q4 -; BE-I32-NEON-NEXT: vrev64.32 q3, q5 -; BE-I32-NEON-NEXT: add sp, sp, #16 -; BE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I32-NEON-NEXT: add sp, sp, #4 -; BE-I32-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; -; BE-I64-NEON-LABEL: lrint_v16fp128: -; BE-I64-NEON: @ %bb.0: -; BE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-I64-NEON-NEXT: .pad #4 -; BE-I64-NEON-NEXT: sub sp, sp, #4 -; BE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I64-NEON-NEXT: .pad #56 -; BE-I64-NEON-NEXT: sub sp, sp, #56 -; BE-I64-NEON-NEXT: mov r5, r3 -; BE-I64-NEON-NEXT: add r3, sp, #376 -; BE-I64-NEON-NEXT: mov r6, r2 -; BE-I64-NEON-NEXT: mov r4, r0 -; BE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: ldr r7, [sp, #392] -; BE-I64-NEON-NEXT: add r3, sp, #396 -; BE-I64-NEON-NEXT: mov r9, r1 -; BE-I64-NEON-NEXT: vmov.32 d8[0], r0 -; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEON-NEXT: mov r0, r7 -; BE-I64-NEON-NEXT: ldr r11, [sp, #168] -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: ldr r2, [sp, #160] -; BE-I64-NEON-NEXT: mov r10, r1 -; BE-I64-NEON-NEXT: ldr r3, [sp, #164] -; BE-I64-NEON-NEXT: vmov.32 d9[0], r0 -; BE-I64-NEON-NEXT: mov r0, r6 -; BE-I64-NEON-NEXT: mov r1, r5 -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: add r3, sp, #172 -; BE-I64-NEON-NEXT: mov r8, r1 -; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEON-NEXT: mov r0, r11 -; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: add r3, sp, #220 -; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEON-NEXT: ldr r0, [sp, #216] -; BE-I64-NEON-NEXT: mov r11, r1 -; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: add r3, sp, #236 -; BE-I64-NEON-NEXT: vmov.32 d12[0], r0 -; BE-I64-NEON-NEXT: ldr r0, [sp, #232] -; BE-I64-NEON-NEXT: mov r6, r1 -; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: add r3, sp, #252 -; BE-I64-NEON-NEXT: vmov.32 d13[0], r0 -; BE-I64-NEON-NEXT: ldr r0, [sp, #248] -; BE-I64-NEON-NEXT: mov r7, r1 -; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: add r3, sp, #268 -; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEON-NEXT: ldr r0, [sp, #264] -; BE-I64-NEON-NEXT: mov r5, r1 -; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEON-NEXT: ldr r0, [sp, #280] -; BE-I64-NEON-NEXT: ldr r2, [sp, #288] -; BE-I64-NEON-NEXT: vmov.32 d13[1], r7 -; BE-I64-NEON-NEXT: ldr r7, [sp, #284] -; BE-I64-NEON-NEXT: ldr r3, [sp, #292] -; BE-I64-NEON-NEXT: vmov.32 d14[1], r5 -; BE-I64-NEON-NEXT: ldr r5, [sp, #328] -; BE-I64-NEON-NEXT: vmov.32 d12[1], r6 -; BE-I64-NEON-NEXT: ldr r6, [sp, #300] -; BE-I64-NEON-NEXT: vmov.32 d10[1], r8 -; BE-I64-NEON-NEXT: ldr r8, [sp, #184] -; BE-I64-NEON-NEXT: vmov.32 d11[1], r11 -; BE-I64-NEON-NEXT: vmov.32 d9[1], r10 -; BE-I64-NEON-NEXT: vmov.32 d8[1], r9 -; BE-I64-NEON-NEXT: vmov.32 d15[1], r1 -; BE-I64-NEON-NEXT: mov r1, r7 -; BE-I64-NEON-NEXT: vstr d14, [sp, #48] @ 8-byte Spill -; BE-I64-NEON-NEXT: vstr d13, [sp, #40] @ 8-byte Spill -; BE-I64-NEON-NEXT: vstr d12, [sp, #32] @ 8-byte Spill -; BE-I64-NEON-NEXT: vstr d11, [sp, #24] @ 8-byte Spill -; BE-I64-NEON-NEXT: vstr d10, [sp, #16] @ 8-byte Spill -; BE-I64-NEON-NEXT: vstr d9, [sp, #8] @ 8-byte Spill -; BE-I64-NEON-NEXT: vstr d8, [sp] @ 8-byte Spill -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: mov r10, r1 -; BE-I64-NEON-NEXT: ldr r1, [sp, #296] -; BE-I64-NEON-NEXT: ldr r2, [sp, #304] -; BE-I64-NEON-NEXT: vmov.32 d8[0], r0 -; BE-I64-NEON-NEXT: ldr r3, [sp, #308] -; BE-I64-NEON-NEXT: mov r0, r1 -; BE-I64-NEON-NEXT: mov r1, r6 -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: add r3, sp, #332 -; BE-I64-NEON-NEXT: mov r11, r1 -; BE-I64-NEON-NEXT: vmov.32 d9[0], r0 -; BE-I64-NEON-NEXT: mov r0, r5 -; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: add r3, sp, #188 -; BE-I64-NEON-NEXT: mov r7, r1 -; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEON-NEXT: mov r0, r8 -; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: add r3, sp, #204 -; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEON-NEXT: ldr r0, [sp, #200] -; BE-I64-NEON-NEXT: mov r8, r1 -; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: add r3, sp, #348 -; BE-I64-NEON-NEXT: vmov.32 d12[0], r0 -; BE-I64-NEON-NEXT: ldr r0, [sp, #344] -; BE-I64-NEON-NEXT: mov r5, r1 -; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: add r3, sp, #364 -; BE-I64-NEON-NEXT: vmov.32 d13[0], r0 -; BE-I64-NEON-NEXT: ldr r0, [sp, #360] -; BE-I64-NEON-NEXT: mov r9, r1 -; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: add r3, sp, #316 -; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEON-NEXT: ldr r0, [sp, #312] -; BE-I64-NEON-NEXT: mov r6, r1 -; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: vldr d18, [sp, #48] @ 8-byte Reload -; BE-I64-NEON-NEXT: vrev64.32 d17, d15 -; BE-I64-NEON-NEXT: vrev64.32 d16, d18 -; BE-I64-NEON-NEXT: vldr d18, [sp, #40] @ 8-byte Reload -; BE-I64-NEON-NEXT: vmov.32 d24[0], r0 -; BE-I64-NEON-NEXT: add r0, r4, #64 -; BE-I64-NEON-NEXT: vldr d20, [sp, #32] @ 8-byte Reload -; BE-I64-NEON-NEXT: vrev64.32 d19, d18 -; BE-I64-NEON-NEXT: vmov.32 d9[1], r11 -; BE-I64-NEON-NEXT: vmov.32 d10[1], r7 -; BE-I64-NEON-NEXT: vrev64.32 d18, d20 -; BE-I64-NEON-NEXT: vldr d20, [sp, #24] @ 8-byte Reload -; BE-I64-NEON-NEXT: vmov.32 d8[1], r10 -; BE-I64-NEON-NEXT: vmov.32 d14[1], r6 -; BE-I64-NEON-NEXT: vmov.32 d24[1], r1 -; BE-I64-NEON-NEXT: vldr d22, [sp, #16] @ 8-byte Reload -; BE-I64-NEON-NEXT: vrev64.32 d21, d20 -; BE-I64-NEON-NEXT: vrev64.32 d1, d9 -; BE-I64-NEON-NEXT: vmov.32 d13[1], r9 -; BE-I64-NEON-NEXT: vrev64.32 d31, d10 -; BE-I64-NEON-NEXT: vrev64.32 d20, d22 -; BE-I64-NEON-NEXT: vldr d22, [sp, #8] @ 8-byte Reload -; BE-I64-NEON-NEXT: vrev64.32 d0, d8 -; BE-I64-NEON-NEXT: vrev64.32 d29, d14 -; BE-I64-NEON-NEXT: vmov.32 d12[1], r5 -; BE-I64-NEON-NEXT: vrev64.32 d30, d24 -; BE-I64-NEON-NEXT: vrev64.32 d27, d22 -; BE-I64-NEON-NEXT: vldr d22, [sp] @ 8-byte Reload -; BE-I64-NEON-NEXT: vst1.64 {d0, d1}, [r0:128]! -; BE-I64-NEON-NEXT: vmov.32 d11[1], r8 -; BE-I64-NEON-NEXT: vrev64.32 d28, d13 -; BE-I64-NEON-NEXT: vst1.64 {d30, d31}, [r0:128]! -; BE-I64-NEON-NEXT: vrev64.32 d26, d22 -; BE-I64-NEON-NEXT: vrev64.32 d23, d12 -; BE-I64-NEON-NEXT: vst1.64 {d28, d29}, [r0:128]! -; BE-I64-NEON-NEXT: vrev64.32 d22, d11 -; BE-I64-NEON-NEXT: vst1.64 {d26, d27}, [r0:128] -; BE-I64-NEON-NEXT: vst1.64 {d20, d21}, [r4:128]! -; BE-I64-NEON-NEXT: vst1.64 {d22, d23}, [r4:128]! -; BE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r4:128]! -; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r4:128] -; BE-I64-NEON-NEXT: add sp, sp, #56 -; BE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I64-NEON-NEXT: add sp, sp, #4 -; BE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} - %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16fp128(<16 x fp128> %x) - ret <16 x iXLen> %a + %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128> %x) + ret <8 x iXLen> %a } -declare <16 x iXLen> @llvm.lrint.v16iXLen.v16fp128(<16 x fp128>) +declare <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128>) -define <32 x iXLen> @lrint_v32fp128(<32 x fp128> %x) { -; LE-I32-LABEL: lrint_v32fp128: +define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) { +; LE-I32-LABEL: lrint_v16fp128: ; LE-I32: @ %bb.0: ; LE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; LE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} @@ -10752,258 +2675,126 @@ define <32 x iXLen> @lrint_v32fp128(<32 x fp128> %x) { ; LE-I32-NEXT: sub sp, sp, #4 ; LE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} ; LE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I32-NEXT: .pad #80 -; LE-I32-NEXT: sub sp, sp, #80 -; LE-I32-NEXT: str r3, [sp, #16] @ 4-byte Spill -; LE-I32-NEXT: add r3, sp, #336 -; LE-I32-NEXT: str r2, [sp, #12] @ 4-byte Spill -; LE-I32-NEXT: mov r9, r0 -; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: add r3, sp, #244 -; LE-I32-NEXT: ldr r7, [sp, #240] -; LE-I32-NEXT: vmov.32 d9[0], r0 -; LE-I32-NEXT: ldr r5, [sp, #288] -; LE-I32-NEXT: ldm r3, {r1, r2, r3} -; LE-I32-NEXT: mov r0, r7 -; LE-I32-NEXT: ldr r8, [sp, #352] -; LE-I32-NEXT: ldr r11, [sp, #656] -; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: add r3, sp, #292 -; LE-I32-NEXT: vmov.32 d10[0], r0 -; LE-I32-NEXT: mov r0, r5 -; LE-I32-NEXT: ldm r3, {r1, r2, r3} -; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: add r3, sp, #272 -; LE-I32-NEXT: mov r10, r0 -; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: ldr r6, [sp, #256] -; LE-I32-NEXT: vmov.32 d11[0], r0 -; LE-I32-NEXT: ldr r1, [sp, #260] -; LE-I32-NEXT: ldr r2, [sp, #264] -; LE-I32-NEXT: ldr r3, [sp, #268] -; LE-I32-NEXT: mov r0, r6 -; LE-I32-NEXT: ldr r7, [sp, #660] -; LE-I32-NEXT: vmov.32 d11[1], r10 -; LE-I32-NEXT: ldr r5, [sp, #664] -; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: vmov.32 d10[1], r0 -; LE-I32-NEXT: ldr r1, [sp, #356] -; LE-I32-NEXT: ldr r2, [sp, #360] -; LE-I32-NEXT: add lr, sp, #32 -; LE-I32-NEXT: ldr r3, [sp, #364] -; LE-I32-NEXT: mov r0, r8 -; LE-I32-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: ldr r3, [sp, #668] -; LE-I32-NEXT: add lr, sp, #48 -; LE-I32-NEXT: vmov.32 d9[1], r0 -; LE-I32-NEXT: mov r0, r11 -; LE-I32-NEXT: mov r1, r7 -; LE-I32-NEXT: mov r2, r5 -; LE-I32-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: add r3, sp, #400 -; LE-I32-NEXT: mov r8, r0 -; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: add r3, sp, #592 +; LE-I32-NEXT: mov r8, r3 +; LE-I32-NEXT: add r3, sp, #280 +; LE-I32-NEXT: mov r9, r2 +; LE-I32-NEXT: mov r10, r1 ; LE-I32-NEXT: mov r6, r0 ; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3} ; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: ldr r4, [sp, #416] -; LE-I32-NEXT: vmov.32 d15[0], r0 -; LE-I32-NEXT: ldr r1, [sp, #420] -; LE-I32-NEXT: ldr r2, [sp, #424] -; LE-I32-NEXT: vmov.32 d13[0], r6 -; LE-I32-NEXT: ldr r3, [sp, #428] +; LE-I32-NEXT: ldr r4, [sp, #216] +; LE-I32-NEXT: vmov.32 d8[0], r0 +; LE-I32-NEXT: ldr r1, [sp, #220] +; LE-I32-NEXT: ldr r2, [sp, #224] +; LE-I32-NEXT: ldr r3, [sp, #228] ; LE-I32-NEXT: mov r0, r4 -; LE-I32-NEXT: ldr r7, [sp, #224] -; LE-I32-NEXT: ldr r10, [sp, #228] -; LE-I32-NEXT: ldr r5, [sp, #232] -; LE-I32-NEXT: ldr r11, [sp, #464] -; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: ldr r3, [sp, #236] -; LE-I32-NEXT: vmov.32 d13[1], r0 -; LE-I32-NEXT: mov r0, r7 -; LE-I32-NEXT: mov r1, r10 -; LE-I32-NEXT: mov r2, r5 -; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: add r3, sp, #208 -; LE-I32-NEXT: mov r4, r0 -; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: vmov.32 d9[0], r0 -; LE-I32-NEXT: ldr r0, [sp, #672] -; LE-I32-NEXT: ldr r1, [sp, #676] -; LE-I32-NEXT: ldr r2, [sp, #680] -; LE-I32-NEXT: vmov.32 d11[0], r8 -; LE-I32-NEXT: ldr r3, [sp, #684] -; LE-I32-NEXT: vmov.32 d9[1], r4 -; LE-I32-NEXT: ldr r7, [sp, #612] -; LE-I32-NEXT: ldr r6, [sp, #616] -; LE-I32-NEXT: ldr r5, [sp, #468] -; LE-I32-NEXT: ldr r4, [sp, #472] -; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: vmov.32 d11[1], r0 -; LE-I32-NEXT: ldr r3, [sp, #620] -; LE-I32-NEXT: ldr r0, [sp, #608] -; LE-I32-NEXT: mov r1, r7 -; LE-I32-NEXT: mov r2, r6 -; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: ldr r3, [sp, #476] -; LE-I32-NEXT: vmov.32 d15[1], r0 -; LE-I32-NEXT: mov r0, r11 -; LE-I32-NEXT: mov r1, r5 -; LE-I32-NEXT: mov r2, r4 -; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: add r3, sp, #560 -; LE-I32-NEXT: str r0, [sp, #8] @ 4-byte Spill -; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: add r3, sp, #644 -; LE-I32-NEXT: ldr r7, [sp, #640] -; LE-I32-NEXT: vmov.32 d14[0], r0 -; LE-I32-NEXT: add lr, sp, #64 -; LE-I32-NEXT: ldm r3, {r1, r2, r3} -; LE-I32-NEXT: mov r0, r7 -; LE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: add r3, sp, #624 -; LE-I32-NEXT: mov r11, r0 -; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I32-NEXT: ldr r7, [sp, #152] +; LE-I32-NEXT: ldr r11, [sp, #104] ; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: add r3, sp, #196 -; LE-I32-NEXT: ldr r7, [sp, #192] +; LE-I32-NEXT: add r3, sp, #156 ; LE-I32-NEXT: vmov.32 d10[0], r0 -; LE-I32-NEXT: ldm r3, {r1, r2, r3} ; LE-I32-NEXT: mov r0, r7 -; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: mov r6, r0 -; LE-I32-NEXT: ldr r2, [sp, #184] -; LE-I32-NEXT: ldr r3, [sp, #188] -; LE-I32-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; LE-I32-NEXT: ldr r1, [sp, #16] @ 4-byte Reload -; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: add r3, sp, #324 -; LE-I32-NEXT: ldr r7, [sp, #320] -; LE-I32-NEXT: vmov.32 d8[0], r0 ; LE-I32-NEXT: ldm r3, {r1, r2, r3} -; LE-I32-NEXT: mov r0, r7 -; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: add r3, sp, #304 -; LE-I32-NEXT: mov r7, r0 -; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: add lr, sp, #48 -; LE-I32-NEXT: ldr r4, [sp, #368] -; LE-I32-NEXT: ldr r1, [sp, #372] -; LE-I32-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload -; LE-I32-NEXT: ldr r2, [sp, #376] -; LE-I32-NEXT: ldr r3, [sp, #380] -; LE-I32-NEXT: vmov.32 d14[0], r0 -; LE-I32-NEXT: mov r0, r4 ; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: ldr r5, [sp, #384] +; LE-I32-NEXT: ldr r7, [sp, #184] ; LE-I32-NEXT: vmov.32 d12[0], r0 -; LE-I32-NEXT: ldr r1, [sp, #388] -; LE-I32-NEXT: ldr r2, [sp, #392] -; LE-I32-NEXT: ldr r3, [sp, #396] -; LE-I32-NEXT: mov r0, r5 -; LE-I32-NEXT: ldr r4, [sp, #432] +; LE-I32-NEXT: ldr r1, [sp, #188] +; LE-I32-NEXT: ldr r2, [sp, #192] +; LE-I32-NEXT: ldr r3, [sp, #196] +; LE-I32-NEXT: mov r0, r7 +; LE-I32-NEXT: ldr r4, [sp, #120] ; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: vmov.32 d12[1], r0 -; LE-I32-NEXT: ldr r1, [sp, #436] -; LE-I32-NEXT: ldr r2, [sp, #440] -; LE-I32-NEXT: add lr, sp, #16 -; LE-I32-NEXT: ldr r3, [sp, #444] +; LE-I32-NEXT: add r3, sp, #124 +; LE-I32-NEXT: vmov.32 d13[0], r0 ; LE-I32-NEXT: mov r0, r4 -; LE-I32-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; LE-I32-NEXT: ldm r3, {r1, r2, r3} ; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: vmov.32 d12[0], r0 -; LE-I32-NEXT: ldr r0, [sp, #576] -; LE-I32-NEXT: ldr r1, [sp, #580] -; LE-I32-NEXT: add lr, sp, #48 -; LE-I32-NEXT: vmov.32 d14[1], r7 -; LE-I32-NEXT: ldr r2, [sp, #584] -; LE-I32-NEXT: ldr r3, [sp, #588] -; LE-I32-NEXT: vmov.32 d10[1], r11 -; LE-I32-NEXT: ldr r8, [sp, #448] -; LE-I32-NEXT: ldr r4, [sp, #544] -; LE-I32-NEXT: ldr r10, [sp, #548] -; LE-I32-NEXT: vmov.32 d8[1], r6 -; LE-I32-NEXT: ldr r7, [sp, #552] -; LE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-I32-NEXT: ldr r11, [sp, #512] +; LE-I32-NEXT: ldr r5, [sp, #136] +; LE-I32-NEXT: vmov.32 d15[0], r0 +; LE-I32-NEXT: ldr r1, [sp, #140] +; LE-I32-NEXT: ldr r2, [sp, #144] +; LE-I32-NEXT: ldr r3, [sp, #148] +; LE-I32-NEXT: mov r0, r5 +; LE-I32-NEXT: ldr r4, [sp, #108] +; LE-I32-NEXT: ldr r7, [sp, #112] ; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: add lr, sp, #64 -; LE-I32-NEXT: ldr r3, [sp, #556] -; LE-I32-NEXT: mov r1, r10 -; LE-I32-NEXT: mov r2, r7 -; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEXT: vmov.32 d16[1], r0 -; LE-I32-NEXT: mov r0, r4 -; LE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I32-NEXT: ldr r3, [sp, #116] +; LE-I32-NEXT: vmov.32 d15[1], r0 +; LE-I32-NEXT: mov r0, r11 +; LE-I32-NEXT: mov r1, r4 +; LE-I32-NEXT: mov r2, r7 ; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: add r3, sp, #528 ; LE-I32-NEXT: mov r4, r0 -; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I32-NEXT: mov r0, r6 +; LE-I32-NEXT: mov r1, r10 +; LE-I32-NEXT: mov r2, r9 +; LE-I32-NEXT: mov r3, r8 ; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: ldr r1, [sp, #8] @ 4-byte Reload -; LE-I32-NEXT: vmov.32 d15[0], r0 -; LE-I32-NEXT: ldr r0, [sp, #480] -; LE-I32-NEXT: ldr r2, [sp, #488] -; LE-I32-NEXT: vmov.32 d13[0], r1 -; LE-I32-NEXT: ldr r1, [sp, #484] -; LE-I32-NEXT: ldr r3, [sp, #492] -; LE-I32-NEXT: vmov.32 d15[1], r4 -; LE-I32-NEXT: ldr r7, [sp, #452] -; LE-I32-NEXT: ldr r5, [sp, #456] -; LE-I32-NEXT: ldr r6, [sp, #516] -; LE-I32-NEXT: ldr r4, [sp, #520] +; LE-I32-NEXT: ldr r7, [sp, #200] +; LE-I32-NEXT: vmov.32 d14[0], r0 +; LE-I32-NEXT: ldr r1, [sp, #204] +; LE-I32-NEXT: ldr r2, [sp, #208] +; LE-I32-NEXT: ldr r3, [sp, #212] +; LE-I32-NEXT: mov r0, r7 +; LE-I32-NEXT: ldr r5, [sp, #172] +; LE-I32-NEXT: vmov.32 d14[1], r4 +; LE-I32-NEXT: ldr r6, [sp, #176] ; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: ldr r3, [sp, #460] ; LE-I32-NEXT: vmov.32 d13[1], r0 -; LE-I32-NEXT: mov r0, r8 -; LE-I32-NEXT: mov r1, r7 -; LE-I32-NEXT: mov r2, r5 +; LE-I32-NEXT: ldr r3, [sp, #180] +; LE-I32-NEXT: ldr r0, [sp, #168] +; LE-I32-NEXT: mov r1, r5 +; LE-I32-NEXT: mov r2, r6 ; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: ldr r3, [sp, #524] -; LE-I32-NEXT: vmov.32 d12[1], r0 -; LE-I32-NEXT: mov r0, r11 +; LE-I32-NEXT: add r3, sp, #248 +; LE-I32-NEXT: mov r5, r0 +; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3} +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: ldr r4, [sp, #264] +; LE-I32-NEXT: vmov.32 d11[0], r0 +; LE-I32-NEXT: ldr r1, [sp, #268] +; LE-I32-NEXT: ldr r2, [sp, #272] +; LE-I32-NEXT: vmov.32 d12[1], r5 +; LE-I32-NEXT: ldr r3, [sp, #276] +; LE-I32-NEXT: mov r0, r4 +; LE-I32-NEXT: ldr r6, [sp, #236] +; LE-I32-NEXT: ldr r7, [sp, #240] +; LE-I32-NEXT: ldr r8, [sp, #332] +; LE-I32-NEXT: ldr r5, [sp, #336] +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: vmov.32 d11[1], r0 +; LE-I32-NEXT: ldr r3, [sp, #244] +; LE-I32-NEXT: ldr r0, [sp, #232] ; LE-I32-NEXT: mov r1, r6 -; LE-I32-NEXT: mov r2, r4 +; LE-I32-NEXT: mov r2, r7 +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: vmov.32 d10[1], r0 +; LE-I32-NEXT: ldr r3, [sp, #340] +; LE-I32-NEXT: ldr r0, [sp, #328] +; LE-I32-NEXT: mov r1, r8 +; LE-I32-NEXT: mov r2, r5 ; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: add r3, sp, #496 +; LE-I32-NEXT: add r3, sp, #312 ; LE-I32-NEXT: mov r4, r0 ; LE-I32-NEXT: ldm r3, {r0, r1, r2, r3} ; LE-I32-NEXT: bl lrintl -; LE-I32-NEXT: vmov.32 d14[0], r0 -; LE-I32-NEXT: add r0, r9, #64 -; LE-I32-NEXT: add lr, sp, #64 -; LE-I32-NEXT: vst1.32 {d12, d13}, [r0:128]! -; LE-I32-NEXT: vmov.32 d14[1], r4 -; LE-I32-NEXT: vst1.32 {d14, d15}, [r0:128]! -; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEXT: add lr, sp, #32 -; LE-I32-NEXT: vst1.32 {d16, d17}, [r0:128]! -; LE-I32-NEXT: vst1.64 {d10, d11}, [r0:128] -; LE-I32-NEXT: vst1.32 {d8, d9}, [r9:128]! -; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEXT: add lr, sp, #48 -; LE-I32-NEXT: vst1.32 {d16, d17}, [r9:128]! -; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEXT: add lr, sp, #16 -; LE-I32-NEXT: vst1.32 {d16, d17}, [r9:128]! -; LE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEXT: vst1.64 {d16, d17}, [r9:128] -; LE-I32-NEXT: add sp, sp, #80 +; LE-I32-NEXT: add r3, sp, #300 +; LE-I32-NEXT: ldr r7, [sp, #296] +; LE-I32-NEXT: vmov.32 d9[0], r0 +; LE-I32-NEXT: ldm r3, {r1, r2, r3} +; LE-I32-NEXT: mov r0, r7 +; LE-I32-NEXT: vmov.32 d9[1], r4 +; LE-I32-NEXT: bl lrintl +; LE-I32-NEXT: vmov.32 d8[1], r0 +; LE-I32-NEXT: vorr q0, q7, q7 +; LE-I32-NEXT: vorr q1, q6, q6 +; LE-I32-NEXT: vorr q2, q5, q5 +; LE-I32-NEXT: vorr q3, q4, q4 ; LE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; LE-I32-NEXT: add sp, sp, #4 ; LE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; -; LE-I64-LABEL: lrint_v32fp128: +; LE-I64-LABEL: lrint_v16fp128: ; LE-I64: @ %bb.0: ; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} @@ -11011,988 +2802,162 @@ define <32 x iXLen> @lrint_v32fp128(<32 x fp128> %x) { ; LE-I64-NEXT: sub sp, sp, #4 ; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} ; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEXT: .pad #192 -; LE-I64-NEXT: sub sp, sp, #192 -; LE-I64-NEXT: str r3, [sp, #60] @ 4-byte Spill -; LE-I64-NEXT: add r3, sp, #688 -; LE-I64-NEXT: str r2, [sp, #56] @ 4-byte Spill -; LE-I64-NEXT: mov r9, r0 -; LE-I64-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add r3, sp, #560 +; LE-I64-NEXT: .pad #72 +; LE-I64-NEXT: sub sp, sp, #72 +; LE-I64-NEXT: mov r6, r3 +; LE-I64-NEXT: add r3, sp, #408 +; LE-I64-NEXT: mov r7, r2 ; LE-I64-NEXT: mov r4, r0 -; LE-I64-NEXT: str r1, [sp, #64] @ 4-byte Spill ; LE-I64-NEXT: ldm r3, {r0, r1, r2, r3} ; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: vmov.32 d17[0], r0 -; LE-I64-NEXT: ldr r7, [sp, #544] -; LE-I64-NEXT: ldr r6, [sp, #548] -; LE-I64-NEXT: add lr, sp, #96 -; LE-I64-NEXT: ldr r2, [sp, #552] -; LE-I64-NEXT: vmov.32 d17[1], r1 -; LE-I64-NEXT: ldr r3, [sp, #556] +; LE-I64-NEXT: add r5, sp, #176 +; LE-I64-NEXT: mov r10, r1 +; LE-I64-NEXT: vmov.32 d13[0], r0 ; LE-I64-NEXT: mov r0, r7 +; LE-I64-NEXT: ldm r5, {r2, r3, r5} ; LE-I64-NEXT: mov r1, r6 -; LE-I64-NEXT: vorr q4, q8, q8 -; LE-I64-NEXT: ldr r5, [sp, #528] -; LE-I64-NEXT: vmov.32 d17[0], r4 -; LE-I64-NEXT: ldr r10, [sp, #304] -; LE-I64-NEXT: ldr r8, [sp, #368] -; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEXT: ldr r8, [sp, #232] ; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add r3, sp, #532 +; LE-I64-NEXT: add r3, sp, #188 +; LE-I64-NEXT: mov r9, r1 ; LE-I64-NEXT: vmov.32 d8[0], r0 -; LE-I64-NEXT: mov r11, r1 -; LE-I64-NEXT: add lr, sp, #144 -; LE-I64-NEXT: ldm r3, {r1, r2, r3} ; LE-I64-NEXT: mov r0, r5 -; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add r3, sp, #308 -; LE-I64-NEXT: mov r5, r1 -; LE-I64-NEXT: add lr, sp, #176 -; LE-I64-NEXT: vmov.32 d17[0], r0 -; LE-I64-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEXT: mov r0, r10 -; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add r3, sp, #372 -; LE-I64-NEXT: mov r10, r1 -; LE-I64-NEXT: vmov.32 d11[0], r0 -; LE-I64-NEXT: mov r0, r8 -; LE-I64-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add r3, sp, #404 -; LE-I64-NEXT: vmov.32 d15[0], r0 -; LE-I64-NEXT: ldr r0, [sp, #400] -; LE-I64-NEXT: mov r6, r1 ; LE-I64-NEXT: ldm r3, {r1, r2, r3} ; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add r3, sp, #596 +; LE-I64-NEXT: add r3, sp, #236 +; LE-I64-NEXT: mov r11, r1 ; LE-I64-NEXT: vmov.32 d9[0], r0 -; LE-I64-NEXT: ldr r0, [sp, #592] -; LE-I64-NEXT: mov r7, r1 -; LE-I64-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add r3, sp, #676 -; LE-I64-NEXT: vmov.32 d13[0], r0 -; LE-I64-NEXT: ldr r0, [sp, #672] -; LE-I64-NEXT: mov r4, r1 +; LE-I64-NEXT: mov r0, r8 ; LE-I64-NEXT: ldm r3, {r1, r2, r3} ; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add lr, sp, #96 -; LE-I64-NEXT: vmov.32 d13[1], r4 -; LE-I64-NEXT: str r1, [sp, #52] @ 4-byte Spill -; LE-I64-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; LE-I64-NEXT: add lr, sp, #80 -; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #128 -; LE-I64-NEXT: vmov.32 d9[1], r7 -; LE-I64-NEXT: ldr r1, [sp, #628] -; LE-I64-NEXT: ldr r2, [sp, #632] -; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #112 -; LE-I64-NEXT: vmov.32 d15[1], r6 -; LE-I64-NEXT: ldr r3, [sp, #636] -; LE-I64-NEXT: ldr r7, [sp, #64] @ 4-byte Reload -; LE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #160 -; LE-I64-NEXT: vmov.32 d11[1], r10 -; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #176 -; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-I64-NEXT: add lr, sp, #144 -; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEXT: vmov.32 d18[0], r0 -; LE-I64-NEXT: ldr r0, [sp, #624] -; LE-I64-NEXT: vmov.32 d16[1], r11 -; LE-I64-NEXT: vmov.32 d9[1], r5 -; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #96 -; LE-I64-NEXT: vmov.32 d19[1], r7 -; LE-I64-NEXT: vstmia lr, {d18, d19} @ 16-byte Spill -; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add r3, sp, #512 -; LE-I64-NEXT: str r0, [sp, #48] @ 4-byte Spill -; LE-I64-NEXT: str r1, [sp, #64] @ 4-byte Spill -; LE-I64-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add r3, sp, #768 -; LE-I64-NEXT: mov r11, r0 -; LE-I64-NEXT: str r1, [sp, #28] @ 4-byte Spill -; LE-I64-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: ldr r6, [sp, #784] -; LE-I64-NEXT: add r3, sp, #788 +; LE-I64-NEXT: add r3, sp, #252 +; LE-I64-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEXT: ldr r0, [sp, #248] ; LE-I64-NEXT: mov r8, r1 -; LE-I64-NEXT: vmov.32 d14[0], r0 ; LE-I64-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEXT: mov r0, r6 -; LE-I64-NEXT: ldr r5, [sp, #736] -; LE-I64-NEXT: ldr r7, [sp, #752] -; LE-I64-NEXT: ldr r4, [sp, #720] ; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add r3, sp, #740 +; LE-I64-NEXT: add r3, sp, #268 +; LE-I64-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEXT: ldr r0, [sp, #264] ; LE-I64-NEXT: mov r6, r1 -; LE-I64-NEXT: vmov.32 d15[0], r0 -; LE-I64-NEXT: mov r0, r5 -; LE-I64-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add r3, sp, #756 -; LE-I64-NEXT: mov r5, r1 -; LE-I64-NEXT: vmov.32 d12[0], r0 -; LE-I64-NEXT: mov r0, r7 ; LE-I64-NEXT: ldm r3, {r1, r2, r3} ; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add r3, sp, #724 +; LE-I64-NEXT: add r3, sp, #284 +; LE-I64-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEXT: ldr r0, [sp, #280] ; LE-I64-NEXT: mov r7, r1 -; LE-I64-NEXT: vmov.32 d13[0], r0 -; LE-I64-NEXT: mov r0, r4 ; LE-I64-NEXT: ldm r3, {r1, r2, r3} ; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: vmov.32 d13[1], r7 -; LE-I64-NEXT: add lr, sp, #8 -; LE-I64-NEXT: ldr r2, [sp, #296] -; LE-I64-NEXT: vmov.32 d12[1], r5 -; LE-I64-NEXT: ldr r3, [sp, #300] -; LE-I64-NEXT: ldr r4, [sp, #576] -; LE-I64-NEXT: vmov.32 d11[0], r0 -; LE-I64-NEXT: ldr r0, [sp, #56] @ 4-byte Reload -; LE-I64-NEXT: ldr r10, [sp, #384] -; LE-I64-NEXT: vmov.32 d15[1], r6 -; LE-I64-NEXT: ldr r6, [sp, #352] -; LE-I64-NEXT: vmov.32 d14[1], r8 -; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #32 -; LE-I64-NEXT: vmov.32 d11[1], r1 -; LE-I64-NEXT: ldr r1, [sp, #60] @ 4-byte Reload -; LE-I64-NEXT: vmov.32 d8[0], r11 -; LE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #176 -; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add lr, sp, #160 -; LE-I64-NEXT: add r3, sp, #356 +; LE-I64-NEXT: add r3, sp, #316 +; LE-I64-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEXT: ldr r0, [sp, #312] ; LE-I64-NEXT: mov r5, r1 -; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEXT: vmov.32 d16[0], r0 -; LE-I64-NEXT: mov r0, r6 -; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add lr, sp, #112 -; LE-I64-NEXT: add r3, sp, #388 -; LE-I64-NEXT: mov r7, r1 -; LE-I64-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload -; LE-I64-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEXT: vmov.32 d14[0], r0 -; LE-I64-NEXT: mov r0, r10 -; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add lr, sp, #128 -; LE-I64-NEXT: add r3, sp, #580 -; LE-I64-NEXT: mov r6, r1 -; LE-I64-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-I64-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEXT: vmov.32 d12[0], r0 -; LE-I64-NEXT: mov r0, r4 -; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add lr, sp, #80 -; LE-I64-NEXT: add r3, sp, #708 -; LE-I64-NEXT: mov r4, r1 -; LE-I64-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload ; LE-I64-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEXT: vmov.32 d8[0], r0 -; LE-I64-NEXT: ldr r0, [sp, #704] ; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: vmov.32 d8[1], r4 -; LE-I64-NEXT: add lr, sp, #80 -; LE-I64-NEXT: ldr r2, [sp, #52] @ 4-byte Reload -; LE-I64-NEXT: vmov.32 d12[1], r6 -; LE-I64-NEXT: ldr r6, [sp, #644] -; LE-I64-NEXT: ldr r3, [sp, #652] -; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #128 +; LE-I64-NEXT: vmov.32 d15[1], r5 +; LE-I64-NEXT: add lr, sp, #56 +; LE-I64-NEXT: ldr r5, [sp, #300] ; LE-I64-NEXT: vmov.32 d14[1], r7 -; LE-I64-NEXT: ldr r4, [sp, #480] -; LE-I64-NEXT: ldr r7, [sp, #656] -; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #112 +; LE-I64-NEXT: ldr r2, [sp, #304] +; LE-I64-NEXT: ldr r3, [sp, #308] +; LE-I64-NEXT: vmov.32 d11[1], r6 +; LE-I64-NEXT: ldr r6, [sp, #200] +; LE-I64-NEXT: ldr r7, [sp, #204] +; LE-I64-NEXT: vmov.32 d10[1], r8 +; LE-I64-NEXT: ldr r8, [sp, #344] +; LE-I64-NEXT: vmov.32 d9[1], r11 +; LE-I64-NEXT: ldr r11, [sp, #216] ; LE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #160 -; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #40 +; LE-I64-NEXT: vmov.32 d17[0], r0 +; LE-I64-NEXT: ldr r0, [sp, #296] +; LE-I64-NEXT: vmov.32 d8[1], r9 +; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-I64-NEXT: add lr, sp, #24 +; LE-I64-NEXT: vorr q5, q8, q8 +; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I64-NEXT: vorr q4, q6, q6 +; LE-I64-NEXT: vmov.32 d11[1], r1 +; LE-I64-NEXT: mov r1, r5 +; LE-I64-NEXT: vmov.32 d9[1], r10 +; LE-I64-NEXT: bl lrintl ; LE-I64-NEXT: vmov.32 d10[0], r0 -; LE-I64-NEXT: ldr r0, [sp, #28] @ 4-byte Reload -; LE-I64-NEXT: ldr r10, [sp, #496] -; LE-I64-NEXT: vmov.32 d16[1], r5 -; LE-I64-NEXT: add r5, r9, #192 -; LE-I64-NEXT: ldr r8, [sp, #608] -; LE-I64-NEXT: vmov.32 d10[1], r1 -; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #176 -; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEXT: vmov.32 d16[1], r0 -; LE-I64-NEXT: ldr r0, [sp, #640] -; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #96 -; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: ldr r2, [sp, #208] +; LE-I64-NEXT: ldr r3, [sp, #212] ; LE-I64-NEXT: add lr, sp, #8 -; LE-I64-NEXT: vmov.32 d16[1], r2 -; LE-I64-NEXT: ldr r2, [sp, #648] -; LE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]! -; LE-I64-NEXT: vst1.64 {d10, d11}, [r5:128]! -; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEXT: vst1.64 {d16, d17}, [r5:128]! -; LE-I64-NEXT: ldr r1, [sp, #48] @ 4-byte Reload -; LE-I64-NEXT: vmov.32 d9[0], r1 -; LE-I64-NEXT: mov r1, r6 -; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add r3, sp, #660 -; LE-I64-NEXT: mov r11, r1 -; LE-I64-NEXT: vmov.32 d12[0], r0 -; LE-I64-NEXT: mov r0, r7 -; LE-I64-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add r3, sp, #484 -; LE-I64-NEXT: mov r7, r1 -; LE-I64-NEXT: vmov.32 d13[0], r0 -; LE-I64-NEXT: mov r0, r4 -; LE-I64-NEXT: ldm r3, {r1, r2, r3} +; LE-I64-NEXT: mov r9, r1 +; LE-I64-NEXT: mov r0, r6 +; LE-I64-NEXT: mov r1, r7 +; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill ; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add r3, sp, #500 -; LE-I64-NEXT: mov r6, r1 +; LE-I64-NEXT: add r3, sp, #220 +; LE-I64-NEXT: mov r10, r1 ; LE-I64-NEXT: vmov.32 d10[0], r0 -; LE-I64-NEXT: mov r0, r10 +; LE-I64-NEXT: mov r0, r11 ; LE-I64-NEXT: ldm r3, {r1, r2, r3} ; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add r3, sp, #612 -; LE-I64-NEXT: mov r4, r1 +; LE-I64-NEXT: add r3, sp, #348 +; LE-I64-NEXT: mov r11, r1 ; LE-I64-NEXT: vmov.32 d11[0], r0 ; LE-I64-NEXT: mov r0, r8 ; LE-I64-NEXT: ldm r3, {r1, r2, r3} ; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: vmov.32 d8[0], r0 -; LE-I64-NEXT: ldr r0, [sp, #64] @ 4-byte Reload -; LE-I64-NEXT: add lr, sp, #96 -; LE-I64-NEXT: add r8, r9, #128 -; LE-I64-NEXT: vmov.32 d13[1], r7 -; LE-I64-NEXT: ldr r2, [sp, #344] -; LE-I64-NEXT: ldr r3, [sp, #348] -; LE-I64-NEXT: vmov.32 d12[1], r11 -; LE-I64-NEXT: ldr r7, [sp, #452] -; LE-I64-NEXT: ldr r10, [sp, #416] -; LE-I64-NEXT: vmov.32 d9[1], r0 -; LE-I64-NEXT: ldr r0, [sp, #336] -; LE-I64-NEXT: vmov.32 d8[1], r1 -; LE-I64-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #64 -; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-I64-NEXT: add lr, sp, #32 -; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEXT: add lr, sp, #144 -; LE-I64-NEXT: vmov.32 d11[1], r4 -; LE-I64-NEXT: ldr r4, [sp, #340] -; LE-I64-NEXT: vst1.64 {d16, d17}, [r5:128] -; LE-I64-NEXT: mov r1, r4 -; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEXT: add lr, sp, #80 -; LE-I64-NEXT: vmov.32 d10[1], r6 -; LE-I64-NEXT: ldr r6, [sp, #448] -; LE-I64-NEXT: vst1.64 {d16, d17}, [r8:128]! -; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEXT: vst1.64 {d16, d17}, [r8:128]! -; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: ldr r2, [sp, #456] -; LE-I64-NEXT: mov r11, r1 -; LE-I64-NEXT: ldr r3, [sp, #460] -; LE-I64-NEXT: vmov.32 d15[0], r0 -; LE-I64-NEXT: mov r0, r6 -; LE-I64-NEXT: mov r1, r7 -; LE-I64-NEXT: ldr r5, [sp, #432] -; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add r3, sp, #468 -; LE-I64-NEXT: vmov.32 d12[0], r0 -; LE-I64-NEXT: ldr r0, [sp, #464] -; LE-I64-NEXT: mov r6, r1 +; LE-I64-NEXT: add r3, sp, #364 +; LE-I64-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEXT: ldr r0, [sp, #360] +; LE-I64-NEXT: mov r8, r1 ; LE-I64-NEXT: ldm r3, {r1, r2, r3} ; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add r3, sp, #420 -; LE-I64-NEXT: mov r7, r1 -; LE-I64-NEXT: vmov.32 d13[0], r0 -; LE-I64-NEXT: mov r0, r10 +; LE-I64-NEXT: add r3, sp, #380 +; LE-I64-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEXT: ldr r0, [sp, #376] +; LE-I64-NEXT: mov r5, r1 ; LE-I64-NEXT: ldm r3, {r1, r2, r3} ; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add r3, sp, #436 -; LE-I64-NEXT: mov r4, r1 -; LE-I64-NEXT: vmov.32 d8[0], r0 -; LE-I64-NEXT: mov r0, r5 +; LE-I64-NEXT: add r3, sp, #396 +; LE-I64-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEXT: ldr r0, [sp, #392] +; LE-I64-NEXT: mov r6, r1 ; LE-I64-NEXT: ldm r3, {r1, r2, r3} ; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add r3, sp, #324 -; LE-I64-NEXT: vmov.32 d9[0], r0 -; LE-I64-NEXT: ldr r0, [sp, #320] -; LE-I64-NEXT: mov r5, r1 +; LE-I64-NEXT: add r3, sp, #332 +; LE-I64-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEXT: ldr r0, [sp, #328] +; LE-I64-NEXT: mov r7, r1 ; LE-I64-NEXT: ldm r3, {r1, r2, r3} ; LE-I64-NEXT: bl lrintl -; LE-I64-NEXT: add lr, sp, #64 -; LE-I64-NEXT: vmov.32 d9[1], r5 -; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEXT: add lr, sp, #96 -; LE-I64-NEXT: vmov.32 d13[1], r7 -; LE-I64-NEXT: vst1.64 {d16, d17}, [r8:128]! -; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEXT: add lr, sp, #176 -; LE-I64-NEXT: vmov.32 d8[1], r4 -; LE-I64-NEXT: vmov.32 d12[1], r6 -; LE-I64-NEXT: vmov.32 d14[0], r0 -; LE-I64-NEXT: add r0, r9, #64 -; LE-I64-NEXT: vst1.64 {d16, d17}, [r8:128] -; LE-I64-NEXT: vst1.64 {d8, d9}, [r0:128]! +; LE-I64-NEXT: add lr, sp, #8 +; LE-I64-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEXT: add r0, r4, #64 +; LE-I64-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #24 +; LE-I64-NEXT: vmov.32 d13[1], r8 +; LE-I64-NEXT: vmov.32 d18[1], r9 +; LE-I64-NEXT: vmov.32 d15[1], r6 +; LE-I64-NEXT: vmov.32 d12[1], r1 +; LE-I64-NEXT: vmov.32 d14[1], r5 +; LE-I64-NEXT: vst1.64 {d18, d19}, [r0:128]! ; LE-I64-NEXT: vst1.64 {d12, d13}, [r0:128]! -; LE-I64-NEXT: vst1.64 {d10, d11}, [r0:128]! -; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEXT: add lr, sp, #160 -; LE-I64-NEXT: vmov.32 d15[1], r11 -; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128] +; LE-I64-NEXT: vmov.32 d8[1], r7 +; LE-I64-NEXT: vst1.64 {d14, d15}, [r0:128]! +; LE-I64-NEXT: vst1.64 {d8, d9}, [r0:128] +; LE-I64-NEXT: vmov.32 d11[1], r11 ; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEXT: add lr, sp, #112 -; LE-I64-NEXT: vmov.32 d14[1], r1 -; LE-I64-NEXT: vst1.64 {d16, d17}, [r9:128]! -; LE-I64-NEXT: vst1.64 {d14, d15}, [r9:128]! +; LE-I64-NEXT: add lr, sp, #40 +; LE-I64-NEXT: vmov.32 d10[1], r10 +; LE-I64-NEXT: vst1.64 {d16, d17}, [r4:128]! +; LE-I64-NEXT: vst1.64 {d10, d11}, [r4:128]! ; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEXT: add lr, sp, #128 -; LE-I64-NEXT: vst1.64 {d16, d17}, [r9:128]! +; LE-I64-NEXT: add lr, sp, #56 +; LE-I64-NEXT: vst1.64 {d16, d17}, [r4:128]! ; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEXT: vst1.64 {d16, d17}, [r9:128] -; LE-I64-NEXT: add sp, sp, #192 +; LE-I64-NEXT: vst1.64 {d16, d17}, [r4:128] +; LE-I64-NEXT: add sp, sp, #72 ; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; LE-I64-NEXT: add sp, sp, #4 ; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; -; LE-I32-NEON-LABEL: lrint_v32fp128: -; LE-I32-NEON: @ %bb.0: -; LE-I32-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-I32-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-I32-NEON-NEXT: .pad #4 -; LE-I32-NEON-NEXT: sub sp, sp, #4 -; LE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I32-NEON-NEXT: .pad #80 -; LE-I32-NEON-NEXT: sub sp, sp, #80 -; LE-I32-NEON-NEXT: str r3, [sp, #16] @ 4-byte Spill -; LE-I32-NEON-NEXT: add r3, sp, #336 -; LE-I32-NEON-NEXT: str r2, [sp, #12] @ 4-byte Spill -; LE-I32-NEON-NEXT: mov r9, r0 -; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: add r3, sp, #244 -; LE-I32-NEON-NEXT: ldr r7, [sp, #240] -; LE-I32-NEON-NEXT: vmov.32 d9[0], r0 -; LE-I32-NEON-NEXT: ldr r5, [sp, #288] -; LE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I32-NEON-NEXT: mov r0, r7 -; LE-I32-NEON-NEXT: ldr r8, [sp, #352] -; LE-I32-NEON-NEXT: ldr r11, [sp, #656] -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: add r3, sp, #292 -; LE-I32-NEON-NEXT: vmov.32 d10[0], r0 -; LE-I32-NEON-NEXT: mov r0, r5 -; LE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: add r3, sp, #272 -; LE-I32-NEON-NEXT: mov r10, r0 -; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: ldr r6, [sp, #256] -; LE-I32-NEON-NEXT: vmov.32 d11[0], r0 -; LE-I32-NEON-NEXT: ldr r1, [sp, #260] -; LE-I32-NEON-NEXT: ldr r2, [sp, #264] -; LE-I32-NEON-NEXT: ldr r3, [sp, #268] -; LE-I32-NEON-NEXT: mov r0, r6 -; LE-I32-NEON-NEXT: ldr r7, [sp, #660] -; LE-I32-NEON-NEXT: vmov.32 d11[1], r10 -; LE-I32-NEON-NEXT: ldr r5, [sp, #664] -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: vmov.32 d10[1], r0 -; LE-I32-NEON-NEXT: ldr r1, [sp, #356] -; LE-I32-NEON-NEXT: ldr r2, [sp, #360] -; LE-I32-NEON-NEXT: add lr, sp, #32 -; LE-I32-NEON-NEXT: ldr r3, [sp, #364] -; LE-I32-NEON-NEXT: mov r0, r8 -; LE-I32-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: ldr r3, [sp, #668] -; LE-I32-NEON-NEXT: add lr, sp, #48 -; LE-I32-NEON-NEXT: vmov.32 d9[1], r0 -; LE-I32-NEON-NEXT: mov r0, r11 -; LE-I32-NEON-NEXT: mov r1, r7 -; LE-I32-NEON-NEXT: mov r2, r5 -; LE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: add r3, sp, #400 -; LE-I32-NEON-NEXT: mov r8, r0 -; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: add r3, sp, #592 -; LE-I32-NEON-NEXT: mov r6, r0 -; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: ldr r4, [sp, #416] -; LE-I32-NEON-NEXT: vmov.32 d15[0], r0 -; LE-I32-NEON-NEXT: ldr r1, [sp, #420] -; LE-I32-NEON-NEXT: ldr r2, [sp, #424] -; LE-I32-NEON-NEXT: vmov.32 d13[0], r6 -; LE-I32-NEON-NEXT: ldr r3, [sp, #428] -; LE-I32-NEON-NEXT: mov r0, r4 -; LE-I32-NEON-NEXT: ldr r7, [sp, #224] -; LE-I32-NEON-NEXT: ldr r10, [sp, #228] -; LE-I32-NEON-NEXT: ldr r5, [sp, #232] -; LE-I32-NEON-NEXT: ldr r11, [sp, #464] -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: ldr r3, [sp, #236] -; LE-I32-NEON-NEXT: vmov.32 d13[1], r0 -; LE-I32-NEON-NEXT: mov r0, r7 -; LE-I32-NEON-NEXT: mov r1, r10 -; LE-I32-NEON-NEXT: mov r2, r5 -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: add r3, sp, #208 -; LE-I32-NEON-NEXT: mov r4, r0 -; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: vmov.32 d9[0], r0 -; LE-I32-NEON-NEXT: ldr r0, [sp, #672] -; LE-I32-NEON-NEXT: ldr r1, [sp, #676] -; LE-I32-NEON-NEXT: ldr r2, [sp, #680] -; LE-I32-NEON-NEXT: vmov.32 d11[0], r8 -; LE-I32-NEON-NEXT: ldr r3, [sp, #684] -; LE-I32-NEON-NEXT: vmov.32 d9[1], r4 -; LE-I32-NEON-NEXT: ldr r7, [sp, #612] -; LE-I32-NEON-NEXT: ldr r6, [sp, #616] -; LE-I32-NEON-NEXT: ldr r5, [sp, #468] -; LE-I32-NEON-NEXT: ldr r4, [sp, #472] -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: vmov.32 d11[1], r0 -; LE-I32-NEON-NEXT: ldr r3, [sp, #620] -; LE-I32-NEON-NEXT: ldr r0, [sp, #608] -; LE-I32-NEON-NEXT: mov r1, r7 -; LE-I32-NEON-NEXT: mov r2, r6 -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: ldr r3, [sp, #476] -; LE-I32-NEON-NEXT: vmov.32 d15[1], r0 -; LE-I32-NEON-NEXT: mov r0, r11 -; LE-I32-NEON-NEXT: mov r1, r5 -; LE-I32-NEON-NEXT: mov r2, r4 -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: add r3, sp, #560 -; LE-I32-NEON-NEXT: str r0, [sp, #8] @ 4-byte Spill -; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: add r3, sp, #644 -; LE-I32-NEON-NEXT: ldr r7, [sp, #640] -; LE-I32-NEON-NEXT: vmov.32 d14[0], r0 -; LE-I32-NEON-NEXT: add lr, sp, #64 -; LE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I32-NEON-NEXT: mov r0, r7 -; LE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: add r3, sp, #624 -; LE-I32-NEON-NEXT: mov r11, r0 -; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: add r3, sp, #196 -; LE-I32-NEON-NEXT: ldr r7, [sp, #192] -; LE-I32-NEON-NEXT: vmov.32 d10[0], r0 -; LE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I32-NEON-NEXT: mov r0, r7 -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: mov r6, r0 -; LE-I32-NEON-NEXT: ldr r2, [sp, #184] -; LE-I32-NEON-NEXT: ldr r3, [sp, #188] -; LE-I32-NEON-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; LE-I32-NEON-NEXT: ldr r1, [sp, #16] @ 4-byte Reload -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: add r3, sp, #324 -; LE-I32-NEON-NEXT: ldr r7, [sp, #320] -; LE-I32-NEON-NEXT: vmov.32 d8[0], r0 -; LE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I32-NEON-NEXT: mov r0, r7 -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: add r3, sp, #304 -; LE-I32-NEON-NEXT: mov r7, r0 -; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: add lr, sp, #48 -; LE-I32-NEON-NEXT: ldr r4, [sp, #368] -; LE-I32-NEON-NEXT: ldr r1, [sp, #372] -; LE-I32-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload -; LE-I32-NEON-NEXT: ldr r2, [sp, #376] -; LE-I32-NEON-NEXT: ldr r3, [sp, #380] -; LE-I32-NEON-NEXT: vmov.32 d14[0], r0 -; LE-I32-NEON-NEXT: mov r0, r4 -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: ldr r5, [sp, #384] -; LE-I32-NEON-NEXT: vmov.32 d12[0], r0 -; LE-I32-NEON-NEXT: ldr r1, [sp, #388] -; LE-I32-NEON-NEXT: ldr r2, [sp, #392] -; LE-I32-NEON-NEXT: ldr r3, [sp, #396] -; LE-I32-NEON-NEXT: mov r0, r5 -; LE-I32-NEON-NEXT: ldr r4, [sp, #432] -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: vmov.32 d12[1], r0 -; LE-I32-NEON-NEXT: ldr r1, [sp, #436] -; LE-I32-NEON-NEXT: ldr r2, [sp, #440] -; LE-I32-NEON-NEXT: add lr, sp, #16 -; LE-I32-NEON-NEXT: ldr r3, [sp, #444] -; LE-I32-NEON-NEXT: mov r0, r4 -; LE-I32-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: vmov.32 d12[0], r0 -; LE-I32-NEON-NEXT: ldr r0, [sp, #576] -; LE-I32-NEON-NEXT: ldr r1, [sp, #580] -; LE-I32-NEON-NEXT: add lr, sp, #48 -; LE-I32-NEON-NEXT: vmov.32 d14[1], r7 -; LE-I32-NEON-NEXT: ldr r2, [sp, #584] -; LE-I32-NEON-NEXT: ldr r3, [sp, #588] -; LE-I32-NEON-NEXT: vmov.32 d10[1], r11 -; LE-I32-NEON-NEXT: ldr r8, [sp, #448] -; LE-I32-NEON-NEXT: ldr r4, [sp, #544] -; LE-I32-NEON-NEXT: ldr r10, [sp, #548] -; LE-I32-NEON-NEXT: vmov.32 d8[1], r6 -; LE-I32-NEON-NEXT: ldr r7, [sp, #552] -; LE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-I32-NEON-NEXT: ldr r11, [sp, #512] -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: add lr, sp, #64 -; LE-I32-NEON-NEXT: ldr r3, [sp, #556] -; LE-I32-NEON-NEXT: mov r1, r10 -; LE-I32-NEON-NEXT: mov r2, r7 -; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEON-NEXT: vmov.32 d16[1], r0 -; LE-I32-NEON-NEXT: mov r0, r4 -; LE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: add r3, sp, #528 -; LE-I32-NEON-NEXT: mov r4, r0 -; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: ldr r1, [sp, #8] @ 4-byte Reload -; LE-I32-NEON-NEXT: vmov.32 d15[0], r0 -; LE-I32-NEON-NEXT: ldr r0, [sp, #480] -; LE-I32-NEON-NEXT: ldr r2, [sp, #488] -; LE-I32-NEON-NEXT: vmov.32 d13[0], r1 -; LE-I32-NEON-NEXT: ldr r1, [sp, #484] -; LE-I32-NEON-NEXT: ldr r3, [sp, #492] -; LE-I32-NEON-NEXT: vmov.32 d15[1], r4 -; LE-I32-NEON-NEXT: ldr r7, [sp, #452] -; LE-I32-NEON-NEXT: ldr r5, [sp, #456] -; LE-I32-NEON-NEXT: ldr r6, [sp, #516] -; LE-I32-NEON-NEXT: ldr r4, [sp, #520] -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: ldr r3, [sp, #460] -; LE-I32-NEON-NEXT: vmov.32 d13[1], r0 -; LE-I32-NEON-NEXT: mov r0, r8 -; LE-I32-NEON-NEXT: mov r1, r7 -; LE-I32-NEON-NEXT: mov r2, r5 -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: ldr r3, [sp, #524] -; LE-I32-NEON-NEXT: vmov.32 d12[1], r0 -; LE-I32-NEON-NEXT: mov r0, r11 -; LE-I32-NEON-NEXT: mov r1, r6 -; LE-I32-NEON-NEXT: mov r2, r4 -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: add r3, sp, #496 -; LE-I32-NEON-NEXT: mov r4, r0 -; LE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-I32-NEON-NEXT: bl lrintl -; LE-I32-NEON-NEXT: vmov.32 d14[0], r0 -; LE-I32-NEON-NEXT: add r0, r9, #64 -; LE-I32-NEON-NEXT: add lr, sp, #64 -; LE-I32-NEON-NEXT: vst1.32 {d12, d13}, [r0:128]! -; LE-I32-NEON-NEXT: vmov.32 d14[1], r4 -; LE-I32-NEON-NEXT: vst1.32 {d14, d15}, [r0:128]! -; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEON-NEXT: add lr, sp, #32 -; LE-I32-NEON-NEXT: vst1.32 {d16, d17}, [r0:128]! -; LE-I32-NEON-NEXT: vst1.64 {d10, d11}, [r0:128] -; LE-I32-NEON-NEXT: vst1.32 {d8, d9}, [r9:128]! -; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEON-NEXT: add lr, sp, #48 -; LE-I32-NEON-NEXT: vst1.32 {d16, d17}, [r9:128]! -; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEON-NEXT: add lr, sp, #16 -; LE-I32-NEON-NEXT: vst1.32 {d16, d17}, [r9:128]! -; LE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I32-NEON-NEXT: vst1.64 {d16, d17}, [r9:128] -; LE-I32-NEON-NEXT: add sp, sp, #80 -; LE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I32-NEON-NEXT: add sp, sp, #4 -; LE-I32-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; -; LE-I64-NEON-LABEL: lrint_v32fp128: -; LE-I64-NEON: @ %bb.0: -; LE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; LE-I64-NEON-NEXT: .pad #4 -; LE-I64-NEON-NEXT: sub sp, sp, #4 -; LE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEON-NEXT: .pad #192 -; LE-I64-NEON-NEXT: sub sp, sp, #192 -; LE-I64-NEON-NEXT: str r3, [sp, #60] @ 4-byte Spill -; LE-I64-NEON-NEXT: add r3, sp, #688 -; LE-I64-NEON-NEXT: str r2, [sp, #56] @ 4-byte Spill -; LE-I64-NEON-NEXT: mov r9, r0 -; LE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add r3, sp, #560 -; LE-I64-NEON-NEXT: mov r4, r0 -; LE-I64-NEON-NEXT: str r1, [sp, #64] @ 4-byte Spill -; LE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: vmov.32 d17[0], r0 -; LE-I64-NEON-NEXT: ldr r7, [sp, #544] -; LE-I64-NEON-NEXT: ldr r6, [sp, #548] -; LE-I64-NEON-NEXT: add lr, sp, #96 -; LE-I64-NEON-NEXT: ldr r2, [sp, #552] -; LE-I64-NEON-NEXT: vmov.32 d17[1], r1 -; LE-I64-NEON-NEXT: ldr r3, [sp, #556] -; LE-I64-NEON-NEXT: mov r0, r7 -; LE-I64-NEON-NEXT: mov r1, r6 -; LE-I64-NEON-NEXT: vorr q4, q8, q8 -; LE-I64-NEON-NEXT: ldr r5, [sp, #528] -; LE-I64-NEON-NEXT: vmov.32 d17[0], r4 -; LE-I64-NEON-NEXT: ldr r10, [sp, #304] -; LE-I64-NEON-NEXT: ldr r8, [sp, #368] -; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add r3, sp, #532 -; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 -; LE-I64-NEON-NEXT: mov r11, r1 -; LE-I64-NEON-NEXT: add lr, sp, #144 -; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEON-NEXT: mov r0, r5 -; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add r3, sp, #308 -; LE-I64-NEON-NEXT: mov r5, r1 -; LE-I64-NEON-NEXT: add lr, sp, #176 -; LE-I64-NEON-NEXT: vmov.32 d17[0], r0 -; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEON-NEXT: mov r0, r10 -; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add r3, sp, #372 -; LE-I64-NEON-NEXT: mov r10, r1 -; LE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; LE-I64-NEON-NEXT: mov r0, r8 -; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add r3, sp, #404 -; LE-I64-NEON-NEXT: vmov.32 d15[0], r0 -; LE-I64-NEON-NEXT: ldr r0, [sp, #400] -; LE-I64-NEON-NEXT: mov r6, r1 -; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add r3, sp, #596 -; LE-I64-NEON-NEXT: vmov.32 d9[0], r0 -; LE-I64-NEON-NEXT: ldr r0, [sp, #592] -; LE-I64-NEON-NEXT: mov r7, r1 -; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add r3, sp, #676 -; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 -; LE-I64-NEON-NEXT: ldr r0, [sp, #672] -; LE-I64-NEON-NEXT: mov r4, r1 -; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add lr, sp, #96 -; LE-I64-NEON-NEXT: vmov.32 d13[1], r4 -; LE-I64-NEON-NEXT: str r1, [sp, #52] @ 4-byte Spill -; LE-I64-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #80 -; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #128 -; LE-I64-NEON-NEXT: vmov.32 d9[1], r7 -; LE-I64-NEON-NEXT: ldr r1, [sp, #628] -; LE-I64-NEON-NEXT: ldr r2, [sp, #632] -; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #112 -; LE-I64-NEON-NEXT: vmov.32 d15[1], r6 -; LE-I64-NEON-NEXT: ldr r3, [sp, #636] -; LE-I64-NEON-NEXT: ldr r7, [sp, #64] @ 4-byte Reload -; LE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #160 -; LE-I64-NEON-NEXT: vmov.32 d11[1], r10 -; LE-I64-NEON-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #176 -; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #144 -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: vmov.32 d18[0], r0 -; LE-I64-NEON-NEXT: ldr r0, [sp, #624] -; LE-I64-NEON-NEXT: vmov.32 d16[1], r11 -; LE-I64-NEON-NEXT: vmov.32 d9[1], r5 -; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #96 -; LE-I64-NEON-NEXT: vmov.32 d19[1], r7 -; LE-I64-NEON-NEXT: vstmia lr, {d18, d19} @ 16-byte Spill -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add r3, sp, #512 -; LE-I64-NEON-NEXT: str r0, [sp, #48] @ 4-byte Spill -; LE-I64-NEON-NEXT: str r1, [sp, #64] @ 4-byte Spill -; LE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add r3, sp, #768 -; LE-I64-NEON-NEXT: mov r11, r0 -; LE-I64-NEON-NEXT: str r1, [sp, #28] @ 4-byte Spill -; LE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: ldr r6, [sp, #784] -; LE-I64-NEON-NEXT: add r3, sp, #788 -; LE-I64-NEON-NEXT: mov r8, r1 -; LE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEON-NEXT: mov r0, r6 -; LE-I64-NEON-NEXT: ldr r5, [sp, #736] -; LE-I64-NEON-NEXT: ldr r7, [sp, #752] -; LE-I64-NEON-NEXT: ldr r4, [sp, #720] -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add r3, sp, #740 -; LE-I64-NEON-NEXT: mov r6, r1 -; LE-I64-NEON-NEXT: vmov.32 d15[0], r0 -; LE-I64-NEON-NEXT: mov r0, r5 -; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add r3, sp, #756 -; LE-I64-NEON-NEXT: mov r5, r1 -; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 -; LE-I64-NEON-NEXT: mov r0, r7 -; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add r3, sp, #724 -; LE-I64-NEON-NEXT: mov r7, r1 -; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 -; LE-I64-NEON-NEXT: mov r0, r4 -; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: vmov.32 d13[1], r7 -; LE-I64-NEON-NEXT: add lr, sp, #8 -; LE-I64-NEON-NEXT: ldr r2, [sp, #296] -; LE-I64-NEON-NEXT: vmov.32 d12[1], r5 -; LE-I64-NEON-NEXT: ldr r3, [sp, #300] -; LE-I64-NEON-NEXT: ldr r4, [sp, #576] -; LE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; LE-I64-NEON-NEXT: ldr r0, [sp, #56] @ 4-byte Reload -; LE-I64-NEON-NEXT: ldr r10, [sp, #384] -; LE-I64-NEON-NEXT: vmov.32 d15[1], r6 -; LE-I64-NEON-NEXT: ldr r6, [sp, #352] -; LE-I64-NEON-NEXT: vmov.32 d14[1], r8 -; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #32 -; LE-I64-NEON-NEXT: vmov.32 d11[1], r1 -; LE-I64-NEON-NEXT: ldr r1, [sp, #60] @ 4-byte Reload -; LE-I64-NEON-NEXT: vmov.32 d8[0], r11 -; LE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #176 -; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add lr, sp, #160 -; LE-I64-NEON-NEXT: add r3, sp, #356 -; LE-I64-NEON-NEXT: mov r5, r1 -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEON-NEXT: vmov.32 d16[0], r0 -; LE-I64-NEON-NEXT: mov r0, r6 -; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add lr, sp, #112 -; LE-I64-NEON-NEXT: add r3, sp, #388 -; LE-I64-NEON-NEXT: mov r7, r1 -; LE-I64-NEON-NEXT: vldmia lr, {d14, d15} @ 16-byte Reload -; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; LE-I64-NEON-NEXT: mov r0, r10 -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add lr, sp, #128 -; LE-I64-NEON-NEXT: add r3, sp, #580 -; LE-I64-NEON-NEXT: mov r6, r1 -; LE-I64-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 -; LE-I64-NEON-NEXT: mov r0, r4 -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add lr, sp, #80 -; LE-I64-NEON-NEXT: add r3, sp, #708 -; LE-I64-NEON-NEXT: mov r4, r1 -; LE-I64-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 -; LE-I64-NEON-NEXT: ldr r0, [sp, #704] -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: vmov.32 d8[1], r4 -; LE-I64-NEON-NEXT: add lr, sp, #80 -; LE-I64-NEON-NEXT: ldr r2, [sp, #52] @ 4-byte Reload -; LE-I64-NEON-NEXT: vmov.32 d12[1], r6 -; LE-I64-NEON-NEXT: ldr r6, [sp, #644] -; LE-I64-NEON-NEXT: ldr r3, [sp, #652] -; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #128 -; LE-I64-NEON-NEXT: vmov.32 d14[1], r7 -; LE-I64-NEON-NEXT: ldr r4, [sp, #480] -; LE-I64-NEON-NEXT: ldr r7, [sp, #656] -; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #112 -; LE-I64-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #160 -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; LE-I64-NEON-NEXT: ldr r0, [sp, #28] @ 4-byte Reload -; LE-I64-NEON-NEXT: ldr r10, [sp, #496] -; LE-I64-NEON-NEXT: vmov.32 d16[1], r5 -; LE-I64-NEON-NEXT: add r5, r9, #192 -; LE-I64-NEON-NEXT: ldr r8, [sp, #608] -; LE-I64-NEON-NEXT: vmov.32 d10[1], r1 -; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #176 -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: vmov.32 d16[1], r0 -; LE-I64-NEON-NEXT: ldr r0, [sp, #640] -; LE-I64-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #96 -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #8 -; LE-I64-NEON-NEXT: vmov.32 d16[1], r2 -; LE-I64-NEON-NEXT: ldr r2, [sp, #648] -; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! -; LE-I64-NEON-NEXT: vst1.64 {d10, d11}, [r5:128]! -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128]! -; LE-I64-NEON-NEXT: ldr r1, [sp, #48] @ 4-byte Reload -; LE-I64-NEON-NEXT: vmov.32 d9[0], r1 -; LE-I64-NEON-NEXT: mov r1, r6 -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add r3, sp, #660 -; LE-I64-NEON-NEXT: mov r11, r1 -; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 -; LE-I64-NEON-NEXT: mov r0, r7 -; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add r3, sp, #484 -; LE-I64-NEON-NEXT: mov r7, r1 -; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 -; LE-I64-NEON-NEXT: mov r0, r4 -; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add r3, sp, #500 -; LE-I64-NEON-NEXT: mov r6, r1 -; LE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; LE-I64-NEON-NEXT: mov r0, r10 -; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add r3, sp, #612 -; LE-I64-NEON-NEXT: mov r4, r1 -; LE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; LE-I64-NEON-NEXT: mov r0, r8 -; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 -; LE-I64-NEON-NEXT: ldr r0, [sp, #64] @ 4-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #96 -; LE-I64-NEON-NEXT: add r8, r9, #128 -; LE-I64-NEON-NEXT: vmov.32 d13[1], r7 -; LE-I64-NEON-NEXT: ldr r2, [sp, #344] -; LE-I64-NEON-NEXT: ldr r3, [sp, #348] -; LE-I64-NEON-NEXT: vmov.32 d12[1], r11 -; LE-I64-NEON-NEXT: ldr r7, [sp, #452] -; LE-I64-NEON-NEXT: ldr r10, [sp, #416] -; LE-I64-NEON-NEXT: vmov.32 d9[1], r0 -; LE-I64-NEON-NEXT: ldr r0, [sp, #336] -; LE-I64-NEON-NEXT: vmov.32 d8[1], r1 -; LE-I64-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #64 -; LE-I64-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; LE-I64-NEON-NEXT: add lr, sp, #32 -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #144 -; LE-I64-NEON-NEXT: vmov.32 d11[1], r4 -; LE-I64-NEON-NEXT: ldr r4, [sp, #340] -; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r5:128] -; LE-I64-NEON-NEXT: mov r1, r4 -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #80 -; LE-I64-NEON-NEXT: vmov.32 d10[1], r6 -; LE-I64-NEON-NEXT: ldr r6, [sp, #448] -; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r8:128]! -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r8:128]! -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: ldr r2, [sp, #456] -; LE-I64-NEON-NEXT: mov r11, r1 -; LE-I64-NEON-NEXT: ldr r3, [sp, #460] -; LE-I64-NEON-NEXT: vmov.32 d15[0], r0 -; LE-I64-NEON-NEXT: mov r0, r6 -; LE-I64-NEON-NEXT: mov r1, r7 -; LE-I64-NEON-NEXT: ldr r5, [sp, #432] -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add r3, sp, #468 -; LE-I64-NEON-NEXT: vmov.32 d12[0], r0 -; LE-I64-NEON-NEXT: ldr r0, [sp, #464] -; LE-I64-NEON-NEXT: mov r6, r1 -; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add r3, sp, #420 -; LE-I64-NEON-NEXT: mov r7, r1 -; LE-I64-NEON-NEXT: vmov.32 d13[0], r0 -; LE-I64-NEON-NEXT: mov r0, r10 -; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add r3, sp, #436 -; LE-I64-NEON-NEXT: mov r4, r1 -; LE-I64-NEON-NEXT: vmov.32 d8[0], r0 -; LE-I64-NEON-NEXT: mov r0, r5 -; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add r3, sp, #324 -; LE-I64-NEON-NEXT: vmov.32 d9[0], r0 -; LE-I64-NEON-NEXT: ldr r0, [sp, #320] -; LE-I64-NEON-NEXT: mov r5, r1 -; LE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; LE-I64-NEON-NEXT: bl lrintl -; LE-I64-NEON-NEXT: add lr, sp, #64 -; LE-I64-NEON-NEXT: vmov.32 d9[1], r5 -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #96 -; LE-I64-NEON-NEXT: vmov.32 d13[1], r7 -; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r8:128]! -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #176 -; LE-I64-NEON-NEXT: vmov.32 d8[1], r4 -; LE-I64-NEON-NEXT: vmov.32 d12[1], r6 -; LE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; LE-I64-NEON-NEXT: add r0, r9, #64 -; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r8:128] -; LE-I64-NEON-NEXT: vst1.64 {d8, d9}, [r0:128]! -; LE-I64-NEON-NEXT: vst1.64 {d12, d13}, [r0:128]! -; LE-I64-NEON-NEXT: vst1.64 {d10, d11}, [r0:128]! -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #160 -; LE-I64-NEON-NEXT: vmov.32 d15[1], r11 -; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r0:128] -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #112 -; LE-I64-NEON-NEXT: vmov.32 d14[1], r1 -; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r9:128]! -; LE-I64-NEON-NEXT: vst1.64 {d14, d15}, [r9:128]! -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: add lr, sp, #128 -; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r9:128]! -; LE-I64-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; LE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r9:128] -; LE-I64-NEON-NEXT: add sp, sp, #192 -; LE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; LE-I64-NEON-NEXT: add sp, sp, #4 -; LE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; -; BE-I32-LABEL: lrint_v32fp128: +; BE-I32-LABEL: lrint_v16fp128: ; BE-I32: @ %bb.0: ; BE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; BE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} @@ -12000,261 +2965,126 @@ define <32 x iXLen> @lrint_v32fp128(<32 x fp128> %x) { ; BE-I32-NEXT: sub sp, sp, #4 ; BE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} ; BE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I32-NEXT: .pad #104 -; BE-I32-NEXT: sub sp, sp, #104 -; BE-I32-NEXT: mov r4, r3 -; BE-I32-NEXT: add r3, sp, #248 -; BE-I32-NEXT: mov r8, r2 -; BE-I32-NEXT: mov r11, r0 -; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: add r3, sp, #616 -; BE-I32-NEXT: mov r9, r0 -; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: add r3, sp, #680 -; BE-I32-NEXT: mov r5, r0 +; BE-I32-NEXT: .pad #16 +; BE-I32-NEXT: sub sp, sp, #16 +; BE-I32-NEXT: stm sp, {r0, r1, r2, r3} @ 16-byte Folded Spill +; BE-I32-NEXT: add r3, sp, #264 ; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3} ; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: ldr r7, [sp, #232] -; BE-I32-NEXT: add lr, sp, #72 -; BE-I32-NEXT: ldr r1, [sp, #236] -; BE-I32-NEXT: vmov.32 d17[0], r0 -; BE-I32-NEXT: ldr r2, [sp, #240] -; BE-I32-NEXT: ldr r3, [sp, #244] -; BE-I32-NEXT: mov r0, r7 -; BE-I32-NEXT: ldr r10, [sp, #376] -; BE-I32-NEXT: vmov.32 d11[0], r5 -; BE-I32-NEXT: ldr r6, [sp, #296] -; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: add r3, sp, #300 -; BE-I32-NEXT: vmov.32 d15[0], r0 -; BE-I32-NEXT: mov r0, r6 -; BE-I32-NEXT: ldm r3, {r1, r2, r3} -; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: add r3, sp, #380 -; BE-I32-NEXT: vmov.32 d13[0], r0 -; BE-I32-NEXT: mov r0, r10 +; BE-I32-NEXT: add r3, sp, #332 +; BE-I32-NEXT: ldr r7, [sp, #328] +; BE-I32-NEXT: vmov.32 d9[0], r0 +; BE-I32-NEXT: ldr r10, [sp, #280] ; BE-I32-NEXT: ldm r3, {r1, r2, r3} +; BE-I32-NEXT: mov r0, r7 +; BE-I32-NEXT: ldr r8, [sp, #168] ; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: add r3, sp, #360 -; BE-I32-NEXT: mov r5, r0 -; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: vmov.32 d17[0], r0 -; BE-I32-NEXT: ldr r6, [sp, #312] -; BE-I32-NEXT: ldr r1, [sp, #316] -; BE-I32-NEXT: ldr r2, [sp, #320] -; BE-I32-NEXT: ldr r3, [sp, #324] -; BE-I32-NEXT: vmov.32 d17[1], r5 -; BE-I32-NEXT: mov r0, r6 -; BE-I32-NEXT: ldr r7, [sp, #572] -; BE-I32-NEXT: vorr q4, q8, q8 -; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: ldr r6, [sp, #632] -; BE-I32-NEXT: add lr, sp, #88 -; BE-I32-NEXT: ldr r1, [sp, #636] -; BE-I32-NEXT: vmov.32 d13[1], r0 -; BE-I32-NEXT: ldr r2, [sp, #640] -; BE-I32-NEXT: ldr r3, [sp, #644] -; BE-I32-NEXT: mov r0, r6 -; BE-I32-NEXT: ldr r5, [sp, #576] -; BE-I32-NEXT: vmov.32 d15[1], r9 -; BE-I32-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill +; BE-I32-NEXT: ldr r5, [sp, #344] +; BE-I32-NEXT: vmov.32 d11[0], r0 +; BE-I32-NEXT: ldr r1, [sp, #348] +; BE-I32-NEXT: ldr r2, [sp, #352] +; BE-I32-NEXT: ldr r3, [sp, #356] +; BE-I32-NEXT: mov r0, r5 +; BE-I32-NEXT: ldr r7, [sp, #284] +; BE-I32-NEXT: ldr r4, [sp, #288] +; BE-I32-NEXT: ldr r6, [sp, #172] +; BE-I32-NEXT: ldr r9, [sp, #176] ; BE-I32-NEXT: bl lrintl +; BE-I32-NEXT: ldr r3, [sp, #292] ; BE-I32-NEXT: vmov.32 d11[1], r0 -; BE-I32-NEXT: ldr r3, [sp, #580] -; BE-I32-NEXT: ldr r0, [sp, #568] +; BE-I32-NEXT: mov r0, r10 ; BE-I32-NEXT: mov r1, r7 -; BE-I32-NEXT: mov r2, r5 -; BE-I32-NEXT: vorr q6, q5, q5 -; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: add r3, sp, #552 -; BE-I32-NEXT: mov r9, r0 -; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: add r3, sp, #520 -; BE-I32-NEXT: mov r5, r0 -; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: ldr r6, [sp, #584] -; BE-I32-NEXT: add lr, sp, #8 -; BE-I32-NEXT: ldr r1, [sp, #588] -; BE-I32-NEXT: vmov.32 d16[0], r0 -; BE-I32-NEXT: ldr r2, [sp, #592] -; BE-I32-NEXT: ldr r3, [sp, #596] -; BE-I32-NEXT: mov r0, r6 -; BE-I32-NEXT: vmov.32 d17[0], r5 -; BE-I32-NEXT: ldr r7, [sp, #216] -; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: add r3, sp, #220 -; BE-I32-NEXT: vmov.32 d12[0], r0 -; BE-I32-NEXT: mov r0, r7 -; BE-I32-NEXT: ldm r3, {r1, r2, r3} +; BE-I32-NEXT: mov r2, r4 ; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: ldr r2, [sp, #208] -; BE-I32-NEXT: mov r7, r0 -; BE-I32-NEXT: ldr r3, [sp, #212] +; BE-I32-NEXT: ldr r3, [sp, #180] +; BE-I32-NEXT: vmov.32 d9[1], r0 ; BE-I32-NEXT: mov r0, r8 -; BE-I32-NEXT: mov r1, r4 +; BE-I32-NEXT: mov r1, r6 +; BE-I32-NEXT: mov r2, r9 ; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: add r3, sp, #456 -; BE-I32-NEXT: mov r5, r0 +; BE-I32-NEXT: add r3, sp, #232 +; BE-I32-NEXT: mov r4, r0 ; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3} ; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: ldr r6, [sp, #328] -; BE-I32-NEXT: vmov.32 d10[0], r0 -; BE-I32-NEXT: ldr r1, [sp, #332] -; BE-I32-NEXT: ldr r2, [sp, #336] -; BE-I32-NEXT: vmov.32 d14[0], r5 -; BE-I32-NEXT: ldr r3, [sp, #340] -; BE-I32-NEXT: mov r0, r6 -; BE-I32-NEXT: ldr r10, [sp, #504] -; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: ldr r6, [sp, #344] -; BE-I32-NEXT: vmov.32 d8[0], r0 -; BE-I32-NEXT: ldr r1, [sp, #348] -; BE-I32-NEXT: ldr r2, [sp, #352] -; BE-I32-NEXT: ldr r3, [sp, #356] -; BE-I32-NEXT: mov r0, r6 +; BE-I32-NEXT: add r3, sp, #136 +; BE-I32-NEXT: mov r6, r0 +; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3} ; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: vmov.32 d8[1], r0 -; BE-I32-NEXT: ldr r6, [sp, #600] -; BE-I32-NEXT: add lr, sp, #56 -; BE-I32-NEXT: ldr r1, [sp, #604] -; BE-I32-NEXT: vmov.32 d14[1], r7 -; BE-I32-NEXT: ldr r2, [sp, #608] -; BE-I32-NEXT: ldr r3, [sp, #612] -; BE-I32-NEXT: mov r0, r6 -; BE-I32-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; BE-I32-NEXT: add lr, sp, #40 -; BE-I32-NEXT: ldr r5, [sp, #508] -; BE-I32-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; BE-I32-NEXT: ldr r5, [sp, #296] +; BE-I32-NEXT: vmov.32 d13[0], r0 +; BE-I32-NEXT: ldr r1, [sp, #300] +; BE-I32-NEXT: ldr r2, [sp, #304] +; BE-I32-NEXT: ldr r3, [sp, #308] +; BE-I32-NEXT: mov r0, r5 +; BE-I32-NEXT: ldr r10, [sp, #216] +; BE-I32-NEXT: ldr r8, [sp, #220] +; BE-I32-NEXT: ldr r9, [sp, #152] ; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: vmov.32 d12[1], r0 -; BE-I32-NEXT: add lr, sp, #24 -; BE-I32-NEXT: ldr r7, [sp, #536] -; BE-I32-NEXT: ldr r1, [sp, #540] -; BE-I32-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; BE-I32-NEXT: add lr, sp, #8 +; BE-I32-NEXT: ldr r7, [sp, #248] +; BE-I32-NEXT: vmov.32 d10[0], r0 +; BE-I32-NEXT: ldr r1, [sp, #252] +; BE-I32-NEXT: ldr r2, [sp, #256] +; BE-I32-NEXT: vmov.32 d8[0], r6 +; BE-I32-NEXT: ldr r3, [sp, #260] ; BE-I32-NEXT: mov r0, r7 -; BE-I32-NEXT: ldr r2, [sp, #544] -; BE-I32-NEXT: ldr r3, [sp, #548] -; BE-I32-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; BE-I32-NEXT: ldr r6, [sp, #512] -; BE-I32-NEXT: vmov.32 d13[1], r9 +; BE-I32-NEXT: ldr r5, [sp, #224] +; BE-I32-NEXT: ldr r11, [sp, #120] ; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: ldr r3, [sp, #516] -; BE-I32-NEXT: vmov.32 d12[1], r0 +; BE-I32-NEXT: ldr r3, [sp, #228] +; BE-I32-NEXT: vmov.32 d8[1], r0 ; BE-I32-NEXT: mov r0, r10 -; BE-I32-NEXT: mov r1, r5 -; BE-I32-NEXT: mov r2, r6 +; BE-I32-NEXT: mov r1, r8 +; BE-I32-NEXT: mov r2, r5 ; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: add r3, sp, #488 +; BE-I32-NEXT: add r3, sp, #200 ; BE-I32-NEXT: mov r5, r0 ; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3} ; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: add r3, sp, #424 -; BE-I32-NEXT: mov r7, r0 -; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: ldr r6, [sp, #264] ; BE-I32-NEXT: vmov.32 d15[0], r0 -; BE-I32-NEXT: ldr r1, [sp, #268] -; BE-I32-NEXT: ldr r2, [sp, #272] -; BE-I32-NEXT: vmov.32 d11[0], r7 -; BE-I32-NEXT: ldr r3, [sp, #276] -; BE-I32-NEXT: mov r0, r6 -; BE-I32-NEXT: ldr r8, [sp, #696] -; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: add lr, sp, #88 -; BE-I32-NEXT: ldr r4, [sp, #472] -; BE-I32-NEXT: ldr r1, [sp, #476] -; BE-I32-NEXT: vmov.32 d11[1], r5 -; BE-I32-NEXT: ldr r2, [sp, #480] -; BE-I32-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I32-NEXT: ldr r3, [sp, #484] -; BE-I32-NEXT: vmov.32 d16[0], r0 -; BE-I32-NEXT: mov r0, r4 -; BE-I32-NEXT: ldr r6, [sp, #700] -; BE-I32-NEXT: ldr r7, [sp, #704] -; BE-I32-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: ldr r3, [sp, #708] -; BE-I32-NEXT: vmov.32 d10[1], r0 -; BE-I32-NEXT: mov r0, r8 -; BE-I32-NEXT: mov r1, r6 -; BE-I32-NEXT: mov r2, r7 -; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: add r3, sp, #648 -; BE-I32-NEXT: mov r4, r0 -; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: add lr, sp, #72 -; BE-I32-NEXT: ldr r5, [sp, #664] -; BE-I32-NEXT: ldr r1, [sp, #668] -; BE-I32-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; BE-I32-NEXT: ldr r2, [sp, #672] -; BE-I32-NEXT: ldr r3, [sp, #676] -; BE-I32-NEXT: vmov.32 d8[0], r0 -; BE-I32-NEXT: mov r0, r5 -; BE-I32-NEXT: ldr r6, [sp, #444] -; BE-I32-NEXT: vmov.32 d9[1], r4 -; BE-I32-NEXT: ldr r7, [sp, #448] -; BE-I32-NEXT: ldr r8, [sp, #412] -; BE-I32-NEXT: ldr r4, [sp, #416] +; BE-I32-NEXT: ldr r0, [sp, #184] +; BE-I32-NEXT: ldr r1, [sp, #188] +; BE-I32-NEXT: ldr r2, [sp, #192] +; BE-I32-NEXT: vmov.32 d14[0], r4 +; BE-I32-NEXT: ldr r3, [sp, #196] +; BE-I32-NEXT: vmov.32 d15[1], r5 +; BE-I32-NEXT: ldr r7, [sp, #156] +; BE-I32-NEXT: ldr r6, [sp, #160] +; BE-I32-NEXT: ldr r4, [sp, #124] +; BE-I32-NEXT: ldr r5, [sp, #128] ; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: vmov.32 d8[1], r0 -; BE-I32-NEXT: ldr r3, [sp, #452] -; BE-I32-NEXT: ldr r0, [sp, #440] -; BE-I32-NEXT: mov r1, r6 -; BE-I32-NEXT: mov r2, r7 +; BE-I32-NEXT: ldr r3, [sp, #164] +; BE-I32-NEXT: vmov.32 d14[1], r0 +; BE-I32-NEXT: mov r0, r9 +; BE-I32-NEXT: mov r1, r7 +; BE-I32-NEXT: mov r2, r6 ; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: vmov.32 d15[1], r0 -; BE-I32-NEXT: ldr r3, [sp, #420] -; BE-I32-NEXT: ldr r0, [sp, #408] -; BE-I32-NEXT: mov r1, r8 -; BE-I32-NEXT: mov r2, r4 +; BE-I32-NEXT: ldr r3, [sp, #132] +; BE-I32-NEXT: vmov.32 d13[1], r0 +; BE-I32-NEXT: mov r0, r11 +; BE-I32-NEXT: mov r1, r4 +; BE-I32-NEXT: mov r2, r5 ; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: add r3, sp, #392 ; BE-I32-NEXT: mov r4, r0 -; BE-I32-NEXT: ldm r3, {r0, r1, r2, r3} +; BE-I32-NEXT: ldm sp, {r0, r1, r2, r3} @ 16-byte Folded Reload ; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: add r3, sp, #284 -; BE-I32-NEXT: ldr r7, [sp, #280] -; BE-I32-NEXT: vmov.32 d14[0], r0 +; BE-I32-NEXT: add r3, sp, #316 +; BE-I32-NEXT: ldr r7, [sp, #312] +; BE-I32-NEXT: vmov.32 d12[0], r0 ; BE-I32-NEXT: ldm r3, {r1, r2, r3} ; BE-I32-NEXT: mov r0, r7 -; BE-I32-NEXT: vmov.32 d14[1], r4 +; BE-I32-NEXT: vmov.32 d12[1], r4 ; BE-I32-NEXT: bl lrintl -; BE-I32-NEXT: add lr, sp, #88 -; BE-I32-NEXT: vrev64.32 q9, q4 -; BE-I32-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload -; BE-I32-NEXT: add lr, sp, #24 -; BE-I32-NEXT: vrev64.32 q8, q7 -; BE-I32-NEXT: vmov.32 d20[1], r0 -; BE-I32-NEXT: add r0, r11, #64 -; BE-I32-NEXT: vst1.32 {d10, d11}, [r0:128]! -; BE-I32-NEXT: vst1.32 {d12, d13}, [r0:128]! -; BE-I32-NEXT: vldmia lr, {d22, d23} @ 16-byte Reload -; BE-I32-NEXT: add lr, sp, #40 -; BE-I32-NEXT: vst1.32 {d22, d23}, [r0:128]! -; BE-I32-NEXT: vst1.64 {d18, d19}, [r0:128] -; BE-I32-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; BE-I32-NEXT: add lr, sp, #56 -; BE-I32-NEXT: vst1.32 {d18, d19}, [r11:128]! -; BE-I32-NEXT: vst1.32 {d20, d21}, [r11:128]! -; BE-I32-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; BE-I32-NEXT: vst1.32 {d18, d19}, [r11:128]! -; BE-I32-NEXT: vst1.64 {d16, d17}, [r11:128] -; BE-I32-NEXT: add sp, sp, #104 +; BE-I32-NEXT: vmov.32 d10[1], r0 +; BE-I32-NEXT: vrev64.32 q0, q6 +; BE-I32-NEXT: vrev64.32 q1, q7 +; BE-I32-NEXT: vrev64.32 q2, q4 +; BE-I32-NEXT: vrev64.32 q3, q5 +; BE-I32-NEXT: add sp, sp, #16 ; BE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; BE-I32-NEXT: add sp, sp, #4 ; BE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; -; BE-I64-LABEL: lrint_v32fp128: +; BE-I64-LABEL: lrint_v16fp128: ; BE-I64: @ %bb.0: ; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} @@ -12262,990 +3092,174 @@ define <32 x iXLen> @lrint_v32fp128(<32 x fp128> %x) { ; BE-I64-NEXT: sub sp, sp, #4 ; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} ; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I64-NEXT: .pad #152 -; BE-I64-NEXT: sub sp, sp, #152 -; BE-I64-NEXT: str r3, [sp, #120] @ 4-byte Spill -; BE-I64-NEXT: add r3, sp, #712 -; BE-I64-NEXT: str r2, [sp, #112] @ 4-byte Spill -; BE-I64-NEXT: mov r9, r0 +; BE-I64-NEXT: .pad #56 +; BE-I64-NEXT: sub sp, sp, #56 +; BE-I64-NEXT: mov r5, r3 +; BE-I64-NEXT: add r3, sp, #376 +; BE-I64-NEXT: mov r6, r2 +; BE-I64-NEXT: mov r4, r0 ; BE-I64-NEXT: ldm r3, {r0, r1, r2, r3} ; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: ldr r7, [sp, #648] -; BE-I64-NEXT: add r3, sp, #652 -; BE-I64-NEXT: mov r5, r1 -; BE-I64-NEXT: vmov.32 d9[0], r0 +; BE-I64-NEXT: ldr r7, [sp, #392] +; BE-I64-NEXT: add r3, sp, #396 +; BE-I64-NEXT: mov r9, r1 +; BE-I64-NEXT: vmov.32 d8[0], r0 ; BE-I64-NEXT: ldm r3, {r1, r2, r3} ; BE-I64-NEXT: mov r0, r7 -; BE-I64-NEXT: ldr r6, [sp, #520] -; BE-I64-NEXT: ldr r8, [sp, #632] +; BE-I64-NEXT: ldr r11, [sp, #168] ; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: add r3, sp, #524 -; BE-I64-NEXT: mov r7, r1 -; BE-I64-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEXT: ldr r2, [sp, #160] +; BE-I64-NEXT: mov r10, r1 +; BE-I64-NEXT: ldr r3, [sp, #164] +; BE-I64-NEXT: vmov.32 d9[0], r0 ; BE-I64-NEXT: mov r0, r6 -; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: mov r1, r5 ; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: add r3, sp, #636 -; BE-I64-NEXT: mov r4, r1 -; BE-I64-NEXT: vmov.32 d8[0], r0 -; BE-I64-NEXT: mov r0, r8 +; BE-I64-NEXT: add r3, sp, #172 +; BE-I64-NEXT: mov r8, r1 +; BE-I64-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEXT: mov r0, r11 ; BE-I64-NEXT: ldm r3, {r1, r2, r3} ; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: mov r6, r1 +; BE-I64-NEXT: add r3, sp, #220 ; BE-I64-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEXT: ldr r0, [sp, #488] -; BE-I64-NEXT: vmov.32 d8[1], r4 -; BE-I64-NEXT: ldr r1, [sp, #492] -; BE-I64-NEXT: ldr r2, [sp, #496] -; BE-I64-NEXT: vmov.32 d10[1], r7 -; BE-I64-NEXT: ldr r3, [sp, #500] -; BE-I64-NEXT: vmov.32 d9[1], r5 -; BE-I64-NEXT: vstr d8, [sp, #144] @ 8-byte Spill -; BE-I64-NEXT: vstr d10, [sp, #136] @ 8-byte Spill -; BE-I64-NEXT: vstr d9, [sp, #128] @ 8-byte Spill -; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: add r3, sp, #680 -; BE-I64-NEXT: str r0, [sp, #104] @ 4-byte Spill -; BE-I64-NEXT: str r1, [sp, #88] @ 4-byte Spill -; BE-I64-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: vmov.32 d16[0], r0 -; BE-I64-NEXT: ldr r0, [sp, #728] -; BE-I64-NEXT: ldr r2, [sp, #736] -; BE-I64-NEXT: vmov.32 d11[1], r6 -; BE-I64-NEXT: ldr r6, [sp, #732] -; BE-I64-NEXT: ldr r3, [sp, #740] -; BE-I64-NEXT: vmov.32 d16[1], r1 -; BE-I64-NEXT: ldr r5, [sp, #504] -; BE-I64-NEXT: mov r1, r6 -; BE-I64-NEXT: ldr r7, [sp, #744] -; BE-I64-NEXT: ldr r4, [sp, #748] -; BE-I64-NEXT: vstr d11, [sp, #24] @ 8-byte Spill -; BE-I64-NEXT: vstr d16, [sp, #8] @ 8-byte Spill -; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: ldr r2, [sp, #752] +; BE-I64-NEXT: ldr r0, [sp, #216] ; BE-I64-NEXT: mov r11, r1 -; BE-I64-NEXT: ldr r3, [sp, #756] -; BE-I64-NEXT: vmov.32 d13[0], r0 -; BE-I64-NEXT: mov r0, r7 -; BE-I64-NEXT: mov r1, r4 -; BE-I64-NEXT: ldr r10, [sp, #552] -; BE-I64-NEXT: ldr r6, [sp, #664] -; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: add r3, sp, #508 -; BE-I64-NEXT: mov r8, r1 -; BE-I64-NEXT: vmov.32 d8[0], r0 -; BE-I64-NEXT: mov r0, r5 -; BE-I64-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: add r3, sp, #540 -; BE-I64-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEXT: ldr r0, [sp, #536] -; BE-I64-NEXT: mov r7, r1 ; BE-I64-NEXT: ldm r3, {r1, r2, r3} ; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: add r3, sp, #556 -; BE-I64-NEXT: mov r5, r1 +; BE-I64-NEXT: add r3, sp, #236 ; BE-I64-NEXT: vmov.32 d12[0], r0 -; BE-I64-NEXT: mov r0, r10 -; BE-I64-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: add r3, sp, #668 -; BE-I64-NEXT: mov r4, r1 -; BE-I64-NEXT: vmov.32 d9[0], r0 -; BE-I64-NEXT: mov r0, r6 -; BE-I64-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: add r3, sp, #700 -; BE-I64-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEXT: ldr r0, [sp, #696] +; BE-I64-NEXT: ldr r0, [sp, #232] ; BE-I64-NEXT: mov r6, r1 ; BE-I64-NEXT: ldm r3, {r1, r2, r3} ; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEXT: ldr r0, [sp, #104] @ 4-byte Reload -; BE-I64-NEXT: ldr r2, [sp, #256] -; BE-I64-NEXT: vmov.32 d13[1], r11 -; BE-I64-NEXT: ldr r3, [sp, #260] -; BE-I64-NEXT: vmov.32 d14[1], r6 -; BE-I64-NEXT: ldr r6, [sp, #264] -; BE-I64-NEXT: vmov.32 d9[1], r4 -; BE-I64-NEXT: ldr r4, [sp, #344] -; BE-I64-NEXT: vmov.32 d12[1], r5 -; BE-I64-NEXT: ldr r5, [sp, #312] -; BE-I64-NEXT: vmov.32 d8[1], r8 -; BE-I64-NEXT: ldr r8, [sp, #328] -; BE-I64-NEXT: vmov.32 d10[1], r7 -; BE-I64-NEXT: vstr d13, [sp, #32] @ 8-byte Spill -; BE-I64-NEXT: vmov.32 d11[1], r1 -; BE-I64-NEXT: ldr r1, [sp, #120] @ 4-byte Reload +; BE-I64-NEXT: add r3, sp, #252 ; BE-I64-NEXT: vmov.32 d13[0], r0 -; BE-I64-NEXT: ldr r0, [sp, #112] @ 4-byte Reload -; BE-I64-NEXT: vstr d14, [sp] @ 8-byte Spill -; BE-I64-NEXT: vstr d9, [sp, #16] @ 8-byte Spill -; BE-I64-NEXT: vstr d12, [sp, #56] @ 8-byte Spill -; BE-I64-NEXT: vstr d10, [sp, #64] @ 8-byte Spill -; BE-I64-NEXT: vstr d8, [sp, #40] @ 8-byte Spill +; BE-I64-NEXT: ldr r0, [sp, #248] +; BE-I64-NEXT: mov r7, r1 +; BE-I64-NEXT: ldm r3, {r1, r2, r3} ; BE-I64-NEXT: bl lrintl ; BE-I64-NEXT: add r3, sp, #268 -; BE-I64-NEXT: mov r11, r1 ; BE-I64-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEXT: mov r0, r6 +; BE-I64-NEXT: ldr r0, [sp, #264] +; BE-I64-NEXT: mov r5, r1 ; BE-I64-NEXT: ldm r3, {r1, r2, r3} ; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: add r3, sp, #316 +; BE-I64-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEXT: ldr r0, [sp, #280] +; BE-I64-NEXT: ldr r2, [sp, #288] +; BE-I64-NEXT: vmov.32 d13[1], r7 +; BE-I64-NEXT: ldr r7, [sp, #284] +; BE-I64-NEXT: ldr r3, [sp, #292] +; BE-I64-NEXT: vmov.32 d14[1], r5 +; BE-I64-NEXT: ldr r5, [sp, #328] +; BE-I64-NEXT: vmov.32 d12[1], r6 +; BE-I64-NEXT: ldr r6, [sp, #300] +; BE-I64-NEXT: vmov.32 d10[1], r8 +; BE-I64-NEXT: ldr r8, [sp, #184] +; BE-I64-NEXT: vmov.32 d11[1], r11 +; BE-I64-NEXT: vmov.32 d9[1], r10 +; BE-I64-NEXT: vmov.32 d8[1], r9 +; BE-I64-NEXT: vmov.32 d15[1], r1 +; BE-I64-NEXT: mov r1, r7 +; BE-I64-NEXT: vstr d14, [sp, #48] @ 8-byte Spill +; BE-I64-NEXT: vstr d13, [sp, #40] @ 8-byte Spill +; BE-I64-NEXT: vstr d12, [sp, #32] @ 8-byte Spill +; BE-I64-NEXT: vstr d11, [sp, #24] @ 8-byte Spill +; BE-I64-NEXT: vstr d10, [sp, #16] @ 8-byte Spill +; BE-I64-NEXT: vstr d9, [sp, #8] @ 8-byte Spill +; BE-I64-NEXT: vstr d8, [sp] @ 8-byte Spill +; BE-I64-NEXT: bl lrintl ; BE-I64-NEXT: mov r10, r1 +; BE-I64-NEXT: ldr r1, [sp, #296] +; BE-I64-NEXT: ldr r2, [sp, #304] ; BE-I64-NEXT: vmov.32 d8[0], r0 -; BE-I64-NEXT: mov r0, r5 -; BE-I64-NEXT: ldm r3, {r1, r2, r3} +; BE-I64-NEXT: ldr r3, [sp, #308] +; BE-I64-NEXT: mov r0, r1 +; BE-I64-NEXT: mov r1, r6 ; BE-I64-NEXT: bl lrintl ; BE-I64-NEXT: add r3, sp, #332 -; BE-I64-NEXT: mov r7, r1 -; BE-I64-NEXT: vmov.32 d12[0], r0 -; BE-I64-NEXT: mov r0, r8 -; BE-I64-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: add r3, sp, #348 -; BE-I64-NEXT: mov r5, r1 -; BE-I64-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEXT: mov r0, r4 -; BE-I64-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: add r3, sp, #364 +; BE-I64-NEXT: mov r11, r1 ; BE-I64-NEXT: vmov.32 d9[0], r0 -; BE-I64-NEXT: ldr r0, [sp, #360] -; BE-I64-NEXT: mov r4, r1 +; BE-I64-NEXT: mov r0, r5 ; BE-I64-NEXT: ldm r3, {r1, r2, r3} ; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: add r3, sp, #476 +; BE-I64-NEXT: add r3, sp, #188 +; BE-I64-NEXT: mov r7, r1 ; BE-I64-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEXT: ldr r0, [sp, #472] -; BE-I64-NEXT: mov r6, r1 +; BE-I64-NEXT: mov r0, r8 ; BE-I64-NEXT: ldm r3, {r1, r2, r3} ; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: vmov.32 d16[0], r0 -; BE-I64-NEXT: ldr r0, [sp, #88] @ 4-byte Reload -; BE-I64-NEXT: ldr r2, [sp, #592] -; BE-I64-NEXT: vldr d20, [sp, #136] @ 8-byte Reload -; BE-I64-NEXT: vmov.32 d16[1], r1 -; BE-I64-NEXT: ldr r1, [sp, #588] -; BE-I64-NEXT: ldr r3, [sp, #596] -; BE-I64-NEXT: vldr d22, [sp, #24] @ 8-byte Reload -; BE-I64-NEXT: vldr d18, [sp, #8] @ 8-byte Reload -; BE-I64-NEXT: vrev64.32 d21, d20 -; BE-I64-NEXT: vmov.32 d10[1], r6 -; BE-I64-NEXT: ldr r6, [sp, #600] -; BE-I64-NEXT: vmov.32 d9[1], r4 -; BE-I64-NEXT: ldr r4, [sp, #616] -; BE-I64-NEXT: vmov.32 d12[1], r7 -; BE-I64-NEXT: ldr r7, [sp, #604] -; BE-I64-NEXT: vmov.32 d8[1], r10 -; BE-I64-NEXT: add r10, r9, #192 -; BE-I64-NEXT: vmov.32 d14[1], r11 -; BE-I64-NEXT: ldr r11, [sp, #440] -; BE-I64-NEXT: vmov.32 d13[1], r0 -; BE-I64-NEXT: ldr r0, [sp, #584] -; BE-I64-NEXT: vmov.32 d15[1], r5 -; BE-I64-NEXT: vstr d16, [sp, #48] @ 8-byte Spill -; BE-I64-NEXT: vldr d16, [sp, #128] @ 8-byte Reload -; BE-I64-NEXT: vrev64.32 d20, d22 -; BE-I64-NEXT: vldr d22, [sp] @ 8-byte Reload -; BE-I64-NEXT: vrev64.32 d19, d18 -; BE-I64-NEXT: vrev64.32 d17, d16 -; BE-I64-NEXT: vrev64.32 d18, d22 -; BE-I64-NEXT: vstr d10, [sp, #120] @ 8-byte Spill -; BE-I64-NEXT: vstr d9, [sp, #112] @ 8-byte Spill -; BE-I64-NEXT: vstr d15, [sp, #104] @ 8-byte Spill -; BE-I64-NEXT: vstr d12, [sp, #96] @ 8-byte Spill -; BE-I64-NEXT: vstr d8, [sp, #80] @ 8-byte Spill -; BE-I64-NEXT: vstr d14, [sp, #72] @ 8-byte Spill -; BE-I64-NEXT: vstr d13, [sp, #88] @ 8-byte Spill -; BE-I64-NEXT: vst1.64 {d20, d21}, [r10:128]! -; BE-I64-NEXT: vrev64.32 d16, d11 -; BE-I64-NEXT: vst1.64 {d18, d19}, [r10:128]! -; BE-I64-NEXT: vst1.64 {d16, d17}, [r10:128]! -; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: ldr r2, [sp, #608] +; BE-I64-NEXT: add r3, sp, #204 +; BE-I64-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEXT: ldr r0, [sp, #200] ; BE-I64-NEXT: mov r8, r1 -; BE-I64-NEXT: ldr r3, [sp, #612] -; BE-I64-NEXT: vmov.32 d8[0], r0 -; BE-I64-NEXT: mov r0, r6 -; BE-I64-NEXT: mov r1, r7 -; BE-I64-NEXT: ldr r5, [sp, #456] -; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: add r3, sp, #620 -; BE-I64-NEXT: mov r6, r1 -; BE-I64-NEXT: vmov.32 d9[0], r0 -; BE-I64-NEXT: mov r0, r4 -; BE-I64-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: add r3, sp, #444 -; BE-I64-NEXT: mov r7, r1 -; BE-I64-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEXT: mov r0, r11 ; BE-I64-NEXT: ldm r3, {r1, r2, r3} ; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: add r3, sp, #460 -; BE-I64-NEXT: mov r4, r1 -; BE-I64-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEXT: mov r0, r5 +; BE-I64-NEXT: add r3, sp, #348 +; BE-I64-NEXT: vmov.32 d12[0], r0 +; BE-I64-NEXT: ldr r0, [sp, #344] +; BE-I64-NEXT: mov r5, r1 ; BE-I64-NEXT: ldm r3, {r1, r2, r3} ; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: add r3, sp, #572 +; BE-I64-NEXT: add r3, sp, #364 ; BE-I64-NEXT: vmov.32 d13[0], r0 -; BE-I64-NEXT: ldr r0, [sp, #568] -; BE-I64-NEXT: mov r5, r1 +; BE-I64-NEXT: ldr r0, [sp, #360] +; BE-I64-NEXT: mov r9, r1 ; BE-I64-NEXT: ldm r3, {r1, r2, r3} ; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: vldr d16, [sp, #16] @ 8-byte Reload -; BE-I64-NEXT: vldr d18, [sp, #56] @ 8-byte Reload -; BE-I64-NEXT: vrev64.32 d17, d16 -; BE-I64-NEXT: ldr r2, [sp, #304] -; BE-I64-NEXT: vrev64.32 d16, d18 -; BE-I64-NEXT: ldr r3, [sp, #308] -; BE-I64-NEXT: vldr d18, [sp, #144] @ 8-byte Reload -; BE-I64-NEXT: vldr d20, [sp, #64] @ 8-byte Reload -; BE-I64-NEXT: vrev64.32 d19, d18 -; BE-I64-NEXT: vrev64.32 d18, d20 -; BE-I64-NEXT: vldr d20, [sp, #40] @ 8-byte Reload -; BE-I64-NEXT: vldr d22, [sp, #32] @ 8-byte Reload +; BE-I64-NEXT: add r3, sp, #316 ; BE-I64-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEXT: ldr r0, [sp, #296] -; BE-I64-NEXT: vmov.32 d10[1], r7 -; BE-I64-NEXT: ldr r7, [sp, #412] -; BE-I64-NEXT: vmov.32 d9[1], r6 -; BE-I64-NEXT: ldr r6, [sp, #408] -; BE-I64-NEXT: vmov.32 d8[1], r8 -; BE-I64-NEXT: add r8, r9, #128 -; BE-I64-NEXT: vrev64.32 d21, d20 -; BE-I64-NEXT: vmov.32 d13[1], r5 -; BE-I64-NEXT: ldr r5, [sp, #300] -; BE-I64-NEXT: vrev64.32 d20, d22 -; BE-I64-NEXT: vmov.32 d14[1], r1 -; BE-I64-NEXT: mov r1, r5 -; BE-I64-NEXT: vstr d10, [sp, #136] @ 8-byte Spill -; BE-I64-NEXT: vstr d9, [sp, #128] @ 8-byte Spill -; BE-I64-NEXT: vstr d8, [sp, #24] @ 8-byte Spill -; BE-I64-NEXT: vst1.64 {d20, d21}, [r10:128] -; BE-I64-NEXT: vst1.64 {d18, d19}, [r8:128]! -; BE-I64-NEXT: vmov.32 d11[1], r4 -; BE-I64-NEXT: ldr r4, [sp, #424] -; BE-I64-NEXT: ldr r10, [sp, #376] -; BE-I64-NEXT: vst1.64 {d16, d17}, [r8:128]! -; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: ldr r2, [sp, #416] -; BE-I64-NEXT: mov r11, r1 -; BE-I64-NEXT: ldr r3, [sp, #420] -; BE-I64-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEXT: mov r0, r6 -; BE-I64-NEXT: mov r1, r7 -; BE-I64-NEXT: ldr r5, [sp, #392] -; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: add r3, sp, #428 +; BE-I64-NEXT: ldr r0, [sp, #312] ; BE-I64-NEXT: mov r6, r1 -; BE-I64-NEXT: vmov.32 d8[0], r0 -; BE-I64-NEXT: mov r0, r4 -; BE-I64-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: add r3, sp, #380 -; BE-I64-NEXT: mov r7, r1 -; BE-I64-NEXT: vmov.32 d9[0], r0 -; BE-I64-NEXT: mov r0, r10 -; BE-I64-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: add r3, sp, #396 -; BE-I64-NEXT: mov r4, r1 -; BE-I64-NEXT: vmov.32 d12[0], r0 -; BE-I64-NEXT: mov r0, r5 -; BE-I64-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: add r3, sp, #284 -; BE-I64-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEXT: ldr r0, [sp, #280] -; BE-I64-NEXT: mov r5, r1 ; BE-I64-NEXT: ldm r3, {r1, r2, r3} ; BE-I64-NEXT: bl lrintl -; BE-I64-NEXT: vldr d16, [sp, #120] @ 8-byte Reload -; BE-I64-NEXT: vldr d18, [sp, #112] @ 8-byte Reload -; BE-I64-NEXT: vrev64.32 d17, d16 -; BE-I64-NEXT: vldr d26, [sp, #136] @ 8-byte Reload +; BE-I64-NEXT: vldr d18, [sp, #48] @ 8-byte Reload +; BE-I64-NEXT: vrev64.32 d17, d15 ; BE-I64-NEXT: vrev64.32 d16, d18 -; BE-I64-NEXT: vldr d18, [sp, #104] @ 8-byte Reload -; BE-I64-NEXT: vrev64.32 d31, d26 -; BE-I64-NEXT: vldr d26, [sp, #128] @ 8-byte Reload -; BE-I64-NEXT: vldr d20, [sp, #96] @ 8-byte Reload +; BE-I64-NEXT: vldr d18, [sp, #40] @ 8-byte Reload +; BE-I64-NEXT: vmov.32 d24[0], r0 +; BE-I64-NEXT: add r0, r4, #64 +; BE-I64-NEXT: vldr d20, [sp, #32] @ 8-byte Reload ; BE-I64-NEXT: vrev64.32 d19, d18 +; BE-I64-NEXT: vmov.32 d9[1], r11 +; BE-I64-NEXT: vmov.32 d10[1], r7 ; BE-I64-NEXT: vrev64.32 d18, d20 -; BE-I64-NEXT: vldr d20, [sp, #80] @ 8-byte Reload -; BE-I64-NEXT: vrev64.32 d30, d26 -; BE-I64-NEXT: vldr d26, [sp, #24] @ 8-byte Reload -; BE-I64-NEXT: vmov.32 d10[1], r5 -; BE-I64-NEXT: vldr d22, [sp, #72] @ 8-byte Reload +; BE-I64-NEXT: vldr d20, [sp, #24] @ 8-byte Reload +; BE-I64-NEXT: vmov.32 d8[1], r10 +; BE-I64-NEXT: vmov.32 d14[1], r6 +; BE-I64-NEXT: vmov.32 d24[1], r1 +; BE-I64-NEXT: vldr d22, [sp, #16] @ 8-byte Reload ; BE-I64-NEXT: vrev64.32 d21, d20 -; BE-I64-NEXT: vrev64.32 d1, d26 -; BE-I64-NEXT: vmov.32 d9[1], r7 -; BE-I64-NEXT: vmov.32 d12[1], r4 +; BE-I64-NEXT: vrev64.32 d1, d9 +; BE-I64-NEXT: vmov.32 d13[1], r9 +; BE-I64-NEXT: vrev64.32 d31, d10 ; BE-I64-NEXT: vrev64.32 d20, d22 -; BE-I64-NEXT: vldr d22, [sp, #88] @ 8-byte Reload -; BE-I64-NEXT: vmov.32 d8[1], r6 -; BE-I64-NEXT: vrev64.32 d0, d14 -; BE-I64-NEXT: vmov.32 d28[0], r0 -; BE-I64-NEXT: add r0, r9, #64 -; BE-I64-NEXT: vrev64.32 d3, d10 -; BE-I64-NEXT: vldr d24, [sp, #48] @ 8-byte Reload -; BE-I64-NEXT: vrev64.32 d23, d22 -; BE-I64-NEXT: vrev64.32 d5, d9 -; BE-I64-NEXT: vst1.64 {d0, d1}, [r8:128]! -; BE-I64-NEXT: vrev64.32 d2, d12 -; BE-I64-NEXT: vmov.32 d15[1], r11 -; BE-I64-NEXT: vrev64.32 d22, d24 -; BE-I64-NEXT: vrev64.32 d25, d13 -; BE-I64-NEXT: vrev64.32 d4, d8 -; BE-I64-NEXT: vst1.64 {d30, d31}, [r8:128] -; BE-I64-NEXT: vst1.64 {d2, d3}, [r0:128]! -; BE-I64-NEXT: vmov.32 d28[1], r1 -; BE-I64-NEXT: vrev64.32 d24, d11 -; BE-I64-NEXT: vst1.64 {d4, d5}, [r0:128]! -; BE-I64-NEXT: vrev64.32 d27, d15 -; BE-I64-NEXT: vst1.64 {d24, d25}, [r0:128]! -; BE-I64-NEXT: vrev64.32 d26, d28 -; BE-I64-NEXT: vst1.64 {d22, d23}, [r0:128] -; BE-I64-NEXT: vst1.64 {d20, d21}, [r9:128]! -; BE-I64-NEXT: vst1.64 {d26, d27}, [r9:128]! -; BE-I64-NEXT: vst1.64 {d18, d19}, [r9:128]! -; BE-I64-NEXT: vst1.64 {d16, d17}, [r9:128] -; BE-I64-NEXT: add sp, sp, #152 +; BE-I64-NEXT: vldr d22, [sp, #8] @ 8-byte Reload +; BE-I64-NEXT: vrev64.32 d0, d8 +; BE-I64-NEXT: vrev64.32 d29, d14 +; BE-I64-NEXT: vmov.32 d12[1], r5 +; BE-I64-NEXT: vrev64.32 d30, d24 +; BE-I64-NEXT: vrev64.32 d27, d22 +; BE-I64-NEXT: vldr d22, [sp] @ 8-byte Reload +; BE-I64-NEXT: vst1.64 {d0, d1}, [r0:128]! +; BE-I64-NEXT: vmov.32 d11[1], r8 +; BE-I64-NEXT: vrev64.32 d28, d13 +; BE-I64-NEXT: vst1.64 {d30, d31}, [r0:128]! +; BE-I64-NEXT: vrev64.32 d26, d22 +; BE-I64-NEXT: vrev64.32 d23, d12 +; BE-I64-NEXT: vst1.64 {d28, d29}, [r0:128]! +; BE-I64-NEXT: vrev64.32 d22, d11 +; BE-I64-NEXT: vst1.64 {d26, d27}, [r0:128] +; BE-I64-NEXT: vst1.64 {d20, d21}, [r4:128]! +; BE-I64-NEXT: vst1.64 {d22, d23}, [r4:128]! +; BE-I64-NEXT: vst1.64 {d18, d19}, [r4:128]! +; BE-I64-NEXT: vst1.64 {d16, d17}, [r4:128] +; BE-I64-NEXT: add sp, sp, #56 ; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; BE-I64-NEXT: add sp, sp, #4 ; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; -; BE-I32-NEON-LABEL: lrint_v32fp128: -; BE-I32-NEON: @ %bb.0: -; BE-I32-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-I32-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-I32-NEON-NEXT: .pad #4 -; BE-I32-NEON-NEXT: sub sp, sp, #4 -; BE-I32-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I32-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I32-NEON-NEXT: .pad #104 -; BE-I32-NEON-NEXT: sub sp, sp, #104 -; BE-I32-NEON-NEXT: mov r4, r3 -; BE-I32-NEON-NEXT: add r3, sp, #248 -; BE-I32-NEON-NEXT: mov r8, r2 -; BE-I32-NEON-NEXT: mov r11, r0 -; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: add r3, sp, #616 -; BE-I32-NEON-NEXT: mov r9, r0 -; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: add r3, sp, #680 -; BE-I32-NEON-NEXT: mov r5, r0 -; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: ldr r7, [sp, #232] -; BE-I32-NEON-NEXT: add lr, sp, #72 -; BE-I32-NEON-NEXT: ldr r1, [sp, #236] -; BE-I32-NEON-NEXT: vmov.32 d17[0], r0 -; BE-I32-NEON-NEXT: ldr r2, [sp, #240] -; BE-I32-NEON-NEXT: ldr r3, [sp, #244] -; BE-I32-NEON-NEXT: mov r0, r7 -; BE-I32-NEON-NEXT: ldr r10, [sp, #376] -; BE-I32-NEON-NEXT: vmov.32 d11[0], r5 -; BE-I32-NEON-NEXT: ldr r6, [sp, #296] -; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: add r3, sp, #300 -; BE-I32-NEON-NEXT: vmov.32 d15[0], r0 -; BE-I32-NEON-NEXT: mov r0, r6 -; BE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: add r3, sp, #380 -; BE-I32-NEON-NEXT: vmov.32 d13[0], r0 -; BE-I32-NEON-NEXT: mov r0, r10 -; BE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: add r3, sp, #360 -; BE-I32-NEON-NEXT: mov r5, r0 -; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: vmov.32 d17[0], r0 -; BE-I32-NEON-NEXT: ldr r6, [sp, #312] -; BE-I32-NEON-NEXT: ldr r1, [sp, #316] -; BE-I32-NEON-NEXT: ldr r2, [sp, #320] -; BE-I32-NEON-NEXT: ldr r3, [sp, #324] -; BE-I32-NEON-NEXT: vmov.32 d17[1], r5 -; BE-I32-NEON-NEXT: mov r0, r6 -; BE-I32-NEON-NEXT: ldr r7, [sp, #572] -; BE-I32-NEON-NEXT: vorr q4, q8, q8 -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: ldr r6, [sp, #632] -; BE-I32-NEON-NEXT: add lr, sp, #88 -; BE-I32-NEON-NEXT: ldr r1, [sp, #636] -; BE-I32-NEON-NEXT: vmov.32 d13[1], r0 -; BE-I32-NEON-NEXT: ldr r2, [sp, #640] -; BE-I32-NEON-NEXT: ldr r3, [sp, #644] -; BE-I32-NEON-NEXT: mov r0, r6 -; BE-I32-NEON-NEXT: ldr r5, [sp, #576] -; BE-I32-NEON-NEXT: vmov.32 d15[1], r9 -; BE-I32-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: vmov.32 d11[1], r0 -; BE-I32-NEON-NEXT: ldr r3, [sp, #580] -; BE-I32-NEON-NEXT: ldr r0, [sp, #568] -; BE-I32-NEON-NEXT: mov r1, r7 -; BE-I32-NEON-NEXT: mov r2, r5 -; BE-I32-NEON-NEXT: vorr q6, q5, q5 -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: add r3, sp, #552 -; BE-I32-NEON-NEXT: mov r9, r0 -; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: add r3, sp, #520 -; BE-I32-NEON-NEXT: mov r5, r0 -; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: ldr r6, [sp, #584] -; BE-I32-NEON-NEXT: add lr, sp, #8 -; BE-I32-NEON-NEXT: ldr r1, [sp, #588] -; BE-I32-NEON-NEXT: vmov.32 d16[0], r0 -; BE-I32-NEON-NEXT: ldr r2, [sp, #592] -; BE-I32-NEON-NEXT: ldr r3, [sp, #596] -; BE-I32-NEON-NEXT: mov r0, r6 -; BE-I32-NEON-NEXT: vmov.32 d17[0], r5 -; BE-I32-NEON-NEXT: ldr r7, [sp, #216] -; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: add r3, sp, #220 -; BE-I32-NEON-NEXT: vmov.32 d12[0], r0 -; BE-I32-NEON-NEXT: mov r0, r7 -; BE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: ldr r2, [sp, #208] -; BE-I32-NEON-NEXT: mov r7, r0 -; BE-I32-NEON-NEXT: ldr r3, [sp, #212] -; BE-I32-NEON-NEXT: mov r0, r8 -; BE-I32-NEON-NEXT: mov r1, r4 -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: add r3, sp, #456 -; BE-I32-NEON-NEXT: mov r5, r0 -; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: ldr r6, [sp, #328] -; BE-I32-NEON-NEXT: vmov.32 d10[0], r0 -; BE-I32-NEON-NEXT: ldr r1, [sp, #332] -; BE-I32-NEON-NEXT: ldr r2, [sp, #336] -; BE-I32-NEON-NEXT: vmov.32 d14[0], r5 -; BE-I32-NEON-NEXT: ldr r3, [sp, #340] -; BE-I32-NEON-NEXT: mov r0, r6 -; BE-I32-NEON-NEXT: ldr r10, [sp, #504] -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: ldr r6, [sp, #344] -; BE-I32-NEON-NEXT: vmov.32 d8[0], r0 -; BE-I32-NEON-NEXT: ldr r1, [sp, #348] -; BE-I32-NEON-NEXT: ldr r2, [sp, #352] -; BE-I32-NEON-NEXT: ldr r3, [sp, #356] -; BE-I32-NEON-NEXT: mov r0, r6 -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: vmov.32 d8[1], r0 -; BE-I32-NEON-NEXT: ldr r6, [sp, #600] -; BE-I32-NEON-NEXT: add lr, sp, #56 -; BE-I32-NEON-NEXT: ldr r1, [sp, #604] -; BE-I32-NEON-NEXT: vmov.32 d14[1], r7 -; BE-I32-NEON-NEXT: ldr r2, [sp, #608] -; BE-I32-NEON-NEXT: ldr r3, [sp, #612] -; BE-I32-NEON-NEXT: mov r0, r6 -; BE-I32-NEON-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill -; BE-I32-NEON-NEXT: add lr, sp, #40 -; BE-I32-NEON-NEXT: ldr r5, [sp, #508] -; BE-I32-NEON-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: vmov.32 d12[1], r0 -; BE-I32-NEON-NEXT: add lr, sp, #24 -; BE-I32-NEON-NEXT: ldr r7, [sp, #536] -; BE-I32-NEON-NEXT: ldr r1, [sp, #540] -; BE-I32-NEON-NEXT: vstmia lr, {d12, d13} @ 16-byte Spill -; BE-I32-NEON-NEXT: add lr, sp, #8 -; BE-I32-NEON-NEXT: mov r0, r7 -; BE-I32-NEON-NEXT: ldr r2, [sp, #544] -; BE-I32-NEON-NEXT: ldr r3, [sp, #548] -; BE-I32-NEON-NEXT: vldmia lr, {d12, d13} @ 16-byte Reload -; BE-I32-NEON-NEXT: ldr r6, [sp, #512] -; BE-I32-NEON-NEXT: vmov.32 d13[1], r9 -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: ldr r3, [sp, #516] -; BE-I32-NEON-NEXT: vmov.32 d12[1], r0 -; BE-I32-NEON-NEXT: mov r0, r10 -; BE-I32-NEON-NEXT: mov r1, r5 -; BE-I32-NEON-NEXT: mov r2, r6 -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: add r3, sp, #488 -; BE-I32-NEON-NEXT: mov r5, r0 -; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: add r3, sp, #424 -; BE-I32-NEON-NEXT: mov r7, r0 -; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: ldr r6, [sp, #264] -; BE-I32-NEON-NEXT: vmov.32 d15[0], r0 -; BE-I32-NEON-NEXT: ldr r1, [sp, #268] -; BE-I32-NEON-NEXT: ldr r2, [sp, #272] -; BE-I32-NEON-NEXT: vmov.32 d11[0], r7 -; BE-I32-NEON-NEXT: ldr r3, [sp, #276] -; BE-I32-NEON-NEXT: mov r0, r6 -; BE-I32-NEON-NEXT: ldr r8, [sp, #696] -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: add lr, sp, #88 -; BE-I32-NEON-NEXT: ldr r4, [sp, #472] -; BE-I32-NEON-NEXT: ldr r1, [sp, #476] -; BE-I32-NEON-NEXT: vmov.32 d11[1], r5 -; BE-I32-NEON-NEXT: ldr r2, [sp, #480] -; BE-I32-NEON-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload -; BE-I32-NEON-NEXT: ldr r3, [sp, #484] -; BE-I32-NEON-NEXT: vmov.32 d16[0], r0 -; BE-I32-NEON-NEXT: mov r0, r4 -; BE-I32-NEON-NEXT: ldr r6, [sp, #700] -; BE-I32-NEON-NEXT: ldr r7, [sp, #704] -; BE-I32-NEON-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: ldr r3, [sp, #708] -; BE-I32-NEON-NEXT: vmov.32 d10[1], r0 -; BE-I32-NEON-NEXT: mov r0, r8 -; BE-I32-NEON-NEXT: mov r1, r6 -; BE-I32-NEON-NEXT: mov r2, r7 -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: add r3, sp, #648 -; BE-I32-NEON-NEXT: mov r4, r0 -; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: add lr, sp, #72 -; BE-I32-NEON-NEXT: ldr r5, [sp, #664] -; BE-I32-NEON-NEXT: ldr r1, [sp, #668] -; BE-I32-NEON-NEXT: vldmia lr, {d8, d9} @ 16-byte Reload -; BE-I32-NEON-NEXT: ldr r2, [sp, #672] -; BE-I32-NEON-NEXT: ldr r3, [sp, #676] -; BE-I32-NEON-NEXT: vmov.32 d8[0], r0 -; BE-I32-NEON-NEXT: mov r0, r5 -; BE-I32-NEON-NEXT: ldr r6, [sp, #444] -; BE-I32-NEON-NEXT: vmov.32 d9[1], r4 -; BE-I32-NEON-NEXT: ldr r7, [sp, #448] -; BE-I32-NEON-NEXT: ldr r8, [sp, #412] -; BE-I32-NEON-NEXT: ldr r4, [sp, #416] -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: vmov.32 d8[1], r0 -; BE-I32-NEON-NEXT: ldr r3, [sp, #452] -; BE-I32-NEON-NEXT: ldr r0, [sp, #440] -; BE-I32-NEON-NEXT: mov r1, r6 -; BE-I32-NEON-NEXT: mov r2, r7 -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: vmov.32 d15[1], r0 -; BE-I32-NEON-NEXT: ldr r3, [sp, #420] -; BE-I32-NEON-NEXT: ldr r0, [sp, #408] -; BE-I32-NEON-NEXT: mov r1, r8 -; BE-I32-NEON-NEXT: mov r2, r4 -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: add r3, sp, #392 -; BE-I32-NEON-NEXT: mov r4, r0 -; BE-I32-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: add r3, sp, #284 -; BE-I32-NEON-NEXT: ldr r7, [sp, #280] -; BE-I32-NEON-NEXT: vmov.32 d14[0], r0 -; BE-I32-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I32-NEON-NEXT: mov r0, r7 -; BE-I32-NEON-NEXT: vmov.32 d14[1], r4 -; BE-I32-NEON-NEXT: bl lrintl -; BE-I32-NEON-NEXT: add lr, sp, #88 -; BE-I32-NEON-NEXT: vrev64.32 q9, q4 -; BE-I32-NEON-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload -; BE-I32-NEON-NEXT: add lr, sp, #24 -; BE-I32-NEON-NEXT: vrev64.32 q8, q7 -; BE-I32-NEON-NEXT: vmov.32 d20[1], r0 -; BE-I32-NEON-NEXT: add r0, r11, #64 -; BE-I32-NEON-NEXT: vst1.32 {d10, d11}, [r0:128]! -; BE-I32-NEON-NEXT: vst1.32 {d12, d13}, [r0:128]! -; BE-I32-NEON-NEXT: vldmia lr, {d22, d23} @ 16-byte Reload -; BE-I32-NEON-NEXT: add lr, sp, #40 -; BE-I32-NEON-NEXT: vst1.32 {d22, d23}, [r0:128]! -; BE-I32-NEON-NEXT: vst1.64 {d18, d19}, [r0:128] -; BE-I32-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; BE-I32-NEON-NEXT: add lr, sp, #56 -; BE-I32-NEON-NEXT: vst1.32 {d18, d19}, [r11:128]! -; BE-I32-NEON-NEXT: vst1.32 {d20, d21}, [r11:128]! -; BE-I32-NEON-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload -; BE-I32-NEON-NEXT: vst1.32 {d18, d19}, [r11:128]! -; BE-I32-NEON-NEXT: vst1.64 {d16, d17}, [r11:128] -; BE-I32-NEON-NEXT: add sp, sp, #104 -; BE-I32-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I32-NEON-NEXT: add sp, sp, #4 -; BE-I32-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; -; BE-I64-NEON-LABEL: lrint_v32fp128: -; BE-I64-NEON: @ %bb.0: -; BE-I64-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-I64-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; BE-I64-NEON-NEXT: .pad #4 -; BE-I64-NEON-NEXT: sub sp, sp, #4 -; BE-I64-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I64-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I64-NEON-NEXT: .pad #152 -; BE-I64-NEON-NEXT: sub sp, sp, #152 -; BE-I64-NEON-NEXT: str r3, [sp, #120] @ 4-byte Spill -; BE-I64-NEON-NEXT: add r3, sp, #712 -; BE-I64-NEON-NEXT: str r2, [sp, #112] @ 4-byte Spill -; BE-I64-NEON-NEXT: mov r9, r0 -; BE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: ldr r7, [sp, #648] -; BE-I64-NEON-NEXT: add r3, sp, #652 -; BE-I64-NEON-NEXT: mov r5, r1 -; BE-I64-NEON-NEXT: vmov.32 d9[0], r0 -; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEON-NEXT: mov r0, r7 -; BE-I64-NEON-NEXT: ldr r6, [sp, #520] -; BE-I64-NEON-NEXT: ldr r8, [sp, #632] -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: add r3, sp, #524 -; BE-I64-NEON-NEXT: mov r7, r1 -; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEON-NEXT: mov r0, r6 -; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: add r3, sp, #636 -; BE-I64-NEON-NEXT: mov r4, r1 -; BE-I64-NEON-NEXT: vmov.32 d8[0], r0 -; BE-I64-NEON-NEXT: mov r0, r8 -; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: mov r6, r1 -; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEON-NEXT: ldr r0, [sp, #488] -; BE-I64-NEON-NEXT: vmov.32 d8[1], r4 -; BE-I64-NEON-NEXT: ldr r1, [sp, #492] -; BE-I64-NEON-NEXT: ldr r2, [sp, #496] -; BE-I64-NEON-NEXT: vmov.32 d10[1], r7 -; BE-I64-NEON-NEXT: ldr r3, [sp, #500] -; BE-I64-NEON-NEXT: vmov.32 d9[1], r5 -; BE-I64-NEON-NEXT: vstr d8, [sp, #144] @ 8-byte Spill -; BE-I64-NEON-NEXT: vstr d10, [sp, #136] @ 8-byte Spill -; BE-I64-NEON-NEXT: vstr d9, [sp, #128] @ 8-byte Spill -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: add r3, sp, #680 -; BE-I64-NEON-NEXT: str r0, [sp, #104] @ 4-byte Spill -; BE-I64-NEON-NEXT: str r1, [sp, #88] @ 4-byte Spill -; BE-I64-NEON-NEXT: ldm r3, {r0, r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: vmov.32 d16[0], r0 -; BE-I64-NEON-NEXT: ldr r0, [sp, #728] -; BE-I64-NEON-NEXT: ldr r2, [sp, #736] -; BE-I64-NEON-NEXT: vmov.32 d11[1], r6 -; BE-I64-NEON-NEXT: ldr r6, [sp, #732] -; BE-I64-NEON-NEXT: ldr r3, [sp, #740] -; BE-I64-NEON-NEXT: vmov.32 d16[1], r1 -; BE-I64-NEON-NEXT: ldr r5, [sp, #504] -; BE-I64-NEON-NEXT: mov r1, r6 -; BE-I64-NEON-NEXT: ldr r7, [sp, #744] -; BE-I64-NEON-NEXT: ldr r4, [sp, #748] -; BE-I64-NEON-NEXT: vstr d11, [sp, #24] @ 8-byte Spill -; BE-I64-NEON-NEXT: vstr d16, [sp, #8] @ 8-byte Spill -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: ldr r2, [sp, #752] -; BE-I64-NEON-NEXT: mov r11, r1 -; BE-I64-NEON-NEXT: ldr r3, [sp, #756] -; BE-I64-NEON-NEXT: vmov.32 d13[0], r0 -; BE-I64-NEON-NEXT: mov r0, r7 -; BE-I64-NEON-NEXT: mov r1, r4 -; BE-I64-NEON-NEXT: ldr r10, [sp, #552] -; BE-I64-NEON-NEXT: ldr r6, [sp, #664] -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: add r3, sp, #508 -; BE-I64-NEON-NEXT: mov r8, r1 -; BE-I64-NEON-NEXT: vmov.32 d8[0], r0 -; BE-I64-NEON-NEXT: mov r0, r5 -; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: add r3, sp, #540 -; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEON-NEXT: ldr r0, [sp, #536] -; BE-I64-NEON-NEXT: mov r7, r1 -; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: add r3, sp, #556 -; BE-I64-NEON-NEXT: mov r5, r1 -; BE-I64-NEON-NEXT: vmov.32 d12[0], r0 -; BE-I64-NEON-NEXT: mov r0, r10 -; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: add r3, sp, #668 -; BE-I64-NEON-NEXT: mov r4, r1 -; BE-I64-NEON-NEXT: vmov.32 d9[0], r0 -; BE-I64-NEON-NEXT: mov r0, r6 -; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: add r3, sp, #700 -; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEON-NEXT: ldr r0, [sp, #696] -; BE-I64-NEON-NEXT: mov r6, r1 -; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEON-NEXT: ldr r0, [sp, #104] @ 4-byte Reload -; BE-I64-NEON-NEXT: ldr r2, [sp, #256] -; BE-I64-NEON-NEXT: vmov.32 d13[1], r11 -; BE-I64-NEON-NEXT: ldr r3, [sp, #260] -; BE-I64-NEON-NEXT: vmov.32 d14[1], r6 -; BE-I64-NEON-NEXT: ldr r6, [sp, #264] -; BE-I64-NEON-NEXT: vmov.32 d9[1], r4 -; BE-I64-NEON-NEXT: ldr r4, [sp, #344] -; BE-I64-NEON-NEXT: vmov.32 d12[1], r5 -; BE-I64-NEON-NEXT: ldr r5, [sp, #312] -; BE-I64-NEON-NEXT: vmov.32 d8[1], r8 -; BE-I64-NEON-NEXT: ldr r8, [sp, #328] -; BE-I64-NEON-NEXT: vmov.32 d10[1], r7 -; BE-I64-NEON-NEXT: vstr d13, [sp, #32] @ 8-byte Spill -; BE-I64-NEON-NEXT: vmov.32 d11[1], r1 -; BE-I64-NEON-NEXT: ldr r1, [sp, #120] @ 4-byte Reload -; BE-I64-NEON-NEXT: vmov.32 d13[0], r0 -; BE-I64-NEON-NEXT: ldr r0, [sp, #112] @ 4-byte Reload -; BE-I64-NEON-NEXT: vstr d14, [sp] @ 8-byte Spill -; BE-I64-NEON-NEXT: vstr d9, [sp, #16] @ 8-byte Spill -; BE-I64-NEON-NEXT: vstr d12, [sp, #56] @ 8-byte Spill -; BE-I64-NEON-NEXT: vstr d10, [sp, #64] @ 8-byte Spill -; BE-I64-NEON-NEXT: vstr d8, [sp, #40] @ 8-byte Spill -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: add r3, sp, #268 -; BE-I64-NEON-NEXT: mov r11, r1 -; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEON-NEXT: mov r0, r6 -; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: add r3, sp, #316 -; BE-I64-NEON-NEXT: mov r10, r1 -; BE-I64-NEON-NEXT: vmov.32 d8[0], r0 -; BE-I64-NEON-NEXT: mov r0, r5 -; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: add r3, sp, #332 -; BE-I64-NEON-NEXT: mov r7, r1 -; BE-I64-NEON-NEXT: vmov.32 d12[0], r0 -; BE-I64-NEON-NEXT: mov r0, r8 -; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: add r3, sp, #348 -; BE-I64-NEON-NEXT: mov r5, r1 -; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEON-NEXT: mov r0, r4 -; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: add r3, sp, #364 -; BE-I64-NEON-NEXT: vmov.32 d9[0], r0 -; BE-I64-NEON-NEXT: ldr r0, [sp, #360] -; BE-I64-NEON-NEXT: mov r4, r1 -; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: add r3, sp, #476 -; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEON-NEXT: ldr r0, [sp, #472] -; BE-I64-NEON-NEXT: mov r6, r1 -; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: vmov.32 d16[0], r0 -; BE-I64-NEON-NEXT: ldr r0, [sp, #88] @ 4-byte Reload -; BE-I64-NEON-NEXT: ldr r2, [sp, #592] -; BE-I64-NEON-NEXT: vldr d20, [sp, #136] @ 8-byte Reload -; BE-I64-NEON-NEXT: vmov.32 d16[1], r1 -; BE-I64-NEON-NEXT: ldr r1, [sp, #588] -; BE-I64-NEON-NEXT: ldr r3, [sp, #596] -; BE-I64-NEON-NEXT: vldr d22, [sp, #24] @ 8-byte Reload -; BE-I64-NEON-NEXT: vldr d18, [sp, #8] @ 8-byte Reload -; BE-I64-NEON-NEXT: vrev64.32 d21, d20 -; BE-I64-NEON-NEXT: vmov.32 d10[1], r6 -; BE-I64-NEON-NEXT: ldr r6, [sp, #600] -; BE-I64-NEON-NEXT: vmov.32 d9[1], r4 -; BE-I64-NEON-NEXT: ldr r4, [sp, #616] -; BE-I64-NEON-NEXT: vmov.32 d12[1], r7 -; BE-I64-NEON-NEXT: ldr r7, [sp, #604] -; BE-I64-NEON-NEXT: vmov.32 d8[1], r10 -; BE-I64-NEON-NEXT: add r10, r9, #192 -; BE-I64-NEON-NEXT: vmov.32 d14[1], r11 -; BE-I64-NEON-NEXT: ldr r11, [sp, #440] -; BE-I64-NEON-NEXT: vmov.32 d13[1], r0 -; BE-I64-NEON-NEXT: ldr r0, [sp, #584] -; BE-I64-NEON-NEXT: vmov.32 d15[1], r5 -; BE-I64-NEON-NEXT: vstr d16, [sp, #48] @ 8-byte Spill -; BE-I64-NEON-NEXT: vldr d16, [sp, #128] @ 8-byte Reload -; BE-I64-NEON-NEXT: vrev64.32 d20, d22 -; BE-I64-NEON-NEXT: vldr d22, [sp] @ 8-byte Reload -; BE-I64-NEON-NEXT: vrev64.32 d19, d18 -; BE-I64-NEON-NEXT: vrev64.32 d17, d16 -; BE-I64-NEON-NEXT: vrev64.32 d18, d22 -; BE-I64-NEON-NEXT: vstr d10, [sp, #120] @ 8-byte Spill -; BE-I64-NEON-NEXT: vstr d9, [sp, #112] @ 8-byte Spill -; BE-I64-NEON-NEXT: vstr d15, [sp, #104] @ 8-byte Spill -; BE-I64-NEON-NEXT: vstr d12, [sp, #96] @ 8-byte Spill -; BE-I64-NEON-NEXT: vstr d8, [sp, #80] @ 8-byte Spill -; BE-I64-NEON-NEXT: vstr d14, [sp, #72] @ 8-byte Spill -; BE-I64-NEON-NEXT: vstr d13, [sp, #88] @ 8-byte Spill -; BE-I64-NEON-NEXT: vst1.64 {d20, d21}, [r10:128]! -; BE-I64-NEON-NEXT: vrev64.32 d16, d11 -; BE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r10:128]! -; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r10:128]! -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: ldr r2, [sp, #608] -; BE-I64-NEON-NEXT: mov r8, r1 -; BE-I64-NEON-NEXT: ldr r3, [sp, #612] -; BE-I64-NEON-NEXT: vmov.32 d8[0], r0 -; BE-I64-NEON-NEXT: mov r0, r6 -; BE-I64-NEON-NEXT: mov r1, r7 -; BE-I64-NEON-NEXT: ldr r5, [sp, #456] -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: add r3, sp, #620 -; BE-I64-NEON-NEXT: mov r6, r1 -; BE-I64-NEON-NEXT: vmov.32 d9[0], r0 -; BE-I64-NEON-NEXT: mov r0, r4 -; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: add r3, sp, #444 -; BE-I64-NEON-NEXT: mov r7, r1 -; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEON-NEXT: mov r0, r11 -; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: add r3, sp, #460 -; BE-I64-NEON-NEXT: mov r4, r1 -; BE-I64-NEON-NEXT: vmov.32 d11[0], r0 -; BE-I64-NEON-NEXT: mov r0, r5 -; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: add r3, sp, #572 -; BE-I64-NEON-NEXT: vmov.32 d13[0], r0 -; BE-I64-NEON-NEXT: ldr r0, [sp, #568] -; BE-I64-NEON-NEXT: mov r5, r1 -; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: vldr d16, [sp, #16] @ 8-byte Reload -; BE-I64-NEON-NEXT: vldr d18, [sp, #56] @ 8-byte Reload -; BE-I64-NEON-NEXT: vrev64.32 d17, d16 -; BE-I64-NEON-NEXT: ldr r2, [sp, #304] -; BE-I64-NEON-NEXT: vrev64.32 d16, d18 -; BE-I64-NEON-NEXT: ldr r3, [sp, #308] -; BE-I64-NEON-NEXT: vldr d18, [sp, #144] @ 8-byte Reload -; BE-I64-NEON-NEXT: vldr d20, [sp, #64] @ 8-byte Reload -; BE-I64-NEON-NEXT: vrev64.32 d19, d18 -; BE-I64-NEON-NEXT: vrev64.32 d18, d20 -; BE-I64-NEON-NEXT: vldr d20, [sp, #40] @ 8-byte Reload -; BE-I64-NEON-NEXT: vldr d22, [sp, #32] @ 8-byte Reload -; BE-I64-NEON-NEXT: vmov.32 d14[0], r0 -; BE-I64-NEON-NEXT: ldr r0, [sp, #296] -; BE-I64-NEON-NEXT: vmov.32 d10[1], r7 -; BE-I64-NEON-NEXT: ldr r7, [sp, #412] -; BE-I64-NEON-NEXT: vmov.32 d9[1], r6 -; BE-I64-NEON-NEXT: ldr r6, [sp, #408] -; BE-I64-NEON-NEXT: vmov.32 d8[1], r8 -; BE-I64-NEON-NEXT: add r8, r9, #128 -; BE-I64-NEON-NEXT: vrev64.32 d21, d20 -; BE-I64-NEON-NEXT: vmov.32 d13[1], r5 -; BE-I64-NEON-NEXT: ldr r5, [sp, #300] -; BE-I64-NEON-NEXT: vrev64.32 d20, d22 -; BE-I64-NEON-NEXT: vmov.32 d14[1], r1 -; BE-I64-NEON-NEXT: mov r1, r5 -; BE-I64-NEON-NEXT: vstr d10, [sp, #136] @ 8-byte Spill -; BE-I64-NEON-NEXT: vstr d9, [sp, #128] @ 8-byte Spill -; BE-I64-NEON-NEXT: vstr d8, [sp, #24] @ 8-byte Spill -; BE-I64-NEON-NEXT: vst1.64 {d20, d21}, [r10:128] -; BE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r8:128]! -; BE-I64-NEON-NEXT: vmov.32 d11[1], r4 -; BE-I64-NEON-NEXT: ldr r4, [sp, #424] -; BE-I64-NEON-NEXT: ldr r10, [sp, #376] -; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r8:128]! -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: ldr r2, [sp, #416] -; BE-I64-NEON-NEXT: mov r11, r1 -; BE-I64-NEON-NEXT: ldr r3, [sp, #420] -; BE-I64-NEON-NEXT: vmov.32 d15[0], r0 -; BE-I64-NEON-NEXT: mov r0, r6 -; BE-I64-NEON-NEXT: mov r1, r7 -; BE-I64-NEON-NEXT: ldr r5, [sp, #392] -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: add r3, sp, #428 -; BE-I64-NEON-NEXT: mov r6, r1 -; BE-I64-NEON-NEXT: vmov.32 d8[0], r0 -; BE-I64-NEON-NEXT: mov r0, r4 -; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: add r3, sp, #380 -; BE-I64-NEON-NEXT: mov r7, r1 -; BE-I64-NEON-NEXT: vmov.32 d9[0], r0 -; BE-I64-NEON-NEXT: mov r0, r10 -; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: add r3, sp, #396 -; BE-I64-NEON-NEXT: mov r4, r1 -; BE-I64-NEON-NEXT: vmov.32 d12[0], r0 -; BE-I64-NEON-NEXT: mov r0, r5 -; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: add r3, sp, #284 -; BE-I64-NEON-NEXT: vmov.32 d10[0], r0 -; BE-I64-NEON-NEXT: ldr r0, [sp, #280] -; BE-I64-NEON-NEXT: mov r5, r1 -; BE-I64-NEON-NEXT: ldm r3, {r1, r2, r3} -; BE-I64-NEON-NEXT: bl lrintl -; BE-I64-NEON-NEXT: vldr d16, [sp, #120] @ 8-byte Reload -; BE-I64-NEON-NEXT: vldr d18, [sp, #112] @ 8-byte Reload -; BE-I64-NEON-NEXT: vrev64.32 d17, d16 -; BE-I64-NEON-NEXT: vldr d26, [sp, #136] @ 8-byte Reload -; BE-I64-NEON-NEXT: vrev64.32 d16, d18 -; BE-I64-NEON-NEXT: vldr d18, [sp, #104] @ 8-byte Reload -; BE-I64-NEON-NEXT: vrev64.32 d31, d26 -; BE-I64-NEON-NEXT: vldr d26, [sp, #128] @ 8-byte Reload -; BE-I64-NEON-NEXT: vldr d20, [sp, #96] @ 8-byte Reload -; BE-I64-NEON-NEXT: vrev64.32 d19, d18 -; BE-I64-NEON-NEXT: vrev64.32 d18, d20 -; BE-I64-NEON-NEXT: vldr d20, [sp, #80] @ 8-byte Reload -; BE-I64-NEON-NEXT: vrev64.32 d30, d26 -; BE-I64-NEON-NEXT: vldr d26, [sp, #24] @ 8-byte Reload -; BE-I64-NEON-NEXT: vmov.32 d10[1], r5 -; BE-I64-NEON-NEXT: vldr d22, [sp, #72] @ 8-byte Reload -; BE-I64-NEON-NEXT: vrev64.32 d21, d20 -; BE-I64-NEON-NEXT: vrev64.32 d1, d26 -; BE-I64-NEON-NEXT: vmov.32 d9[1], r7 -; BE-I64-NEON-NEXT: vmov.32 d12[1], r4 -; BE-I64-NEON-NEXT: vrev64.32 d20, d22 -; BE-I64-NEON-NEXT: vldr d22, [sp, #88] @ 8-byte Reload -; BE-I64-NEON-NEXT: vmov.32 d8[1], r6 -; BE-I64-NEON-NEXT: vrev64.32 d0, d14 -; BE-I64-NEON-NEXT: vmov.32 d28[0], r0 -; BE-I64-NEON-NEXT: add r0, r9, #64 -; BE-I64-NEON-NEXT: vrev64.32 d3, d10 -; BE-I64-NEON-NEXT: vldr d24, [sp, #48] @ 8-byte Reload -; BE-I64-NEON-NEXT: vrev64.32 d23, d22 -; BE-I64-NEON-NEXT: vrev64.32 d5, d9 -; BE-I64-NEON-NEXT: vst1.64 {d0, d1}, [r8:128]! -; BE-I64-NEON-NEXT: vrev64.32 d2, d12 -; BE-I64-NEON-NEXT: vmov.32 d15[1], r11 -; BE-I64-NEON-NEXT: vrev64.32 d22, d24 -; BE-I64-NEON-NEXT: vrev64.32 d25, d13 -; BE-I64-NEON-NEXT: vrev64.32 d4, d8 -; BE-I64-NEON-NEXT: vst1.64 {d30, d31}, [r8:128] -; BE-I64-NEON-NEXT: vst1.64 {d2, d3}, [r0:128]! -; BE-I64-NEON-NEXT: vmov.32 d28[1], r1 -; BE-I64-NEON-NEXT: vrev64.32 d24, d11 -; BE-I64-NEON-NEXT: vst1.64 {d4, d5}, [r0:128]! -; BE-I64-NEON-NEXT: vrev64.32 d27, d15 -; BE-I64-NEON-NEXT: vst1.64 {d24, d25}, [r0:128]! -; BE-I64-NEON-NEXT: vrev64.32 d26, d28 -; BE-I64-NEON-NEXT: vst1.64 {d22, d23}, [r0:128] -; BE-I64-NEON-NEXT: vst1.64 {d20, d21}, [r9:128]! -; BE-I64-NEON-NEXT: vst1.64 {d26, d27}, [r9:128]! -; BE-I64-NEON-NEXT: vst1.64 {d18, d19}, [r9:128]! -; BE-I64-NEON-NEXT: vst1.64 {d16, d17}, [r9:128] -; BE-I64-NEON-NEXT: add sp, sp, #152 -; BE-I64-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; BE-I64-NEON-NEXT: add sp, sp, #4 -; BE-I64-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} - %a = call <32 x iXLen> @llvm.lrint.v32iXLen.v16fp128(<32 x fp128> %x) - ret <32 x iXLen> %a + %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16fp128(<16 x fp128> %x) + ret <16 x iXLen> %a } -declare <32 x iXLen> @llvm.lrint.v32iXLen.v32fp128(<32 x fp128>) +declare <16 x iXLen> @llvm.lrint.v16iXLen.v16fp128(<16 x fp128>) From 8b0b955d67fac20085390b39568e1fc299ba75e4 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 9 Aug 2025 23:41:28 -0500 Subject: [PATCH 8/8] remove more <32 x fp128> --- .../AArch64/sve-fixed-vector-llrint.ll | 318 ----- .../CodeGen/AArch64/sve-fixed-vector-lrint.ll | 533 -------- llvm/test/CodeGen/AArch64/vector-llrint.ll | 258 ---- llvm/test/CodeGen/AArch64/vector-lrint.ll | 471 ------- llvm/test/CodeGen/PowerPC/vector-lrint.ll | 1131 ----------------- 5 files changed, 2711 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll b/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll index 38ba9240d15b4..5e934bd95e995 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll @@ -1171,321 +1171,3 @@ define <16 x i64> @llrint_v16fp128(<16 x fp128> %x) nounwind { ret <16 x i64> %a } declare <16 x i64> @llvm.llrint.v16i64.v16fp128(<16 x fp128>) - -define <32 x i64> @llrint_v32fp128(<32 x fp128> %x) nounwind { -; CHECK-LABEL: llrint_v32fp128: -; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-32]! // 8-byte Folded Spill -; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: sub sp, sp, #512 -; CHECK-NEXT: addvl sp, sp, #-8 -; CHECK-NEXT: addvl x9, sp, #8 -; CHECK-NEXT: stp q2, q1, [sp, #16] // 32-byte Folded Spill -; CHECK-NEXT: mov x19, x8 -; CHECK-NEXT: stp q0, q7, [sp, #48] // 32-byte Folded Spill -; CHECK-NEXT: ldr q0, [x9, #864] -; CHECK-NEXT: addvl x9, sp, #8 -; CHECK-NEXT: str q6, [sp, #96] // 16-byte Folded Spill -; CHECK-NEXT: str q0, [sp, #496] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [x9, #880] -; CHECK-NEXT: addvl x9, sp, #8 -; CHECK-NEXT: stp q5, q4, [sp, #128] // 32-byte Folded Spill -; CHECK-NEXT: str q0, [sp, #480] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [x9, #896] -; CHECK-NEXT: addvl x9, sp, #8 -; CHECK-NEXT: str q0, [sp, #464] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [x9, #912] -; CHECK-NEXT: addvl x9, sp, #8 -; CHECK-NEXT: str q0, [sp, #448] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [x9, #800] -; CHECK-NEXT: addvl x9, sp, #8 -; CHECK-NEXT: str q0, [sp, #432] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [x9, #816] -; CHECK-NEXT: addvl x9, sp, #8 -; CHECK-NEXT: str q0, [sp, #416] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [x9, #832] -; CHECK-NEXT: addvl x9, sp, #8 -; CHECK-NEXT: str q0, [sp, #400] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [x9, #848] -; CHECK-NEXT: addvl x9, sp, #8 -; CHECK-NEXT: str q0, [sp, #384] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [x9, #736] -; CHECK-NEXT: addvl x9, sp, #8 -; CHECK-NEXT: str q0, [sp, #368] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [x9, #752] -; CHECK-NEXT: addvl x9, sp, #8 -; CHECK-NEXT: str q0, [sp, #352] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [x9, #768] -; CHECK-NEXT: addvl x9, sp, #8 -; CHECK-NEXT: str q0, [sp, #336] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [x9, #784] -; CHECK-NEXT: addvl x9, sp, #8 -; CHECK-NEXT: str q0, [sp, #320] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [x9, #672] -; CHECK-NEXT: addvl x9, sp, #8 -; CHECK-NEXT: str q0, [sp, #304] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [x9, #688] -; CHECK-NEXT: addvl x9, sp, #8 -; CHECK-NEXT: str q0, [sp, #288] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [x9, #704] -; CHECK-NEXT: addvl x9, sp, #8 -; CHECK-NEXT: str q0, [sp, #272] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [x9, #720] -; CHECK-NEXT: addvl x9, sp, #8 -; CHECK-NEXT: str q0, [sp, #256] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [x9, #608] -; CHECK-NEXT: addvl x9, sp, #8 -; CHECK-NEXT: str q0, [sp, #240] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [x9, #624] -; CHECK-NEXT: addvl x9, sp, #8 -; CHECK-NEXT: str q0, [sp, #224] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [x9, #640] -; CHECK-NEXT: addvl x9, sp, #8 -; CHECK-NEXT: str q0, [sp, #208] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [x9, #656] -; CHECK-NEXT: addvl x9, sp, #8 -; CHECK-NEXT: str q0, [sp, #192] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [x9, #544] -; CHECK-NEXT: addvl x9, sp, #8 -; CHECK-NEXT: str q0, [sp, #176] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [x9, #560] -; CHECK-NEXT: addvl x9, sp, #8 -; CHECK-NEXT: str q0, [sp, #160] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [x9, #576] -; CHECK-NEXT: addvl x9, sp, #8 -; CHECK-NEXT: str q0, [sp, #112] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [x9, #592] -; CHECK-NEXT: str q0, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: mov v0.16b, v3.16b -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-NEXT: add x8, sp, #512 -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: str z0, [x8, #7, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: add x8, sp, #512 -; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: ldr z1, [x8, #7, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d -; CHECK-NEXT: str z0, [x8, #7, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: add x8, sp, #512 -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: str z0, [x8, #6, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #128] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #128] // 16-byte Folded Reload -; CHECK-NEXT: add x8, sp, #512 -; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: ldr z1, [x8, #6, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d -; CHECK-NEXT: str z0, [x8, #6, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #144] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #144] // 16-byte Folded Reload -; CHECK-NEXT: add x8, sp, #512 -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: str z0, [x8, #5, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #160] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #160] // 16-byte Folded Reload -; CHECK-NEXT: add x8, sp, #512 -; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: ldr z1, [x8, #5, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d -; CHECK-NEXT: str z0, [x8, #5, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #192] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #192] // 16-byte Folded Reload -; CHECK-NEXT: add x8, sp, #512 -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: str z0, [x8, #4, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #224] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #224] // 16-byte Folded Reload -; CHECK-NEXT: add x8, sp, #512 -; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: ldr z1, [x8, #4, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d -; CHECK-NEXT: str z0, [x8, #4, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #256] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #256] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #272] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #256] // 16-byte Folded Reload -; CHECK-NEXT: add x8, sp, #512 -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: str z0, [x8, #3, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #288] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #288] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #304] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #288] // 16-byte Folded Reload -; CHECK-NEXT: add x8, sp, #512 -; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: ldr z1, [x8, #3, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d -; CHECK-NEXT: str z0, [x8, #3, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #320] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #320] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #336] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #320] // 16-byte Folded Reload -; CHECK-NEXT: add x8, sp, #512 -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: str z0, [x8, #2, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #352] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #352] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #368] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #352] // 16-byte Folded Reload -; CHECK-NEXT: add x8, sp, #512 -; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: ldr z1, [x8, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d -; CHECK-NEXT: str z0, [x8, #2, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #384] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #384] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #400] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #384] // 16-byte Folded Reload -; CHECK-NEXT: add x8, sp, #512 -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #416] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #416] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #432] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #416] // 16-byte Folded Reload -; CHECK-NEXT: add x8, sp, #512 -; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: ldr z1, [x8, #1, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d -; CHECK-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #448] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #448] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #464] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #448] // 16-byte Folded Reload -; CHECK-NEXT: add x8, sp, #512 -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #480] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #480] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #496] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #480] // 16-byte Folded Reload -; CHECK-NEXT: add x8, sp, #512 -; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: add x9, sp, #512 -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: ldr z1, [x8] // 16-byte Folded Reload -; CHECK-NEXT: mov x8, #28 // =0x1c -; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d -; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3] -; CHECK-NEXT: mov x8, #24 // =0x18 -; CHECK-NEXT: ldr z0, [x9, #1, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3] -; CHECK-NEXT: mov x8, #20 // =0x14 -; CHECK-NEXT: ldr z0, [x9, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3] -; CHECK-NEXT: mov x8, #16 // =0x10 -; CHECK-NEXT: ldr z0, [x9, #3, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3] -; CHECK-NEXT: mov x8, #12 // =0xc -; CHECK-NEXT: ldr z0, [x9, #4, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3] -; CHECK-NEXT: mov x8, #8 // =0x8 -; CHECK-NEXT: ldr z0, [x9, #5, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3] -; CHECK-NEXT: mov x8, #4 // =0x4 -; CHECK-NEXT: ldr z0, [x9, #6, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3] -; CHECK-NEXT: add x8, sp, #512 -; CHECK-NEXT: ldr z0, [x8, #7, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: st1d { z0.d }, p0, [x19] -; CHECK-NEXT: addvl sp, sp, #8 -; CHECK-NEXT: add sp, sp, #512 -; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr x29, [sp], #32 // 8-byte Folded Reload -; CHECK-NEXT: ret - %a = call <32 x i64> @llvm.llrint.v32i64.v16fp128(<32 x fp128> %x) - ret <32 x i64> %a -} -declare <32 x i64> @llvm.llrint.v32i64.v32fp128(<32 x fp128>) diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll b/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll index 175f4993d06c9..90f9eb6aa0a10 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll @@ -2107,536 +2107,3 @@ define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) nounwind { ret <16 x iXLen> %a } declare <16 x iXLen> @llvm.lrint.v16iXLen.v16fp128(<16 x fp128>) - -define <32 x iXLen> @lrint_v32fp128(<32 x fp128> %x) nounwind { -; CHECK-i32-LABEL: lrint_v32fp128: -; CHECK-i32: // %bb.0: -; CHECK-i32-NEXT: stp x29, x30, [sp, #-96]! // 16-byte Folded Spill -; CHECK-i32-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill -; CHECK-i32-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill -; CHECK-i32-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill -; CHECK-i32-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill -; CHECK-i32-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill -; CHECK-i32-NEXT: sub sp, sp, #528 -; CHECK-i32-NEXT: stp q2, q1, [sp, #368] // 32-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #624] -; CHECK-i32-NEXT: str q0, [sp, #224] // 16-byte Folded Spill -; CHECK-i32-NEXT: str q1, [sp, #160] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #640] -; CHECK-i32-NEXT: str q7, [sp, #208] // 16-byte Folded Spill -; CHECK-i32-NEXT: str q1, [sp, #128] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #656] -; CHECK-i32-NEXT: str q6, [sp, #240] // 16-byte Folded Spill -; CHECK-i32-NEXT: str q1, [sp, #96] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #672] -; CHECK-i32-NEXT: str q5, [sp, #272] // 16-byte Folded Spill -; CHECK-i32-NEXT: str q1, [sp, #80] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #688] -; CHECK-i32-NEXT: str q4, [sp, #304] // 16-byte Folded Spill -; CHECK-i32-NEXT: str q1, [sp, #48] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #704] -; CHECK-i32-NEXT: str q3, [sp, #336] // 16-byte Folded Spill -; CHECK-i32-NEXT: str q1, [sp, #32] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #720] -; CHECK-i32-NEXT: str q1, [sp, #112] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #736] -; CHECK-i32-NEXT: str q1, [sp, #176] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #752] -; CHECK-i32-NEXT: str q1, [sp, #16] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #768] -; CHECK-i32-NEXT: str q1, [sp, #64] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #784] -; CHECK-i32-NEXT: str q1, [sp, #192] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #800] -; CHECK-i32-NEXT: str q1, [sp, #288] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #816] -; CHECK-i32-NEXT: str q1, [sp, #144] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #832] -; CHECK-i32-NEXT: str q1, [sp, #256] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #848] -; CHECK-i32-NEXT: str q1, [sp, #352] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #864] -; CHECK-i32-NEXT: str q1, [sp, #416] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #880] -; CHECK-i32-NEXT: str q1, [sp, #320] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #896] -; CHECK-i32-NEXT: str q1, [sp, #400] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #912] -; CHECK-i32-NEXT: str q1, [sp, #448] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #928] -; CHECK-i32-NEXT: str q1, [sp, #480] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #944] -; CHECK-i32-NEXT: str q1, [sp, #432] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #960] -; CHECK-i32-NEXT: str q1, [sp, #464] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #976] -; CHECK-i32-NEXT: str q1, [sp, #496] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #992] -; CHECK-i32-NEXT: mov v0.16b, v1.16b -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #496] // 16-byte Folded Reload -; CHECK-i32-NEXT: str w0, [sp, #524] // 4-byte Folded Spill -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #464] // 16-byte Folded Reload -; CHECK-i32-NEXT: str w0, [sp, #496] // 4-byte Folded Spill -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #432] // 16-byte Folded Reload -; CHECK-i32-NEXT: str w0, [sp, #464] // 4-byte Folded Spill -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #480] // 16-byte Folded Reload -; CHECK-i32-NEXT: str w0, [sp, #432] // 4-byte Folded Spill -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #448] // 16-byte Folded Reload -; CHECK-i32-NEXT: str w0, [sp, #480] // 4-byte Folded Spill -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #400] // 16-byte Folded Reload -; CHECK-i32-NEXT: str w0, [sp, #448] // 4-byte Folded Spill -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #320] // 16-byte Folded Reload -; CHECK-i32-NEXT: str w0, [sp, #400] // 4-byte Folded Spill -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #416] // 16-byte Folded Reload -; CHECK-i32-NEXT: str w0, [sp, #320] // 4-byte Folded Spill -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #352] // 16-byte Folded Reload -; CHECK-i32-NEXT: str w0, [sp, #416] // 4-byte Folded Spill -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #256] // 16-byte Folded Reload -; CHECK-i32-NEXT: str w0, [sp, #352] // 4-byte Folded Spill -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload -; CHECK-i32-NEXT: str w0, [sp, #256] // 4-byte Folded Spill -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #288] // 16-byte Folded Reload -; CHECK-i32-NEXT: str w0, [sp, #144] // 4-byte Folded Spill -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload -; CHECK-i32-NEXT: str w0, [sp, #288] // 4-byte Folded Spill -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload -; CHECK-i32-NEXT: str w0, [sp, #192] // 4-byte Folded Spill -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-i32-NEXT: str w0, [sp, #64] // 4-byte Folded Spill -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload -; CHECK-i32-NEXT: str w0, [sp, #16] // 4-byte Folded Spill -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload -; CHECK-i32-NEXT: str w0, [sp, #176] // 4-byte Folded Spill -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-i32-NEXT: str w0, [sp, #112] // 4-byte Folded Spill -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-i32-NEXT: mov w29, w0 -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload -; CHECK-i32-NEXT: mov w21, w0 -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload -; CHECK-i32-NEXT: str w0, [sp, #80] // 4-byte Folded Spill -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload -; CHECK-i32-NEXT: str w0, [sp, #96] // 4-byte Folded Spill -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload -; CHECK-i32-NEXT: mov w23, w0 -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload -; CHECK-i32-NEXT: mov w24, w0 -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload -; CHECK-i32-NEXT: mov w28, w0 -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #272] // 16-byte Folded Reload -; CHECK-i32-NEXT: mov w19, w0 -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #304] // 16-byte Folded Reload -; CHECK-i32-NEXT: mov w25, w0 -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #336] // 16-byte Folded Reload -; CHECK-i32-NEXT: mov w26, w0 -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #368] // 16-byte Folded Reload -; CHECK-i32-NEXT: mov w20, w0 -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #384] // 16-byte Folded Reload -; CHECK-i32-NEXT: mov w22, w0 -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload -; CHECK-i32-NEXT: mov w27, w0 -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr s4, [sp, #16] // 4-byte Folded Reload -; CHECK-i32-NEXT: ldr w8, [sp, #64] // 4-byte Folded Reload -; CHECK-i32-NEXT: fmov s2, w24 -; CHECK-i32-NEXT: ldr s5, [sp, #144] // 4-byte Folded Reload -; CHECK-i32-NEXT: ldr s6, [sp, #320] // 4-byte Folded Reload -; CHECK-i32-NEXT: fmov s3, w21 -; CHECK-i32-NEXT: mov v4.s[1], w8 -; CHECK-i32-NEXT: ldr w8, [sp, #256] // 4-byte Folded Reload -; CHECK-i32-NEXT: ldr s7, [sp, #432] // 4-byte Folded Reload -; CHECK-i32-NEXT: mov v2.s[1], w23 -; CHECK-i32-NEXT: fmov s1, w26 -; CHECK-i32-NEXT: fmov s0, w0 -; CHECK-i32-NEXT: mov v5.s[1], w8 -; CHECK-i32-NEXT: ldr w8, [sp, #400] // 4-byte Folded Reload -; CHECK-i32-NEXT: mov v3.s[1], w29 -; CHECK-i32-NEXT: mov v6.s[1], w8 -; CHECK-i32-NEXT: ldr w8, [sp, #464] // 4-byte Folded Reload -; CHECK-i32-NEXT: mov v1.s[1], w25 -; CHECK-i32-NEXT: mov v0.s[1], w27 -; CHECK-i32-NEXT: mov v7.s[1], w8 -; CHECK-i32-NEXT: ldr w8, [sp, #96] // 4-byte Folded Reload -; CHECK-i32-NEXT: mov v2.s[2], w8 -; CHECK-i32-NEXT: ldr w8, [sp, #112] // 4-byte Folded Reload -; CHECK-i32-NEXT: mov v1.s[2], w19 -; CHECK-i32-NEXT: mov v0.s[2], w22 -; CHECK-i32-NEXT: mov v3.s[2], w8 -; CHECK-i32-NEXT: ldr w8, [sp, #192] // 4-byte Folded Reload -; CHECK-i32-NEXT: mov v4.s[2], w8 -; CHECK-i32-NEXT: ldr w8, [sp, #352] // 4-byte Folded Reload -; CHECK-i32-NEXT: mov v1.s[3], w28 -; CHECK-i32-NEXT: mov v0.s[3], w20 -; CHECK-i32-NEXT: mov v5.s[2], w8 -; CHECK-i32-NEXT: ldr w8, [sp, #448] // 4-byte Folded Reload -; CHECK-i32-NEXT: mov v6.s[2], w8 -; CHECK-i32-NEXT: ldr w8, [sp, #496] // 4-byte Folded Reload -; CHECK-i32-NEXT: mov v7.s[2], w8 -; CHECK-i32-NEXT: ldr w8, [sp, #80] // 4-byte Folded Reload -; CHECK-i32-NEXT: mov v2.s[3], w8 -; CHECK-i32-NEXT: ldr w8, [sp, #176] // 4-byte Folded Reload -; CHECK-i32-NEXT: mov v3.s[3], w8 -; CHECK-i32-NEXT: ldr w8, [sp, #288] // 4-byte Folded Reload -; CHECK-i32-NEXT: mov v4.s[3], w8 -; CHECK-i32-NEXT: ldr w8, [sp, #416] // 4-byte Folded Reload -; CHECK-i32-NEXT: mov v5.s[3], w8 -; CHECK-i32-NEXT: ldr w8, [sp, #480] // 4-byte Folded Reload -; CHECK-i32-NEXT: mov v6.s[3], w8 -; CHECK-i32-NEXT: ldr w8, [sp, #524] // 4-byte Folded Reload -; CHECK-i32-NEXT: mov v7.s[3], w8 -; CHECK-i32-NEXT: add sp, sp, #528 -; CHECK-i32-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload -; CHECK-i32-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload -; CHECK-i32-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload -; CHECK-i32-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload -; CHECK-i32-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload -; CHECK-i32-NEXT: ldp x29, x30, [sp], #96 // 16-byte Folded Reload -; CHECK-i32-NEXT: ret -; -; CHECK-i64-LABEL: lrint_v32fp128: -; CHECK-i64: // %bb.0: -; CHECK-i64-NEXT: str x29, [sp, #-32]! // 8-byte Folded Spill -; CHECK-i64-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill -; CHECK-i64-NEXT: sub sp, sp, #512 -; CHECK-i64-NEXT: addvl sp, sp, #-8 -; CHECK-i64-NEXT: addvl x9, sp, #8 -; CHECK-i64-NEXT: stp q2, q1, [sp, #16] // 32-byte Folded Spill -; CHECK-i64-NEXT: mov x19, x8 -; CHECK-i64-NEXT: stp q0, q7, [sp, #48] // 32-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [x9, #864] -; CHECK-i64-NEXT: addvl x9, sp, #8 -; CHECK-i64-NEXT: str q6, [sp, #96] // 16-byte Folded Spill -; CHECK-i64-NEXT: str q0, [sp, #496] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [x9, #880] -; CHECK-i64-NEXT: addvl x9, sp, #8 -; CHECK-i64-NEXT: stp q5, q4, [sp, #128] // 32-byte Folded Spill -; CHECK-i64-NEXT: str q0, [sp, #480] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [x9, #896] -; CHECK-i64-NEXT: addvl x9, sp, #8 -; CHECK-i64-NEXT: str q0, [sp, #464] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [x9, #912] -; CHECK-i64-NEXT: addvl x9, sp, #8 -; CHECK-i64-NEXT: str q0, [sp, #448] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [x9, #800] -; CHECK-i64-NEXT: addvl x9, sp, #8 -; CHECK-i64-NEXT: str q0, [sp, #432] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [x9, #816] -; CHECK-i64-NEXT: addvl x9, sp, #8 -; CHECK-i64-NEXT: str q0, [sp, #416] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [x9, #832] -; CHECK-i64-NEXT: addvl x9, sp, #8 -; CHECK-i64-NEXT: str q0, [sp, #400] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [x9, #848] -; CHECK-i64-NEXT: addvl x9, sp, #8 -; CHECK-i64-NEXT: str q0, [sp, #384] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [x9, #736] -; CHECK-i64-NEXT: addvl x9, sp, #8 -; CHECK-i64-NEXT: str q0, [sp, #368] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [x9, #752] -; CHECK-i64-NEXT: addvl x9, sp, #8 -; CHECK-i64-NEXT: str q0, [sp, #352] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [x9, #768] -; CHECK-i64-NEXT: addvl x9, sp, #8 -; CHECK-i64-NEXT: str q0, [sp, #336] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [x9, #784] -; CHECK-i64-NEXT: addvl x9, sp, #8 -; CHECK-i64-NEXT: str q0, [sp, #320] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [x9, #672] -; CHECK-i64-NEXT: addvl x9, sp, #8 -; CHECK-i64-NEXT: str q0, [sp, #304] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [x9, #688] -; CHECK-i64-NEXT: addvl x9, sp, #8 -; CHECK-i64-NEXT: str q0, [sp, #288] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [x9, #704] -; CHECK-i64-NEXT: addvl x9, sp, #8 -; CHECK-i64-NEXT: str q0, [sp, #272] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [x9, #720] -; CHECK-i64-NEXT: addvl x9, sp, #8 -; CHECK-i64-NEXT: str q0, [sp, #256] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [x9, #608] -; CHECK-i64-NEXT: addvl x9, sp, #8 -; CHECK-i64-NEXT: str q0, [sp, #240] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [x9, #624] -; CHECK-i64-NEXT: addvl x9, sp, #8 -; CHECK-i64-NEXT: str q0, [sp, #224] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [x9, #640] -; CHECK-i64-NEXT: addvl x9, sp, #8 -; CHECK-i64-NEXT: str q0, [sp, #208] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [x9, #656] -; CHECK-i64-NEXT: addvl x9, sp, #8 -; CHECK-i64-NEXT: str q0, [sp, #192] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [x9, #544] -; CHECK-i64-NEXT: addvl x9, sp, #8 -; CHECK-i64-NEXT: str q0, [sp, #176] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [x9, #560] -; CHECK-i64-NEXT: addvl x9, sp, #8 -; CHECK-i64-NEXT: str q0, [sp, #160] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [x9, #576] -; CHECK-i64-NEXT: addvl x9, sp, #8 -; CHECK-i64-NEXT: str q0, [sp, #112] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [x9, #592] -; CHECK-i64-NEXT: str q0, [sp, #80] // 16-byte Folded Spill -; CHECK-i64-NEXT: mov v0.16b, v3.16b -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-i64-NEXT: add x8, sp, #512 -; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: str z0, [x8, #7, mul vl] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload -; CHECK-i64-NEXT: add x8, sp, #512 -; CHECK-i64-NEXT: ptrue p0.d, vl2 -; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: ldr z1, [x8, #7, mul vl] // 16-byte Folded Reload -; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d -; CHECK-i64-NEXT: str z0, [x8, #7, mul vl] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #64] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload -; CHECK-i64-NEXT: add x8, sp, #512 -; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: str z0, [x8, #6, mul vl] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #128] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #128] // 16-byte Folded Reload -; CHECK-i64-NEXT: add x8, sp, #512 -; CHECK-i64-NEXT: ptrue p0.d, vl2 -; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: ldr z1, [x8, #6, mul vl] // 16-byte Folded Reload -; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d -; CHECK-i64-NEXT: str z0, [x8, #6, mul vl] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #144] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #144] // 16-byte Folded Reload -; CHECK-i64-NEXT: add x8, sp, #512 -; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: str z0, [x8, #5, mul vl] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #160] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #160] // 16-byte Folded Reload -; CHECK-i64-NEXT: add x8, sp, #512 -; CHECK-i64-NEXT: ptrue p0.d, vl2 -; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: ldr z1, [x8, #5, mul vl] // 16-byte Folded Reload -; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d -; CHECK-i64-NEXT: str z0, [x8, #5, mul vl] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #192] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #192] // 16-byte Folded Reload -; CHECK-i64-NEXT: add x8, sp, #512 -; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: str z0, [x8, #4, mul vl] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #224] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #224] // 16-byte Folded Reload -; CHECK-i64-NEXT: add x8, sp, #512 -; CHECK-i64-NEXT: ptrue p0.d, vl2 -; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: ldr z1, [x8, #4, mul vl] // 16-byte Folded Reload -; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d -; CHECK-i64-NEXT: str z0, [x8, #4, mul vl] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #256] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #256] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #272] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #256] // 16-byte Folded Reload -; CHECK-i64-NEXT: add x8, sp, #512 -; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: str z0, [x8, #3, mul vl] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #288] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #288] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #304] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #288] // 16-byte Folded Reload -; CHECK-i64-NEXT: add x8, sp, #512 -; CHECK-i64-NEXT: ptrue p0.d, vl2 -; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: ldr z1, [x8, #3, mul vl] // 16-byte Folded Reload -; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d -; CHECK-i64-NEXT: str z0, [x8, #3, mul vl] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #320] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #320] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #336] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #320] // 16-byte Folded Reload -; CHECK-i64-NEXT: add x8, sp, #512 -; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: str z0, [x8, #2, mul vl] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #352] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #352] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #368] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #352] // 16-byte Folded Reload -; CHECK-i64-NEXT: add x8, sp, #512 -; CHECK-i64-NEXT: ptrue p0.d, vl2 -; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: ldr z1, [x8, #2, mul vl] // 16-byte Folded Reload -; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d -; CHECK-i64-NEXT: str z0, [x8, #2, mul vl] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #384] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #384] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #400] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #384] // 16-byte Folded Reload -; CHECK-i64-NEXT: add x8, sp, #512 -; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #416] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #416] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #432] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #416] // 16-byte Folded Reload -; CHECK-i64-NEXT: add x8, sp, #512 -; CHECK-i64-NEXT: ptrue p0.d, vl2 -; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: ldr z1, [x8, #1, mul vl] // 16-byte Folded Reload -; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d -; CHECK-i64-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #448] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #448] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #464] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #448] // 16-byte Folded Reload -; CHECK-i64-NEXT: add x8, sp, #512 -; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: str z0, [x8] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #480] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #480] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #496] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #480] // 16-byte Folded Reload -; CHECK-i64-NEXT: add x8, sp, #512 -; CHECK-i64-NEXT: ptrue p0.d, vl2 -; CHECK-i64-NEXT: add x9, sp, #512 -; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: ldr z1, [x8] // 16-byte Folded Reload -; CHECK-i64-NEXT: mov x8, #28 // =0x1c -; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d -; CHECK-i64-NEXT: ptrue p0.d, vl4 -; CHECK-i64-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3] -; CHECK-i64-NEXT: mov x8, #24 // =0x18 -; CHECK-i64-NEXT: ldr z0, [x9, #1, mul vl] // 16-byte Folded Reload -; CHECK-i64-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3] -; CHECK-i64-NEXT: mov x8, #20 // =0x14 -; CHECK-i64-NEXT: ldr z0, [x9, #2, mul vl] // 16-byte Folded Reload -; CHECK-i64-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3] -; CHECK-i64-NEXT: mov x8, #16 // =0x10 -; CHECK-i64-NEXT: ldr z0, [x9, #3, mul vl] // 16-byte Folded Reload -; CHECK-i64-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3] -; CHECK-i64-NEXT: mov x8, #12 // =0xc -; CHECK-i64-NEXT: ldr z0, [x9, #4, mul vl] // 16-byte Folded Reload -; CHECK-i64-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3] -; CHECK-i64-NEXT: mov x8, #8 // =0x8 -; CHECK-i64-NEXT: ldr z0, [x9, #5, mul vl] // 16-byte Folded Reload -; CHECK-i64-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3] -; CHECK-i64-NEXT: mov x8, #4 // =0x4 -; CHECK-i64-NEXT: ldr z0, [x9, #6, mul vl] // 16-byte Folded Reload -; CHECK-i64-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3] -; CHECK-i64-NEXT: add x8, sp, #512 -; CHECK-i64-NEXT: ldr z0, [x8, #7, mul vl] // 16-byte Folded Reload -; CHECK-i64-NEXT: st1d { z0.d }, p0, [x19] -; CHECK-i64-NEXT: addvl sp, sp, #8 -; CHECK-i64-NEXT: add sp, sp, #512 -; CHECK-i64-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload -; CHECK-i64-NEXT: ldr x29, [sp], #32 // 8-byte Folded Reload -; CHECK-i64-NEXT: ret - %a = call <32 x iXLen> @llvm.lrint.v32iXLen.v16fp128(<32 x fp128> %x) - ret <32 x iXLen> %a -} -declare <32 x iXLen> @llvm.lrint.v32iXLen.v32fp128(<32 x fp128>) diff --git a/llvm/test/CodeGen/AArch64/vector-llrint.ll b/llvm/test/CodeGen/AArch64/vector-llrint.ll index 8f139cc225a67..3547c6f092b41 100644 --- a/llvm/test/CodeGen/AArch64/vector-llrint.ll +++ b/llvm/test/CodeGen/AArch64/vector-llrint.ll @@ -917,261 +917,3 @@ define <16 x i64> @llrint_v16f128(<16 x fp128> %x) nounwind { ret <16 x i64> %a } declare <16 x i64> @llvm.llrint.v16i64.v16f128(<16 x fp128>) - -define <32 x i64> @llrint_v32f128(<32 x fp128> %x) nounwind { -; CHECK-LABEL: llrint_v32f128: -; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-32]! // 8-byte Folded Spill -; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: sub sp, sp, #512 -; CHECK-NEXT: str q0, [sp, #464] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #896] -; CHECK-NEXT: mov x19, x8 -; CHECK-NEXT: str q7, [sp, #272] // 16-byte Folded Spill -; CHECK-NEXT: str q0, [sp, #496] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #912] -; CHECK-NEXT: str q6, [sp, #320] // 16-byte Folded Spill -; CHECK-NEXT: str q0, [sp, #480] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #864] -; CHECK-NEXT: stp q3, q5, [sp, #16] // 32-byte Folded Spill -; CHECK-NEXT: str q0, [sp, #448] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #880] -; CHECK-NEXT: stp q2, q0, [sp, #416] // 32-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #832] -; CHECK-NEXT: str q0, [sp, #400] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #848] -; CHECK-NEXT: stp q4, q0, [sp, #368] // 32-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #800] -; CHECK-NEXT: str q0, [sp, #352] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #816] -; CHECK-NEXT: str q0, [sp, #336] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #768] -; CHECK-NEXT: str q0, [sp, #304] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #784] -; CHECK-NEXT: str q0, [sp, #288] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #736] -; CHECK-NEXT: str q0, [sp, #256] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #752] -; CHECK-NEXT: str q0, [sp, #240] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #704] -; CHECK-NEXT: str q0, [sp, #208] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #720] -; CHECK-NEXT: str q0, [sp, #192] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #672] -; CHECK-NEXT: str q0, [sp, #160] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #688] -; CHECK-NEXT: str q0, [sp, #128] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #640] -; CHECK-NEXT: str q0, [sp, #144] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #656] -; CHECK-NEXT: str q0, [sp, #112] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #608] -; CHECK-NEXT: str q0, [sp, #176] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #624] -; CHECK-NEXT: str q0, [sp, #96] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #576] -; CHECK-NEXT: str q0, [sp, #224] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #592] -; CHECK-NEXT: str q0, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #544] -; CHECK-NEXT: str q0, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #560] -; CHECK-NEXT: str q0, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: mov v0.16b, v1.16b -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #464] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: str q0, [sp, #464] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #416] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: str q0, [sp, #416] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #368] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: str q0, [sp, #368] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #272] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #272] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #320] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #272] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: str q0, [sp, #320] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #272] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #272] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: str q0, [sp, #272] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: str q0, [sp, #224] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #96] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #96] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: str q0, [sp, #176] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #112] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #112] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: str q0, [sp, #144] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #128] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #128] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: str q0, [sp, #160] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #192] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #192] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: str q0, [sp, #208] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #240] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #256] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #240] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: str q0, [sp, #256] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #288] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #288] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #304] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #288] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: str q0, [sp, #304] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #336] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #336] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #352] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #336] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: str q0, [sp, #352] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #384] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #384] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #400] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #384] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: str q0, [sp, #400] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #432] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #432] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #448] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #432] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: str q0, [sp, #448] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #480] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: str q0, [sp, #480] // 16-byte Folded Spill -; CHECK-NEXT: ldr q0, [sp, #496] // 16-byte Folded Reload -; CHECK-NEXT: bl llrintl -; CHECK-NEXT: ldr q0, [sp, #400] // 16-byte Folded Reload -; CHECK-NEXT: str q0, [x19, #208] -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: ldr q1, [sp, #480] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: ldr q1, [sp, #352] // 16-byte Folded Reload -; CHECK-NEXT: str q1, [x19, #192] -; CHECK-NEXT: ldr q1, [sp, #304] // 16-byte Folded Reload -; CHECK-NEXT: str q1, [x19, #176] -; CHECK-NEXT: ldr q1, [sp, #256] // 16-byte Folded Reload -; CHECK-NEXT: str q0, [x19, #240] -; CHECK-NEXT: str q1, [x19, #160] -; CHECK-NEXT: ldr q0, [sp, #448] // 16-byte Folded Reload -; CHECK-NEXT: str q0, [x19, #224] -; CHECK-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload -; CHECK-NEXT: str q0, [x19, #144] -; CHECK-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload -; CHECK-NEXT: str q0, [x19, #128] -; CHECK-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload -; CHECK-NEXT: str q0, [x19, #112] -; CHECK-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload -; CHECK-NEXT: str q0, [x19, #96] -; CHECK-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload -; CHECK-NEXT: str q0, [x19, #80] -; CHECK-NEXT: ldr q0, [sp, #272] // 16-byte Folded Reload -; CHECK-NEXT: str q0, [x19, #64] -; CHECK-NEXT: ldr q0, [sp, #320] // 16-byte Folded Reload -; CHECK-NEXT: str q0, [x19, #48] -; CHECK-NEXT: ldr q0, [sp, #368] // 16-byte Folded Reload -; CHECK-NEXT: str q0, [x19, #32] -; CHECK-NEXT: ldr q0, [sp, #416] // 16-byte Folded Reload -; CHECK-NEXT: str q0, [x19, #16] -; CHECK-NEXT: ldr q0, [sp, #464] // 16-byte Folded Reload -; CHECK-NEXT: str q0, [x19] -; CHECK-NEXT: add sp, sp, #512 -; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr x29, [sp], #32 // 8-byte Folded Reload -; CHECK-NEXT: ret - %a = call <32 x i64> @llvm.llrint.v32i64.v16f128(<32 x fp128> %x) - ret <32 x i64> %a -} -declare <32 x i64> @llvm.llrint.v32i64.v32f128(<32 x fp128>) diff --git a/llvm/test/CodeGen/AArch64/vector-lrint.ll b/llvm/test/CodeGen/AArch64/vector-lrint.ll index b899db839a65a..927c6142138b3 100644 --- a/llvm/test/CodeGen/AArch64/vector-lrint.ll +++ b/llvm/test/CodeGen/AArch64/vector-lrint.ll @@ -1781,476 +1781,5 @@ define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) nounwind { } declare <16 x iXLen> @llvm.lrint.v16iXLen.v16fp128(<16 x fp128>) -define <32 x iXLen> @lrint_v32fp128(<32 x fp128> %x) nounwind { -; CHECK-i32-LABEL: lrint_v32fp128: -; CHECK-i32: // %bb.0: -; CHECK-i32-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-i32-NEXT: sub sp, sp, #512 -; CHECK-i32-NEXT: str q1, [sp] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #896] -; CHECK-i32-NEXT: stp q2, q3, [sp, #16] // 32-byte Folded Spill -; CHECK-i32-NEXT: str q1, [sp, #368] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #880] -; CHECK-i32-NEXT: stp q7, q4, [sp, #464] // 32-byte Folded Spill -; CHECK-i32-NEXT: str q1, [sp, #352] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #864] -; CHECK-i32-NEXT: str q6, [sp, #112] // 16-byte Folded Spill -; CHECK-i32-NEXT: str q1, [sp, #336] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #848] -; CHECK-i32-NEXT: str q5, [sp, #80] // 16-byte Folded Spill -; CHECK-i32-NEXT: str q1, [sp, #384] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #832] -; CHECK-i32-NEXT: str q1, [sp, #320] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #816] -; CHECK-i32-NEXT: str q1, [sp, #304] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #800] -; CHECK-i32-NEXT: str q1, [sp, #288] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #784] -; CHECK-i32-NEXT: str q1, [sp, #400] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #768] -; CHECK-i32-NEXT: str q1, [sp, #272] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #752] -; CHECK-i32-NEXT: str q1, [sp, #256] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #736] -; CHECK-i32-NEXT: str q1, [sp, #240] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #720] -; CHECK-i32-NEXT: str q1, [sp, #416] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #704] -; CHECK-i32-NEXT: str q1, [sp, #224] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #688] -; CHECK-i32-NEXT: str q1, [sp, #208] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #672] -; CHECK-i32-NEXT: str q1, [sp, #192] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #656] -; CHECK-i32-NEXT: str q1, [sp, #432] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #640] -; CHECK-i32-NEXT: str q1, [sp, #176] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #624] -; CHECK-i32-NEXT: str q1, [sp, #160] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #608] -; CHECK-i32-NEXT: str q1, [sp, #144] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #592] -; CHECK-i32-NEXT: str q1, [sp, #448] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #576] -; CHECK-i32-NEXT: str q1, [sp, #128] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #560] -; CHECK-i32-NEXT: str q1, [sp, #96] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #544] -; CHECK-i32-NEXT: str q1, [sp, #64] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q1, [sp, #528] -; CHECK-i32-NEXT: str q1, [sp, #48] // 16-byte Folded Spill -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: fmov s0, w0 -; CHECK-i32-NEXT: str q0, [sp, #496] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #496] // 16-byte Folded Reload -; CHECK-i32-NEXT: mov v0.s[1], w0 -; CHECK-i32-NEXT: str q0, [sp, #496] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #496] // 16-byte Folded Reload -; CHECK-i32-NEXT: mov v0.s[2], w0 -; CHECK-i32-NEXT: str q0, [sp, #496] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #496] // 16-byte Folded Reload -; CHECK-i32-NEXT: mov v0.s[3], w0 -; CHECK-i32-NEXT: str q0, [sp, #496] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #480] // 16-byte Folded Reload -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: fmov s0, w0 -; CHECK-i32-NEXT: str q0, [sp, #480] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #480] // 16-byte Folded Reload -; CHECK-i32-NEXT: mov v0.s[1], w0 -; CHECK-i32-NEXT: str q0, [sp, #480] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #480] // 16-byte Folded Reload -; CHECK-i32-NEXT: mov v0.s[2], w0 -; CHECK-i32-NEXT: str q0, [sp, #480] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #464] // 16-byte Folded Reload -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #480] // 16-byte Folded Reload -; CHECK-i32-NEXT: mov v0.s[3], w0 -; CHECK-i32-NEXT: str q0, [sp, #480] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: fmov s0, w0 -; CHECK-i32-NEXT: str q0, [sp, #464] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #464] // 16-byte Folded Reload -; CHECK-i32-NEXT: mov v0.s[1], w0 -; CHECK-i32-NEXT: str q0, [sp, #464] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #464] // 16-byte Folded Reload -; CHECK-i32-NEXT: mov v0.s[2], w0 -; CHECK-i32-NEXT: str q0, [sp, #464] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #464] // 16-byte Folded Reload -; CHECK-i32-NEXT: mov v0.s[3], w0 -; CHECK-i32-NEXT: str q0, [sp, #464] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #448] // 16-byte Folded Reload -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: fmov s0, w0 -; CHECK-i32-NEXT: str q0, [sp, #448] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #448] // 16-byte Folded Reload -; CHECK-i32-NEXT: mov v0.s[1], w0 -; CHECK-i32-NEXT: str q0, [sp, #448] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #448] // 16-byte Folded Reload -; CHECK-i32-NEXT: mov v0.s[2], w0 -; CHECK-i32-NEXT: str q0, [sp, #448] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #448] // 16-byte Folded Reload -; CHECK-i32-NEXT: mov v0.s[3], w0 -; CHECK-i32-NEXT: str q0, [sp, #448] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #432] // 16-byte Folded Reload -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: fmov s0, w0 -; CHECK-i32-NEXT: str q0, [sp, #432] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #432] // 16-byte Folded Reload -; CHECK-i32-NEXT: mov v0.s[1], w0 -; CHECK-i32-NEXT: str q0, [sp, #432] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #432] // 16-byte Folded Reload -; CHECK-i32-NEXT: mov v0.s[2], w0 -; CHECK-i32-NEXT: str q0, [sp, #432] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #432] // 16-byte Folded Reload -; CHECK-i32-NEXT: mov v0.s[3], w0 -; CHECK-i32-NEXT: str q0, [sp, #432] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #416] // 16-byte Folded Reload -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: fmov s0, w0 -; CHECK-i32-NEXT: str q0, [sp, #416] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #416] // 16-byte Folded Reload -; CHECK-i32-NEXT: mov v0.s[1], w0 -; CHECK-i32-NEXT: str q0, [sp, #416] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #256] // 16-byte Folded Reload -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #416] // 16-byte Folded Reload -; CHECK-i32-NEXT: mov v0.s[2], w0 -; CHECK-i32-NEXT: str q0, [sp, #416] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #272] // 16-byte Folded Reload -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #416] // 16-byte Folded Reload -; CHECK-i32-NEXT: mov v0.s[3], w0 -; CHECK-i32-NEXT: str q0, [sp, #416] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #400] // 16-byte Folded Reload -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: fmov s0, w0 -; CHECK-i32-NEXT: str q0, [sp, #400] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #288] // 16-byte Folded Reload -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #400] // 16-byte Folded Reload -; CHECK-i32-NEXT: mov v0.s[1], w0 -; CHECK-i32-NEXT: str q0, [sp, #400] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #304] // 16-byte Folded Reload -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #400] // 16-byte Folded Reload -; CHECK-i32-NEXT: mov v0.s[2], w0 -; CHECK-i32-NEXT: str q0, [sp, #400] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #320] // 16-byte Folded Reload -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #400] // 16-byte Folded Reload -; CHECK-i32-NEXT: mov v0.s[3], w0 -; CHECK-i32-NEXT: str q0, [sp, #400] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #384] // 16-byte Folded Reload -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: fmov s0, w0 -; CHECK-i32-NEXT: str q0, [sp, #384] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #336] // 16-byte Folded Reload -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #384] // 16-byte Folded Reload -; CHECK-i32-NEXT: mov v0.s[1], w0 -; CHECK-i32-NEXT: str q0, [sp, #384] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #352] // 16-byte Folded Reload -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldr q0, [sp, #384] // 16-byte Folded Reload -; CHECK-i32-NEXT: mov v0.s[2], w0 -; CHECK-i32-NEXT: str q0, [sp, #384] // 16-byte Folded Spill -; CHECK-i32-NEXT: ldr q0, [sp, #368] // 16-byte Folded Reload -; CHECK-i32-NEXT: bl lrintl -; CHECK-i32-NEXT: ldp q7, q6, [sp, #384] // 32-byte Folded Reload -; CHECK-i32-NEXT: ldp q1, q0, [sp, #480] // 32-byte Folded Reload -; CHECK-i32-NEXT: ldp q3, q2, [sp, #448] // 32-byte Folded Reload -; CHECK-i32-NEXT: ldp q5, q4, [sp, #416] // 32-byte Folded Reload -; CHECK-i32-NEXT: mov v7.s[3], w0 -; CHECK-i32-NEXT: add sp, sp, #512 -; CHECK-i32-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload -; CHECK-i32-NEXT: ret -; -; CHECK-i64-LABEL: lrint_v32fp128: -; CHECK-i64: // %bb.0: -; CHECK-i64-NEXT: str x29, [sp, #-32]! // 8-byte Folded Spill -; CHECK-i64-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill -; CHECK-i64-NEXT: sub sp, sp, #512 -; CHECK-i64-NEXT: str q0, [sp, #464] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #896] -; CHECK-i64-NEXT: mov x19, x8 -; CHECK-i64-NEXT: str q7, [sp, #272] // 16-byte Folded Spill -; CHECK-i64-NEXT: str q0, [sp, #496] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #912] -; CHECK-i64-NEXT: str q6, [sp, #320] // 16-byte Folded Spill -; CHECK-i64-NEXT: str q0, [sp, #480] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #864] -; CHECK-i64-NEXT: stp q3, q5, [sp, #16] // 32-byte Folded Spill -; CHECK-i64-NEXT: str q0, [sp, #448] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #880] -; CHECK-i64-NEXT: stp q2, q0, [sp, #416] // 32-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #832] -; CHECK-i64-NEXT: str q0, [sp, #400] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #848] -; CHECK-i64-NEXT: stp q4, q0, [sp, #368] // 32-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #800] -; CHECK-i64-NEXT: str q0, [sp, #352] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #816] -; CHECK-i64-NEXT: str q0, [sp, #336] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #768] -; CHECK-i64-NEXT: str q0, [sp, #304] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #784] -; CHECK-i64-NEXT: str q0, [sp, #288] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #736] -; CHECK-i64-NEXT: str q0, [sp, #256] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #752] -; CHECK-i64-NEXT: str q0, [sp, #240] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #704] -; CHECK-i64-NEXT: str q0, [sp, #208] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #720] -; CHECK-i64-NEXT: str q0, [sp, #192] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #672] -; CHECK-i64-NEXT: str q0, [sp, #160] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #688] -; CHECK-i64-NEXT: str q0, [sp, #128] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #640] -; CHECK-i64-NEXT: str q0, [sp, #144] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #656] -; CHECK-i64-NEXT: str q0, [sp, #112] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #608] -; CHECK-i64-NEXT: str q0, [sp, #176] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #624] -; CHECK-i64-NEXT: str q0, [sp, #96] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #576] -; CHECK-i64-NEXT: str q0, [sp, #224] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #592] -; CHECK-i64-NEXT: str q0, [sp, #80] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #544] -; CHECK-i64-NEXT: str q0, [sp, #64] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #560] -; CHECK-i64-NEXT: str q0, [sp, #48] // 16-byte Folded Spill -; CHECK-i64-NEXT: mov v0.16b, v1.16b -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #464] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: str q0, [sp, #464] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #416] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload -; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: str q0, [sp, #416] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #368] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload -; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: str q0, [sp, #368] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #272] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #272] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #320] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #272] // 16-byte Folded Reload -; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: str q0, [sp, #320] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #272] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #272] // 16-byte Folded Reload -; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: str q0, [sp, #272] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #80] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload -; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: str q0, [sp, #224] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #96] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #96] // 16-byte Folded Reload -; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: str q0, [sp, #176] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #112] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #112] // 16-byte Folded Reload -; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: str q0, [sp, #144] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #128] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #128] // 16-byte Folded Reload -; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: str q0, [sp, #160] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #192] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #192] // 16-byte Folded Reload -; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: str q0, [sp, #208] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #240] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #256] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #240] // 16-byte Folded Reload -; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: str q0, [sp, #256] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #288] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #288] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #304] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #288] // 16-byte Folded Reload -; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: str q0, [sp, #304] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #336] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #336] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #352] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #336] // 16-byte Folded Reload -; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: str q0, [sp, #352] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #384] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #384] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #400] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #384] // 16-byte Folded Reload -; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: str q0, [sp, #400] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #432] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #432] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #448] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #432] // 16-byte Folded Reload -; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: str q0, [sp, #448] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #480] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: str q0, [sp, #480] // 16-byte Folded Spill -; CHECK-i64-NEXT: ldr q0, [sp, #496] // 16-byte Folded Reload -; CHECK-i64-NEXT: bl lrintl -; CHECK-i64-NEXT: ldr q0, [sp, #400] // 16-byte Folded Reload -; CHECK-i64-NEXT: str q0, [x19, #208] -; CHECK-i64-NEXT: fmov d0, x0 -; CHECK-i64-NEXT: ldr q1, [sp, #480] // 16-byte Folded Reload -; CHECK-i64-NEXT: mov v0.d[1], v1.d[0] -; CHECK-i64-NEXT: ldr q1, [sp, #352] // 16-byte Folded Reload -; CHECK-i64-NEXT: str q1, [x19, #192] -; CHECK-i64-NEXT: ldr q1, [sp, #304] // 16-byte Folded Reload -; CHECK-i64-NEXT: str q1, [x19, #176] -; CHECK-i64-NEXT: ldr q1, [sp, #256] // 16-byte Folded Reload -; CHECK-i64-NEXT: str q0, [x19, #240] -; CHECK-i64-NEXT: str q1, [x19, #160] -; CHECK-i64-NEXT: ldr q0, [sp, #448] // 16-byte Folded Reload -; CHECK-i64-NEXT: str q0, [x19, #224] -; CHECK-i64-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload -; CHECK-i64-NEXT: str q0, [x19, #144] -; CHECK-i64-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload -; CHECK-i64-NEXT: str q0, [x19, #128] -; CHECK-i64-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload -; CHECK-i64-NEXT: str q0, [x19, #112] -; CHECK-i64-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload -; CHECK-i64-NEXT: str q0, [x19, #96] -; CHECK-i64-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload -; CHECK-i64-NEXT: str q0, [x19, #80] -; CHECK-i64-NEXT: ldr q0, [sp, #272] // 16-byte Folded Reload -; CHECK-i64-NEXT: str q0, [x19, #64] -; CHECK-i64-NEXT: ldr q0, [sp, #320] // 16-byte Folded Reload -; CHECK-i64-NEXT: str q0, [x19, #48] -; CHECK-i64-NEXT: ldr q0, [sp, #368] // 16-byte Folded Reload -; CHECK-i64-NEXT: str q0, [x19, #32] -; CHECK-i64-NEXT: ldr q0, [sp, #416] // 16-byte Folded Reload -; CHECK-i64-NEXT: str q0, [x19, #16] -; CHECK-i64-NEXT: ldr q0, [sp, #464] // 16-byte Folded Reload -; CHECK-i64-NEXT: str q0, [x19] -; CHECK-i64-NEXT: add sp, sp, #512 -; CHECK-i64-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload -; CHECK-i64-NEXT: ldr x29, [sp], #32 // 8-byte Folded Reload -; CHECK-i64-NEXT: ret - %a = call <32 x iXLen> @llvm.lrint.v32iXLen.v16fp128(<32 x fp128> %x) - ret <32 x iXLen> %a -} -declare <32 x iXLen> @llvm.lrint.v32iXLen.v32fp128(<32 x fp128>) ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; CHECK-i32-GI: {{.*}} diff --git a/llvm/test/CodeGen/PowerPC/vector-lrint.ll b/llvm/test/CodeGen/PowerPC/vector-lrint.ll index 6c824be017e81..f4375362f861c 100644 --- a/llvm/test/CodeGen/PowerPC/vector-lrint.ll +++ b/llvm/test/CodeGen/PowerPC/vector-lrint.ll @@ -5529,1134 +5529,3 @@ define <16 x i64> @lrint_v16i64_v16f128(<16 x fp128> %x) nounwind { ret <16 x i64> %a } declare <16 x i64> @llvm.lrint.v16i64.v16f128(<16 x fp128>) - -define <32 x i64> @lrint_v32i64_v32f128(<32 x fp128> %x) nounwind { -; BE-LABEL: lrint_v32i64_v32f128: -; BE: # %bb.0: -; BE-NEXT: mflr r0 -; BE-NEXT: stdu r1, -896(r1) -; BE-NEXT: std r0, 912(r1) -; BE-NEXT: std r30, 880(r1) # 8-byte Folded Spill -; BE-NEXT: mr r30, r3 -; BE-NEXT: addi r3, r1, 1440 -; BE-NEXT: li r4, 688 -; BE-NEXT: lxvw4x vs0, 0, r3 -; BE-NEXT: li r3, 256 -; BE-NEXT: stxvd2x v20, r1, r4 # 16-byte Folded Spill -; BE-NEXT: li r4, 704 -; BE-NEXT: stxvd2x v21, r1, r4 # 16-byte Folded Spill -; BE-NEXT: li r4, 720 -; BE-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill -; BE-NEXT: addi r3, r1, 1456 -; BE-NEXT: stxvd2x v22, r1, r4 # 16-byte Folded Spill -; BE-NEXT: li r4, 736 -; BE-NEXT: lxvw4x vs0, 0, r3 -; BE-NEXT: li r3, 240 -; BE-NEXT: stxvd2x v23, r1, r4 # 16-byte Folded Spill -; BE-NEXT: li r4, 752 -; BE-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill -; BE-NEXT: addi r3, r1, 1408 -; BE-NEXT: stxvd2x v24, r1, r4 # 16-byte Folded Spill -; BE-NEXT: li r4, 768 -; BE-NEXT: lxvw4x vs0, 0, r3 -; BE-NEXT: li r3, 224 -; BE-NEXT: stxvd2x v25, r1, r4 # 16-byte Folded Spill -; BE-NEXT: li r4, 784 -; BE-NEXT: stxvd2x v26, r1, r4 # 16-byte Folded Spill -; BE-NEXT: li r4, 800 -; BE-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill -; BE-NEXT: addi r3, r1, 1424 -; BE-NEXT: stxvd2x v27, r1, r4 # 16-byte Folded Spill -; BE-NEXT: li r4, 816 -; BE-NEXT: lxvw4x vs0, 0, r3 -; BE-NEXT: li r3, 208 -; BE-NEXT: stxvd2x v28, r1, r4 # 16-byte Folded Spill -; BE-NEXT: li r4, 832 -; BE-NEXT: vmr v28, v2 -; BE-NEXT: vmr v2, v3 -; BE-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill -; BE-NEXT: addi r3, r1, 1376 -; BE-NEXT: stxvd2x v29, r1, r4 # 16-byte Folded Spill -; BE-NEXT: li r4, 848 -; BE-NEXT: lxvw4x vs0, 0, r3 -; BE-NEXT: li r3, 192 -; BE-NEXT: stxvd2x v30, r1, r4 # 16-byte Folded Spill -; BE-NEXT: li r4, 864 -; BE-NEXT: stxvd2x v31, r1, r4 # 16-byte Folded Spill -; BE-NEXT: li r4, 400 -; BE-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill -; BE-NEXT: addi r3, r1, 1392 -; BE-NEXT: stxvd2x v13, r1, r4 # 16-byte Folded Spill -; BE-NEXT: li r4, 416 -; BE-NEXT: lxvw4x vs0, 0, r3 -; BE-NEXT: li r3, 176 -; BE-NEXT: stxvd2x v12, r1, r4 # 16-byte Folded Spill -; BE-NEXT: li r4, 368 -; BE-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill -; BE-NEXT: addi r3, r1, 1344 -; BE-NEXT: stxvd2x v11, r1, r4 # 16-byte Folded Spill -; BE-NEXT: li r4, 384 -; BE-NEXT: lxvw4x vs0, 0, r3 -; BE-NEXT: li r3, 160 -; BE-NEXT: stxvd2x v10, r1, r4 # 16-byte Folded Spill -; BE-NEXT: li r4, 336 -; BE-NEXT: stxvd2x v9, r1, r4 # 16-byte Folded Spill -; BE-NEXT: li r4, 352 -; BE-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill -; BE-NEXT: addi r3, r1, 1360 -; BE-NEXT: stxvd2x v8, r1, r4 # 16-byte Folded Spill -; BE-NEXT: li r4, 304 -; BE-NEXT: lxvw4x vs0, 0, r3 -; BE-NEXT: li r3, 144 -; BE-NEXT: stxvd2x v7, r1, r4 # 16-byte Folded Spill -; BE-NEXT: li r4, 320 -; BE-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill -; BE-NEXT: addi r3, r1, 1312 -; BE-NEXT: stxvd2x v6, r1, r4 # 16-byte Folded Spill -; BE-NEXT: li r4, 272 -; BE-NEXT: lxvw4x vs0, 0, r3 -; BE-NEXT: li r3, 128 -; BE-NEXT: stxvd2x v5, r1, r4 # 16-byte Folded Spill -; BE-NEXT: li r4, 288 -; BE-NEXT: stxvd2x v4, r1, r4 # 16-byte Folded Spill -; BE-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill -; BE-NEXT: addi r3, r1, 1328 -; BE-NEXT: lxvw4x v23, 0, r3 -; BE-NEXT: addi r3, r1, 1280 -; BE-NEXT: lxvw4x v22, 0, r3 -; BE-NEXT: addi r3, r1, 1296 -; BE-NEXT: lxvw4x v21, 0, r3 -; BE-NEXT: addi r3, r1, 1248 -; BE-NEXT: lxvw4x v20, 0, r3 -; BE-NEXT: addi r3, r1, 1264 -; BE-NEXT: lxvw4x v31, 0, r3 -; BE-NEXT: addi r3, r1, 1216 -; BE-NEXT: lxvw4x v30, 0, r3 -; BE-NEXT: addi r3, r1, 1232 -; BE-NEXT: lxvw4x v29, 0, r3 -; BE-NEXT: addi r3, r1, 1184 -; BE-NEXT: lxvw4x v27, 0, r3 -; BE-NEXT: addi r3, r1, 1200 -; BE-NEXT: lxvw4x v26, 0, r3 -; BE-NEXT: addi r3, r1, 1152 -; BE-NEXT: lxvw4x v25, 0, r3 -; BE-NEXT: addi r3, r1, 1168 -; BE-NEXT: lxvw4x v24, 0, r3 -; BE-NEXT: bl lrintf128 -; BE-NEXT: nop -; BE-NEXT: vmr v2, v28 -; BE-NEXT: std r3, 440(r1) -; BE-NEXT: bl lrintf128 -; BE-NEXT: nop -; BE-NEXT: vmr v2, v24 -; BE-NEXT: std r3, 432(r1) -; BE-NEXT: bl lrintf128 -; BE-NEXT: nop -; BE-NEXT: vmr v2, v25 -; BE-NEXT: std r3, 536(r1) -; BE-NEXT: bl lrintf128 -; BE-NEXT: nop -; BE-NEXT: vmr v2, v26 -; BE-NEXT: std r3, 528(r1) -; BE-NEXT: bl lrintf128 -; BE-NEXT: nop -; BE-NEXT: vmr v2, v27 -; BE-NEXT: std r3, 552(r1) -; BE-NEXT: bl lrintf128 -; BE-NEXT: nop -; BE-NEXT: vmr v2, v29 -; BE-NEXT: std r3, 544(r1) -; BE-NEXT: bl lrintf128 -; BE-NEXT: nop -; BE-NEXT: vmr v2, v30 -; BE-NEXT: std r3, 568(r1) -; BE-NEXT: bl lrintf128 -; BE-NEXT: nop -; BE-NEXT: vmr v2, v31 -; BE-NEXT: std r3, 560(r1) -; BE-NEXT: bl lrintf128 -; BE-NEXT: nop -; BE-NEXT: vmr v2, v20 -; BE-NEXT: std r3, 584(r1) -; BE-NEXT: bl lrintf128 -; BE-NEXT: nop -; BE-NEXT: vmr v2, v21 -; BE-NEXT: std r3, 576(r1) -; BE-NEXT: bl lrintf128 -; BE-NEXT: nop -; BE-NEXT: vmr v2, v22 -; BE-NEXT: std r3, 600(r1) -; BE-NEXT: bl lrintf128 -; BE-NEXT: nop -; BE-NEXT: vmr v2, v23 -; BE-NEXT: std r3, 592(r1) -; BE-NEXT: bl lrintf128 -; BE-NEXT: nop -; BE-NEXT: std r3, 616(r1) -; BE-NEXT: li r3, 128 -; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; BE-NEXT: bl lrintf128 -; BE-NEXT: nop -; BE-NEXT: std r3, 608(r1) -; BE-NEXT: li r3, 144 -; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; BE-NEXT: bl lrintf128 -; BE-NEXT: nop -; BE-NEXT: std r3, 632(r1) -; BE-NEXT: li r3, 160 -; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; BE-NEXT: bl lrintf128 -; BE-NEXT: nop -; BE-NEXT: std r3, 624(r1) -; BE-NEXT: li r3, 176 -; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; BE-NEXT: bl lrintf128 -; BE-NEXT: nop -; BE-NEXT: std r3, 648(r1) -; BE-NEXT: li r3, 192 -; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; BE-NEXT: bl lrintf128 -; BE-NEXT: nop -; BE-NEXT: std r3, 640(r1) -; BE-NEXT: li r3, 208 -; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; BE-NEXT: bl lrintf128 -; BE-NEXT: nop -; BE-NEXT: std r3, 664(r1) -; BE-NEXT: li r3, 224 -; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; BE-NEXT: bl lrintf128 -; BE-NEXT: nop -; BE-NEXT: std r3, 656(r1) -; BE-NEXT: li r3, 240 -; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; BE-NEXT: bl lrintf128 -; BE-NEXT: nop -; BE-NEXT: std r3, 680(r1) -; BE-NEXT: li r3, 256 -; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; BE-NEXT: bl lrintf128 -; BE-NEXT: nop -; BE-NEXT: std r3, 672(r1) -; BE-NEXT: li r3, 272 -; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; BE-NEXT: bl lrintf128 -; BE-NEXT: nop -; BE-NEXT: std r3, 456(r1) -; BE-NEXT: li r3, 288 -; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; BE-NEXT: bl lrintf128 -; BE-NEXT: nop -; BE-NEXT: std r3, 448(r1) -; BE-NEXT: li r3, 304 -; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; BE-NEXT: bl lrintf128 -; BE-NEXT: nop -; BE-NEXT: std r3, 472(r1) -; BE-NEXT: li r3, 320 -; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; BE-NEXT: bl lrintf128 -; BE-NEXT: nop -; BE-NEXT: std r3, 464(r1) -; BE-NEXT: li r3, 336 -; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; BE-NEXT: bl lrintf128 -; BE-NEXT: nop -; BE-NEXT: std r3, 488(r1) -; BE-NEXT: li r3, 352 -; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; BE-NEXT: bl lrintf128 -; BE-NEXT: nop -; BE-NEXT: std r3, 480(r1) -; BE-NEXT: li r3, 368 -; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; BE-NEXT: bl lrintf128 -; BE-NEXT: nop -; BE-NEXT: std r3, 504(r1) -; BE-NEXT: li r3, 384 -; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; BE-NEXT: bl lrintf128 -; BE-NEXT: nop -; BE-NEXT: std r3, 496(r1) -; BE-NEXT: li r3, 400 -; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; BE-NEXT: bl lrintf128 -; BE-NEXT: nop -; BE-NEXT: std r3, 520(r1) -; BE-NEXT: li r3, 416 -; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; BE-NEXT: bl lrintf128 -; BE-NEXT: nop -; BE-NEXT: std r3, 512(r1) -; BE-NEXT: addi r3, r1, 432 -; BE-NEXT: lxvd2x vs0, 0, r3 -; BE-NEXT: addi r3, r1, 528 -; BE-NEXT: lxvd2x vs1, 0, r3 -; BE-NEXT: addi r3, r1, 544 -; BE-NEXT: lxvd2x vs2, 0, r3 -; BE-NEXT: addi r3, r1, 560 -; BE-NEXT: lxvd2x vs3, 0, r3 -; BE-NEXT: addi r3, r1, 576 -; BE-NEXT: lxvd2x vs4, 0, r3 -; BE-NEXT: addi r3, r1, 592 -; BE-NEXT: lxvd2x vs5, 0, r3 -; BE-NEXT: addi r3, r1, 608 -; BE-NEXT: lxvd2x vs6, 0, r3 -; BE-NEXT: addi r3, r1, 624 -; BE-NEXT: lxvd2x vs7, 0, r3 -; BE-NEXT: addi r3, r1, 640 -; BE-NEXT: lxvd2x vs8, 0, r3 -; BE-NEXT: addi r3, r1, 656 -; BE-NEXT: lxvd2x vs9, 0, r3 -; BE-NEXT: addi r3, r1, 672 -; BE-NEXT: lxvd2x vs10, 0, r3 -; BE-NEXT: addi r3, r1, 448 -; BE-NEXT: lxvd2x vs11, 0, r3 -; BE-NEXT: addi r3, r1, 464 -; BE-NEXT: lxvd2x vs12, 0, r3 -; BE-NEXT: addi r3, r1, 480 -; BE-NEXT: lxvd2x vs13, 0, r3 -; BE-NEXT: addi r3, r1, 496 -; BE-NEXT: lxvd2x v2, 0, r3 -; BE-NEXT: addi r3, r1, 512 -; BE-NEXT: lxvd2x v3, 0, r3 -; BE-NEXT: li r3, 80 -; BE-NEXT: stxvd2x v3, r30, r3 -; BE-NEXT: li r3, 64 -; BE-NEXT: stxvd2x v2, r30, r3 -; BE-NEXT: li r3, 48 -; BE-NEXT: stxvd2x vs13, r30, r3 -; BE-NEXT: li r3, 32 -; BE-NEXT: stxvd2x vs12, r30, r3 -; BE-NEXT: li r3, 16 -; BE-NEXT: stxvd2x vs11, r30, r3 -; BE-NEXT: li r3, 240 -; BE-NEXT: stxvd2x vs10, r30, r3 -; BE-NEXT: li r3, 224 -; BE-NEXT: stxvd2x vs9, r30, r3 -; BE-NEXT: li r3, 208 -; BE-NEXT: stxvd2x vs8, r30, r3 -; BE-NEXT: li r3, 192 -; BE-NEXT: stxvd2x vs7, r30, r3 -; BE-NEXT: li r3, 176 -; BE-NEXT: stxvd2x vs6, r30, r3 -; BE-NEXT: li r3, 160 -; BE-NEXT: stxvd2x vs5, r30, r3 -; BE-NEXT: li r3, 144 -; BE-NEXT: stxvd2x vs4, r30, r3 -; BE-NEXT: li r3, 128 -; BE-NEXT: stxvd2x vs3, r30, r3 -; BE-NEXT: li r3, 112 -; BE-NEXT: stxvd2x vs2, r30, r3 -; BE-NEXT: li r3, 96 -; BE-NEXT: stxvd2x vs1, r30, r3 -; BE-NEXT: li r3, 864 -; BE-NEXT: stxvd2x vs0, 0, r30 -; BE-NEXT: ld r30, 880(r1) # 8-byte Folded Reload -; BE-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload -; BE-NEXT: li r3, 848 -; BE-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload -; BE-NEXT: li r3, 832 -; BE-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload -; BE-NEXT: li r3, 816 -; BE-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload -; BE-NEXT: li r3, 800 -; BE-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload -; BE-NEXT: li r3, 784 -; BE-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload -; BE-NEXT: li r3, 768 -; BE-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload -; BE-NEXT: li r3, 752 -; BE-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload -; BE-NEXT: li r3, 736 -; BE-NEXT: lxvd2x v23, r1, r3 # 16-byte Folded Reload -; BE-NEXT: li r3, 720 -; BE-NEXT: lxvd2x v22, r1, r3 # 16-byte Folded Reload -; BE-NEXT: li r3, 704 -; BE-NEXT: lxvd2x v21, r1, r3 # 16-byte Folded Reload -; BE-NEXT: li r3, 688 -; BE-NEXT: lxvd2x v20, r1, r3 # 16-byte Folded Reload -; BE-NEXT: addi r1, r1, 896 -; BE-NEXT: ld r0, 16(r1) -; BE-NEXT: mtlr r0 -; BE-NEXT: blr -; -; CHECK-LABEL: lrint_v32i64_v32f128: -; CHECK: # %bb.0: -; CHECK-NEXT: mflr r0 -; CHECK-NEXT: stdu r1, -640(r1) -; CHECK-NEXT: li r4, 432 -; CHECK-NEXT: std r0, 656(r1) -; CHECK-NEXT: std r30, 624(r1) # 8-byte Folded Spill -; CHECK-NEXT: mr r30, r3 -; CHECK-NEXT: addi r3, r1, 1184 -; CHECK-NEXT: stxvd2x v20, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 448 -; CHECK-NEXT: lxvd2x vs0, 0, r3 -; CHECK-NEXT: addi r3, r1, 1168 -; CHECK-NEXT: stxvd2x v21, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 464 -; CHECK-NEXT: lxvd2x vs1, 0, r3 -; CHECK-NEXT: addi r3, r1, 1152 -; CHECK-NEXT: stxvd2x v22, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 480 -; CHECK-NEXT: lxvd2x vs2, 0, r3 -; CHECK-NEXT: addi r3, r1, 1136 -; CHECK-NEXT: stxvd2x v23, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 496 -; CHECK-NEXT: lxvd2x vs3, 0, r3 -; CHECK-NEXT: addi r3, r1, 1120 -; CHECK-NEXT: stxvd2x v24, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 512 -; CHECK-NEXT: lxvd2x vs4, 0, r3 -; CHECK-NEXT: addi r3, r1, 1104 -; CHECK-NEXT: vmr v24, v3 -; CHECK-NEXT: stxvd2x v25, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 528 -; CHECK-NEXT: lxvd2x vs5, 0, r3 -; CHECK-NEXT: addi r3, r1, 1088 -; CHECK-NEXT: stxvd2x v26, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 544 -; CHECK-NEXT: xxswapd vs0, vs0 -; CHECK-NEXT: stxvd2x v27, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 560 -; CHECK-NEXT: stxvd2x v28, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 576 -; CHECK-NEXT: stxvd2x v29, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 592 -; CHECK-NEXT: stxvd2x v30, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 608 -; CHECK-NEXT: stxvd2x v31, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 416 -; CHECK-NEXT: stxvd2x v13, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 400 -; CHECK-NEXT: stxvd2x v12, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 384 -; CHECK-NEXT: stxvd2x v11, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 352 -; CHECK-NEXT: stxvd2x v10, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 336 -; CHECK-NEXT: stxvd2x v9, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 304 -; CHECK-NEXT: stxvd2x v8, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 288 -; CHECK-NEXT: stxvd2x v7, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 256 -; CHECK-NEXT: stxvd2x v6, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 240 -; CHECK-NEXT: stxvd2x v5, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 224 -; CHECK-NEXT: stxvd2x v4, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 192 -; CHECK-NEXT: stxvd2x vs0, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 176 -; CHECK-NEXT: xxswapd vs0, vs1 -; CHECK-NEXT: stxvd2x vs0, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 160 -; CHECK-NEXT: xxswapd vs0, vs2 -; CHECK-NEXT: stxvd2x vs0, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 144 -; CHECK-NEXT: xxswapd vs0, vs3 -; CHECK-NEXT: stxvd2x vs0, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 128 -; CHECK-NEXT: xxswapd vs0, vs4 -; CHECK-NEXT: stxvd2x vs0, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: li r4, 112 -; CHECK-NEXT: xxswapd vs0, vs5 -; CHECK-NEXT: stxvd2x vs0, r1, r4 # 16-byte Folded Spill -; CHECK-NEXT: lxvd2x vs0, 0, r3 -; CHECK-NEXT: li r3, 96 -; CHECK-NEXT: xxswapd vs0, vs0 -; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: addi r3, r1, 1072 -; CHECK-NEXT: lxvd2x vs0, 0, r3 -; CHECK-NEXT: li r3, 80 -; CHECK-NEXT: xxswapd vs0, vs0 -; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: addi r3, r1, 1056 -; CHECK-NEXT: lxvd2x vs0, 0, r3 -; CHECK-NEXT: li r3, 64 -; CHECK-NEXT: xxswapd vs0, vs0 -; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: addi r3, r1, 1040 -; CHECK-NEXT: lxvd2x vs0, 0, r3 -; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: xxswapd vs0, vs0 -; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: addi r3, r1, 1024 -; CHECK-NEXT: lxvd2x vs0, 0, r3 -; CHECK-NEXT: addi r3, r1, 1008 -; CHECK-NEXT: xxswapd v22, vs0 -; CHECK-NEXT: lxvd2x vs0, 0, r3 -; CHECK-NEXT: addi r3, r1, 992 -; CHECK-NEXT: xxswapd v21, vs0 -; CHECK-NEXT: lxvd2x vs0, 0, r3 -; CHECK-NEXT: addi r3, r1, 976 -; CHECK-NEXT: xxswapd v20, vs0 -; CHECK-NEXT: lxvd2x vs0, 0, r3 -; CHECK-NEXT: addi r3, r1, 960 -; CHECK-NEXT: xxswapd v31, vs0 -; CHECK-NEXT: lxvd2x vs0, 0, r3 -; CHECK-NEXT: addi r3, r1, 944 -; CHECK-NEXT: xxswapd v30, vs0 -; CHECK-NEXT: lxvd2x vs0, 0, r3 -; CHECK-NEXT: addi r3, r1, 928 -; CHECK-NEXT: xxswapd v29, vs0 -; CHECK-NEXT: lxvd2x vs0, 0, r3 -; CHECK-NEXT: addi r3, r1, 912 -; CHECK-NEXT: xxswapd v28, vs0 -; CHECK-NEXT: lxvd2x vs0, 0, r3 -; CHECK-NEXT: addi r3, r1, 896 -; CHECK-NEXT: xxswapd v27, vs0 -; CHECK-NEXT: lxvd2x vs0, 0, r3 -; CHECK-NEXT: addi r3, r1, 880 -; CHECK-NEXT: xxswapd v26, vs0 -; CHECK-NEXT: lxvd2x vs0, 0, r3 -; CHECK-NEXT: xxswapd v25, vs0 -; CHECK-NEXT: bl lrintf128 -; CHECK-NEXT: nop -; CHECK-NEXT: vmr v2, v24 -; CHECK-NEXT: mtvsrd v23, r3 -; CHECK-NEXT: bl lrintf128 -; CHECK-NEXT: nop -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: li r3, 368 -; CHECK-NEXT: vmr v2, v25 -; CHECK-NEXT: xxmrghd vs0, vs0, v23 -; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: bl lrintf128 -; CHECK-NEXT: nop -; CHECK-NEXT: vmr v2, v26 -; CHECK-NEXT: mtvsrd v25, r3 -; CHECK-NEXT: bl lrintf128 -; CHECK-NEXT: nop -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: li r3, 320 -; CHECK-NEXT: vmr v2, v27 -; CHECK-NEXT: xxmrghd vs0, vs0, v25 -; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: bl lrintf128 -; CHECK-NEXT: nop -; CHECK-NEXT: vmr v2, v28 -; CHECK-NEXT: mtvsrd v27, r3 -; CHECK-NEXT: bl lrintf128 -; CHECK-NEXT: nop -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: li r3, 272 -; CHECK-NEXT: vmr v2, v29 -; CHECK-NEXT: xxmrghd vs0, vs0, v27 -; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: bl lrintf128 -; CHECK-NEXT: nop -; CHECK-NEXT: vmr v2, v30 -; CHECK-NEXT: mtvsrd v29, r3 -; CHECK-NEXT: bl lrintf128 -; CHECK-NEXT: nop -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: li r3, 208 -; CHECK-NEXT: vmr v2, v31 -; CHECK-NEXT: xxmrghd vs0, vs0, v29 -; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: bl lrintf128 -; CHECK-NEXT: nop -; CHECK-NEXT: vmr v2, v20 -; CHECK-NEXT: mtvsrd v31, r3 -; CHECK-NEXT: bl lrintf128 -; CHECK-NEXT: nop -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: vmr v2, v21 -; CHECK-NEXT: xxmrghd v31, vs0, v31 -; CHECK-NEXT: bl lrintf128 -; CHECK-NEXT: nop -; CHECK-NEXT: vmr v2, v22 -; CHECK-NEXT: mtvsrd v29, r3 -; CHECK-NEXT: bl lrintf128 -; CHECK-NEXT: nop -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: xxmrghd v29, vs0, v29 -; CHECK-NEXT: bl lrintf128 -; CHECK-NEXT: nop -; CHECK-NEXT: mtvsrd v27, r3 -; CHECK-NEXT: li r3, 64 -; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: bl lrintf128 -; CHECK-NEXT: nop -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: li r3, 80 -; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: xxmrghd v27, vs0, v27 -; CHECK-NEXT: bl lrintf128 -; CHECK-NEXT: nop -; CHECK-NEXT: mtvsrd v25, r3 -; CHECK-NEXT: li r3, 96 -; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: bl lrintf128 -; CHECK-NEXT: nop -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: li r3, 112 -; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: xxmrghd v25, vs0, v25 -; CHECK-NEXT: bl lrintf128 -; CHECK-NEXT: nop -; CHECK-NEXT: mtvsrd v23, r3 -; CHECK-NEXT: li r3, 128 -; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: bl lrintf128 -; CHECK-NEXT: nop -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: li r3, 144 -; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: xxmrghd v23, vs0, v23 -; CHECK-NEXT: bl lrintf128 -; CHECK-NEXT: nop -; CHECK-NEXT: mtvsrd v22, r3 -; CHECK-NEXT: li r3, 160 -; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: bl lrintf128 -; CHECK-NEXT: nop -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: li r3, 176 -; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: xxmrghd v22, vs0, v22 -; CHECK-NEXT: bl lrintf128 -; CHECK-NEXT: nop -; CHECK-NEXT: mtvsrd v21, r3 -; CHECK-NEXT: li r3, 192 -; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: bl lrintf128 -; CHECK-NEXT: nop -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: li r3, 224 -; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: xxmrghd v21, vs0, v21 -; CHECK-NEXT: bl lrintf128 -; CHECK-NEXT: nop -; CHECK-NEXT: mtvsrd v20, r3 -; CHECK-NEXT: li r3, 240 -; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: bl lrintf128 -; CHECK-NEXT: nop -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: li r3, 256 -; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: xxmrghd v20, vs0, v20 -; CHECK-NEXT: bl lrintf128 -; CHECK-NEXT: nop -; CHECK-NEXT: mtvsrd v24, r3 -; CHECK-NEXT: li r3, 288 -; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: bl lrintf128 -; CHECK-NEXT: nop -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: li r3, 304 -; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: xxmrghd v24, vs0, v24 -; CHECK-NEXT: bl lrintf128 -; CHECK-NEXT: nop -; CHECK-NEXT: mtvsrd v26, r3 -; CHECK-NEXT: li r3, 336 -; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: bl lrintf128 -; CHECK-NEXT: nop -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: li r3, 352 -; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: xxmrghd v26, vs0, v26 -; CHECK-NEXT: bl lrintf128 -; CHECK-NEXT: nop -; CHECK-NEXT: mtvsrd v28, r3 -; CHECK-NEXT: li r3, 384 -; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: bl lrintf128 -; CHECK-NEXT: nop -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: li r3, 400 -; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: xxmrghd v28, vs0, v28 -; CHECK-NEXT: bl lrintf128 -; CHECK-NEXT: nop -; CHECK-NEXT: mtvsrd v30, r3 -; CHECK-NEXT: li r3, 416 -; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: bl lrintf128 -; CHECK-NEXT: nop -; CHECK-NEXT: mtfprd f0, r3 -; CHECK-NEXT: li r3, 80 -; CHECK-NEXT: xxswapd vs1, v28 -; CHECK-NEXT: li r4, 208 -; CHECK-NEXT: xxswapd vs2, v26 -; CHECK-NEXT: xxswapd vs3, v27 -; CHECK-NEXT: xxmrghd v2, vs0, v30 -; CHECK-NEXT: xxswapd vs0, v2 -; CHECK-NEXT: stxvd2x vs0, r30, r3 -; CHECK-NEXT: li r3, 64 -; CHECK-NEXT: stxvd2x vs1, r30, r3 -; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: stxvd2x vs2, r30, r3 -; CHECK-NEXT: li r3, 32 -; CHECK-NEXT: xxswapd vs0, v24 -; CHECK-NEXT: stxvd2x vs0, r30, r3 -; CHECK-NEXT: li r3, 16 -; CHECK-NEXT: xxswapd vs1, v20 -; CHECK-NEXT: stxvd2x vs1, r30, r3 -; CHECK-NEXT: li r3, 240 -; CHECK-NEXT: xxswapd vs2, v23 -; CHECK-NEXT: xxswapd vs0, v21 -; CHECK-NEXT: stxvd2x vs0, r30, r3 -; CHECK-NEXT: li r3, 224 -; CHECK-NEXT: xxswapd vs1, v22 -; CHECK-NEXT: stxvd2x vs1, r30, r3 -; CHECK-NEXT: li r3, 208 -; CHECK-NEXT: stxvd2x vs2, r30, r3 -; CHECK-NEXT: li r3, 192 -; CHECK-NEXT: xxswapd vs0, v25 -; CHECK-NEXT: stxvd2x vs0, r30, r3 -; CHECK-NEXT: li r3, 176 -; CHECK-NEXT: stxvd2x vs3, r30, r3 -; CHECK-NEXT: li r3, 160 -; CHECK-NEXT: lxvd2x vs2, r1, r4 # 16-byte Folded Reload -; CHECK-NEXT: li r4, 272 -; CHECK-NEXT: xxswapd vs1, v29 -; CHECK-NEXT: stxvd2x vs1, r30, r3 -; CHECK-NEXT: li r3, 144 -; CHECK-NEXT: lxvd2x vs1, r1, r4 # 16-byte Folded Reload -; CHECK-NEXT: li r4, 320 -; CHECK-NEXT: lxvd2x vs3, r1, r4 # 16-byte Folded Reload -; CHECK-NEXT: li r4, 368 -; CHECK-NEXT: lxvd2x vs4, r1, r4 # 16-byte Folded Reload -; CHECK-NEXT: xxswapd vs0, v31 -; CHECK-NEXT: stxvd2x vs0, r30, r3 -; CHECK-NEXT: li r3, 128 -; CHECK-NEXT: xxswapd vs2, vs2 -; CHECK-NEXT: stxvd2x vs2, r30, r3 -; CHECK-NEXT: li r3, 112 -; CHECK-NEXT: xxswapd vs1, vs1 -; CHECK-NEXT: stxvd2x vs1, r30, r3 -; CHECK-NEXT: li r3, 96 -; CHECK-NEXT: xxswapd vs3, vs3 -; CHECK-NEXT: stxvd2x vs3, r30, r3 -; CHECK-NEXT: li r3, 608 -; CHECK-NEXT: xxswapd vs4, vs4 -; CHECK-NEXT: stxvd2x vs4, 0, r30 -; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: li r3, 592 -; CHECK-NEXT: ld r30, 624(r1) # 8-byte Folded Reload -; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: li r3, 576 -; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: li r3, 560 -; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: li r3, 544 -; CHECK-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: li r3, 528 -; CHECK-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: li r3, 512 -; CHECK-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: li r3, 496 -; CHECK-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: li r3, 480 -; CHECK-NEXT: lxvd2x v23, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: li r3, 464 -; CHECK-NEXT: lxvd2x v22, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: li r3, 448 -; CHECK-NEXT: lxvd2x v21, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: li r3, 432 -; CHECK-NEXT: lxvd2x v20, r1, r3 # 16-byte Folded Reload -; CHECK-NEXT: addi r1, r1, 640 -; CHECK-NEXT: ld r0, 16(r1) -; CHECK-NEXT: mtlr r0 -; CHECK-NEXT: blr -; -; FAST-LABEL: lrint_v32i64_v32f128: -; FAST: # %bb.0: -; FAST-NEXT: mflr r0 -; FAST-NEXT: stdu r1, -640(r1) -; FAST-NEXT: li r4, 432 -; FAST-NEXT: std r0, 656(r1) -; FAST-NEXT: std r30, 624(r1) # 8-byte Folded Spill -; FAST-NEXT: mr r30, r3 -; FAST-NEXT: addi r3, r1, 1184 -; FAST-NEXT: stxvd2x v20, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 448 -; FAST-NEXT: lxvd2x vs0, 0, r3 -; FAST-NEXT: addi r3, r1, 1168 -; FAST-NEXT: stxvd2x v21, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 464 -; FAST-NEXT: lxvd2x vs1, 0, r3 -; FAST-NEXT: addi r3, r1, 1152 -; FAST-NEXT: stxvd2x v22, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 480 -; FAST-NEXT: lxvd2x vs2, 0, r3 -; FAST-NEXT: addi r3, r1, 1136 -; FAST-NEXT: stxvd2x v23, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 496 -; FAST-NEXT: lxvd2x vs3, 0, r3 -; FAST-NEXT: addi r3, r1, 1120 -; FAST-NEXT: stxvd2x v24, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 512 -; FAST-NEXT: lxvd2x vs4, 0, r3 -; FAST-NEXT: addi r3, r1, 1104 -; FAST-NEXT: vmr v24, v3 -; FAST-NEXT: stxvd2x v25, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 528 -; FAST-NEXT: lxvd2x vs5, 0, r3 -; FAST-NEXT: addi r3, r1, 1088 -; FAST-NEXT: stxvd2x v26, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 544 -; FAST-NEXT: xxswapd vs0, vs0 -; FAST-NEXT: stxvd2x v27, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 560 -; FAST-NEXT: stxvd2x v28, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 576 -; FAST-NEXT: stxvd2x v29, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 592 -; FAST-NEXT: stxvd2x v30, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 608 -; FAST-NEXT: stxvd2x v31, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 416 -; FAST-NEXT: stxvd2x v13, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 400 -; FAST-NEXT: stxvd2x v12, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 384 -; FAST-NEXT: stxvd2x v11, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 352 -; FAST-NEXT: stxvd2x v10, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 336 -; FAST-NEXT: stxvd2x v9, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 304 -; FAST-NEXT: stxvd2x v8, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 288 -; FAST-NEXT: stxvd2x v7, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 256 -; FAST-NEXT: stxvd2x v6, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 240 -; FAST-NEXT: stxvd2x v5, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 224 -; FAST-NEXT: stxvd2x v4, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 192 -; FAST-NEXT: stxvd2x vs0, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 176 -; FAST-NEXT: xxswapd vs0, vs1 -; FAST-NEXT: stxvd2x vs0, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 160 -; FAST-NEXT: xxswapd vs0, vs2 -; FAST-NEXT: stxvd2x vs0, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 144 -; FAST-NEXT: xxswapd vs0, vs3 -; FAST-NEXT: stxvd2x vs0, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 128 -; FAST-NEXT: xxswapd vs0, vs4 -; FAST-NEXT: stxvd2x vs0, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 112 -; FAST-NEXT: xxswapd vs0, vs5 -; FAST-NEXT: stxvd2x vs0, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: lxvd2x vs0, 0, r3 -; FAST-NEXT: li r3, 96 -; FAST-NEXT: xxswapd vs0, vs0 -; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: addi r3, r1, 1072 -; FAST-NEXT: lxvd2x vs0, 0, r3 -; FAST-NEXT: li r3, 80 -; FAST-NEXT: xxswapd vs0, vs0 -; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: addi r3, r1, 1056 -; FAST-NEXT: lxvd2x vs0, 0, r3 -; FAST-NEXT: li r3, 64 -; FAST-NEXT: xxswapd vs0, vs0 -; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: addi r3, r1, 1040 -; FAST-NEXT: lxvd2x vs0, 0, r3 -; FAST-NEXT: li r3, 48 -; FAST-NEXT: xxswapd vs0, vs0 -; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: addi r3, r1, 1024 -; FAST-NEXT: lxvd2x vs0, 0, r3 -; FAST-NEXT: addi r3, r1, 1008 -; FAST-NEXT: xxswapd v22, vs0 -; FAST-NEXT: lxvd2x vs0, 0, r3 -; FAST-NEXT: addi r3, r1, 992 -; FAST-NEXT: xxswapd v21, vs0 -; FAST-NEXT: lxvd2x vs0, 0, r3 -; FAST-NEXT: addi r3, r1, 976 -; FAST-NEXT: xxswapd v20, vs0 -; FAST-NEXT: lxvd2x vs0, 0, r3 -; FAST-NEXT: addi r3, r1, 960 -; FAST-NEXT: xxswapd v31, vs0 -; FAST-NEXT: lxvd2x vs0, 0, r3 -; FAST-NEXT: addi r3, r1, 944 -; FAST-NEXT: xxswapd v30, vs0 -; FAST-NEXT: lxvd2x vs0, 0, r3 -; FAST-NEXT: addi r3, r1, 928 -; FAST-NEXT: xxswapd v29, vs0 -; FAST-NEXT: lxvd2x vs0, 0, r3 -; FAST-NEXT: addi r3, r1, 912 -; FAST-NEXT: xxswapd v28, vs0 -; FAST-NEXT: lxvd2x vs0, 0, r3 -; FAST-NEXT: addi r3, r1, 896 -; FAST-NEXT: xxswapd v27, vs0 -; FAST-NEXT: lxvd2x vs0, 0, r3 -; FAST-NEXT: addi r3, r1, 880 -; FAST-NEXT: xxswapd v26, vs0 -; FAST-NEXT: lxvd2x vs0, 0, r3 -; FAST-NEXT: xxswapd v25, vs0 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: vmr v2, v24 -; FAST-NEXT: mtvsrd v23, r3 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: li r3, 368 -; FAST-NEXT: vmr v2, v25 -; FAST-NEXT: xxmrghd vs0, vs0, v23 -; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: vmr v2, v26 -; FAST-NEXT: mtvsrd v25, r3 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: li r3, 320 -; FAST-NEXT: vmr v2, v27 -; FAST-NEXT: xxmrghd vs0, vs0, v25 -; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: vmr v2, v28 -; FAST-NEXT: mtvsrd v27, r3 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: li r3, 272 -; FAST-NEXT: vmr v2, v29 -; FAST-NEXT: xxmrghd vs0, vs0, v27 -; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: vmr v2, v30 -; FAST-NEXT: mtvsrd v29, r3 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: li r3, 208 -; FAST-NEXT: vmr v2, v31 -; FAST-NEXT: xxmrghd vs0, vs0, v29 -; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: vmr v2, v20 -; FAST-NEXT: mtvsrd v31, r3 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: vmr v2, v21 -; FAST-NEXT: xxmrghd v31, vs0, v31 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: vmr v2, v22 -; FAST-NEXT: mtvsrd v29, r3 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: li r3, 48 -; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: xxmrghd v29, vs0, v29 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtvsrd v27, r3 -; FAST-NEXT: li r3, 64 -; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: li r3, 80 -; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: xxmrghd v27, vs0, v27 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtvsrd v25, r3 -; FAST-NEXT: li r3, 96 -; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: li r3, 112 -; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: xxmrghd v25, vs0, v25 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtvsrd v23, r3 -; FAST-NEXT: li r3, 128 -; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: li r3, 144 -; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: xxmrghd v23, vs0, v23 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtvsrd v22, r3 -; FAST-NEXT: li r3, 160 -; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: li r3, 176 -; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: xxmrghd v22, vs0, v22 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtvsrd v21, r3 -; FAST-NEXT: li r3, 192 -; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: li r3, 224 -; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: xxmrghd v21, vs0, v21 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtvsrd v20, r3 -; FAST-NEXT: li r3, 240 -; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: li r3, 256 -; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: xxmrghd v20, vs0, v20 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtvsrd v24, r3 -; FAST-NEXT: li r3, 288 -; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: li r3, 304 -; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: xxmrghd v24, vs0, v24 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtvsrd v26, r3 -; FAST-NEXT: li r3, 336 -; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: li r3, 352 -; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: xxmrghd v26, vs0, v26 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtvsrd v28, r3 -; FAST-NEXT: li r3, 384 -; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: li r3, 400 -; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: xxmrghd v28, vs0, v28 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtvsrd v30, r3 -; FAST-NEXT: li r3, 416 -; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: li r3, 80 -; FAST-NEXT: xxswapd vs1, v28 -; FAST-NEXT: li r4, 208 -; FAST-NEXT: xxswapd vs2, v26 -; FAST-NEXT: xxswapd vs3, v27 -; FAST-NEXT: xxmrghd v2, vs0, v30 -; FAST-NEXT: xxswapd vs0, v2 -; FAST-NEXT: stxvd2x vs0, r30, r3 -; FAST-NEXT: li r3, 64 -; FAST-NEXT: stxvd2x vs1, r30, r3 -; FAST-NEXT: li r3, 48 -; FAST-NEXT: stxvd2x vs2, r30, r3 -; FAST-NEXT: li r3, 32 -; FAST-NEXT: xxswapd vs0, v24 -; FAST-NEXT: stxvd2x vs0, r30, r3 -; FAST-NEXT: li r3, 16 -; FAST-NEXT: xxswapd vs1, v20 -; FAST-NEXT: stxvd2x vs1, r30, r3 -; FAST-NEXT: li r3, 240 -; FAST-NEXT: xxswapd vs2, v23 -; FAST-NEXT: xxswapd vs0, v21 -; FAST-NEXT: stxvd2x vs0, r30, r3 -; FAST-NEXT: li r3, 224 -; FAST-NEXT: xxswapd vs1, v22 -; FAST-NEXT: stxvd2x vs1, r30, r3 -; FAST-NEXT: li r3, 208 -; FAST-NEXT: stxvd2x vs2, r30, r3 -; FAST-NEXT: li r3, 192 -; FAST-NEXT: xxswapd vs0, v25 -; FAST-NEXT: stxvd2x vs0, r30, r3 -; FAST-NEXT: li r3, 176 -; FAST-NEXT: stxvd2x vs3, r30, r3 -; FAST-NEXT: li r3, 160 -; FAST-NEXT: lxvd2x vs2, r1, r4 # 16-byte Folded Reload -; FAST-NEXT: li r4, 272 -; FAST-NEXT: xxswapd vs1, v29 -; FAST-NEXT: stxvd2x vs1, r30, r3 -; FAST-NEXT: li r3, 144 -; FAST-NEXT: lxvd2x vs1, r1, r4 # 16-byte Folded Reload -; FAST-NEXT: li r4, 320 -; FAST-NEXT: lxvd2x vs3, r1, r4 # 16-byte Folded Reload -; FAST-NEXT: li r4, 368 -; FAST-NEXT: lxvd2x vs4, r1, r4 # 16-byte Folded Reload -; FAST-NEXT: xxswapd vs0, v31 -; FAST-NEXT: stxvd2x vs0, r30, r3 -; FAST-NEXT: li r3, 128 -; FAST-NEXT: xxswapd vs2, vs2 -; FAST-NEXT: stxvd2x vs2, r30, r3 -; FAST-NEXT: li r3, 112 -; FAST-NEXT: xxswapd vs1, vs1 -; FAST-NEXT: stxvd2x vs1, r30, r3 -; FAST-NEXT: li r3, 96 -; FAST-NEXT: xxswapd vs3, vs3 -; FAST-NEXT: stxvd2x vs3, r30, r3 -; FAST-NEXT: li r3, 608 -; FAST-NEXT: xxswapd vs4, vs4 -; FAST-NEXT: stxvd2x vs4, 0, r30 -; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 592 -; FAST-NEXT: ld r30, 624(r1) # 8-byte Folded Reload -; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 576 -; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 560 -; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 544 -; FAST-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 528 -; FAST-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 512 -; FAST-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 496 -; FAST-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 480 -; FAST-NEXT: lxvd2x v23, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 464 -; FAST-NEXT: lxvd2x v22, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 448 -; FAST-NEXT: lxvd2x v21, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 432 -; FAST-NEXT: lxvd2x v20, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: addi r1, r1, 640 -; FAST-NEXT: ld r0, 16(r1) -; FAST-NEXT: mtlr r0 -; FAST-NEXT: blr - %a = call <32 x i64> @llvm.lrint.v32i64.v32f128(<32 x fp128> %x) - ret <32 x i64> %a -} -declare <32 x i64> @llvm.lrint.v32i64.v32f128(<32 x fp128>)