diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td index ddb27dc6404fa..67ee2fd791bcf 100644 --- a/llvm/lib/Target/LoongArch/LoongArch.td +++ b/llvm/lib/Target/LoongArch/LoongArch.td @@ -129,7 +129,9 @@ include "LoongArchInstrInfo.td" //===----------------------------------------------------------------------===// def : ProcessorModel<"generic-la32", NoSchedModel, [Feature32Bit]>; -def : ProcessorModel<"generic-la64", NoSchedModel, [Feature64Bit, FeatureUAL]>; +def : ProcessorModel<"generic-la64", NoSchedModel, [Feature64Bit, + FeatureUAL, + FeatureExtLSX]>; // Generic 64-bit processor with double-precision floating-point support. def : ProcessorModel<"loongarch64", NoSchedModel, [Feature64Bit, diff --git a/llvm/test/CodeGen/LoongArch/calling-conv-common.ll b/llvm/test/CodeGen/LoongArch/calling-conv-common.ll index 06dfe00d90847..5c9575b2baab1 100644 --- a/llvm/test/CodeGen/LoongArch/calling-conv-common.ll +++ b/llvm/test/CodeGen/LoongArch/calling-conv-common.ll @@ -123,13 +123,12 @@ define i64 @caller_large_scalars() nounwind { ; CHECK-NEXT: addi.d $sp, $sp, -80 ; CHECK-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ; CHECK-NEXT: st.d $zero, $sp, 24 -; CHECK-NEXT: st.d $zero, $sp, 16 -; CHECK-NEXT: st.d $zero, $sp, 8 +; CHECK-NEXT: vrepli.b $vr0, 0 +; CHECK-NEXT: vst $vr0, $sp, 8 ; CHECK-NEXT: ori $a0, $zero, 2 ; CHECK-NEXT: st.d $a0, $sp, 0 ; CHECK-NEXT: st.d $zero, $sp, 56 -; CHECK-NEXT: st.d $zero, $sp, 48 -; CHECK-NEXT: st.d $zero, $sp, 40 +; CHECK-NEXT: vst $vr0, $sp, 40 ; CHECK-NEXT: ori $a2, $zero, 1 ; CHECK-NEXT: addi.d $a0, $sp, 32 ; CHECK-NEXT: addi.d $a1, $sp, 0 @@ -182,14 +181,13 @@ define i64 @caller_large_scalars_exhausted_regs() nounwind { ; CHECK-NEXT: ori $a0, $zero, 9 ; CHECK-NEXT: st.d $a0, $sp, 0 ; CHECK-NEXT: st.d $zero, $sp, 40 -; CHECK-NEXT: st.d $zero, $sp, 32 -; CHECK-NEXT: st.d $zero, $sp, 24 +; CHECK-NEXT: vrepli.b $vr0, 0 +; CHECK-NEXT: vst $vr0, $sp, 24 ; CHECK-NEXT: ori $a0, $zero, 10 ; CHECK-NEXT: st.d $a0, $sp, 16 ; CHECK-NEXT: st.d $zero, $sp, 72 -; CHECK-NEXT: st.d $zero, $sp, 64 -; CHECK-NEXT: st.d $zero, $sp, 56 -; CHECK-NEXT: ori $t0, $zero, 8 +; CHECK-NEXT: ori $a0, $zero, 8 +; CHECK-NEXT: st.d $a0, $sp, 48 ; CHECK-NEXT: ori $a0, $zero, 1 ; CHECK-NEXT: ori $a1, $zero, 2 ; CHECK-NEXT: ori $a2, $zero, 3 @@ -198,7 +196,7 @@ define i64 @caller_large_scalars_exhausted_regs() nounwind { ; CHECK-NEXT: ori $a5, $zero, 6 ; CHECK-NEXT: ori $a6, $zero, 7 ; CHECK-NEXT: addi.d $a7, $sp, 48 -; CHECK-NEXT: st.d $t0, $sp, 48 +; CHECK-NEXT: vst $vr0, $sp, 56 ; CHECK-NEXT: bl %plt(callee_large_scalars_exhausted_regs) ; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload ; CHECK-NEXT: addi.d $sp, $sp, 96 diff --git a/llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll b/llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll index 34fbec03c535b..35186b660c1e6 100644 --- a/llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll +++ b/llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll @@ -63,26 +63,17 @@ define i64 @caller_double_in_gpr_exhausted_fprs() nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: addi.d $sp, $sp, -16 ; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) -; CHECK-NEXT: fld.d $fa1, $a0, %pc_lo12(.LCPI3_0) -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_1) -; CHECK-NEXT: fld.d $fa2, $a0, %pc_lo12(.LCPI3_1) -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_2) -; CHECK-NEXT: fld.d $fa3, $a0, %pc_lo12(.LCPI3_2) -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_3) -; CHECK-NEXT: fld.d $fa4, $a0, %pc_lo12(.LCPI3_3) -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_4) -; CHECK-NEXT: fld.d $fa5, $a0, %pc_lo12(.LCPI3_4) -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_5) -; CHECK-NEXT: fld.d $fa6, $a0, %pc_lo12(.LCPI3_5) -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_6) -; CHECK-NEXT: fld.d $fa7, $a0, %pc_lo12(.LCPI3_6) -; CHECK-NEXT: addi.d $a0, $zero, 1 -; CHECK-NEXT: movgr2fr.d $fa0, $a0 -; CHECK-NEXT: ffint.d.l $fa0, $fa0 ; CHECK-NEXT: ori $a0, $zero, 0 ; CHECK-NEXT: lu32i.d $a0, 131072 ; CHECK-NEXT: lu52i.d $a0, $a0, 1026 +; CHECK-NEXT: vldi $vr0, -912 +; CHECK-NEXT: vldi $vr1, -1024 +; CHECK-NEXT: vldi $vr2, -1016 +; CHECK-NEXT: vldi $vr3, -1008 +; CHECK-NEXT: vldi $vr4, -1004 +; CHECK-NEXT: vldi $vr5, -1000 +; CHECK-NEXT: vldi $vr6, -996 +; CHECK-NEXT: vldi $vr7, -992 ; CHECK-NEXT: bl %plt(callee_double_in_gpr_exhausted_fprs) ; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ; CHECK-NEXT: addi.d $sp, $sp, 16 @@ -98,9 +89,7 @@ define i64 @caller_double_in_gpr_exhausted_fprs() nounwind { define double @callee_double_ret() nounwind { ; CHECK-LABEL: callee_double_ret: ; CHECK: # %bb.0: -; CHECK-NEXT: addi.d $a0, $zero, 1 -; CHECK-NEXT: movgr2fr.d $fa0, $a0 -; CHECK-NEXT: ffint.d.l $fa0, $fa0 +; CHECK-NEXT: vldi $vr0, -912 ; CHECK-NEXT: ret ret double 1.0 } diff --git a/llvm/test/CodeGen/LoongArch/calling-conv-lp64s.ll b/llvm/test/CodeGen/LoongArch/calling-conv-lp64s.ll index 558b9457239c1..a10d30c372f16 100644 --- a/llvm/test/CodeGen/LoongArch/calling-conv-lp64s.ll +++ b/llvm/test/CodeGen/LoongArch/calling-conv-lp64s.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc --mtriple=loongarch64 --target-abi=lp64s < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --target-abi=lp64s --mattr=-f < %s | FileCheck %s ;; This file contains specific tests for the lp64s ABI. diff --git a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll index a26102710cbeb..161ed573c81f0 100644 --- a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll @@ -175,16 +175,11 @@ define i8 @test_ctpop_i8(i8 %a) nounwind { ; ; LA64-LABEL: test_ctpop_i8: ; LA64: # %bb.0: -; LA64-NEXT: srli.d $a1, $a0, 1 -; LA64-NEXT: andi $a1, $a1, 85 -; LA64-NEXT: sub.d $a0, $a0, $a1 -; LA64-NEXT: andi $a1, $a0, 51 -; LA64-NEXT: srli.d $a0, $a0, 2 -; LA64-NEXT: andi $a0, $a0, 51 -; LA64-NEXT: add.d $a0, $a1, $a0 -; LA64-NEXT: srli.d $a1, $a0, 4 -; LA64-NEXT: add.d $a0, $a0, $a1 -; LA64-NEXT: andi $a0, $a0, 15 +; LA64-NEXT: andi $a0, $a0, 255 +; LA64-NEXT: vldi $vr0, 0 +; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; LA64-NEXT: vpcnt.d $vr0, $vr0 +; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0 ; LA64-NEXT: ret %1 = call i8 @llvm.ctpop.i8(i8 %a) ret i8 %1 @@ -213,22 +208,11 @@ define i16 @test_ctpop_i16(i16 %a) nounwind { ; ; LA64-LABEL: test_ctpop_i16: ; LA64: # %bb.0: -; LA64-NEXT: srli.d $a1, $a0, 1 -; LA64-NEXT: lu12i.w $a2, 5 -; LA64-NEXT: ori $a2, $a2, 1365 -; LA64-NEXT: and $a1, $a1, $a2 -; LA64-NEXT: sub.d $a0, $a0, $a1 -; LA64-NEXT: lu12i.w $a1, 3 -; LA64-NEXT: ori $a1, $a1, 819 -; LA64-NEXT: and $a2, $a0, $a1 -; LA64-NEXT: srli.d $a0, $a0, 2 -; LA64-NEXT: and $a0, $a0, $a1 -; LA64-NEXT: add.d $a0, $a2, $a0 -; LA64-NEXT: srli.d $a1, $a0, 4 -; LA64-NEXT: add.d $a0, $a0, $a1 -; LA64-NEXT: bstrpick.d $a1, $a0, 11, 8 -; LA64-NEXT: andi $a0, $a0, 15 -; LA64-NEXT: add.d $a0, $a0, $a1 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vldi $vr0, 0 +; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; LA64-NEXT: vpcnt.d $vr0, $vr0 +; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0 ; LA64-NEXT: ret %1 = call i16 @llvm.ctpop.i16(i16 %a) ret i16 %1 @@ -261,26 +245,11 @@ define i32 @test_ctpop_i32(i32 %a) nounwind { ; ; LA64-LABEL: test_ctpop_i32: ; LA64: # %bb.0: -; LA64-NEXT: srli.d $a1, $a0, 1 -; LA64-NEXT: lu12i.w $a2, 349525 -; LA64-NEXT: ori $a2, $a2, 1365 -; LA64-NEXT: and $a1, $a1, $a2 -; LA64-NEXT: sub.d $a0, $a0, $a1 -; LA64-NEXT: lu12i.w $a1, 209715 -; LA64-NEXT: ori $a1, $a1, 819 -; LA64-NEXT: and $a2, $a0, $a1 -; LA64-NEXT: srli.d $a0, $a0, 2 -; LA64-NEXT: and $a0, $a0, $a1 -; LA64-NEXT: add.d $a0, $a2, $a0 -; LA64-NEXT: srli.d $a1, $a0, 4 -; LA64-NEXT: add.d $a0, $a0, $a1 -; LA64-NEXT: lu12i.w $a1, 61680 -; LA64-NEXT: ori $a1, $a1, 3855 -; LA64-NEXT: and $a0, $a0, $a1 -; LA64-NEXT: lu12i.w $a1, 4112 -; LA64-NEXT: ori $a1, $a1, 257 -; LA64-NEXT: mul.d $a0, $a0, $a1 -; LA64-NEXT: bstrpick.d $a0, $a0, 31, 24 +; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 +; LA64-NEXT: vldi $vr0, 0 +; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; LA64-NEXT: vpcnt.d $vr0, $vr0 +; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0 ; LA64-NEXT: ret %1 = call i32 @llvm.ctpop.i32(i32 %a) ret i32 %1 @@ -327,30 +296,10 @@ define i64 @test_ctpop_i64(i64 %a) nounwind { ; ; LA64-LABEL: test_ctpop_i64: ; LA64: # %bb.0: -; LA64-NEXT: srli.d $a1, $a0, 1 -; LA64-NEXT: lu12i.w $a2, 349525 -; LA64-NEXT: ori $a2, $a2, 1365 -; LA64-NEXT: bstrins.d $a2, $a2, 62, 32 -; LA64-NEXT: and $a1, $a1, $a2 -; LA64-NEXT: sub.d $a0, $a0, $a1 -; LA64-NEXT: lu12i.w $a1, 209715 -; LA64-NEXT: ori $a1, $a1, 819 -; LA64-NEXT: bstrins.d $a1, $a1, 61, 32 -; LA64-NEXT: and $a2, $a0, $a1 -; LA64-NEXT: srli.d $a0, $a0, 2 -; LA64-NEXT: and $a0, $a0, $a1 -; LA64-NEXT: add.d $a0, $a2, $a0 -; LA64-NEXT: srli.d $a1, $a0, 4 -; LA64-NEXT: add.d $a0, $a0, $a1 -; LA64-NEXT: lu12i.w $a1, 61680 -; LA64-NEXT: ori $a1, $a1, 3855 -; LA64-NEXT: bstrins.d $a1, $a1, 59, 32 -; LA64-NEXT: and $a0, $a0, $a1 -; LA64-NEXT: lu12i.w $a1, 4112 -; LA64-NEXT: ori $a1, $a1, 257 -; LA64-NEXT: bstrins.d $a1, $a1, 56, 32 -; LA64-NEXT: mul.d $a0, $a0, $a1 -; LA64-NEXT: srli.d $a0, $a0, 56 +; LA64-NEXT: vldi $vr0, 0 +; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; LA64-NEXT: vpcnt.d $vr0, $vr0 +; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0 ; LA64-NEXT: ret %1 = call i64 @llvm.ctpop.i64(i64 %a) ret i64 %1 diff --git a/llvm/test/CodeGen/LoongArch/double-imm.ll b/llvm/test/CodeGen/LoongArch/double-imm.ll index 8d50b27907d72..fe403ec532d8e 100644 --- a/llvm/test/CodeGen/LoongArch/double-imm.ll +++ b/llvm/test/CodeGen/LoongArch/double-imm.ll @@ -59,9 +59,7 @@ define double @f64_add_fimm1(double %a) nounwind { ; ; LA64-LABEL: f64_add_fimm1: ; LA64: # %bb.0: -; LA64-NEXT: addi.d $a0, $zero, 1 -; LA64-NEXT: movgr2fr.d $fa1, $a0 -; LA64-NEXT: ffint.d.l $fa1, $fa1 +; LA64-NEXT: vldi $vr1, -912 ; LA64-NEXT: fadd.d $fa0, $fa0, $fa1 ; LA64-NEXT: ret %1 = fadd double %a, 1.0 @@ -79,9 +77,7 @@ define double @f64_positive_fimm1() nounwind { ; ; LA64-LABEL: f64_positive_fimm1: ; LA64: # %bb.0: -; LA64-NEXT: addi.d $a0, $zero, 1 -; LA64-NEXT: movgr2fr.d $fa0, $a0 -; LA64-NEXT: ffint.d.l $fa0, $fa0 +; LA64-NEXT: vldi $vr0, -912 ; LA64-NEXT: ret ret double 1.0 } diff --git a/llvm/test/CodeGen/LoongArch/fdiv-reciprocal-estimate.ll b/llvm/test/CodeGen/LoongArch/fdiv-reciprocal-estimate.ll index 3f38bbed881a3..50f2d21a9cc84 100644 --- a/llvm/test/CodeGen/LoongArch/fdiv-reciprocal-estimate.ll +++ b/llvm/test/CodeGen/LoongArch/fdiv-reciprocal-estimate.ll @@ -66,14 +66,13 @@ define double @fdiv_d(double %x, double %y) { ; ; LA64D-FRECIPE-LABEL: fdiv_d: ; LA64D-FRECIPE: # %bb.0: -; LA64D-FRECIPE-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0) -; LA64D-FRECIPE-NEXT: fld.d $fa2, $a0, %pc_lo12(.LCPI1_0) -; LA64D-FRECIPE-NEXT: frecipe.d $fa3, $fa1 -; LA64D-FRECIPE-NEXT: fmadd.d $fa2, $fa1, $fa3, $fa2 -; LA64D-FRECIPE-NEXT: fnmsub.d $fa2, $fa2, $fa3, $fa3 -; LA64D-FRECIPE-NEXT: fmul.d $fa3, $fa0, $fa2 -; LA64D-FRECIPE-NEXT: fnmsub.d $fa0, $fa1, $fa3, $fa0 -; LA64D-FRECIPE-NEXT: fmadd.d $fa0, $fa2, $fa0, $fa3 +; LA64D-FRECIPE-NEXT: frecipe.d $fa2, $fa1 +; LA64D-FRECIPE-NEXT: vldi $vr3, -784 +; LA64D-FRECIPE-NEXT: fmadd.d $fa3, $fa1, $fa2, $fa3 +; LA64D-FRECIPE-NEXT: fnmsub.d $fa2, $fa3, $fa2, $fa2 +; LA64D-FRECIPE-NEXT: fmul.d $fa3, $fa0, $fa2 +; LA64D-FRECIPE-NEXT: fnmsub.d $fa0, $fa1, $fa3, $fa0 +; LA64D-FRECIPE-NEXT: fmadd.d $fa0, $fa2, $fa0, $fa3 ; LA64D-FRECIPE-NEXT: ret %div = fdiv fast double %x, %y ret double %div diff --git a/llvm/test/CodeGen/LoongArch/frame.ll b/llvm/test/CodeGen/LoongArch/frame.ll index ac5cb3c7e7211..cf15fd8bdb437 100644 --- a/llvm/test/CodeGen/LoongArch/frame.ll +++ b/llvm/test/CodeGen/LoongArch/frame.ll @@ -12,8 +12,8 @@ define i32 @test() nounwind { ; CHECK-NEXT: addi.d $sp, $sp, -32 ; CHECK-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill ; CHECK-NEXT: st.w $zero, $sp, 16 -; CHECK-NEXT: st.d $zero, $sp, 8 -; CHECK-NEXT: st.d $zero, $sp, 0 +; CHECK-NEXT: vrepli.b $vr0, 0 +; CHECK-NEXT: vst $vr0, $sp, 0 ; CHECK-NEXT: addi.d $a0, $sp, 4 ; CHECK-NEXT: bl %plt(test1) ; CHECK-NEXT: move $a0, $zero diff --git a/llvm/test/CodeGen/LoongArch/fsqrt-reciprocal-estimate.ll b/llvm/test/CodeGen/LoongArch/fsqrt-reciprocal-estimate.ll index 388ae6321f664..5f14352fccd60 100644 --- a/llvm/test/CodeGen/LoongArch/fsqrt-reciprocal-estimate.ll +++ b/llvm/test/CodeGen/LoongArch/fsqrt-reciprocal-estimate.ll @@ -35,16 +35,14 @@ define float @frsqrt_f32(float %a) nounwind { ; ; LA64D-FRECIPE-LABEL: frsqrt_f32: ; LA64D-FRECIPE: # %bb.0: -; LA64D-FRECIPE-NEXT: frsqrte.s $fa1, $fa0 -; LA64D-FRECIPE-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0) -; LA64D-FRECIPE-NEXT: fld.s $fa2, $a0, %pc_lo12(.LCPI0_0) -; LA64D-FRECIPE-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_1) -; LA64D-FRECIPE-NEXT: fld.s $fa3, $a0, %pc_lo12(.LCPI0_1) -; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa0, $fa1 -; LA64D-FRECIPE-NEXT: fmul.s $fa0, $fa0, $fa1 -; LA64D-FRECIPE-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 -; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa3 -; LA64D-FRECIPE-NEXT: fmul.s $fa0, $fa1, $fa0 +; LA64D-FRECIPE-NEXT: frsqrte.s $fa1, $fa0 +; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa0, $fa1 +; LA64D-FRECIPE-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA64D-FRECIPE-NEXT: vldi $vr2, -1144 +; LA64D-FRECIPE-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 +; LA64D-FRECIPE-NEXT: vldi $vr2, -1056 +; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa2 +; LA64D-FRECIPE-NEXT: fmul.s $fa0, $fa1, $fa0 ; LA64D-FRECIPE-NEXT: ret %1 = call fast float @llvm.sqrt.f32(float %a) @@ -88,20 +86,18 @@ define double @frsqrt_f64(double %a) nounwind { ; ; LA64D-FRECIPE-LABEL: frsqrt_f64: ; LA64D-FRECIPE: # %bb.0: -; LA64D-FRECIPE-NEXT: frsqrte.d $fa1, $fa0 -; LA64D-FRECIPE-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0) -; LA64D-FRECIPE-NEXT: fld.d $fa2, $a0, %pc_lo12(.LCPI1_0) -; LA64D-FRECIPE-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_1) -; LA64D-FRECIPE-NEXT: fld.d $fa3, $a0, %pc_lo12(.LCPI1_1) -; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa0, $fa1 -; LA64D-FRECIPE-NEXT: fmul.d $fa4, $fa0, $fa1 -; LA64D-FRECIPE-NEXT: fmadd.d $fa4, $fa4, $fa1, $fa2 -; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa3 -; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa4 -; LA64D-FRECIPE-NEXT: fmul.d $fa0, $fa0, $fa1 -; LA64D-FRECIPE-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 -; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa3 -; LA64D-FRECIPE-NEXT: fmul.d $fa0, $fa1, $fa0 +; LA64D-FRECIPE-NEXT: frsqrte.d $fa1, $fa0 +; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa0, $fa1 +; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa0, $fa1 +; LA64D-FRECIPE-NEXT: vldi $vr3, -888 +; LA64D-FRECIPE-NEXT: fmadd.d $fa2, $fa2, $fa1, $fa3 +; LA64D-FRECIPE-NEXT: vldi $vr4, -800 +; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa4 +; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa2 +; LA64D-FRECIPE-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA64D-FRECIPE-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa3 +; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa4 +; LA64D-FRECIPE-NEXT: fmul.d $fa0, $fa1, $fa0 ; LA64D-FRECIPE-NEXT: ret %1 = call fast double @llvm.sqrt.f64(double %a) %2 = fdiv fast double 1.0, %1 @@ -209,26 +205,24 @@ define double @sqrt_simplify_before_recip_3_uses_f64(double %x, ptr %p1, ptr %p2 ; ; LA64D-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_f64: ; LA64D-FRECIPE: # %bb.0: -; LA64D-FRECIPE-NEXT: frsqrte.d $fa1, $fa0 -; LA64D-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI2_0) -; LA64D-FRECIPE-NEXT: fld.d $fa2, $a2, %pc_lo12(.LCPI2_0) -; LA64D-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI2_1) -; LA64D-FRECIPE-NEXT: fld.d $fa3, $a2, %pc_lo12(.LCPI2_1) -; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa0, $fa1 -; LA64D-FRECIPE-NEXT: fmul.d $fa4, $fa0, $fa1 -; LA64D-FRECIPE-NEXT: fmadd.d $fa4, $fa4, $fa1, $fa2 -; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa3 -; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa4 -; LA64D-FRECIPE-NEXT: fmul.d $fa4, $fa0, $fa1 -; LA64D-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI2_2) -; LA64D-FRECIPE-NEXT: fld.d $fa5, $a2, %pc_lo12(.LCPI2_2) -; LA64D-FRECIPE-NEXT: fmadd.d $fa2, $fa4, $fa1, $fa2 -; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa3 -; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa2 -; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa1, $fa5 -; LA64D-FRECIPE-NEXT: fmul.d $fa0, $fa0, $fa1 -; LA64D-FRECIPE-NEXT: fst.d $fa1, $a0, 0 -; LA64D-FRECIPE-NEXT: fst.d $fa2, $a1, 0 +; LA64D-FRECIPE-NEXT: frsqrte.d $fa1, $fa0 +; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa0, $fa1 +; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa0, $fa1 +; LA64D-FRECIPE-NEXT: vldi $vr3, -888 +; LA64D-FRECIPE-NEXT: fmadd.d $fa2, $fa2, $fa1, $fa3 +; LA64D-FRECIPE-NEXT: vldi $vr4, -800 +; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa4 +; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa2 +; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa0, $fa1 +; LA64D-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI2_0) +; LA64D-FRECIPE-NEXT: fld.d $fa5, $a2, %pc_lo12(.LCPI2_0) +; LA64D-FRECIPE-NEXT: fmadd.d $fa2, $fa2, $fa1, $fa3 +; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa4 +; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa2 +; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa1, $fa5 +; LA64D-FRECIPE-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA64D-FRECIPE-NEXT: fst.d $fa1, $a0, 0 +; LA64D-FRECIPE-NEXT: fst.d $fa2, $a1, 0 ; LA64D-FRECIPE-NEXT: ret %sqrt = tail call fast double @llvm.sqrt.f64(double %x) %rsqrt = fdiv fast double 1.0, %sqrt @@ -342,29 +336,27 @@ define double @sqrt_simplify_before_recip_3_uses_order_f64(double %x, ptr %p1, p ; ; LA64D-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_order_f64: ; LA64D-FRECIPE: # %bb.0: -; LA64D-FRECIPE-NEXT: frsqrte.d $fa1, $fa0 -; LA64D-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_0) -; LA64D-FRECIPE-NEXT: fld.d $fa2, $a2, %pc_lo12(.LCPI3_0) -; LA64D-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_1) -; LA64D-FRECIPE-NEXT: fld.d $fa3, $a2, %pc_lo12(.LCPI3_1) -; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa0, $fa1 -; LA64D-FRECIPE-NEXT: fmul.d $fa4, $fa0, $fa1 -; LA64D-FRECIPE-NEXT: fmadd.d $fa4, $fa4, $fa1, $fa2 -; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa3 -; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa4 -; LA64D-FRECIPE-NEXT: fmul.d $fa4, $fa0, $fa1 -; LA64D-FRECIPE-NEXT: fmadd.d $fa2, $fa4, $fa1, $fa2 -; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa3 -; LA64D-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_2) -; LA64D-FRECIPE-NEXT: fld.d $fa3, $a2, %pc_lo12(.LCPI3_2) -; LA64D-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_3) -; LA64D-FRECIPE-NEXT: fld.d $fa4, $a2, %pc_lo12(.LCPI3_3) -; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa2 -; LA64D-FRECIPE-NEXT: fmul.d $fa0, $fa0, $fa1 -; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa1, $fa3 -; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa4 -; LA64D-FRECIPE-NEXT: fst.d $fa2, $a0, 0 -; LA64D-FRECIPE-NEXT: fst.d $fa1, $a1, 0 +; LA64D-FRECIPE-NEXT: frsqrte.d $fa1, $fa0 +; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa0, $fa1 +; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa0, $fa1 +; LA64D-FRECIPE-NEXT: vldi $vr3, -888 +; LA64D-FRECIPE-NEXT: fmadd.d $fa2, $fa2, $fa1, $fa3 +; LA64D-FRECIPE-NEXT: vldi $vr4, -800 +; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa4 +; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa2 +; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa0, $fa1 +; LA64D-FRECIPE-NEXT: fmadd.d $fa2, $fa2, $fa1, $fa3 +; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa4 +; LA64D-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_0) +; LA64D-FRECIPE-NEXT: fld.d $fa3, $a2, %pc_lo12(.LCPI3_0) +; LA64D-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_1) +; LA64D-FRECIPE-NEXT: fld.d $fa4, $a2, %pc_lo12(.LCPI3_1) +; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa2 +; LA64D-FRECIPE-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa1, $fa3 +; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa4 +; LA64D-FRECIPE-NEXT: fst.d $fa2, $a0, 0 +; LA64D-FRECIPE-NEXT: fst.d $fa1, $a1, 0 ; LA64D-FRECIPE-NEXT: ret %sqrt = tail call fast double @llvm.sqrt.f64(double %x) %sqrt_fast = fdiv fast double %x, %sqrt @@ -512,30 +504,28 @@ define double @sqrt_simplify_before_recip_4_uses_f64(double %x, ptr %p1, ptr %p2 ; ; LA64D-FRECIPE-LABEL: sqrt_simplify_before_recip_4_uses_f64: ; LA64D-FRECIPE: # %bb.0: -; LA64D-FRECIPE-NEXT: frsqrte.d $fa1, $fa0 -; LA64D-FRECIPE-NEXT: pcalau12i $a3, %pc_hi20(.LCPI4_0) -; LA64D-FRECIPE-NEXT: fld.d $fa2, $a3, %pc_lo12(.LCPI4_0) -; LA64D-FRECIPE-NEXT: pcalau12i $a3, %pc_hi20(.LCPI4_1) -; LA64D-FRECIPE-NEXT: fld.d $fa3, $a3, %pc_lo12(.LCPI4_1) -; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa0, $fa1 -; LA64D-FRECIPE-NEXT: fmul.d $fa4, $fa0, $fa1 -; LA64D-FRECIPE-NEXT: fmadd.d $fa4, $fa4, $fa1, $fa2 -; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa3 -; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa4 -; LA64D-FRECIPE-NEXT: fmul.d $fa4, $fa0, $fa1 -; LA64D-FRECIPE-NEXT: fmadd.d $fa2, $fa4, $fa1, $fa2 -; LA64D-FRECIPE-NEXT: pcalau12i $a3, %pc_hi20(.LCPI4_2) -; LA64D-FRECIPE-NEXT: fld.d $fa4, $a3, %pc_lo12(.LCPI4_2) -; LA64D-FRECIPE-NEXT: pcalau12i $a3, %pc_hi20(.LCPI4_3) -; LA64D-FRECIPE-NEXT: fld.d $fa5, $a3, %pc_lo12(.LCPI4_3) -; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa3 -; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa2 -; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa1, $fa4 -; LA64D-FRECIPE-NEXT: fmul.d $fa3, $fa1, $fa5 -; LA64D-FRECIPE-NEXT: fmul.d $fa0, $fa0, $fa1 -; LA64D-FRECIPE-NEXT: fst.d $fa1, $a0, 0 -; LA64D-FRECIPE-NEXT: fst.d $fa2, $a1, 0 -; LA64D-FRECIPE-NEXT: fst.d $fa3, $a2, 0 +; LA64D-FRECIPE-NEXT: frsqrte.d $fa1, $fa0 +; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa0, $fa1 +; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa0, $fa1 +; LA64D-FRECIPE-NEXT: vldi $vr3, -888 +; LA64D-FRECIPE-NEXT: fmadd.d $fa2, $fa2, $fa1, $fa3 +; LA64D-FRECIPE-NEXT: vldi $vr4, -800 +; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa4 +; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa2 +; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa0, $fa1 +; LA64D-FRECIPE-NEXT: fmadd.d $fa2, $fa2, $fa1, $fa3 +; LA64D-FRECIPE-NEXT: pcalau12i $a3, %pc_hi20(.LCPI4_0) +; LA64D-FRECIPE-NEXT: fld.d $fa3, $a3, %pc_lo12(.LCPI4_0) +; LA64D-FRECIPE-NEXT: pcalau12i $a3, %pc_hi20(.LCPI4_1) +; LA64D-FRECIPE-NEXT: fld.d $fa5, $a3, %pc_lo12(.LCPI4_1) +; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa4 +; LA64D-FRECIPE-NEXT: fmul.d $fa1, $fa1, $fa2 +; LA64D-FRECIPE-NEXT: fmul.d $fa2, $fa1, $fa3 +; LA64D-FRECIPE-NEXT: fmul.d $fa3, $fa1, $fa5 +; LA64D-FRECIPE-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA64D-FRECIPE-NEXT: fst.d $fa1, $a0, 0 +; LA64D-FRECIPE-NEXT: fst.d $fa2, $a1, 0 +; LA64D-FRECIPE-NEXT: fst.d $fa3, $a2, 0 ; LA64D-FRECIPE-NEXT: ret %sqrt = tail call fast double @llvm.sqrt.f64(double %x) %rsqrt = fdiv fast double 1.0, %sqrt @@ -595,22 +585,20 @@ define float @sqrt_simplify_before_recip_3_uses_f32(float %x, ptr %p1, ptr %p2) ; ; LA64D-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_f32: ; LA64D-FRECIPE: # %bb.0: -; LA64D-FRECIPE-NEXT: frsqrte.s $fa1, $fa0 -; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa0, $fa1 -; LA64D-FRECIPE-NEXT: fmul.s $fa2, $fa0, $fa1 -; LA64D-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI5_0) -; LA64D-FRECIPE-NEXT: fld.s $fa3, $a2, %pc_lo12(.LCPI5_0) -; LA64D-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI5_1) -; LA64D-FRECIPE-NEXT: fld.s $fa4, $a2, %pc_lo12(.LCPI5_1) -; LA64D-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI5_2) -; LA64D-FRECIPE-NEXT: fld.s $fa5, $a2, %pc_lo12(.LCPI5_2) -; LA64D-FRECIPE-NEXT: fmadd.s $fa2, $fa2, $fa1, $fa3 -; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa4 -; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa2 -; LA64D-FRECIPE-NEXT: fmul.s $fa2, $fa1, $fa5 -; LA64D-FRECIPE-NEXT: fmul.s $fa0, $fa0, $fa1 -; LA64D-FRECIPE-NEXT: fst.s $fa1, $a0, 0 -; LA64D-FRECIPE-NEXT: fst.s $fa2, $a1, 0 +; LA64D-FRECIPE-NEXT: frsqrte.s $fa1, $fa0 +; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa0, $fa1 +; LA64D-FRECIPE-NEXT: fmul.s $fa2, $fa0, $fa1 +; LA64D-FRECIPE-NEXT: vldi $vr3, -1144 +; LA64D-FRECIPE-NEXT: fmadd.s $fa2, $fa2, $fa1, $fa3 +; LA64D-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI5_0) +; LA64D-FRECIPE-NEXT: fld.s $fa3, $a2, %pc_lo12(.LCPI5_0) +; LA64D-FRECIPE-NEXT: vldi $vr4, -1056 +; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa4 +; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa2 +; LA64D-FRECIPE-NEXT: fmul.s $fa2, $fa1, $fa3 +; LA64D-FRECIPE-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA64D-FRECIPE-NEXT: fst.s $fa1, $a0, 0 +; LA64D-FRECIPE-NEXT: fst.s $fa2, $a1, 0 ; LA64D-FRECIPE-NEXT: ret ; %sqrt = tail call fast float @llvm.sqrt.f32(float %x) @@ -681,26 +669,24 @@ define float @sqrt_simplify_before_recip_4_uses_f32(float %x, ptr %p1, ptr %p2, ; ; LA64D-FRECIPE-LABEL: sqrt_simplify_before_recip_4_uses_f32: ; LA64D-FRECIPE: # %bb.0: -; LA64D-FRECIPE-NEXT: pcalau12i $a3, %pc_hi20(.LCPI6_0) -; LA64D-FRECIPE-NEXT: fld.s $fa1, $a3, %pc_lo12(.LCPI6_0) -; LA64D-FRECIPE-NEXT: frsqrte.s $fa2, $fa0 -; LA64D-FRECIPE-NEXT: fmul.s $fa2, $fa0, $fa2 -; LA64D-FRECIPE-NEXT: fmul.s $fa3, $fa0, $fa2 -; LA64D-FRECIPE-NEXT: fmadd.s $fa1, $fa3, $fa2, $fa1 -; LA64D-FRECIPE-NEXT: pcalau12i $a3, %pc_hi20(.LCPI6_1) -; LA64D-FRECIPE-NEXT: fld.s $fa3, $a3, %pc_lo12(.LCPI6_1) -; LA64D-FRECIPE-NEXT: pcalau12i $a3, %pc_hi20(.LCPI6_2) -; LA64D-FRECIPE-NEXT: fld.s $fa4, $a3, %pc_lo12(.LCPI6_2) -; LA64D-FRECIPE-NEXT: pcalau12i $a3, %pc_hi20(.LCPI6_3) -; LA64D-FRECIPE-NEXT: fld.s $fa5, $a3, %pc_lo12(.LCPI6_3) -; LA64D-FRECIPE-NEXT: fmul.s $fa2, $fa2, $fa3 -; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa2, $fa1 -; LA64D-FRECIPE-NEXT: fmul.s $fa2, $fa1, $fa4 -; LA64D-FRECIPE-NEXT: fmul.s $fa3, $fa1, $fa5 -; LA64D-FRECIPE-NEXT: fmul.s $fa0, $fa0, $fa1 -; LA64D-FRECIPE-NEXT: fst.s $fa1, $a0, 0 -; LA64D-FRECIPE-NEXT: fst.s $fa2, $a1, 0 -; LA64D-FRECIPE-NEXT: fst.s $fa3, $a2, 0 +; LA64D-FRECIPE-NEXT: frsqrte.s $fa1, $fa0 +; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa0, $fa1 +; LA64D-FRECIPE-NEXT: fmul.s $fa2, $fa0, $fa1 +; LA64D-FRECIPE-NEXT: vldi $vr3, -1144 +; LA64D-FRECIPE-NEXT: fmadd.s $fa2, $fa2, $fa1, $fa3 +; LA64D-FRECIPE-NEXT: vldi $vr3, -1056 +; LA64D-FRECIPE-NEXT: pcalau12i $a3, %pc_hi20(.LCPI6_0) +; LA64D-FRECIPE-NEXT: fld.s $fa4, $a3, %pc_lo12(.LCPI6_0) +; LA64D-FRECIPE-NEXT: pcalau12i $a3, %pc_hi20(.LCPI6_1) +; LA64D-FRECIPE-NEXT: fld.s $fa5, $a3, %pc_lo12(.LCPI6_1) +; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa3 +; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa2 +; LA64D-FRECIPE-NEXT: fmul.s $fa2, $fa1, $fa4 +; LA64D-FRECIPE-NEXT: fmul.s $fa3, $fa1, $fa5 +; LA64D-FRECIPE-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA64D-FRECIPE-NEXT: fst.s $fa1, $a0, 0 +; LA64D-FRECIPE-NEXT: fst.s $fa2, $a1, 0 +; LA64D-FRECIPE-NEXT: fst.s $fa3, $a2, 0 ; LA64D-FRECIPE-NEXT: ret ; %sqrt = tail call fast float @llvm.sqrt.f32(float %x) @@ -766,25 +752,23 @@ define float @sqrt_simplify_before_recip_3_uses_order_f32(float %x, ptr %p1, ptr ; ; LA64D-FRECIPE-LABEL: sqrt_simplify_before_recip_3_uses_order_f32: ; LA64D-FRECIPE: # %bb.0: -; LA64D-FRECIPE-NEXT: frsqrte.s $fa1, $fa0 -; LA64D-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI7_0) -; LA64D-FRECIPE-NEXT: fld.s $fa2, $a2, %pc_lo12(.LCPI7_0) -; LA64D-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI7_1) -; LA64D-FRECIPE-NEXT: fld.s $fa3, $a2, %pc_lo12(.LCPI7_1) -; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa0, $fa1 -; LA64D-FRECIPE-NEXT: fmul.s $fa4, $fa0, $fa1 -; LA64D-FRECIPE-NEXT: fmadd.s $fa2, $fa4, $fa1, $fa2 -; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa3 -; LA64D-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI7_2) -; LA64D-FRECIPE-NEXT: fld.s $fa3, $a2, %pc_lo12(.LCPI7_2) -; LA64D-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI7_3) -; LA64D-FRECIPE-NEXT: fld.s $fa4, $a2, %pc_lo12(.LCPI7_3) -; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa2 -; LA64D-FRECIPE-NEXT: fmul.s $fa0, $fa0, $fa1 -; LA64D-FRECIPE-NEXT: fmul.s $fa2, $fa1, $fa3 -; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa4 -; LA64D-FRECIPE-NEXT: fst.s $fa2, $a0, 0 -; LA64D-FRECIPE-NEXT: fst.s $fa1, $a1, 0 +; LA64D-FRECIPE-NEXT: frsqrte.s $fa1, $fa0 +; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa0, $fa1 +; LA64D-FRECIPE-NEXT: fmul.s $fa2, $fa0, $fa1 +; LA64D-FRECIPE-NEXT: vldi $vr3, -1144 +; LA64D-FRECIPE-NEXT: fmadd.s $fa2, $fa2, $fa1, $fa3 +; LA64D-FRECIPE-NEXT: vldi $vr3, -1056 +; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa3 +; LA64D-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI7_0) +; LA64D-FRECIPE-NEXT: fld.s $fa3, $a2, %pc_lo12(.LCPI7_0) +; LA64D-FRECIPE-NEXT: pcalau12i $a2, %pc_hi20(.LCPI7_1) +; LA64D-FRECIPE-NEXT: fld.s $fa4, $a2, %pc_lo12(.LCPI7_1) +; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa2 +; LA64D-FRECIPE-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA64D-FRECIPE-NEXT: fmul.s $fa2, $fa1, $fa3 +; LA64D-FRECIPE-NEXT: fmul.s $fa1, $fa1, $fa4 +; LA64D-FRECIPE-NEXT: fst.s $fa2, $a0, 0 +; LA64D-FRECIPE-NEXT: fst.s $fa1, $a1, 0 ; LA64D-FRECIPE-NEXT: ret ; %sqrt = tail call fast float @llvm.sqrt.f32(float %x) diff --git a/llvm/test/CodeGen/LoongArch/get-setcc-result-type.ll b/llvm/test/CodeGen/LoongArch/get-setcc-result-type.ll index 6cf9d7d75b996..3d6e22b5eeb10 100644 --- a/llvm/test/CodeGen/LoongArch/get-setcc-result-type.ll +++ b/llvm/test/CodeGen/LoongArch/get-setcc-result-type.ll @@ -5,22 +5,9 @@ define void @getSetCCResultType(ptr %p) { ; CHECK-LABEL: getSetCCResultType: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ld.w $a1, $a0, 0 -; CHECK-NEXT: ld.w $a2, $a0, 12 -; CHECK-NEXT: ld.w $a3, $a0, 4 -; CHECK-NEXT: ld.w $a4, $a0, 8 -; CHECK-NEXT: sltui $a1, $a1, 1 -; CHECK-NEXT: sub.d $a1, $zero, $a1 -; CHECK-NEXT: sltui $a3, $a3, 1 -; CHECK-NEXT: sub.d $a3, $zero, $a3 -; CHECK-NEXT: sltui $a4, $a4, 1 -; CHECK-NEXT: sub.d $a4, $zero, $a4 -; CHECK-NEXT: sltui $a2, $a2, 1 -; CHECK-NEXT: sub.d $a2, $zero, $a2 -; CHECK-NEXT: st.w $a2, $a0, 12 -; CHECK-NEXT: st.w $a4, $a0, 8 -; CHECK-NEXT: st.w $a3, $a0, 4 -; CHECK-NEXT: st.w $a1, $a0, 0 +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vseqi.w $vr0, $vr0, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: %0 = load <4 x i32>, ptr %p, align 16 diff --git a/llvm/test/CodeGen/LoongArch/inline-asm-constraint-error.ll b/llvm/test/CodeGen/LoongArch/inline-asm-constraint-error.ll index 570fd438be97b..83f796f73934c 100644 --- a/llvm/test/CodeGen/LoongArch/inline-asm-constraint-error.ll +++ b/llvm/test/CodeGen/LoongArch/inline-asm-constraint-error.ll @@ -1,4 +1,4 @@ -; RUN: not llc --mtriple=loongarch32 < %s 2>&1 | FileCheck %s +; RUN: not llc --mtriple=loongarch32 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,LA32 ; RUN: not llc --mtriple=loongarch64 < %s 2>&1 | FileCheck %s define void @constraint_l() { @@ -32,9 +32,9 @@ define void @constraint_K() { } define void @constraint_f() nounwind { -; CHECK: error: couldn't allocate input reg for constraint 'f' +; LA32: error: couldn't allocate input reg for constraint 'f' tail call void asm "fadd.s $$fa0, $$fa0, $0", "f"(float 0.0) -; CHECK: error: couldn't allocate input reg for constraint 'f' +; LA32: error: couldn't allocate input reg for constraint 'f' tail call void asm "fadd.s $$fa0, $$fa0, $0", "f"(double 0.0) ret void } diff --git a/llvm/test/CodeGen/LoongArch/intrinsic-error.ll b/llvm/test/CodeGen/LoongArch/intrinsic-error.ll index a839ab149c333..176e3f60c5625 100644 --- a/llvm/test/CodeGen/LoongArch/intrinsic-error.ll +++ b/llvm/test/CodeGen/LoongArch/intrinsic-error.ll @@ -1,4 +1,4 @@ -; RUN: not llc --mtriple=loongarch32 < %s 2>&1 | FileCheck %s +; RUN: not llc --mtriple=loongarch32 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,LA32 ; RUN: not llc --mtriple=loongarch64 < %s 2>&1 | FileCheck %s declare void @llvm.loongarch.dbar(i32) @@ -54,7 +54,7 @@ entry: } define void @movgr2fcsr(i32 %a) nounwind { -; CHECK: llvm.loongarch.movgr2fcsr: requires basic 'f' target feature. +; LA32: llvm.loongarch.movgr2fcsr: requires basic 'f' target feature. entry: call void @llvm.loongarch.movgr2fcsr(i32 1, i32 %a) ret void @@ -75,7 +75,7 @@ entry: } define i32 @movfcsr2gr() nounwind { -; CHECK: llvm.loongarch.movfcsr2gr: requires basic 'f' target feature. +; LA32: llvm.loongarch.movfcsr2gr: requires basic 'f' target feature. entry: %res = call i32 @llvm.loongarch.movfcsr2gr(i32 1) ret i32 %res diff --git a/llvm/test/CodeGen/LoongArch/intrinsic-memcpy.ll b/llvm/test/CodeGen/LoongArch/intrinsic-memcpy.ll index 622001db32955..402ddb9ad941b 100644 --- a/llvm/test/CodeGen/LoongArch/intrinsic-memcpy.ll +++ b/llvm/test/CodeGen/LoongArch/intrinsic-memcpy.ll @@ -12,18 +12,12 @@ define void @box(ptr noalias nocapture noundef writeonly sret(%Box) align 16 der ; CHECK-NEXT: alsl.d $a1, $a1, $a2, 4 ; CHECK-NEXT: addi.d $a2, $sp, 0 ; CHECK-NEXT: add.d $a3, $a2, $a1 -; CHECK-NEXT: ldx.d $a1, $a1, $a2 -; CHECK-NEXT: ld.d $a2, $a3, 40 -; CHECK-NEXT: st.d $a1, $a0, 0 -; CHECK-NEXT: st.d $a2, $a0, 40 -; CHECK-NEXT: ld.d $a1, $a3, 32 -; CHECK-NEXT: ld.d $a2, $a3, 24 -; CHECK-NEXT: ld.d $a4, $a3, 16 -; CHECK-NEXT: ld.d $a3, $a3, 8 -; CHECK-NEXT: st.d $a1, $a0, 32 -; CHECK-NEXT: st.d $a2, $a0, 24 -; CHECK-NEXT: st.d $a4, $a0, 16 -; CHECK-NEXT: st.d $a3, $a0, 8 +; CHECK-NEXT: vldx $vr0, $a1, $a2 +; CHECK-NEXT: vld $vr1, $a3, 32 +; CHECK-NEXT: vld $vr2, $a3, 16 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: vst $vr1, $a0, 32 +; CHECK-NEXT: vst $vr2, $a0, 16 ; CHECK-NEXT: addi.d $sp, $sp, 96 ; CHECK-NEXT: ret %1 = alloca [2 x %Box], align 16 diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll index 7e320d9245f1c..6ea658acdd717 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll @@ -40,9 +40,7 @@ define float @float_fadd_acquire(ptr %p) nounwind { ; LA64D-LABEL: float_fadd_acquire: ; LA64D: # %bb.0: ; LA64D-NEXT: fld.s $fa0, $a0, 0 -; LA64D-NEXT: addi.w $a1, $zero, 1 -; LA64D-NEXT: movgr2fr.w $fa1, $a1 -; LA64D-NEXT: ffint.s.w $fa1, $fa1 +; LA64D-NEXT: vldi $vr1, -1168 ; LA64D-NEXT: .p2align 4, , 16 ; LA64D-NEXT: .LBB0_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Loop Header: Depth=1 @@ -111,8 +109,7 @@ define float @float_fsub_acquire(ptr %p) nounwind { ; LA64D-LABEL: float_fsub_acquire: ; LA64D: # %bb.0: ; LA64D-NEXT: fld.s $fa0, $a0, 0 -; LA64D-NEXT: pcalau12i $a1, %pc_hi20(.LCPI1_0) -; LA64D-NEXT: fld.s $fa1, $a1, %pc_lo12(.LCPI1_0) +; LA64D-NEXT: vldi $vr1, -1040 ; LA64D-NEXT: .p2align 4, , 16 ; LA64D-NEXT: .LBB1_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Loop Header: Depth=1 @@ -183,9 +180,7 @@ define float @float_fmin_acquire(ptr %p) nounwind { ; LA64D-LABEL: float_fmin_acquire: ; LA64D: # %bb.0: ; LA64D-NEXT: fld.s $fa0, $a0, 0 -; LA64D-NEXT: addi.w $a1, $zero, 1 -; LA64D-NEXT: movgr2fr.w $fa1, $a1 -; LA64D-NEXT: ffint.s.w $fa1, $fa1 +; LA64D-NEXT: vldi $vr1, -1168 ; LA64D-NEXT: .p2align 4, , 16 ; LA64D-NEXT: .LBB2_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Loop Header: Depth=1 @@ -257,9 +252,7 @@ define float @float_fmax_acquire(ptr %p) nounwind { ; LA64D-LABEL: float_fmax_acquire: ; LA64D: # %bb.0: ; LA64D-NEXT: fld.s $fa0, $a0, 0 -; LA64D-NEXT: addi.w $a1, $zero, 1 -; LA64D-NEXT: movgr2fr.w $fa1, $a1 -; LA64D-NEXT: ffint.s.w $fa1, $fa1 +; LA64D-NEXT: vldi $vr1, -1168 ; LA64D-NEXT: .p2align 4, , 16 ; LA64D-NEXT: .LBB3_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Loop Header: Depth=1 @@ -331,35 +324,31 @@ define double @double_fadd_acquire(ptr %p) nounwind { ; ; LA64D-LABEL: double_fadd_acquire: ; LA64D: # %bb.0: -; LA64D-NEXT: addi.d $sp, $sp, -48 -; LA64D-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill -; LA64D-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill -; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: addi.d $sp, $sp, -32 +; LA64D-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill ; LA64D-NEXT: move $fp, $a0 ; LA64D-NEXT: fld.d $fa0, $a0, 0 -; LA64D-NEXT: addi.d $a0, $zero, 1 -; LA64D-NEXT: movgr2fr.d $fa1, $a0 -; LA64D-NEXT: ffint.d.l $fs0, $fa1 ; LA64D-NEXT: .p2align 4, , 16 ; LA64D-NEXT: .LBB4_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 -; LA64D-NEXT: fadd.d $fa1, $fa0, $fs0 -; LA64D-NEXT: fst.d $fa0, $sp, 16 -; LA64D-NEXT: fst.d $fa1, $sp, 8 +; LA64D-NEXT: vldi $vr1, -912 +; LA64D-NEXT: fadd.d $fa1, $fa0, $fa1 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: fst.d $fa1, $sp, 0 ; LA64D-NEXT: ori $a0, $zero, 8 -; LA64D-NEXT: addi.d $a2, $sp, 16 -; LA64D-NEXT: addi.d $a3, $sp, 8 +; LA64D-NEXT: addi.d $a2, $sp, 8 +; LA64D-NEXT: addi.d $a3, $sp, 0 ; LA64D-NEXT: ori $a4, $zero, 2 ; LA64D-NEXT: ori $a5, $zero, 2 ; LA64D-NEXT: move $a1, $fp ; LA64D-NEXT: bl %plt(__atomic_compare_exchange) -; LA64D-NEXT: fld.d $fa0, $sp, 16 +; LA64D-NEXT: fld.d $fa0, $sp, 8 ; LA64D-NEXT: beqz $a0, .LBB4_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end -; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload -; LA64D-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload -; LA64D-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload -; LA64D-NEXT: addi.d $sp, $sp, 48 +; LA64D-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 32 ; LA64D-NEXT: ret %v = atomicrmw fadd ptr %p, double 1.0 acquire, align 4 ret double %v @@ -404,34 +393,31 @@ define double @double_fsub_acquire(ptr %p) nounwind { ; ; LA64D-LABEL: double_fsub_acquire: ; LA64D: # %bb.0: -; LA64D-NEXT: addi.d $sp, $sp, -48 -; LA64D-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill -; LA64D-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill -; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: addi.d $sp, $sp, -32 +; LA64D-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill ; LA64D-NEXT: move $fp, $a0 ; LA64D-NEXT: fld.d $fa0, $a0, 0 -; LA64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0) -; LA64D-NEXT: fld.d $fs0, $a0, %pc_lo12(.LCPI5_0) ; LA64D-NEXT: .p2align 4, , 16 ; LA64D-NEXT: .LBB5_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 -; LA64D-NEXT: fadd.d $fa1, $fa0, $fs0 -; LA64D-NEXT: fst.d $fa0, $sp, 16 -; LA64D-NEXT: fst.d $fa1, $sp, 8 +; LA64D-NEXT: vldi $vr1, -784 +; LA64D-NEXT: fadd.d $fa1, $fa0, $fa1 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: fst.d $fa1, $sp, 0 ; LA64D-NEXT: ori $a0, $zero, 8 -; LA64D-NEXT: addi.d $a2, $sp, 16 -; LA64D-NEXT: addi.d $a3, $sp, 8 +; LA64D-NEXT: addi.d $a2, $sp, 8 +; LA64D-NEXT: addi.d $a3, $sp, 0 ; LA64D-NEXT: ori $a4, $zero, 2 ; LA64D-NEXT: ori $a5, $zero, 2 ; LA64D-NEXT: move $a1, $fp ; LA64D-NEXT: bl %plt(__atomic_compare_exchange) -; LA64D-NEXT: fld.d $fa0, $sp, 16 +; LA64D-NEXT: fld.d $fa0, $sp, 8 ; LA64D-NEXT: beqz $a0, .LBB5_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end -; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload -; LA64D-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload -; LA64D-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload -; LA64D-NEXT: addi.d $sp, $sp, 48 +; LA64D-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 32 ; LA64D-NEXT: ret %v = atomicrmw fsub ptr %p, double 1.0 acquire, align 4 ret double %v @@ -476,36 +462,32 @@ define double @double_fmin_acquire(ptr %p) nounwind { ; ; LA64D-LABEL: double_fmin_acquire: ; LA64D: # %bb.0: -; LA64D-NEXT: addi.d $sp, $sp, -48 -; LA64D-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill -; LA64D-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill -; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: addi.d $sp, $sp, -32 +; LA64D-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill ; LA64D-NEXT: move $fp, $a0 ; LA64D-NEXT: fld.d $fa0, $a0, 0 -; LA64D-NEXT: addi.d $a0, $zero, 1 -; LA64D-NEXT: movgr2fr.d $fa1, $a0 -; LA64D-NEXT: ffint.d.l $fs0, $fa1 ; LA64D-NEXT: .p2align 4, , 16 ; LA64D-NEXT: .LBB6_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ; LA64D-NEXT: fmax.d $fa1, $fa0, $fa0 -; LA64D-NEXT: fmin.d $fa1, $fa1, $fs0 -; LA64D-NEXT: fst.d $fa0, $sp, 16 -; LA64D-NEXT: fst.d $fa1, $sp, 8 +; LA64D-NEXT: vldi $vr2, -912 +; LA64D-NEXT: fmin.d $fa1, $fa1, $fa2 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: fst.d $fa1, $sp, 0 ; LA64D-NEXT: ori $a0, $zero, 8 -; LA64D-NEXT: addi.d $a2, $sp, 16 -; LA64D-NEXT: addi.d $a3, $sp, 8 +; LA64D-NEXT: addi.d $a2, $sp, 8 +; LA64D-NEXT: addi.d $a3, $sp, 0 ; LA64D-NEXT: ori $a4, $zero, 2 ; LA64D-NEXT: ori $a5, $zero, 2 ; LA64D-NEXT: move $a1, $fp ; LA64D-NEXT: bl %plt(__atomic_compare_exchange) -; LA64D-NEXT: fld.d $fa0, $sp, 16 +; LA64D-NEXT: fld.d $fa0, $sp, 8 ; LA64D-NEXT: beqz $a0, .LBB6_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end -; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload -; LA64D-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload -; LA64D-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload -; LA64D-NEXT: addi.d $sp, $sp, 48 +; LA64D-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 32 ; LA64D-NEXT: ret %v = atomicrmw fmin ptr %p, double 1.0 acquire, align 4 ret double %v @@ -550,36 +532,32 @@ define double @double_fmax_acquire(ptr %p) nounwind { ; ; LA64D-LABEL: double_fmax_acquire: ; LA64D: # %bb.0: -; LA64D-NEXT: addi.d $sp, $sp, -48 -; LA64D-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill -; LA64D-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill -; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: addi.d $sp, $sp, -32 +; LA64D-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill ; LA64D-NEXT: move $fp, $a0 ; LA64D-NEXT: fld.d $fa0, $a0, 0 -; LA64D-NEXT: addi.d $a0, $zero, 1 -; LA64D-NEXT: movgr2fr.d $fa1, $a0 -; LA64D-NEXT: ffint.d.l $fs0, $fa1 ; LA64D-NEXT: .p2align 4, , 16 ; LA64D-NEXT: .LBB7_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ; LA64D-NEXT: fmax.d $fa1, $fa0, $fa0 -; LA64D-NEXT: fmax.d $fa1, $fa1, $fs0 -; LA64D-NEXT: fst.d $fa0, $sp, 16 -; LA64D-NEXT: fst.d $fa1, $sp, 8 +; LA64D-NEXT: vldi $vr2, -912 +; LA64D-NEXT: fmax.d $fa1, $fa1, $fa2 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: fst.d $fa1, $sp, 0 ; LA64D-NEXT: ori $a0, $zero, 8 -; LA64D-NEXT: addi.d $a2, $sp, 16 -; LA64D-NEXT: addi.d $a3, $sp, 8 +; LA64D-NEXT: addi.d $a2, $sp, 8 +; LA64D-NEXT: addi.d $a3, $sp, 0 ; LA64D-NEXT: ori $a4, $zero, 2 ; LA64D-NEXT: ori $a5, $zero, 2 ; LA64D-NEXT: move $a1, $fp ; LA64D-NEXT: bl %plt(__atomic_compare_exchange) -; LA64D-NEXT: fld.d $fa0, $sp, 16 +; LA64D-NEXT: fld.d $fa0, $sp, 8 ; LA64D-NEXT: beqz $a0, .LBB7_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end -; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload -; LA64D-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload -; LA64D-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload -; LA64D-NEXT: addi.d $sp, $sp, 48 +; LA64D-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 32 ; LA64D-NEXT: ret %v = atomicrmw fmax ptr %p, double 1.0 acquire, align 4 ret double %v @@ -623,9 +601,7 @@ define float @float_fadd_release(ptr %p) nounwind { ; LA64D-LABEL: float_fadd_release: ; LA64D: # %bb.0: ; LA64D-NEXT: fld.s $fa0, $a0, 0 -; LA64D-NEXT: addi.w $a1, $zero, 1 -; LA64D-NEXT: movgr2fr.w $fa1, $a1 -; LA64D-NEXT: ffint.s.w $fa1, $fa1 +; LA64D-NEXT: vldi $vr1, -1168 ; LA64D-NEXT: .p2align 4, , 16 ; LA64D-NEXT: .LBB8_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Loop Header: Depth=1 @@ -694,8 +670,7 @@ define float @float_fsub_release(ptr %p) nounwind { ; LA64D-LABEL: float_fsub_release: ; LA64D: # %bb.0: ; LA64D-NEXT: fld.s $fa0, $a0, 0 -; LA64D-NEXT: pcalau12i $a1, %pc_hi20(.LCPI9_0) -; LA64D-NEXT: fld.s $fa1, $a1, %pc_lo12(.LCPI9_0) +; LA64D-NEXT: vldi $vr1, -1040 ; LA64D-NEXT: .p2align 4, , 16 ; LA64D-NEXT: .LBB9_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Loop Header: Depth=1 @@ -766,9 +741,7 @@ define float @float_fmin_release(ptr %p) nounwind { ; LA64D-LABEL: float_fmin_release: ; LA64D: # %bb.0: ; LA64D-NEXT: fld.s $fa0, $a0, 0 -; LA64D-NEXT: addi.w $a1, $zero, 1 -; LA64D-NEXT: movgr2fr.w $fa1, $a1 -; LA64D-NEXT: ffint.s.w $fa1, $fa1 +; LA64D-NEXT: vldi $vr1, -1168 ; LA64D-NEXT: .p2align 4, , 16 ; LA64D-NEXT: .LBB10_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Loop Header: Depth=1 @@ -840,9 +813,7 @@ define float @float_fmax_release(ptr %p) nounwind { ; LA64D-LABEL: float_fmax_release: ; LA64D: # %bb.0: ; LA64D-NEXT: fld.s $fa0, $a0, 0 -; LA64D-NEXT: addi.w $a1, $zero, 1 -; LA64D-NEXT: movgr2fr.w $fa1, $a1 -; LA64D-NEXT: ffint.s.w $fa1, $fa1 +; LA64D-NEXT: vldi $vr1, -1168 ; LA64D-NEXT: .p2align 4, , 16 ; LA64D-NEXT: .LBB11_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Loop Header: Depth=1 @@ -914,35 +885,31 @@ define double @double_fadd_release(ptr %p) nounwind { ; ; LA64D-LABEL: double_fadd_release: ; LA64D: # %bb.0: -; LA64D-NEXT: addi.d $sp, $sp, -48 -; LA64D-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill -; LA64D-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill -; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: addi.d $sp, $sp, -32 +; LA64D-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill ; LA64D-NEXT: move $fp, $a0 ; LA64D-NEXT: fld.d $fa0, $a0, 0 -; LA64D-NEXT: addi.d $a0, $zero, 1 -; LA64D-NEXT: movgr2fr.d $fa1, $a0 -; LA64D-NEXT: ffint.d.l $fs0, $fa1 ; LA64D-NEXT: .p2align 4, , 16 ; LA64D-NEXT: .LBB12_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 -; LA64D-NEXT: fadd.d $fa1, $fa0, $fs0 -; LA64D-NEXT: fst.d $fa0, $sp, 16 -; LA64D-NEXT: fst.d $fa1, $sp, 8 +; LA64D-NEXT: vldi $vr1, -912 +; LA64D-NEXT: fadd.d $fa1, $fa0, $fa1 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: fst.d $fa1, $sp, 0 ; LA64D-NEXT: ori $a0, $zero, 8 -; LA64D-NEXT: addi.d $a2, $sp, 16 -; LA64D-NEXT: addi.d $a3, $sp, 8 +; LA64D-NEXT: addi.d $a2, $sp, 8 +; LA64D-NEXT: addi.d $a3, $sp, 0 ; LA64D-NEXT: ori $a4, $zero, 3 ; LA64D-NEXT: move $a1, $fp ; LA64D-NEXT: move $a5, $zero ; LA64D-NEXT: bl %plt(__atomic_compare_exchange) -; LA64D-NEXT: fld.d $fa0, $sp, 16 +; LA64D-NEXT: fld.d $fa0, $sp, 8 ; LA64D-NEXT: beqz $a0, .LBB12_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end -; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload -; LA64D-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload -; LA64D-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload -; LA64D-NEXT: addi.d $sp, $sp, 48 +; LA64D-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 32 ; LA64D-NEXT: ret %v = atomicrmw fadd ptr %p, double 1.0 release, align 4 ret double %v @@ -987,34 +954,31 @@ define double @double_fsub_release(ptr %p) nounwind { ; ; LA64D-LABEL: double_fsub_release: ; LA64D: # %bb.0: -; LA64D-NEXT: addi.d $sp, $sp, -48 -; LA64D-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill -; LA64D-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill -; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: addi.d $sp, $sp, -32 +; LA64D-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill ; LA64D-NEXT: move $fp, $a0 ; LA64D-NEXT: fld.d $fa0, $a0, 0 -; LA64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI13_0) -; LA64D-NEXT: fld.d $fs0, $a0, %pc_lo12(.LCPI13_0) ; LA64D-NEXT: .p2align 4, , 16 ; LA64D-NEXT: .LBB13_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 -; LA64D-NEXT: fadd.d $fa1, $fa0, $fs0 -; LA64D-NEXT: fst.d $fa0, $sp, 16 -; LA64D-NEXT: fst.d $fa1, $sp, 8 +; LA64D-NEXT: vldi $vr1, -784 +; LA64D-NEXT: fadd.d $fa1, $fa0, $fa1 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: fst.d $fa1, $sp, 0 ; LA64D-NEXT: ori $a0, $zero, 8 -; LA64D-NEXT: addi.d $a2, $sp, 16 -; LA64D-NEXT: addi.d $a3, $sp, 8 +; LA64D-NEXT: addi.d $a2, $sp, 8 +; LA64D-NEXT: addi.d $a3, $sp, 0 ; LA64D-NEXT: ori $a4, $zero, 3 ; LA64D-NEXT: move $a1, $fp ; LA64D-NEXT: move $a5, $zero ; LA64D-NEXT: bl %plt(__atomic_compare_exchange) -; LA64D-NEXT: fld.d $fa0, $sp, 16 +; LA64D-NEXT: fld.d $fa0, $sp, 8 ; LA64D-NEXT: beqz $a0, .LBB13_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end -; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload -; LA64D-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload -; LA64D-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload -; LA64D-NEXT: addi.d $sp, $sp, 48 +; LA64D-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 32 ; LA64D-NEXT: ret %v = atomicrmw fsub ptr %p, double 1.0 release, align 4 ret double %v @@ -1059,36 +1023,32 @@ define double @double_fmin_release(ptr %p) nounwind { ; ; LA64D-LABEL: double_fmin_release: ; LA64D: # %bb.0: -; LA64D-NEXT: addi.d $sp, $sp, -48 -; LA64D-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill -; LA64D-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill -; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: addi.d $sp, $sp, -32 +; LA64D-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill ; LA64D-NEXT: move $fp, $a0 ; LA64D-NEXT: fld.d $fa0, $a0, 0 -; LA64D-NEXT: addi.d $a0, $zero, 1 -; LA64D-NEXT: movgr2fr.d $fa1, $a0 -; LA64D-NEXT: ffint.d.l $fs0, $fa1 ; LA64D-NEXT: .p2align 4, , 16 ; LA64D-NEXT: .LBB14_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ; LA64D-NEXT: fmax.d $fa1, $fa0, $fa0 -; LA64D-NEXT: fmin.d $fa1, $fa1, $fs0 -; LA64D-NEXT: fst.d $fa0, $sp, 16 -; LA64D-NEXT: fst.d $fa1, $sp, 8 +; LA64D-NEXT: vldi $vr2, -912 +; LA64D-NEXT: fmin.d $fa1, $fa1, $fa2 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: fst.d $fa1, $sp, 0 ; LA64D-NEXT: ori $a0, $zero, 8 -; LA64D-NEXT: addi.d $a2, $sp, 16 -; LA64D-NEXT: addi.d $a3, $sp, 8 +; LA64D-NEXT: addi.d $a2, $sp, 8 +; LA64D-NEXT: addi.d $a3, $sp, 0 ; LA64D-NEXT: ori $a4, $zero, 3 ; LA64D-NEXT: move $a1, $fp ; LA64D-NEXT: move $a5, $zero ; LA64D-NEXT: bl %plt(__atomic_compare_exchange) -; LA64D-NEXT: fld.d $fa0, $sp, 16 +; LA64D-NEXT: fld.d $fa0, $sp, 8 ; LA64D-NEXT: beqz $a0, .LBB14_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end -; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload -; LA64D-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload -; LA64D-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload -; LA64D-NEXT: addi.d $sp, $sp, 48 +; LA64D-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 32 ; LA64D-NEXT: ret %v = atomicrmw fmin ptr %p, double 1.0 release, align 4 ret double %v @@ -1133,36 +1093,32 @@ define double @double_fmax_release(ptr %p) nounwind { ; ; LA64D-LABEL: double_fmax_release: ; LA64D: # %bb.0: -; LA64D-NEXT: addi.d $sp, $sp, -48 -; LA64D-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill -; LA64D-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill -; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: addi.d $sp, $sp, -32 +; LA64D-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill ; LA64D-NEXT: move $fp, $a0 ; LA64D-NEXT: fld.d $fa0, $a0, 0 -; LA64D-NEXT: addi.d $a0, $zero, 1 -; LA64D-NEXT: movgr2fr.d $fa1, $a0 -; LA64D-NEXT: ffint.d.l $fs0, $fa1 ; LA64D-NEXT: .p2align 4, , 16 ; LA64D-NEXT: .LBB15_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ; LA64D-NEXT: fmax.d $fa1, $fa0, $fa0 -; LA64D-NEXT: fmax.d $fa1, $fa1, $fs0 -; LA64D-NEXT: fst.d $fa0, $sp, 16 -; LA64D-NEXT: fst.d $fa1, $sp, 8 +; LA64D-NEXT: vldi $vr2, -912 +; LA64D-NEXT: fmax.d $fa1, $fa1, $fa2 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: fst.d $fa1, $sp, 0 ; LA64D-NEXT: ori $a0, $zero, 8 -; LA64D-NEXT: addi.d $a2, $sp, 16 -; LA64D-NEXT: addi.d $a3, $sp, 8 +; LA64D-NEXT: addi.d $a2, $sp, 8 +; LA64D-NEXT: addi.d $a3, $sp, 0 ; LA64D-NEXT: ori $a4, $zero, 3 ; LA64D-NEXT: move $a1, $fp ; LA64D-NEXT: move $a5, $zero ; LA64D-NEXT: bl %plt(__atomic_compare_exchange) -; LA64D-NEXT: fld.d $fa0, $sp, 16 +; LA64D-NEXT: fld.d $fa0, $sp, 8 ; LA64D-NEXT: beqz $a0, .LBB15_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end -; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload -; LA64D-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload -; LA64D-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload -; LA64D-NEXT: addi.d $sp, $sp, 48 +; LA64D-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 32 ; LA64D-NEXT: ret %v = atomicrmw fmax ptr %p, double 1.0 release, align 4 ret double %v @@ -1206,9 +1162,7 @@ define float @float_fadd_acq_rel(ptr %p) nounwind { ; LA64D-LABEL: float_fadd_acq_rel: ; LA64D: # %bb.0: ; LA64D-NEXT: fld.s $fa0, $a0, 0 -; LA64D-NEXT: addi.w $a1, $zero, 1 -; LA64D-NEXT: movgr2fr.w $fa1, $a1 -; LA64D-NEXT: ffint.s.w $fa1, $fa1 +; LA64D-NEXT: vldi $vr1, -1168 ; LA64D-NEXT: .p2align 4, , 16 ; LA64D-NEXT: .LBB16_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Loop Header: Depth=1 @@ -1277,8 +1231,7 @@ define float @float_fsub_acq_rel(ptr %p) nounwind { ; LA64D-LABEL: float_fsub_acq_rel: ; LA64D: # %bb.0: ; LA64D-NEXT: fld.s $fa0, $a0, 0 -; LA64D-NEXT: pcalau12i $a1, %pc_hi20(.LCPI17_0) -; LA64D-NEXT: fld.s $fa1, $a1, %pc_lo12(.LCPI17_0) +; LA64D-NEXT: vldi $vr1, -1040 ; LA64D-NEXT: .p2align 4, , 16 ; LA64D-NEXT: .LBB17_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Loop Header: Depth=1 @@ -1349,9 +1302,7 @@ define float @float_fmin_acq_rel(ptr %p) nounwind { ; LA64D-LABEL: float_fmin_acq_rel: ; LA64D: # %bb.0: ; LA64D-NEXT: fld.s $fa0, $a0, 0 -; LA64D-NEXT: addi.w $a1, $zero, 1 -; LA64D-NEXT: movgr2fr.w $fa1, $a1 -; LA64D-NEXT: ffint.s.w $fa1, $fa1 +; LA64D-NEXT: vldi $vr1, -1168 ; LA64D-NEXT: .p2align 4, , 16 ; LA64D-NEXT: .LBB18_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Loop Header: Depth=1 @@ -1423,9 +1374,7 @@ define float @float_fmax_acq_rel(ptr %p) nounwind { ; LA64D-LABEL: float_fmax_acq_rel: ; LA64D: # %bb.0: ; LA64D-NEXT: fld.s $fa0, $a0, 0 -; LA64D-NEXT: addi.w $a1, $zero, 1 -; LA64D-NEXT: movgr2fr.w $fa1, $a1 -; LA64D-NEXT: ffint.s.w $fa1, $fa1 +; LA64D-NEXT: vldi $vr1, -1168 ; LA64D-NEXT: .p2align 4, , 16 ; LA64D-NEXT: .LBB19_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Loop Header: Depth=1 @@ -1497,35 +1446,31 @@ define double @double_fadd_acq_rel(ptr %p) nounwind { ; ; LA64D-LABEL: double_fadd_acq_rel: ; LA64D: # %bb.0: -; LA64D-NEXT: addi.d $sp, $sp, -48 -; LA64D-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill -; LA64D-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill -; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: addi.d $sp, $sp, -32 +; LA64D-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill ; LA64D-NEXT: move $fp, $a0 ; LA64D-NEXT: fld.d $fa0, $a0, 0 -; LA64D-NEXT: addi.d $a0, $zero, 1 -; LA64D-NEXT: movgr2fr.d $fa1, $a0 -; LA64D-NEXT: ffint.d.l $fs0, $fa1 ; LA64D-NEXT: .p2align 4, , 16 ; LA64D-NEXT: .LBB20_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 -; LA64D-NEXT: fadd.d $fa1, $fa0, $fs0 -; LA64D-NEXT: fst.d $fa0, $sp, 16 -; LA64D-NEXT: fst.d $fa1, $sp, 8 +; LA64D-NEXT: vldi $vr1, -912 +; LA64D-NEXT: fadd.d $fa1, $fa0, $fa1 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: fst.d $fa1, $sp, 0 ; LA64D-NEXT: ori $a0, $zero, 8 -; LA64D-NEXT: addi.d $a2, $sp, 16 -; LA64D-NEXT: addi.d $a3, $sp, 8 +; LA64D-NEXT: addi.d $a2, $sp, 8 +; LA64D-NEXT: addi.d $a3, $sp, 0 ; LA64D-NEXT: ori $a4, $zero, 4 ; LA64D-NEXT: ori $a5, $zero, 2 ; LA64D-NEXT: move $a1, $fp ; LA64D-NEXT: bl %plt(__atomic_compare_exchange) -; LA64D-NEXT: fld.d $fa0, $sp, 16 +; LA64D-NEXT: fld.d $fa0, $sp, 8 ; LA64D-NEXT: beqz $a0, .LBB20_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end -; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload -; LA64D-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload -; LA64D-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload -; LA64D-NEXT: addi.d $sp, $sp, 48 +; LA64D-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 32 ; LA64D-NEXT: ret %v = atomicrmw fadd ptr %p, double 1.0 acq_rel, align 4 ret double %v @@ -1570,34 +1515,31 @@ define double @double_fsub_acq_rel(ptr %p) nounwind { ; ; LA64D-LABEL: double_fsub_acq_rel: ; LA64D: # %bb.0: -; LA64D-NEXT: addi.d $sp, $sp, -48 -; LA64D-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill -; LA64D-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill -; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: addi.d $sp, $sp, -32 +; LA64D-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill ; LA64D-NEXT: move $fp, $a0 ; LA64D-NEXT: fld.d $fa0, $a0, 0 -; LA64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI21_0) -; LA64D-NEXT: fld.d $fs0, $a0, %pc_lo12(.LCPI21_0) ; LA64D-NEXT: .p2align 4, , 16 ; LA64D-NEXT: .LBB21_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 -; LA64D-NEXT: fadd.d $fa1, $fa0, $fs0 -; LA64D-NEXT: fst.d $fa0, $sp, 16 -; LA64D-NEXT: fst.d $fa1, $sp, 8 +; LA64D-NEXT: vldi $vr1, -784 +; LA64D-NEXT: fadd.d $fa1, $fa0, $fa1 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: fst.d $fa1, $sp, 0 ; LA64D-NEXT: ori $a0, $zero, 8 -; LA64D-NEXT: addi.d $a2, $sp, 16 -; LA64D-NEXT: addi.d $a3, $sp, 8 +; LA64D-NEXT: addi.d $a2, $sp, 8 +; LA64D-NEXT: addi.d $a3, $sp, 0 ; LA64D-NEXT: ori $a4, $zero, 4 ; LA64D-NEXT: ori $a5, $zero, 2 ; LA64D-NEXT: move $a1, $fp ; LA64D-NEXT: bl %plt(__atomic_compare_exchange) -; LA64D-NEXT: fld.d $fa0, $sp, 16 +; LA64D-NEXT: fld.d $fa0, $sp, 8 ; LA64D-NEXT: beqz $a0, .LBB21_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end -; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload -; LA64D-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload -; LA64D-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload -; LA64D-NEXT: addi.d $sp, $sp, 48 +; LA64D-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 32 ; LA64D-NEXT: ret %v = atomicrmw fsub ptr %p, double 1.0 acq_rel, align 4 ret double %v @@ -1642,36 +1584,32 @@ define double @double_fmin_acq_rel(ptr %p) nounwind { ; ; LA64D-LABEL: double_fmin_acq_rel: ; LA64D: # %bb.0: -; LA64D-NEXT: addi.d $sp, $sp, -48 -; LA64D-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill -; LA64D-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill -; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: addi.d $sp, $sp, -32 +; LA64D-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill ; LA64D-NEXT: move $fp, $a0 ; LA64D-NEXT: fld.d $fa0, $a0, 0 -; LA64D-NEXT: addi.d $a0, $zero, 1 -; LA64D-NEXT: movgr2fr.d $fa1, $a0 -; LA64D-NEXT: ffint.d.l $fs0, $fa1 ; LA64D-NEXT: .p2align 4, , 16 ; LA64D-NEXT: .LBB22_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ; LA64D-NEXT: fmax.d $fa1, $fa0, $fa0 -; LA64D-NEXT: fmin.d $fa1, $fa1, $fs0 -; LA64D-NEXT: fst.d $fa0, $sp, 16 -; LA64D-NEXT: fst.d $fa1, $sp, 8 +; LA64D-NEXT: vldi $vr2, -912 +; LA64D-NEXT: fmin.d $fa1, $fa1, $fa2 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: fst.d $fa1, $sp, 0 ; LA64D-NEXT: ori $a0, $zero, 8 -; LA64D-NEXT: addi.d $a2, $sp, 16 -; LA64D-NEXT: addi.d $a3, $sp, 8 +; LA64D-NEXT: addi.d $a2, $sp, 8 +; LA64D-NEXT: addi.d $a3, $sp, 0 ; LA64D-NEXT: ori $a4, $zero, 4 ; LA64D-NEXT: ori $a5, $zero, 2 ; LA64D-NEXT: move $a1, $fp ; LA64D-NEXT: bl %plt(__atomic_compare_exchange) -; LA64D-NEXT: fld.d $fa0, $sp, 16 +; LA64D-NEXT: fld.d $fa0, $sp, 8 ; LA64D-NEXT: beqz $a0, .LBB22_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end -; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload -; LA64D-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload -; LA64D-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload -; LA64D-NEXT: addi.d $sp, $sp, 48 +; LA64D-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 32 ; LA64D-NEXT: ret %v = atomicrmw fmin ptr %p, double 1.0 acq_rel, align 4 ret double %v @@ -1716,36 +1654,32 @@ define double @double_fmax_acq_rel(ptr %p) nounwind { ; ; LA64D-LABEL: double_fmax_acq_rel: ; LA64D: # %bb.0: -; LA64D-NEXT: addi.d $sp, $sp, -48 -; LA64D-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill -; LA64D-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill -; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: addi.d $sp, $sp, -32 +; LA64D-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill ; LA64D-NEXT: move $fp, $a0 ; LA64D-NEXT: fld.d $fa0, $a0, 0 -; LA64D-NEXT: addi.d $a0, $zero, 1 -; LA64D-NEXT: movgr2fr.d $fa1, $a0 -; LA64D-NEXT: ffint.d.l $fs0, $fa1 ; LA64D-NEXT: .p2align 4, , 16 ; LA64D-NEXT: .LBB23_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ; LA64D-NEXT: fmax.d $fa1, $fa0, $fa0 -; LA64D-NEXT: fmax.d $fa1, $fa1, $fs0 -; LA64D-NEXT: fst.d $fa0, $sp, 16 -; LA64D-NEXT: fst.d $fa1, $sp, 8 +; LA64D-NEXT: vldi $vr2, -912 +; LA64D-NEXT: fmax.d $fa1, $fa1, $fa2 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: fst.d $fa1, $sp, 0 ; LA64D-NEXT: ori $a0, $zero, 8 -; LA64D-NEXT: addi.d $a2, $sp, 16 -; LA64D-NEXT: addi.d $a3, $sp, 8 +; LA64D-NEXT: addi.d $a2, $sp, 8 +; LA64D-NEXT: addi.d $a3, $sp, 0 ; LA64D-NEXT: ori $a4, $zero, 4 ; LA64D-NEXT: ori $a5, $zero, 2 ; LA64D-NEXT: move $a1, $fp ; LA64D-NEXT: bl %plt(__atomic_compare_exchange) -; LA64D-NEXT: fld.d $fa0, $sp, 16 +; LA64D-NEXT: fld.d $fa0, $sp, 8 ; LA64D-NEXT: beqz $a0, .LBB23_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end -; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload -; LA64D-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload -; LA64D-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload -; LA64D-NEXT: addi.d $sp, $sp, 48 +; LA64D-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 32 ; LA64D-NEXT: ret %v = atomicrmw fmax ptr %p, double 1.0 acq_rel, align 4 ret double %v @@ -1789,9 +1723,7 @@ define float @float_fadd_seq_cst(ptr %p) nounwind { ; LA64D-LABEL: float_fadd_seq_cst: ; LA64D: # %bb.0: ; LA64D-NEXT: fld.s $fa0, $a0, 0 -; LA64D-NEXT: addi.w $a1, $zero, 1 -; LA64D-NEXT: movgr2fr.w $fa1, $a1 -; LA64D-NEXT: ffint.s.w $fa1, $fa1 +; LA64D-NEXT: vldi $vr1, -1168 ; LA64D-NEXT: .p2align 4, , 16 ; LA64D-NEXT: .LBB24_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Loop Header: Depth=1 @@ -1860,8 +1792,7 @@ define float @float_fsub_seq_cst(ptr %p) nounwind { ; LA64D-LABEL: float_fsub_seq_cst: ; LA64D: # %bb.0: ; LA64D-NEXT: fld.s $fa0, $a0, 0 -; LA64D-NEXT: pcalau12i $a1, %pc_hi20(.LCPI25_0) -; LA64D-NEXT: fld.s $fa1, $a1, %pc_lo12(.LCPI25_0) +; LA64D-NEXT: vldi $vr1, -1040 ; LA64D-NEXT: .p2align 4, , 16 ; LA64D-NEXT: .LBB25_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Loop Header: Depth=1 @@ -1932,9 +1863,7 @@ define float @float_fmin_seq_cst(ptr %p) nounwind { ; LA64D-LABEL: float_fmin_seq_cst: ; LA64D: # %bb.0: ; LA64D-NEXT: fld.s $fa0, $a0, 0 -; LA64D-NEXT: addi.w $a1, $zero, 1 -; LA64D-NEXT: movgr2fr.w $fa1, $a1 -; LA64D-NEXT: ffint.s.w $fa1, $fa1 +; LA64D-NEXT: vldi $vr1, -1168 ; LA64D-NEXT: .p2align 4, , 16 ; LA64D-NEXT: .LBB26_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Loop Header: Depth=1 @@ -2006,9 +1935,7 @@ define float @float_fmax_seq_cst(ptr %p) nounwind { ; LA64D-LABEL: float_fmax_seq_cst: ; LA64D: # %bb.0: ; LA64D-NEXT: fld.s $fa0, $a0, 0 -; LA64D-NEXT: addi.w $a1, $zero, 1 -; LA64D-NEXT: movgr2fr.w $fa1, $a1 -; LA64D-NEXT: ffint.s.w $fa1, $fa1 +; LA64D-NEXT: vldi $vr1, -1168 ; LA64D-NEXT: .p2align 4, , 16 ; LA64D-NEXT: .LBB27_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Loop Header: Depth=1 @@ -2080,35 +2007,31 @@ define double @double_fadd_seq_cst(ptr %p) nounwind { ; ; LA64D-LABEL: double_fadd_seq_cst: ; LA64D: # %bb.0: -; LA64D-NEXT: addi.d $sp, $sp, -48 -; LA64D-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill -; LA64D-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill -; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: addi.d $sp, $sp, -32 +; LA64D-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill ; LA64D-NEXT: move $fp, $a0 ; LA64D-NEXT: fld.d $fa0, $a0, 0 -; LA64D-NEXT: addi.d $a0, $zero, 1 -; LA64D-NEXT: movgr2fr.d $fa1, $a0 -; LA64D-NEXT: ffint.d.l $fs0, $fa1 ; LA64D-NEXT: .p2align 4, , 16 ; LA64D-NEXT: .LBB28_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 -; LA64D-NEXT: fadd.d $fa1, $fa0, $fs0 -; LA64D-NEXT: fst.d $fa0, $sp, 16 -; LA64D-NEXT: fst.d $fa1, $sp, 8 +; LA64D-NEXT: vldi $vr1, -912 +; LA64D-NEXT: fadd.d $fa1, $fa0, $fa1 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: fst.d $fa1, $sp, 0 ; LA64D-NEXT: ori $a0, $zero, 8 -; LA64D-NEXT: addi.d $a2, $sp, 16 -; LA64D-NEXT: addi.d $a3, $sp, 8 +; LA64D-NEXT: addi.d $a2, $sp, 8 +; LA64D-NEXT: addi.d $a3, $sp, 0 ; LA64D-NEXT: ori $a4, $zero, 5 ; LA64D-NEXT: ori $a5, $zero, 5 ; LA64D-NEXT: move $a1, $fp ; LA64D-NEXT: bl %plt(__atomic_compare_exchange) -; LA64D-NEXT: fld.d $fa0, $sp, 16 +; LA64D-NEXT: fld.d $fa0, $sp, 8 ; LA64D-NEXT: beqz $a0, .LBB28_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end -; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload -; LA64D-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload -; LA64D-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload -; LA64D-NEXT: addi.d $sp, $sp, 48 +; LA64D-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 32 ; LA64D-NEXT: ret %v = atomicrmw fadd ptr %p, double 1.0 seq_cst, align 4 ret double %v @@ -2153,34 +2076,31 @@ define double @double_fsub_seq_cst(ptr %p) nounwind { ; ; LA64D-LABEL: double_fsub_seq_cst: ; LA64D: # %bb.0: -; LA64D-NEXT: addi.d $sp, $sp, -48 -; LA64D-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill -; LA64D-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill -; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: addi.d $sp, $sp, -32 +; LA64D-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill ; LA64D-NEXT: move $fp, $a0 ; LA64D-NEXT: fld.d $fa0, $a0, 0 -; LA64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI29_0) -; LA64D-NEXT: fld.d $fs0, $a0, %pc_lo12(.LCPI29_0) ; LA64D-NEXT: .p2align 4, , 16 ; LA64D-NEXT: .LBB29_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 -; LA64D-NEXT: fadd.d $fa1, $fa0, $fs0 -; LA64D-NEXT: fst.d $fa0, $sp, 16 -; LA64D-NEXT: fst.d $fa1, $sp, 8 +; LA64D-NEXT: vldi $vr1, -784 +; LA64D-NEXT: fadd.d $fa1, $fa0, $fa1 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: fst.d $fa1, $sp, 0 ; LA64D-NEXT: ori $a0, $zero, 8 -; LA64D-NEXT: addi.d $a2, $sp, 16 -; LA64D-NEXT: addi.d $a3, $sp, 8 +; LA64D-NEXT: addi.d $a2, $sp, 8 +; LA64D-NEXT: addi.d $a3, $sp, 0 ; LA64D-NEXT: ori $a4, $zero, 5 ; LA64D-NEXT: ori $a5, $zero, 5 ; LA64D-NEXT: move $a1, $fp ; LA64D-NEXT: bl %plt(__atomic_compare_exchange) -; LA64D-NEXT: fld.d $fa0, $sp, 16 +; LA64D-NEXT: fld.d $fa0, $sp, 8 ; LA64D-NEXT: beqz $a0, .LBB29_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end -; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload -; LA64D-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload -; LA64D-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload -; LA64D-NEXT: addi.d $sp, $sp, 48 +; LA64D-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 32 ; LA64D-NEXT: ret %v = atomicrmw fsub ptr %p, double 1.0 seq_cst, align 4 ret double %v @@ -2225,36 +2145,32 @@ define double @double_fmin_seq_cst(ptr %p) nounwind { ; ; LA64D-LABEL: double_fmin_seq_cst: ; LA64D: # %bb.0: -; LA64D-NEXT: addi.d $sp, $sp, -48 -; LA64D-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill -; LA64D-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill -; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: addi.d $sp, $sp, -32 +; LA64D-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill ; LA64D-NEXT: move $fp, $a0 ; LA64D-NEXT: fld.d $fa0, $a0, 0 -; LA64D-NEXT: addi.d $a0, $zero, 1 -; LA64D-NEXT: movgr2fr.d $fa1, $a0 -; LA64D-NEXT: ffint.d.l $fs0, $fa1 ; LA64D-NEXT: .p2align 4, , 16 ; LA64D-NEXT: .LBB30_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ; LA64D-NEXT: fmax.d $fa1, $fa0, $fa0 -; LA64D-NEXT: fmin.d $fa1, $fa1, $fs0 -; LA64D-NEXT: fst.d $fa0, $sp, 16 -; LA64D-NEXT: fst.d $fa1, $sp, 8 +; LA64D-NEXT: vldi $vr2, -912 +; LA64D-NEXT: fmin.d $fa1, $fa1, $fa2 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: fst.d $fa1, $sp, 0 ; LA64D-NEXT: ori $a0, $zero, 8 -; LA64D-NEXT: addi.d $a2, $sp, 16 -; LA64D-NEXT: addi.d $a3, $sp, 8 +; LA64D-NEXT: addi.d $a2, $sp, 8 +; LA64D-NEXT: addi.d $a3, $sp, 0 ; LA64D-NEXT: ori $a4, $zero, 5 ; LA64D-NEXT: ori $a5, $zero, 5 ; LA64D-NEXT: move $a1, $fp ; LA64D-NEXT: bl %plt(__atomic_compare_exchange) -; LA64D-NEXT: fld.d $fa0, $sp, 16 +; LA64D-NEXT: fld.d $fa0, $sp, 8 ; LA64D-NEXT: beqz $a0, .LBB30_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end -; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload -; LA64D-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload -; LA64D-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload -; LA64D-NEXT: addi.d $sp, $sp, 48 +; LA64D-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 32 ; LA64D-NEXT: ret %v = atomicrmw fmin ptr %p, double 1.0 seq_cst, align 4 ret double %v @@ -2299,36 +2215,32 @@ define double @double_fmax_seq_cst(ptr %p) nounwind { ; ; LA64D-LABEL: double_fmax_seq_cst: ; LA64D: # %bb.0: -; LA64D-NEXT: addi.d $sp, $sp, -48 -; LA64D-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill -; LA64D-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill -; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: addi.d $sp, $sp, -32 +; LA64D-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill ; LA64D-NEXT: move $fp, $a0 ; LA64D-NEXT: fld.d $fa0, $a0, 0 -; LA64D-NEXT: addi.d $a0, $zero, 1 -; LA64D-NEXT: movgr2fr.d $fa1, $a0 -; LA64D-NEXT: ffint.d.l $fs0, $fa1 ; LA64D-NEXT: .p2align 4, , 16 ; LA64D-NEXT: .LBB31_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ; LA64D-NEXT: fmax.d $fa1, $fa0, $fa0 -; LA64D-NEXT: fmax.d $fa1, $fa1, $fs0 -; LA64D-NEXT: fst.d $fa0, $sp, 16 -; LA64D-NEXT: fst.d $fa1, $sp, 8 +; LA64D-NEXT: vldi $vr2, -912 +; LA64D-NEXT: fmax.d $fa1, $fa1, $fa2 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: fst.d $fa1, $sp, 0 ; LA64D-NEXT: ori $a0, $zero, 8 -; LA64D-NEXT: addi.d $a2, $sp, 16 -; LA64D-NEXT: addi.d $a3, $sp, 8 +; LA64D-NEXT: addi.d $a2, $sp, 8 +; LA64D-NEXT: addi.d $a3, $sp, 0 ; LA64D-NEXT: ori $a4, $zero, 5 ; LA64D-NEXT: ori $a5, $zero, 5 ; LA64D-NEXT: move $a1, $fp ; LA64D-NEXT: bl %plt(__atomic_compare_exchange) -; LA64D-NEXT: fld.d $fa0, $sp, 16 +; LA64D-NEXT: fld.d $fa0, $sp, 8 ; LA64D-NEXT: beqz $a0, .LBB31_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end -; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload -; LA64D-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload -; LA64D-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload -; LA64D-NEXT: addi.d $sp, $sp, 48 +; LA64D-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 32 ; LA64D-NEXT: ret %v = atomicrmw fmax ptr %p, double 1.0 seq_cst, align 4 ret double %v @@ -2372,9 +2284,7 @@ define float @float_fadd_monotonic(ptr %p) nounwind { ; LA64D-LABEL: float_fadd_monotonic: ; LA64D: # %bb.0: ; LA64D-NEXT: fld.s $fa0, $a0, 0 -; LA64D-NEXT: addi.w $a1, $zero, 1 -; LA64D-NEXT: movgr2fr.w $fa1, $a1 -; LA64D-NEXT: ffint.s.w $fa1, $fa1 +; LA64D-NEXT: vldi $vr1, -1168 ; LA64D-NEXT: .p2align 4, , 16 ; LA64D-NEXT: .LBB32_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Loop Header: Depth=1 @@ -2443,8 +2353,7 @@ define float @float_fsub_monotonic(ptr %p) nounwind { ; LA64D-LABEL: float_fsub_monotonic: ; LA64D: # %bb.0: ; LA64D-NEXT: fld.s $fa0, $a0, 0 -; LA64D-NEXT: pcalau12i $a1, %pc_hi20(.LCPI33_0) -; LA64D-NEXT: fld.s $fa1, $a1, %pc_lo12(.LCPI33_0) +; LA64D-NEXT: vldi $vr1, -1040 ; LA64D-NEXT: .p2align 4, , 16 ; LA64D-NEXT: .LBB33_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Loop Header: Depth=1 @@ -2515,9 +2424,7 @@ define float @float_fmin_monotonic(ptr %p) nounwind { ; LA64D-LABEL: float_fmin_monotonic: ; LA64D: # %bb.0: ; LA64D-NEXT: fld.s $fa0, $a0, 0 -; LA64D-NEXT: addi.w $a1, $zero, 1 -; LA64D-NEXT: movgr2fr.w $fa1, $a1 -; LA64D-NEXT: ffint.s.w $fa1, $fa1 +; LA64D-NEXT: vldi $vr1, -1168 ; LA64D-NEXT: .p2align 4, , 16 ; LA64D-NEXT: .LBB34_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Loop Header: Depth=1 @@ -2589,9 +2496,7 @@ define float @float_fmax_monotonic(ptr %p) nounwind { ; LA64D-LABEL: float_fmax_monotonic: ; LA64D: # %bb.0: ; LA64D-NEXT: fld.s $fa0, $a0, 0 -; LA64D-NEXT: addi.w $a1, $zero, 1 -; LA64D-NEXT: movgr2fr.w $fa1, $a1 -; LA64D-NEXT: ffint.s.w $fa1, $fa1 +; LA64D-NEXT: vldi $vr1, -1168 ; LA64D-NEXT: .p2align 4, , 16 ; LA64D-NEXT: .LBB35_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Loop Header: Depth=1 @@ -2663,35 +2568,31 @@ define double @double_fadd_monotonic(ptr %p) nounwind { ; ; LA64D-LABEL: double_fadd_monotonic: ; LA64D: # %bb.0: -; LA64D-NEXT: addi.d $sp, $sp, -48 -; LA64D-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill -; LA64D-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill -; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: addi.d $sp, $sp, -32 +; LA64D-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill ; LA64D-NEXT: move $fp, $a0 ; LA64D-NEXT: fld.d $fa0, $a0, 0 -; LA64D-NEXT: addi.d $a0, $zero, 1 -; LA64D-NEXT: movgr2fr.d $fa1, $a0 -; LA64D-NEXT: ffint.d.l $fs0, $fa1 ; LA64D-NEXT: .p2align 4, , 16 ; LA64D-NEXT: .LBB36_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 -; LA64D-NEXT: fadd.d $fa1, $fa0, $fs0 -; LA64D-NEXT: fst.d $fa0, $sp, 16 -; LA64D-NEXT: fst.d $fa1, $sp, 8 +; LA64D-NEXT: vldi $vr1, -912 +; LA64D-NEXT: fadd.d $fa1, $fa0, $fa1 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: fst.d $fa1, $sp, 0 ; LA64D-NEXT: ori $a0, $zero, 8 -; LA64D-NEXT: addi.d $a2, $sp, 16 -; LA64D-NEXT: addi.d $a3, $sp, 8 +; LA64D-NEXT: addi.d $a2, $sp, 8 +; LA64D-NEXT: addi.d $a3, $sp, 0 ; LA64D-NEXT: move $a1, $fp ; LA64D-NEXT: move $a4, $zero ; LA64D-NEXT: move $a5, $zero ; LA64D-NEXT: bl %plt(__atomic_compare_exchange) -; LA64D-NEXT: fld.d $fa0, $sp, 16 +; LA64D-NEXT: fld.d $fa0, $sp, 8 ; LA64D-NEXT: beqz $a0, .LBB36_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end -; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload -; LA64D-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload -; LA64D-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload -; LA64D-NEXT: addi.d $sp, $sp, 48 +; LA64D-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 32 ; LA64D-NEXT: ret %v = atomicrmw fadd ptr %p, double 1.0 monotonic, align 4 ret double %v @@ -2736,34 +2637,31 @@ define double @double_fsub_monotonic(ptr %p) nounwind { ; ; LA64D-LABEL: double_fsub_monotonic: ; LA64D: # %bb.0: -; LA64D-NEXT: addi.d $sp, $sp, -48 -; LA64D-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill -; LA64D-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill -; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: addi.d $sp, $sp, -32 +; LA64D-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill ; LA64D-NEXT: move $fp, $a0 ; LA64D-NEXT: fld.d $fa0, $a0, 0 -; LA64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI37_0) -; LA64D-NEXT: fld.d $fs0, $a0, %pc_lo12(.LCPI37_0) ; LA64D-NEXT: .p2align 4, , 16 ; LA64D-NEXT: .LBB37_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 -; LA64D-NEXT: fadd.d $fa1, $fa0, $fs0 -; LA64D-NEXT: fst.d $fa0, $sp, 16 -; LA64D-NEXT: fst.d $fa1, $sp, 8 +; LA64D-NEXT: vldi $vr1, -784 +; LA64D-NEXT: fadd.d $fa1, $fa0, $fa1 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: fst.d $fa1, $sp, 0 ; LA64D-NEXT: ori $a0, $zero, 8 -; LA64D-NEXT: addi.d $a2, $sp, 16 -; LA64D-NEXT: addi.d $a3, $sp, 8 +; LA64D-NEXT: addi.d $a2, $sp, 8 +; LA64D-NEXT: addi.d $a3, $sp, 0 ; LA64D-NEXT: move $a1, $fp ; LA64D-NEXT: move $a4, $zero ; LA64D-NEXT: move $a5, $zero ; LA64D-NEXT: bl %plt(__atomic_compare_exchange) -; LA64D-NEXT: fld.d $fa0, $sp, 16 +; LA64D-NEXT: fld.d $fa0, $sp, 8 ; LA64D-NEXT: beqz $a0, .LBB37_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end -; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload -; LA64D-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload -; LA64D-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload -; LA64D-NEXT: addi.d $sp, $sp, 48 +; LA64D-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 32 ; LA64D-NEXT: ret %v = atomicrmw fsub ptr %p, double 1.0 monotonic, align 4 ret double %v @@ -2808,36 +2706,32 @@ define double @double_fmin_monotonic(ptr %p) nounwind { ; ; LA64D-LABEL: double_fmin_monotonic: ; LA64D: # %bb.0: -; LA64D-NEXT: addi.d $sp, $sp, -48 -; LA64D-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill -; LA64D-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill -; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: addi.d $sp, $sp, -32 +; LA64D-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill ; LA64D-NEXT: move $fp, $a0 ; LA64D-NEXT: fld.d $fa0, $a0, 0 -; LA64D-NEXT: addi.d $a0, $zero, 1 -; LA64D-NEXT: movgr2fr.d $fa1, $a0 -; LA64D-NEXT: ffint.d.l $fs0, $fa1 ; LA64D-NEXT: .p2align 4, , 16 ; LA64D-NEXT: .LBB38_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ; LA64D-NEXT: fmax.d $fa1, $fa0, $fa0 -; LA64D-NEXT: fmin.d $fa1, $fa1, $fs0 -; LA64D-NEXT: fst.d $fa0, $sp, 16 -; LA64D-NEXT: fst.d $fa1, $sp, 8 +; LA64D-NEXT: vldi $vr2, -912 +; LA64D-NEXT: fmin.d $fa1, $fa1, $fa2 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: fst.d $fa1, $sp, 0 ; LA64D-NEXT: ori $a0, $zero, 8 -; LA64D-NEXT: addi.d $a2, $sp, 16 -; LA64D-NEXT: addi.d $a3, $sp, 8 +; LA64D-NEXT: addi.d $a2, $sp, 8 +; LA64D-NEXT: addi.d $a3, $sp, 0 ; LA64D-NEXT: move $a1, $fp ; LA64D-NEXT: move $a4, $zero ; LA64D-NEXT: move $a5, $zero ; LA64D-NEXT: bl %plt(__atomic_compare_exchange) -; LA64D-NEXT: fld.d $fa0, $sp, 16 +; LA64D-NEXT: fld.d $fa0, $sp, 8 ; LA64D-NEXT: beqz $a0, .LBB38_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end -; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload -; LA64D-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload -; LA64D-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload -; LA64D-NEXT: addi.d $sp, $sp, 48 +; LA64D-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 32 ; LA64D-NEXT: ret %v = atomicrmw fmin ptr %p, double 1.0 monotonic, align 4 ret double %v @@ -2882,36 +2776,32 @@ define double @double_fmax_monotonic(ptr %p) nounwind { ; ; LA64D-LABEL: double_fmax_monotonic: ; LA64D: # %bb.0: -; LA64D-NEXT: addi.d $sp, $sp, -48 -; LA64D-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill -; LA64D-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill -; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: addi.d $sp, $sp, -32 +; LA64D-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill ; LA64D-NEXT: move $fp, $a0 ; LA64D-NEXT: fld.d $fa0, $a0, 0 -; LA64D-NEXT: addi.d $a0, $zero, 1 -; LA64D-NEXT: movgr2fr.d $fa1, $a0 -; LA64D-NEXT: ffint.d.l $fs0, $fa1 ; LA64D-NEXT: .p2align 4, , 16 ; LA64D-NEXT: .LBB39_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ; LA64D-NEXT: fmax.d $fa1, $fa0, $fa0 -; LA64D-NEXT: fmax.d $fa1, $fa1, $fs0 -; LA64D-NEXT: fst.d $fa0, $sp, 16 -; LA64D-NEXT: fst.d $fa1, $sp, 8 +; LA64D-NEXT: vldi $vr2, -912 +; LA64D-NEXT: fmax.d $fa1, $fa1, $fa2 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: fst.d $fa1, $sp, 0 ; LA64D-NEXT: ori $a0, $zero, 8 -; LA64D-NEXT: addi.d $a2, $sp, 16 -; LA64D-NEXT: addi.d $a3, $sp, 8 +; LA64D-NEXT: addi.d $a2, $sp, 8 +; LA64D-NEXT: addi.d $a3, $sp, 0 ; LA64D-NEXT: move $a1, $fp ; LA64D-NEXT: move $a4, $zero ; LA64D-NEXT: move $a5, $zero ; LA64D-NEXT: bl %plt(__atomic_compare_exchange) -; LA64D-NEXT: fld.d $fa0, $sp, 16 +; LA64D-NEXT: fld.d $fa0, $sp, 8 ; LA64D-NEXT: beqz $a0, .LBB39_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end -; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload -; LA64D-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload -; LA64D-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload -; LA64D-NEXT: addi.d $sp, $sp, 48 +; LA64D-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 32 ; LA64D-NEXT: ret %v = atomicrmw fmax ptr %p, double 1.0 monotonic, align 4 ret double %v diff --git a/llvm/test/CodeGen/LoongArch/sextw-removal.ll b/llvm/test/CodeGen/LoongArch/sextw-removal.ll index 7500b5ae09359..96853105049b4 100644 --- a/llvm/test/CodeGen/LoongArch/sextw-removal.ll +++ b/llvm/test/CodeGen/LoongArch/sextw-removal.ll @@ -142,90 +142,44 @@ define signext i32 @test4(ptr %p, i32 signext %b) nounwind { define void @test5(i32 signext %arg, i32 signext %arg1) nounwind { ; CHECK-LABEL: test5: ; CHECK: # %bb.0: # %bb -; CHECK-NEXT: addi.d $sp, $sp, -48 -; CHECK-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill -; CHECK-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill -; CHECK-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill -; CHECK-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill -; CHECK-NEXT: st.d $s2, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill ; CHECK-NEXT: sra.w $a1, $a0, $a1 -; CHECK-NEXT: lu12i.w $a0, 349525 -; CHECK-NEXT: ori $fp, $a0, 1365 -; CHECK-NEXT: lu12i.w $a0, 209715 -; CHECK-NEXT: ori $s0, $a0, 819 -; CHECK-NEXT: lu12i.w $a0, 61680 -; CHECK-NEXT: ori $s1, $a0, 3855 -; CHECK-NEXT: lu12i.w $a0, 4112 -; CHECK-NEXT: ori $s2, $a0, 257 ; CHECK-NEXT: .p2align 4, , 16 ; CHECK-NEXT: .LBB4_1: # %bb2 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: addi.w $a0, $a1, 0 ; CHECK-NEXT: bl %plt(bar) -; CHECK-NEXT: srli.d $a1, $a0, 1 -; CHECK-NEXT: and $a1, $a1, $fp -; CHECK-NEXT: sub.d $a1, $a0, $a1 -; CHECK-NEXT: and $a2, $a1, $s0 -; CHECK-NEXT: srli.d $a1, $a1, 2 -; CHECK-NEXT: and $a1, $a1, $s0 -; CHECK-NEXT: add.d $a1, $a2, $a1 -; CHECK-NEXT: srli.d $a2, $a1, 4 -; CHECK-NEXT: add.d $a1, $a1, $a2 -; CHECK-NEXT: and $a1, $a1, $s1 -; CHECK-NEXT: mul.d $a1, $a1, $s2 -; CHECK-NEXT: bstrpick.d $a1, $a1, 31, 24 +; CHECK-NEXT: bstrpick.d $a1, $a0, 31, 0 +; CHECK-NEXT: vldi $vr0, 0 +; CHECK-NEXT: vinsgr2vr.d $vr0, $a1, 0 +; CHECK-NEXT: vpcnt.d $vr0, $vr0 +; CHECK-NEXT: vpickve2gr.d $a1, $vr0, 0 ; CHECK-NEXT: bnez $a0, .LBB4_1 ; CHECK-NEXT: # %bb.2: # %bb7 -; CHECK-NEXT: ld.d $s2, $sp, 8 # 8-byte Folded Reload -; CHECK-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload -; CHECK-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload -; CHECK-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload -; CHECK-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload -; CHECK-NEXT: addi.d $sp, $sp, 48 +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 ; CHECK-NEXT: ret ; ; NORMV-LABEL: test5: ; NORMV: # %bb.0: # %bb -; NORMV-NEXT: addi.d $sp, $sp, -48 -; NORMV-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill -; NORMV-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill -; NORMV-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill -; NORMV-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill -; NORMV-NEXT: st.d $s2, $sp, 8 # 8-byte Folded Spill +; NORMV-NEXT: addi.d $sp, $sp, -16 +; NORMV-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill ; NORMV-NEXT: sra.w $a1, $a0, $a1 -; NORMV-NEXT: lu12i.w $a0, 349525 -; NORMV-NEXT: ori $fp, $a0, 1365 -; NORMV-NEXT: lu12i.w $a0, 209715 -; NORMV-NEXT: ori $s0, $a0, 819 -; NORMV-NEXT: lu12i.w $a0, 61680 -; NORMV-NEXT: ori $s1, $a0, 3855 -; NORMV-NEXT: lu12i.w $a0, 4112 -; NORMV-NEXT: ori $s2, $a0, 257 ; NORMV-NEXT: .p2align 4, , 16 ; NORMV-NEXT: .LBB4_1: # %bb2 ; NORMV-NEXT: # =>This Inner Loop Header: Depth=1 ; NORMV-NEXT: addi.w $a0, $a1, 0 ; NORMV-NEXT: bl %plt(bar) -; NORMV-NEXT: srli.d $a1, $a0, 1 -; NORMV-NEXT: and $a1, $a1, $fp -; NORMV-NEXT: sub.d $a1, $a0, $a1 -; NORMV-NEXT: and $a2, $a1, $s0 -; NORMV-NEXT: srli.d $a1, $a1, 2 -; NORMV-NEXT: and $a1, $a1, $s0 -; NORMV-NEXT: add.d $a1, $a2, $a1 -; NORMV-NEXT: srli.d $a2, $a1, 4 -; NORMV-NEXT: add.d $a1, $a1, $a2 -; NORMV-NEXT: and $a1, $a1, $s1 -; NORMV-NEXT: mul.d $a1, $a1, $s2 -; NORMV-NEXT: bstrpick.d $a1, $a1, 31, 24 +; NORMV-NEXT: bstrpick.d $a1, $a0, 31, 0 +; NORMV-NEXT: vldi $vr0, 0 +; NORMV-NEXT: vinsgr2vr.d $vr0, $a1, 0 +; NORMV-NEXT: vpcnt.d $vr0, $vr0 +; NORMV-NEXT: vpickve2gr.d $a1, $vr0, 0 ; NORMV-NEXT: bnez $a0, .LBB4_1 ; NORMV-NEXT: # %bb.2: # %bb7 -; NORMV-NEXT: ld.d $s2, $sp, 8 # 8-byte Folded Reload -; NORMV-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload -; NORMV-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload -; NORMV-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload -; NORMV-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload -; NORMV-NEXT: addi.d $sp, $sp, 48 +; NORMV-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; NORMV-NEXT: addi.d $sp, $sp, 16 ; NORMV-NEXT: ret bb: %i = ashr i32 %arg, %arg1 @@ -247,54 +201,45 @@ declare i32 @llvm.ctpop.i32(i32) define void @test6(i32 signext %arg, i32 signext %arg1) nounwind { ; CHECK-LABEL: test6: ; CHECK: # %bb.0: # %bb -; CHECK-NEXT: addi.d $sp, $sp, -32 -; CHECK-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill -; CHECK-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill -; CHECK-NEXT: st.d $s0, $sp, 8 # 8-byte Folded Spill -; CHECK-NEXT: sra.w $fp, $a0, $a1 +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: fst.d $fs0, $sp, 0 # 8-byte Folded Spill +; CHECK-NEXT: sra.w $a0, $a0, $a1 +; CHECK-NEXT: movgr2fr.w $fs0, $zero ; CHECK-NEXT: .p2align 4, , 16 ; CHECK-NEXT: .LBB5_1: # %bb2 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: addi.w $a0, $fp, 0 ; CHECK-NEXT: bl %plt(baz) -; CHECK-NEXT: move $s0, $a0 -; CHECK-NEXT: bl %plt(__fixsfsi) -; CHECK-NEXT: move $fp, $a0 -; CHECK-NEXT: move $a0, $s0 -; CHECK-NEXT: move $a1, $zero -; CHECK-NEXT: bl %plt(__nesf2) -; CHECK-NEXT: bnez $a0, .LBB5_1 +; CHECK-NEXT: ftintrz.w.s $fa1, $fa0 +; CHECK-NEXT: fcmp.cune.s $fcc0, $fa0, $fs0 +; CHECK-NEXT: movfr2gr.s $a0, $fa1 +; CHECK-NEXT: bcnez $fcc0, .LBB5_1 ; CHECK-NEXT: # %bb.2: # %bb7 -; CHECK-NEXT: ld.d $s0, $sp, 8 # 8-byte Folded Reload -; CHECK-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload -; CHECK-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload -; CHECK-NEXT: addi.d $sp, $sp, 32 +; CHECK-NEXT: fld.d $fs0, $sp, 0 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 ; CHECK-NEXT: ret ; ; NORMV-LABEL: test6: ; NORMV: # %bb.0: # %bb -; NORMV-NEXT: addi.d $sp, $sp, -32 -; NORMV-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill -; NORMV-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill -; NORMV-NEXT: st.d $s0, $sp, 8 # 8-byte Folded Spill -; NORMV-NEXT: sra.w $fp, $a0, $a1 +; NORMV-NEXT: addi.d $sp, $sp, -16 +; NORMV-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; NORMV-NEXT: fst.d $fs0, $sp, 0 # 8-byte Folded Spill +; NORMV-NEXT: sra.w $a0, $a0, $a1 +; NORMV-NEXT: movgr2fr.w $fs0, $zero ; NORMV-NEXT: .p2align 4, , 16 ; NORMV-NEXT: .LBB5_1: # %bb2 ; NORMV-NEXT: # =>This Inner Loop Header: Depth=1 -; NORMV-NEXT: addi.w $a0, $fp, 0 +; NORMV-NEXT: addi.w $a0, $a0, 0 ; NORMV-NEXT: bl %plt(baz) -; NORMV-NEXT: move $s0, $a0 -; NORMV-NEXT: bl %plt(__fixsfsi) -; NORMV-NEXT: move $fp, $a0 -; NORMV-NEXT: move $a0, $s0 -; NORMV-NEXT: move $a1, $zero -; NORMV-NEXT: bl %plt(__nesf2) -; NORMV-NEXT: bnez $a0, .LBB5_1 +; NORMV-NEXT: ftintrz.w.s $fa1, $fa0 +; NORMV-NEXT: fcmp.cune.s $fcc0, $fa0, $fs0 +; NORMV-NEXT: movfr2gr.s $a0, $fa1 +; NORMV-NEXT: bcnez $fcc0, .LBB5_1 ; NORMV-NEXT: # %bb.2: # %bb7 -; NORMV-NEXT: ld.d $s0, $sp, 8 # 8-byte Folded Reload -; NORMV-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload -; NORMV-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload -; NORMV-NEXT: addi.d $sp, $sp, 32 +; NORMV-NEXT: fld.d $fs0, $sp, 0 # 8-byte Folded Reload +; NORMV-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; NORMV-NEXT: addi.d $sp, $sp, 16 ; NORMV-NEXT: ret bb: %i = ashr i32 %arg, %arg1 @@ -315,97 +260,42 @@ declare float @baz(i32 signext %i3) define void @test7(i32 signext %arg, i32 signext %arg1) nounwind { ; CHECK-LABEL: test7: ; CHECK: # %bb.0: # %bb -; CHECK-NEXT: addi.d $sp, $sp, -48 -; CHECK-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill -; CHECK-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill -; CHECK-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill -; CHECK-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill -; CHECK-NEXT: st.d $s2, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill ; CHECK-NEXT: sra.w $a0, $a0, $a1 -; CHECK-NEXT: lu12i.w $a1, 349525 -; CHECK-NEXT: ori $fp, $a1, 1365 -; CHECK-NEXT: bstrins.d $fp, $fp, 62, 32 -; CHECK-NEXT: lu12i.w $a1, 209715 -; CHECK-NEXT: ori $s0, $a1, 819 -; CHECK-NEXT: bstrins.d $s0, $s0, 61, 32 -; CHECK-NEXT: lu12i.w $a1, 61680 -; CHECK-NEXT: ori $s1, $a1, 3855 -; CHECK-NEXT: bstrins.d $s1, $s1, 59, 32 -; CHECK-NEXT: lu12i.w $a1, 4112 -; CHECK-NEXT: ori $s2, $a1, 257 -; CHECK-NEXT: bstrins.d $s2, $s2, 56, 32 ; CHECK-NEXT: .p2align 4, , 16 ; CHECK-NEXT: .LBB6_1: # %bb2 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: addi.w $a0, $a0, 0 ; CHECK-NEXT: bl %plt(foo) -; CHECK-NEXT: srli.d $a1, $a0, 1 -; CHECK-NEXT: and $a1, $a1, $fp -; CHECK-NEXT: sub.d $a0, $a0, $a1 -; CHECK-NEXT: and $a1, $a0, $s0 -; CHECK-NEXT: srli.d $a0, $a0, 2 -; CHECK-NEXT: and $a0, $a0, $s0 -; CHECK-NEXT: add.d $a0, $a1, $a0 -; CHECK-NEXT: srli.d $a1, $a0, 4 -; CHECK-NEXT: add.d $a0, $a0, $a1 -; CHECK-NEXT: and $a0, $a0, $s1 -; CHECK-NEXT: mul.d $a0, $a0, $s2 -; CHECK-NEXT: srli.d $a0, $a0, 56 +; CHECK-NEXT: vldi $vr0, 0 +; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; CHECK-NEXT: vpcnt.d $vr0, $vr0 +; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0 ; CHECK-NEXT: bnez $a0, .LBB6_1 ; CHECK-NEXT: # %bb.2: # %bb7 -; CHECK-NEXT: ld.d $s2, $sp, 8 # 8-byte Folded Reload -; CHECK-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload -; CHECK-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload -; CHECK-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload -; CHECK-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload -; CHECK-NEXT: addi.d $sp, $sp, 48 +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 ; CHECK-NEXT: ret ; ; NORMV-LABEL: test7: ; NORMV: # %bb.0: # %bb -; NORMV-NEXT: addi.d $sp, $sp, -48 -; NORMV-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill -; NORMV-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill -; NORMV-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill -; NORMV-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill -; NORMV-NEXT: st.d $s2, $sp, 8 # 8-byte Folded Spill +; NORMV-NEXT: addi.d $sp, $sp, -16 +; NORMV-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill ; NORMV-NEXT: sra.w $a0, $a0, $a1 -; NORMV-NEXT: lu12i.w $a1, 349525 -; NORMV-NEXT: ori $fp, $a1, 1365 -; NORMV-NEXT: bstrins.d $fp, $fp, 62, 32 -; NORMV-NEXT: lu12i.w $a1, 209715 -; NORMV-NEXT: ori $s0, $a1, 819 -; NORMV-NEXT: bstrins.d $s0, $s0, 61, 32 -; NORMV-NEXT: lu12i.w $a1, 61680 -; NORMV-NEXT: ori $s1, $a1, 3855 -; NORMV-NEXT: bstrins.d $s1, $s1, 59, 32 -; NORMV-NEXT: lu12i.w $a1, 4112 -; NORMV-NEXT: ori $s2, $a1, 257 -; NORMV-NEXT: bstrins.d $s2, $s2, 56, 32 ; NORMV-NEXT: .p2align 4, , 16 ; NORMV-NEXT: .LBB6_1: # %bb2 ; NORMV-NEXT: # =>This Inner Loop Header: Depth=1 ; NORMV-NEXT: addi.w $a0, $a0, 0 ; NORMV-NEXT: bl %plt(foo) -; NORMV-NEXT: srli.d $a1, $a0, 1 -; NORMV-NEXT: and $a1, $a1, $fp -; NORMV-NEXT: sub.d $a0, $a0, $a1 -; NORMV-NEXT: and $a1, $a0, $s0 -; NORMV-NEXT: srli.d $a0, $a0, 2 -; NORMV-NEXT: and $a0, $a0, $s0 -; NORMV-NEXT: add.d $a0, $a1, $a0 -; NORMV-NEXT: srli.d $a1, $a0, 4 -; NORMV-NEXT: add.d $a0, $a0, $a1 -; NORMV-NEXT: and $a0, $a0, $s1 -; NORMV-NEXT: mul.d $a0, $a0, $s2 -; NORMV-NEXT: srli.d $a0, $a0, 56 +; NORMV-NEXT: vldi $vr0, 0 +; NORMV-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; NORMV-NEXT: vpcnt.d $vr0, $vr0 +; NORMV-NEXT: vpickve2gr.d $a0, $vr0, 0 ; NORMV-NEXT: bnez $a0, .LBB6_1 ; NORMV-NEXT: # %bb.2: # %bb7 -; NORMV-NEXT: ld.d $s2, $sp, 8 # 8-byte Folded Reload -; NORMV-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload -; NORMV-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload -; NORMV-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload -; NORMV-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload -; NORMV-NEXT: addi.d $sp, $sp, 48 +; NORMV-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; NORMV-NEXT: addi.d $sp, $sp, 16 ; NORMV-NEXT: ret bb: %i = ashr i32 %arg, %arg1 @@ -544,19 +434,18 @@ define void @test10(i32 signext %arg, i32 signext %arg1) nounwind { ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: addi.d $sp, $sp, -16 ; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; CHECK-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill -; CHECK-NEXT: sra.w $fp, $a0, $a1 +; CHECK-NEXT: fst.d $fs0, $sp, 0 # 8-byte Folded Spill +; CHECK-NEXT: sra.w $a0, $a0, $a1 +; CHECK-NEXT: movgr2fr.w $fs0, $zero ; CHECK-NEXT: .p2align 4, , 16 ; CHECK-NEXT: .LBB9_1: # %bb2 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: addi.w $a0, $fp, 0 ; CHECK-NEXT: bl %plt(baz) -; CHECK-NEXT: move $fp, $a0 -; CHECK-NEXT: move $a1, $zero -; CHECK-NEXT: bl %plt(__nesf2) -; CHECK-NEXT: bnez $a0, .LBB9_1 +; CHECK-NEXT: fcmp.cune.s $fcc0, $fa0, $fs0 +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: bcnez $fcc0, .LBB9_1 ; CHECK-NEXT: # %bb.2: # %bb7 -; CHECK-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload +; CHECK-NEXT: fld.d $fs0, $sp, 0 # 8-byte Folded Reload ; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ; CHECK-NEXT: addi.d $sp, $sp, 16 ; CHECK-NEXT: ret @@ -565,19 +454,19 @@ define void @test10(i32 signext %arg, i32 signext %arg1) nounwind { ; NORMV: # %bb.0: # %bb ; NORMV-NEXT: addi.d $sp, $sp, -16 ; NORMV-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; NORMV-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill -; NORMV-NEXT: sra.w $fp, $a0, $a1 +; NORMV-NEXT: fst.d $fs0, $sp, 0 # 8-byte Folded Spill +; NORMV-NEXT: sra.w $a0, $a0, $a1 +; NORMV-NEXT: movgr2fr.w $fs0, $zero ; NORMV-NEXT: .p2align 4, , 16 ; NORMV-NEXT: .LBB9_1: # %bb2 ; NORMV-NEXT: # =>This Inner Loop Header: Depth=1 -; NORMV-NEXT: addi.w $a0, $fp, 0 +; NORMV-NEXT: addi.w $a0, $a0, 0 ; NORMV-NEXT: bl %plt(baz) -; NORMV-NEXT: move $fp, $a0 -; NORMV-NEXT: move $a1, $zero -; NORMV-NEXT: bl %plt(__nesf2) -; NORMV-NEXT: bnez $a0, .LBB9_1 +; NORMV-NEXT: fcmp.cune.s $fcc0, $fa0, $fs0 +; NORMV-NEXT: movfr2gr.s $a0, $fa0 +; NORMV-NEXT: bcnez $fcc0, .LBB9_1 ; NORMV-NEXT: # %bb.2: # %bb7 -; NORMV-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload +; NORMV-NEXT: fld.d $fs0, $sp, 0 # 8-byte Folded Reload ; NORMV-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ; NORMV-NEXT: addi.d $sp, $sp, 16 ; NORMV-NEXT: ret diff --git a/llvm/test/CodeGen/LoongArch/soft-fp-to-int.ll b/llvm/test/CodeGen/LoongArch/soft-fp-to-int.ll index 4eb34bfa09acb..ae8c0a6a15ed6 100644 --- a/llvm/test/CodeGen/LoongArch/soft-fp-to-int.ll +++ b/llvm/test/CodeGen/LoongArch/soft-fp-to-int.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 +; RUN: llc --mtriple=loongarch64 --mattr=-f < %s | FileCheck %s --check-prefix=LA64 define i32 @fptosi_i32_fp128(fp128 %X) nounwind { ; LA32-LABEL: fptosi_i32_fp128: diff --git a/llvm/test/CodeGen/LoongArch/statepoint-call-lowering-r1.ll b/llvm/test/CodeGen/LoongArch/statepoint-call-lowering-r1.ll index 4a77b2c00f54c..ee55ed337a28c 100644 --- a/llvm/test/CodeGen/LoongArch/statepoint-call-lowering-r1.ll +++ b/llvm/test/CodeGen/LoongArch/statepoint-call-lowering-r1.ll @@ -5,7 +5,7 @@ define void @test() gc "statepoint-example" { entry: %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(void ()) @return_i1, i32 0, i32 0, i32 0, i32 0) ["gc-live" ()] -; CHECK: STATEPOINT 0, 0, 0, target-flags(loongarch-call-plt) @return_i1, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, csr_ilp32s_lp64s, implicit-def $r3, implicit-def dead early-clobber $r1 +; CHECK: STATEPOINT 0, 0, 0, target-flags(loongarch-call-plt) @return_i1, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, csr_ilp32d_lp64d, implicit-def $r3, implicit-def dead early-clobber $r1 ret void } diff --git a/llvm/test/CodeGen/LoongArch/statepoint-call-lowering.ll b/llvm/test/CodeGen/LoongArch/statepoint-call-lowering.ll index 6956929e721d7..e5febe62ae8a8 100644 --- a/llvm/test/CodeGen/LoongArch/statepoint-call-lowering.ll +++ b/llvm/test/CodeGen/LoongArch/statepoint-call-lowering.ll @@ -200,10 +200,8 @@ define void @test_attributes(ptr byval(%struct2) %s) nounwind gc "statepoint-exa ; CHECK-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill ; CHECK-NEXT: ld.d $a1, $a0, 16 ; CHECK-NEXT: st.d $a1, $sp, 16 -; CHECK-NEXT: ld.d $a1, $a0, 8 -; CHECK-NEXT: st.d $a1, $sp, 8 -; CHECK-NEXT: ld.d $a0, $a0, 0 -; CHECK-NEXT: st.d $a0, $sp, 0 +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vst $vr0, $sp, 0 ; CHECK-NEXT: ori $a0, $zero, 42 ; CHECK-NEXT: ori $a2, $zero, 17 ; CHECK-NEXT: addi.d $a3, $sp, 0 diff --git a/llvm/test/CodeGen/LoongArch/tail-calls.ll b/llvm/test/CodeGen/LoongArch/tail-calls.ll index 8298d76d8e3a6..7f315ee897b1c 100644 --- a/llvm/test/CodeGen/LoongArch/tail-calls.ll +++ b/llvm/test/CodeGen/LoongArch/tail-calls.ll @@ -103,8 +103,8 @@ define void @caller_indirect_args() nounwind { ; CHECK-NEXT: addi.d $sp, $sp, -48 ; CHECK-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill ; CHECK-NEXT: st.d $zero, $sp, 24 -; CHECK-NEXT: st.d $zero, $sp, 16 -; CHECK-NEXT: st.d $zero, $sp, 8 +; CHECK-NEXT: vrepli.b $vr0, 0 +; CHECK-NEXT: vst $vr0, $sp, 8 ; CHECK-NEXT: ori $a1, $zero, 1 ; CHECK-NEXT: addi.d $a0, $sp, 0 ; CHECK-NEXT: st.d $a1, $sp, 0 diff --git a/llvm/test/CodeGen/LoongArch/target-abi-from-triple-edge-cases.ll b/llvm/test/CodeGen/LoongArch/target-abi-from-triple-edge-cases.ll index 36b78ea2ea02c..147b792361478 100644 --- a/llvm/test/CodeGen/LoongArch/target-abi-from-triple-edge-cases.ll +++ b/llvm/test/CodeGen/LoongArch/target-abi-from-triple-edge-cases.ll @@ -66,14 +66,28 @@ define float @f(float %a) { ; ; LP64D-LABEL: f: ; LP64D: # %bb.0: -; LP64D-NEXT: addi.w $a0, $zero, 1 -; LP64D-NEXT: movgr2fr.w $fa1, $a0 -; LP64D-NEXT: ffint.s.w $fa1, $fa1 +; LP64D-NEXT: vldi $vr1, -1168 ; LP64D-NEXT: fadd.s $fa0, $fa0, $fa1 ; LP64D-NEXT: ret ; -; LP64S-LABEL: f: -; LP64S: bl %plt(__addsf3) +; LP64S-LP64F-NOF-LABEL: f: +; LP64S-LP64F-NOF: bl %plt(__addsf3) +; +; LP64S-LP64D-NOD-LABEL: f: +; LP64S-LP64D-NOD: # %bb.0: +; LP64S-LP64D-NOD-NEXT: movgr2fr.w $fa0, $a0 +; LP64S-LP64D-NOD-NEXT: addi.w $a0, $zero, 1 +; LP64S-LP64D-NOD-NEXT: movgr2fr.w $fa1, $a0 +; LP64S-LP64D-NOD-NEXT: ffint.s.w $fa1, $fa1 +; LP64S-LP64D-NOD-NEXT: fadd.s $fa0, $fa0, $fa1 +; LP64S-LP64D-NOD-NEXT: movfr2gr.s $a0, $fa0 +; LP64S-LP64D-NOD-NEXT: ret +; +; LP64D-LP64F-NOF-LABEL: f: +; LP64D-LP64F-NOF: bl %plt(__addsf3) +; +; LP64D-NONE-NOF-LABEL: f: +; LP64D-NONE-NOF: bl %plt(__addsf3) %1 = fadd float %a, 1.0 ret float %1 } @@ -90,9 +104,7 @@ define double @g(double %a) { ; ; LP64D-LABEL: g: ; LP64D: # %bb.0: -; LP64D-NEXT: addi.d $a0, $zero, 1 -; LP64D-NEXT: movgr2fr.d $fa1, $a0 -; LP64D-NEXT: ffint.d.l $fa1, $fa1 +; LP64D-NEXT: vldi $vr1, -912 ; LP64D-NEXT: fadd.d $fa0, $fa0, $fa1 ; LP64D-NEXT: ret ; diff --git a/llvm/test/CodeGen/LoongArch/target-abi-from-triple.ll b/llvm/test/CodeGen/LoongArch/target-abi-from-triple.ll index 0aca339038860..c8a33725267a2 100644 --- a/llvm/test/CodeGen/LoongArch/target-abi-from-triple.ll +++ b/llvm/test/CodeGen/LoongArch/target-abi-from-triple.ll @@ -18,9 +18,7 @@ define float @f(float %a) { ; ; LP64D-LABEL: f: ; LP64D: # %bb.0: -; LP64D-NEXT: addi.w $a0, $zero, 1 -; LP64D-NEXT: movgr2fr.w $fa1, $a0 -; LP64D-NEXT: ffint.s.w $fa1, $fa1 +; LP64D-NEXT: vldi $vr1, -1168 ; LP64D-NEXT: fadd.s $fa0, $fa0, $fa1 ; LP64D-NEXT: ret %1 = fadd float %a, 1.0 @@ -39,9 +37,7 @@ define double @g(double %a) { ; ; LP64D-LABEL: g: ; LP64D: # %bb.0: -; LP64D-NEXT: addi.d $a0, $zero, 1 -; LP64D-NEXT: movgr2fr.d $fa1, $a0 -; LP64D-NEXT: ffint.d.l $fa1, $fa1 +; LP64D-NEXT: vldi $vr1, -912 ; LP64D-NEXT: fadd.d $fa0, $fa0, $fa1 ; LP64D-NEXT: ret %1 = fadd double %a, 1.0 diff --git a/llvm/test/CodeGen/LoongArch/vararg.ll b/llvm/test/CodeGen/LoongArch/vararg.ll index bbf3b8e333306..f488610868eb3 100644 --- a/llvm/test/CodeGen/LoongArch/vararg.ll +++ b/llvm/test/CodeGen/LoongArch/vararg.ll @@ -289,8 +289,8 @@ define void @va_aligned_stack_caller() nounwind { ; LA64-FPELIM-NEXT: lu52i.d $a0, $a0, -328 ; LA64-FPELIM-NEXT: st.d $a0, $sp, 16 ; LA64-FPELIM-NEXT: st.d $zero, $sp, 88 -; LA64-FPELIM-NEXT: st.d $zero, $sp, 80 -; LA64-FPELIM-NEXT: st.d $zero, $sp, 72 +; LA64-FPELIM-NEXT: vrepli.b $vr0, 0 +; LA64-FPELIM-NEXT: vst $vr0, $sp, 72 ; LA64-FPELIM-NEXT: ori $a5, $zero, 1000 ; LA64-FPELIM-NEXT: ori $a0, $zero, 1 ; LA64-FPELIM-NEXT: ori $a1, $zero, 11 @@ -330,8 +330,8 @@ define void @va_aligned_stack_caller() nounwind { ; LA64-WITHFP-NEXT: lu52i.d $a0, $a0, -328 ; LA64-WITHFP-NEXT: st.d $a0, $sp, 16 ; LA64-WITHFP-NEXT: st.d $zero, $fp, -24 -; LA64-WITHFP-NEXT: st.d $zero, $fp, -32 -; LA64-WITHFP-NEXT: st.d $zero, $fp, -40 +; LA64-WITHFP-NEXT: vrepli.b $vr0, 0 +; LA64-WITHFP-NEXT: vst $vr0, $fp, -40 ; LA64-WITHFP-NEXT: ori $a5, $zero, 1000 ; LA64-WITHFP-NEXT: ori $a0, $zero, 1 ; LA64-WITHFP-NEXT: ori $a1, $zero, 11 diff --git a/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll b/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll index 8dd1ec465c13a..3e1b6d8eaadbc 100644 --- a/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll +++ b/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll @@ -67,19 +67,10 @@ define void @test_zero(ptr %P, ptr %S) nounwind { ; ; LA64D-LABEL: test_zero: ; LA64D: # %bb.0: -; LA64D-NEXT: fld.s $fa0, $a0, 12 -; LA64D-NEXT: fld.s $fa1, $a0, 0 -; LA64D-NEXT: fld.s $fa2, $a0, 4 -; LA64D-NEXT: fld.s $fa3, $a0, 8 -; LA64D-NEXT: movgr2fr.w $fa4, $zero -; LA64D-NEXT: fadd.s $fa1, $fa1, $fa4 -; LA64D-NEXT: fadd.s $fa2, $fa2, $fa4 -; LA64D-NEXT: fadd.s $fa3, $fa3, $fa4 -; LA64D-NEXT: fadd.s $fa0, $fa0, $fa4 -; LA64D-NEXT: fst.s $fa0, $a1, 12 -; LA64D-NEXT: fst.s $fa3, $a1, 8 -; LA64D-NEXT: fst.s $fa2, $a1, 4 -; LA64D-NEXT: fst.s $fa1, $a1, 0 +; LA64D-NEXT: vld $vr0, $a0, 0 +; LA64D-NEXT: vrepli.b $vr1, 0 +; LA64D-NEXT: vfadd.s $vr0, $vr0, $vr1 +; LA64D-NEXT: vst $vr0, $a1, 0 ; LA64D-NEXT: ret %p = load %f4, ptr %P %R = fadd %f4 %p, zeroinitializer @@ -135,17 +126,17 @@ define void @test_f2(ptr %P, ptr %S) nounwind { ; ; LA64D-LABEL: test_f2: ; LA64D: # %bb.0: -; LA64D-NEXT: fld.s $fa0, $a0, 4 -; LA64D-NEXT: fld.s $fa1, $a0, 0 -; LA64D-NEXT: addi.w $a0, $zero, 1 -; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI1_0) -; LA64D-NEXT: fld.s $fa2, $a2, %pc_lo12(.LCPI1_0) -; LA64D-NEXT: movgr2fr.w $fa3, $a0 -; LA64D-NEXT: ffint.s.w $fa3, $fa3 -; LA64D-NEXT: fadd.s $fa1, $fa1, $fa3 -; LA64D-NEXT: fadd.s $fa0, $fa0, $fa2 -; LA64D-NEXT: fst.s $fa0, $a1, 4 -; LA64D-NEXT: fst.s $fa1, $a1, 0 +; LA64D-NEXT: addi.d $sp, $sp, -16 +; LA64D-NEXT: ld.d $a0, $a0, 0 +; LA64D-NEXT: st.d $a0, $sp, 0 +; LA64D-NEXT: vld $vr0, $sp, 0 +; LA64D-NEXT: lu12i.w $a0, 260096 +; LA64D-NEXT: lu52i.d $a0, $a0, 1024 +; LA64D-NEXT: vreplgr2vr.d $vr1, $a0 +; LA64D-NEXT: vfadd.s $vr0, $vr0, $vr1 +; LA64D-NEXT: vpickve2gr.d $a0, $vr0, 0 +; LA64D-NEXT: st.d $a0, $a1, 0 +; LA64D-NEXT: addi.d $sp, $sp, 16 ; LA64D-NEXT: ret %p = load %f2, ptr %P %R = fadd %f2 %p, < float 1.000000e+00, float 2.000000e+00 > @@ -231,27 +222,11 @@ define void @test_f4(ptr %P, ptr %S) nounwind { ; ; LA64D-LABEL: test_f4: ; LA64D: # %bb.0: -; LA64D-NEXT: fld.s $fa0, $a0, 12 -; LA64D-NEXT: fld.s $fa1, $a0, 8 -; LA64D-NEXT: fld.s $fa2, $a0, 4 -; LA64D-NEXT: fld.s $fa3, $a0, 0 -; LA64D-NEXT: addi.w $a0, $zero, 1 -; LA64D-NEXT: movgr2fr.w $fa4, $a0 -; LA64D-NEXT: ffint.s.w $fa4, $fa4 +; LA64D-NEXT: vld $vr0, $a0, 0 ; LA64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0) -; LA64D-NEXT: fld.s $fa5, $a0, %pc_lo12(.LCPI2_0) -; LA64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_1) -; LA64D-NEXT: fld.s $fa6, $a0, %pc_lo12(.LCPI2_1) -; LA64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_2) -; LA64D-NEXT: fld.s $fa7, $a0, %pc_lo12(.LCPI2_2) -; LA64D-NEXT: fadd.s $fa3, $fa3, $fa4 -; LA64D-NEXT: fadd.s $fa2, $fa2, $fa5 -; LA64D-NEXT: fadd.s $fa1, $fa1, $fa6 -; LA64D-NEXT: fadd.s $fa0, $fa0, $fa7 -; LA64D-NEXT: fst.s $fa0, $a1, 12 -; LA64D-NEXT: fst.s $fa1, $a1, 8 -; LA64D-NEXT: fst.s $fa2, $a1, 4 -; LA64D-NEXT: fst.s $fa3, $a1, 0 +; LA64D-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI2_0) +; LA64D-NEXT: vfadd.s $vr0, $vr0, $vr1 +; LA64D-NEXT: vst $vr0, $a1, 0 ; LA64D-NEXT: ret %p = load %f4, ptr %P %R = fadd %f4 %p, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 > @@ -373,39 +348,14 @@ define void @test_f8(ptr %P, ptr %S) nounwind { ; ; LA64D-LABEL: test_f8: ; LA64D: # %bb.0: -; LA64D-NEXT: addi.w $a2, $zero, 1 -; LA64D-NEXT: movgr2fr.w $fa0, $a2 +; LA64D-NEXT: vld $vr0, $a0, 16 ; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_0) -; LA64D-NEXT: fld.s $fa1, $a2, %pc_lo12(.LCPI3_0) -; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_1) -; LA64D-NEXT: fld.s $fa2, $a2, %pc_lo12(.LCPI3_1) -; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI3_2) -; LA64D-NEXT: fld.s $fa3, $a2, %pc_lo12(.LCPI3_2) -; LA64D-NEXT: fld.s $fa4, $a0, 28 -; LA64D-NEXT: fld.s $fa5, $a0, 24 -; LA64D-NEXT: fld.s $fa6, $a0, 12 -; LA64D-NEXT: fld.s $fa7, $a0, 8 -; LA64D-NEXT: fld.s $ft0, $a0, 0 -; LA64D-NEXT: fld.s $ft1, $a0, 16 -; LA64D-NEXT: fld.s $ft2, $a0, 4 -; LA64D-NEXT: ffint.s.w $fa0, $fa0 -; LA64D-NEXT: fadd.s $ft0, $ft0, $fa0 -; LA64D-NEXT: fadd.s $fa0, $ft1, $fa0 -; LA64D-NEXT: fld.s $ft1, $a0, 20 -; LA64D-NEXT: fadd.s $ft2, $ft2, $fa1 -; LA64D-NEXT: fadd.s $fa7, $fa7, $fa2 -; LA64D-NEXT: fadd.s $fa6, $fa6, $fa3 -; LA64D-NEXT: fadd.s $fa1, $ft1, $fa1 -; LA64D-NEXT: fadd.s $fa2, $fa5, $fa2 -; LA64D-NEXT: fadd.s $fa3, $fa4, $fa3 -; LA64D-NEXT: fst.s $fa3, $a1, 28 -; LA64D-NEXT: fst.s $fa2, $a1, 24 -; LA64D-NEXT: fst.s $fa1, $a1, 20 -; LA64D-NEXT: fst.s $fa6, $a1, 12 -; LA64D-NEXT: fst.s $fa7, $a1, 8 -; LA64D-NEXT: fst.s $ft2, $a1, 4 -; LA64D-NEXT: fst.s $fa0, $a1, 16 -; LA64D-NEXT: fst.s $ft0, $a1, 0 +; LA64D-NEXT: vld $vr1, $a2, %pc_lo12(.LCPI3_0) +; LA64D-NEXT: vld $vr2, $a0, 0 +; LA64D-NEXT: vfadd.s $vr0, $vr0, $vr1 +; LA64D-NEXT: vfadd.s $vr1, $vr2, $vr1 +; LA64D-NEXT: vst $vr1, $a1, 0 +; LA64D-NEXT: vst $vr0, $a1, 16 ; LA64D-NEXT: ret %p = load %f8, ptr %P %R = fadd %f8 %p, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 > @@ -496,17 +446,11 @@ define void @test_d2(ptr %P, ptr %S) nounwind { ; ; LA64D-LABEL: test_d2: ; LA64D: # %bb.0: -; LA64D-NEXT: fld.d $fa0, $a0, 8 -; LA64D-NEXT: fld.d $fa1, $a0, 0 -; LA64D-NEXT: addi.d $a0, $zero, 1 -; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI4_0) -; LA64D-NEXT: fld.d $fa2, $a2, %pc_lo12(.LCPI4_0) -; LA64D-NEXT: movgr2fr.d $fa3, $a0 -; LA64D-NEXT: ffint.d.l $fa3, $fa3 -; LA64D-NEXT: fadd.d $fa1, $fa1, $fa3 -; LA64D-NEXT: fadd.d $fa0, $fa0, $fa2 -; LA64D-NEXT: fst.d $fa0, $a1, 8 -; LA64D-NEXT: fst.d $fa1, $a1, 0 +; LA64D-NEXT: vld $vr0, $a0, 0 +; LA64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0) +; LA64D-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI4_0) +; LA64D-NEXT: vfadd.d $vr0, $vr0, $vr1 +; LA64D-NEXT: vst $vr0, $a1, 0 ; LA64D-NEXT: ret %p = load %d2, ptr %P %R = fadd %d2 %p, < double 1.000000e+00, double 2.000000e+00 > @@ -655,27 +599,16 @@ define void @test_d4(ptr %P, ptr %S) nounwind { ; ; LA64D-LABEL: test_d4: ; LA64D: # %bb.0: -; LA64D-NEXT: fld.d $fa0, $a0, 24 -; LA64D-NEXT: fld.d $fa1, $a0, 16 -; LA64D-NEXT: fld.d $fa2, $a0, 8 -; LA64D-NEXT: fld.d $fa3, $a0, 0 -; LA64D-NEXT: addi.d $a0, $zero, 1 -; LA64D-NEXT: movgr2fr.d $fa4, $a0 -; LA64D-NEXT: ffint.d.l $fa4, $fa4 +; LA64D-NEXT: vld $vr0, $a0, 0 +; LA64D-NEXT: vld $vr1, $a0, 16 ; LA64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0) -; LA64D-NEXT: fld.d $fa5, $a0, %pc_lo12(.LCPI5_0) +; LA64D-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI5_0) ; LA64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_1) -; LA64D-NEXT: fld.d $fa6, $a0, %pc_lo12(.LCPI5_1) -; LA64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_2) -; LA64D-NEXT: fld.d $fa7, $a0, %pc_lo12(.LCPI5_2) -; LA64D-NEXT: fadd.d $fa3, $fa3, $fa4 -; LA64D-NEXT: fadd.d $fa2, $fa2, $fa5 -; LA64D-NEXT: fadd.d $fa1, $fa1, $fa6 -; LA64D-NEXT: fadd.d $fa0, $fa0, $fa7 -; LA64D-NEXT: fst.d $fa0, $a1, 24 -; LA64D-NEXT: fst.d $fa1, $a1, 16 -; LA64D-NEXT: fst.d $fa2, $a1, 8 -; LA64D-NEXT: fst.d $fa3, $a1, 0 +; LA64D-NEXT: vld $vr3, $a0, %pc_lo12(.LCPI5_1) +; LA64D-NEXT: vfadd.d $vr1, $vr1, $vr2 +; LA64D-NEXT: vfadd.d $vr0, $vr0, $vr3 +; LA64D-NEXT: vst $vr0, $a1, 0 +; LA64D-NEXT: vst $vr1, $a1, 16 ; LA64D-NEXT: ret %p = load %d4, ptr %P %R = fadd %d4 %p, < double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00 > @@ -934,39 +867,22 @@ define void @test_d8(ptr %P, ptr %S) nounwind { ; ; LA64D-LABEL: test_d8: ; LA64D: # %bb.0: -; LA64D-NEXT: addi.d $a2, $zero, 1 -; LA64D-NEXT: movgr2fr.d $fa0, $a2 ; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI6_0) -; LA64D-NEXT: fld.d $fa1, $a2, %pc_lo12(.LCPI6_0) +; LA64D-NEXT: vld $vr0, $a2, %pc_lo12(.LCPI6_0) ; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI6_1) -; LA64D-NEXT: fld.d $fa2, $a2, %pc_lo12(.LCPI6_1) -; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI6_2) -; LA64D-NEXT: fld.d $fa3, $a2, %pc_lo12(.LCPI6_2) -; LA64D-NEXT: fld.d $fa4, $a0, 56 -; LA64D-NEXT: fld.d $fa5, $a0, 48 -; LA64D-NEXT: fld.d $fa6, $a0, 24 -; LA64D-NEXT: fld.d $fa7, $a0, 16 -; LA64D-NEXT: fld.d $ft0, $a0, 0 -; LA64D-NEXT: fld.d $ft1, $a0, 32 -; LA64D-NEXT: fld.d $ft2, $a0, 8 -; LA64D-NEXT: ffint.d.l $fa0, $fa0 -; LA64D-NEXT: fadd.d $ft0, $ft0, $fa0 -; LA64D-NEXT: fadd.d $fa0, $ft1, $fa0 -; LA64D-NEXT: fld.d $ft1, $a0, 40 -; LA64D-NEXT: fadd.d $ft2, $ft2, $fa1 -; LA64D-NEXT: fadd.d $fa7, $fa7, $fa2 -; LA64D-NEXT: fadd.d $fa6, $fa6, $fa3 -; LA64D-NEXT: fadd.d $fa1, $ft1, $fa1 -; LA64D-NEXT: fadd.d $fa2, $fa5, $fa2 -; LA64D-NEXT: fadd.d $fa3, $fa4, $fa3 -; LA64D-NEXT: fst.d $fa3, $a1, 56 -; LA64D-NEXT: fst.d $fa2, $a1, 48 -; LA64D-NEXT: fst.d $fa1, $a1, 40 -; LA64D-NEXT: fst.d $fa6, $a1, 24 -; LA64D-NEXT: fst.d $fa7, $a1, 16 -; LA64D-NEXT: fst.d $ft2, $a1, 8 -; LA64D-NEXT: fst.d $fa0, $a1, 32 -; LA64D-NEXT: fst.d $ft0, $a1, 0 +; LA64D-NEXT: vld $vr1, $a2, %pc_lo12(.LCPI6_1) +; LA64D-NEXT: vld $vr2, $a0, 16 +; LA64D-NEXT: vld $vr3, $a0, 0 +; LA64D-NEXT: vld $vr4, $a0, 48 +; LA64D-NEXT: vld $vr5, $a0, 32 +; LA64D-NEXT: vfadd.d $vr2, $vr2, $vr0 +; LA64D-NEXT: vfadd.d $vr3, $vr3, $vr1 +; LA64D-NEXT: vfadd.d $vr0, $vr4, $vr0 +; LA64D-NEXT: vfadd.d $vr1, $vr5, $vr1 +; LA64D-NEXT: vst $vr1, $a1, 32 +; LA64D-NEXT: vst $vr0, $a1, 48 +; LA64D-NEXT: vst $vr3, $a1, 0 +; LA64D-NEXT: vst $vr2, $a1, 16 ; LA64D-NEXT: ret %p = load %d8, ptr %P %R = fadd %d8 %p, < double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00 > diff --git a/llvm/test/Transforms/LoopIdiom/LoongArch/popcnt.ll b/llvm/test/Transforms/LoopIdiom/LoongArch/popcnt.ll index 915a100a54f48..0994a7d9391d3 100644 --- a/llvm/test/Transforms/LoopIdiom/LoongArch/popcnt.ll +++ b/llvm/test/Transforms/LoopIdiom/LoongArch/popcnt.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=loop-idiom -mtriple=loongarch32 -mattr=+lsx -S < %s | FileCheck %s --check-prefix=CPOP -; RUN: opt -passes=loop-idiom -mtriple=loongarch64 -mattr=+lsx -S < %s | FileCheck %s --check-prefix=CPOP ; RUN: opt -passes=loop-idiom -mtriple=loongarch32 -S < %s | FileCheck %s --check-prefix=NOCPOP -; RUN: opt -passes=loop-idiom -mtriple=loongarch64 -S < %s | FileCheck %s --check-prefix=NOCPOP +; RUN: opt -passes=loop-idiom -mtriple=loongarch32 -mattr=+lsx -S < %s | FileCheck %s --check-prefix=CPOP +; RUN: opt -passes=loop-idiom -mtriple=loongarch64 -S < %s | FileCheck %s --check-prefix=CPOP +; RUN: opt -passes=loop-idiom -mtriple=loongarch64 -mattr=-lsx -S < %s | FileCheck %s --check-prefix=NOCPOP ; Mostly copied from RISCV version. diff --git a/llvm/test/Transforms/LoopVectorize/LoongArch/loongarch-interleaved.ll b/llvm/test/Transforms/LoopVectorize/LoongArch/loongarch-interleaved.ll index be9b170491b9c..5cfdd635e6883 100644 --- a/llvm/test/Transforms/LoopVectorize/LoongArch/loongarch-interleaved.ll +++ b/llvm/test/Transforms/LoopVectorize/LoongArch/loongarch-interleaved.ll @@ -3,7 +3,7 @@ ; RUN: -S < %s 2>&1 | FileCheck %s ; CHECK-LABEL: foo -; CHECK: %{{.*}} = add {{.*}}, 2 +; CHECK: %{{.*}} = add {{.*}}, 8 ; Function Attrs: nofree norecurse nosync nounwind writeonly define dso_local void @foo(i32 signext %n, ptr nocapture %A) local_unnamed_addr #0 {