diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 61615cb0f7b30..8e0cdc6f1a5e7 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -3223,6 +3223,9 @@ class TargetLoweringBase { /// not legal, but should return true if those types will eventually legalize /// to types that support FMAs. After legalization, it will only be called on /// types that support FMAs (via Legal or Custom actions) + /// + /// Targets that care about soft float support should return false when soft + /// float code is being generated (i.e. use-soft-float). virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT) const { return false; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 5d679a1a916dc..a4f01e55f53c1 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -19354,6 +19354,9 @@ bool ARMTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const { /// patterns (and we don't have the non-fused floating point instruction). bool ARMTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const { + if (Subtarget->useSoftFloat()) + return false; + if (!VT.isSimple()) return false; diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 83417e570dabf..3e05f3b0180a7 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -793,6 +793,9 @@ EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL, bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd( const MachineFunction &MF, EVT VT) const { + if (useSoftFloat()) + return false; + VT = VT.getScalarType(); if (!VT.isSimple()) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 102789a3e9521..0bb68ccdb821c 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -34838,6 +34838,9 @@ bool X86TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { bool X86TargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const { + if (Subtarget.useSoftFloat()) + return false; + if (!Subtarget.hasAnyFMA()) return false; diff --git a/llvm/test/CodeGen/ARM/fmuladd-soft-float.ll b/llvm/test/CodeGen/ARM/fmuladd-soft-float.ll new file mode 100644 index 0000000000000..88c31325b64b7 --- /dev/null +++ b/llvm/test/CodeGen/ARM/fmuladd-soft-float.ll @@ -0,0 +1,406 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=arm < %s | FileCheck %s -check-prefix=SOFT-FLOAT +; RUN: llc -mtriple=arm -mattr=+vfp4d16sp < %s | FileCheck %s -check-prefix=SOFT-FLOAT-VFP32 +; RUN: llc -mtriple=arm -mattr=+vfp4d16sp,+fp64 < %s | FileCheck %s -check-prefix=SOFT-FLOAT-VFP64 + +define float @fmuladd_intrinsic_f32(float %a, float %b, float %c) #0 { +; SOFT-FLOAT-LABEL: fmuladd_intrinsic_f32: +; SOFT-FLOAT: @ %bb.0: +; SOFT-FLOAT-NEXT: push {r4, lr} +; SOFT-FLOAT-NEXT: mov r4, r2 +; SOFT-FLOAT-NEXT: bl __mulsf3 +; SOFT-FLOAT-NEXT: mov r1, r4 +; SOFT-FLOAT-NEXT: bl __addsf3 +; SOFT-FLOAT-NEXT: pop {r4, lr} +; SOFT-FLOAT-NEXT: mov pc, lr +; +; SOFT-FLOAT-VFP32-LABEL: fmuladd_intrinsic_f32: +; SOFT-FLOAT-VFP32: @ %bb.0: +; SOFT-FLOAT-VFP32-NEXT: push {r4, lr} +; SOFT-FLOAT-VFP32-NEXT: mov r4, r2 +; SOFT-FLOAT-VFP32-NEXT: bl __mulsf3 +; SOFT-FLOAT-VFP32-NEXT: mov r1, r4 +; SOFT-FLOAT-VFP32-NEXT: bl __addsf3 +; SOFT-FLOAT-VFP32-NEXT: pop {r4, lr} +; SOFT-FLOAT-VFP32-NEXT: mov pc, lr +; +; SOFT-FLOAT-VFP64-LABEL: fmuladd_intrinsic_f32: +; SOFT-FLOAT-VFP64: @ %bb.0: +; SOFT-FLOAT-VFP64-NEXT: push {r4, lr} +; SOFT-FLOAT-VFP64-NEXT: mov r4, r2 +; SOFT-FLOAT-VFP64-NEXT: bl __mulsf3 +; SOFT-FLOAT-VFP64-NEXT: mov r1, r4 +; SOFT-FLOAT-VFP64-NEXT: bl __addsf3 +; SOFT-FLOAT-VFP64-NEXT: pop {r4, lr} +; SOFT-FLOAT-VFP64-NEXT: mov pc, lr + %result = call float @llvm.fmuladd.f32(float %a, float %b, float %c) + ret float %result +} + +define double @fmuladd_intrinsic_f64(double %a, double %b, double %c) #0 { +; SOFT-FLOAT-LABEL: fmuladd_intrinsic_f64: +; SOFT-FLOAT: @ %bb.0: +; SOFT-FLOAT-NEXT: push {r11, lr} +; SOFT-FLOAT-NEXT: bl __muldf3 +; SOFT-FLOAT-NEXT: ldr r2, [sp, #8] +; SOFT-FLOAT-NEXT: ldr r3, [sp, #12] +; SOFT-FLOAT-NEXT: bl __adddf3 +; SOFT-FLOAT-NEXT: pop {r11, lr} +; SOFT-FLOAT-NEXT: mov pc, lr +; +; SOFT-FLOAT-VFP32-LABEL: fmuladd_intrinsic_f64: +; SOFT-FLOAT-VFP32: @ %bb.0: +; SOFT-FLOAT-VFP32-NEXT: push {r11, lr} +; SOFT-FLOAT-VFP32-NEXT: bl __muldf3 +; SOFT-FLOAT-VFP32-NEXT: ldr r2, [sp, #8] +; SOFT-FLOAT-VFP32-NEXT: ldr r3, [sp, #12] +; SOFT-FLOAT-VFP32-NEXT: bl __adddf3 +; SOFT-FLOAT-VFP32-NEXT: pop {r11, lr} +; SOFT-FLOAT-VFP32-NEXT: mov pc, lr +; +; SOFT-FLOAT-VFP64-LABEL: fmuladd_intrinsic_f64: +; SOFT-FLOAT-VFP64: @ %bb.0: +; SOFT-FLOAT-VFP64-NEXT: push {r11, lr} +; SOFT-FLOAT-VFP64-NEXT: bl __muldf3 +; SOFT-FLOAT-VFP64-NEXT: ldr r2, [sp, #8] +; SOFT-FLOAT-VFP64-NEXT: ldr r3, [sp, #12] +; SOFT-FLOAT-VFP64-NEXT: bl __adddf3 +; SOFT-FLOAT-VFP64-NEXT: pop {r11, lr} +; SOFT-FLOAT-VFP64-NEXT: mov pc, lr + %result = call double @llvm.fmuladd.f64(double %a, double %b, double %c) + ret double %result +} + +define float @fmuladd_contract_f32(float %a, float %b, float %c) #0 { +; SOFT-FLOAT-LABEL: fmuladd_contract_f32: +; SOFT-FLOAT: @ %bb.0: +; SOFT-FLOAT-NEXT: push {r4, lr} +; SOFT-FLOAT-NEXT: mov r4, r2 +; SOFT-FLOAT-NEXT: bl __mulsf3 +; SOFT-FLOAT-NEXT: mov r1, r4 +; SOFT-FLOAT-NEXT: bl __addsf3 +; SOFT-FLOAT-NEXT: pop {r4, lr} +; SOFT-FLOAT-NEXT: mov pc, lr +; +; SOFT-FLOAT-VFP32-LABEL: fmuladd_contract_f32: +; SOFT-FLOAT-VFP32: @ %bb.0: +; SOFT-FLOAT-VFP32-NEXT: push {r4, lr} +; SOFT-FLOAT-VFP32-NEXT: mov r4, r2 +; SOFT-FLOAT-VFP32-NEXT: bl __mulsf3 +; SOFT-FLOAT-VFP32-NEXT: mov r1, r4 +; SOFT-FLOAT-VFP32-NEXT: bl __addsf3 +; SOFT-FLOAT-VFP32-NEXT: pop {r4, lr} +; SOFT-FLOAT-VFP32-NEXT: mov pc, lr +; +; SOFT-FLOAT-VFP64-LABEL: fmuladd_contract_f32: +; SOFT-FLOAT-VFP64: @ %bb.0: +; SOFT-FLOAT-VFP64-NEXT: push {r4, lr} +; SOFT-FLOAT-VFP64-NEXT: mov r4, r2 +; SOFT-FLOAT-VFP64-NEXT: bl __mulsf3 +; SOFT-FLOAT-VFP64-NEXT: mov r1, r4 +; SOFT-FLOAT-VFP64-NEXT: bl __addsf3 +; SOFT-FLOAT-VFP64-NEXT: pop {r4, lr} +; SOFT-FLOAT-VFP64-NEXT: mov pc, lr + %product = fmul contract float %a, %b + %result = fadd contract float %product, %c + ret float %result +} + +define double @fmuladd_contract_f64(double %a, double %b, double %c) #0 { +; SOFT-FLOAT-LABEL: fmuladd_contract_f64: +; SOFT-FLOAT: @ %bb.0: +; SOFT-FLOAT-NEXT: push {r11, lr} +; SOFT-FLOAT-NEXT: bl __muldf3 +; SOFT-FLOAT-NEXT: ldr r2, [sp, #8] +; SOFT-FLOAT-NEXT: ldr r3, [sp, #12] +; SOFT-FLOAT-NEXT: bl __adddf3 +; SOFT-FLOAT-NEXT: pop {r11, lr} +; SOFT-FLOAT-NEXT: mov pc, lr +; +; SOFT-FLOAT-VFP32-LABEL: fmuladd_contract_f64: +; SOFT-FLOAT-VFP32: @ %bb.0: +; SOFT-FLOAT-VFP32-NEXT: push {r11, lr} +; SOFT-FLOAT-VFP32-NEXT: bl __muldf3 +; SOFT-FLOAT-VFP32-NEXT: ldr r2, [sp, #8] +; SOFT-FLOAT-VFP32-NEXT: ldr r3, [sp, #12] +; SOFT-FLOAT-VFP32-NEXT: bl __adddf3 +; SOFT-FLOAT-VFP32-NEXT: pop {r11, lr} +; SOFT-FLOAT-VFP32-NEXT: mov pc, lr +; +; SOFT-FLOAT-VFP64-LABEL: fmuladd_contract_f64: +; SOFT-FLOAT-VFP64: @ %bb.0: +; SOFT-FLOAT-VFP64-NEXT: push {r11, lr} +; SOFT-FLOAT-VFP64-NEXT: bl __muldf3 +; SOFT-FLOAT-VFP64-NEXT: ldr r2, [sp, #8] +; SOFT-FLOAT-VFP64-NEXT: ldr r3, [sp, #12] +; SOFT-FLOAT-VFP64-NEXT: bl __adddf3 +; SOFT-FLOAT-VFP64-NEXT: pop {r11, lr} +; SOFT-FLOAT-VFP64-NEXT: mov pc, lr + %product = fmul contract double %a, %b + %result = fadd contract double %product, %c + ret double %result +} + +define <4 x float> @fmuladd_contract_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 { +; SOFT-FLOAT-LABEL: fmuladd_contract_v4f32: +; SOFT-FLOAT: @ %bb.0: +; SOFT-FLOAT-NEXT: push {r4, r5, r6, r7, r11, lr} +; SOFT-FLOAT-NEXT: mov r7, r1 +; SOFT-FLOAT-NEXT: ldr r1, [sp, #24] +; SOFT-FLOAT-NEXT: mov r4, r3 +; SOFT-FLOAT-NEXT: mov r6, r2 +; SOFT-FLOAT-NEXT: bl __mulsf3 +; SOFT-FLOAT-NEXT: ldr r1, [sp, #40] +; SOFT-FLOAT-NEXT: bl __addsf3 +; SOFT-FLOAT-NEXT: ldr r1, [sp, #28] +; SOFT-FLOAT-NEXT: mov r5, r0 +; SOFT-FLOAT-NEXT: mov r0, r7 +; SOFT-FLOAT-NEXT: bl __mulsf3 +; SOFT-FLOAT-NEXT: ldr r1, [sp, #44] +; SOFT-FLOAT-NEXT: bl __addsf3 +; SOFT-FLOAT-NEXT: ldr r1, [sp, #32] +; SOFT-FLOAT-NEXT: mov r7, r0 +; SOFT-FLOAT-NEXT: mov r0, r6 +; SOFT-FLOAT-NEXT: bl __mulsf3 +; SOFT-FLOAT-NEXT: ldr r1, [sp, #48] +; SOFT-FLOAT-NEXT: bl __addsf3 +; SOFT-FLOAT-NEXT: ldr r1, [sp, #36] +; SOFT-FLOAT-NEXT: mov r6, r0 +; SOFT-FLOAT-NEXT: mov r0, r4 +; SOFT-FLOAT-NEXT: bl __mulsf3 +; SOFT-FLOAT-NEXT: ldr r1, [sp, #52] +; SOFT-FLOAT-NEXT: bl __addsf3 +; SOFT-FLOAT-NEXT: mov r3, r0 +; SOFT-FLOAT-NEXT: mov r0, r5 +; SOFT-FLOAT-NEXT: mov r1, r7 +; SOFT-FLOAT-NEXT: mov r2, r6 +; SOFT-FLOAT-NEXT: pop {r4, r5, r6, r7, r11, lr} +; SOFT-FLOAT-NEXT: mov pc, lr +; +; SOFT-FLOAT-VFP32-LABEL: fmuladd_contract_v4f32: +; SOFT-FLOAT-VFP32: @ %bb.0: +; SOFT-FLOAT-VFP32-NEXT: push {r4, r5, r6, r7, r11, lr} +; SOFT-FLOAT-VFP32-NEXT: mov r7, r1 +; SOFT-FLOAT-VFP32-NEXT: ldr r1, [sp, #24] +; SOFT-FLOAT-VFP32-NEXT: mov r4, r3 +; SOFT-FLOAT-VFP32-NEXT: mov r6, r2 +; SOFT-FLOAT-VFP32-NEXT: bl __mulsf3 +; SOFT-FLOAT-VFP32-NEXT: ldr r1, [sp, #40] +; SOFT-FLOAT-VFP32-NEXT: bl __addsf3 +; SOFT-FLOAT-VFP32-NEXT: ldr r1, [sp, #28] +; SOFT-FLOAT-VFP32-NEXT: mov r5, r0 +; SOFT-FLOAT-VFP32-NEXT: mov r0, r7 +; SOFT-FLOAT-VFP32-NEXT: bl __mulsf3 +; SOFT-FLOAT-VFP32-NEXT: ldr r1, [sp, #44] +; SOFT-FLOAT-VFP32-NEXT: bl __addsf3 +; SOFT-FLOAT-VFP32-NEXT: ldr r1, [sp, #32] +; SOFT-FLOAT-VFP32-NEXT: mov r7, r0 +; SOFT-FLOAT-VFP32-NEXT: mov r0, r6 +; SOFT-FLOAT-VFP32-NEXT: bl __mulsf3 +; SOFT-FLOAT-VFP32-NEXT: ldr r1, [sp, #48] +; SOFT-FLOAT-VFP32-NEXT: bl __addsf3 +; SOFT-FLOAT-VFP32-NEXT: ldr r1, [sp, #36] +; SOFT-FLOAT-VFP32-NEXT: mov r6, r0 +; SOFT-FLOAT-VFP32-NEXT: mov r0, r4 +; SOFT-FLOAT-VFP32-NEXT: bl __mulsf3 +; SOFT-FLOAT-VFP32-NEXT: ldr r1, [sp, #52] +; SOFT-FLOAT-VFP32-NEXT: bl __addsf3 +; SOFT-FLOAT-VFP32-NEXT: mov r3, r0 +; SOFT-FLOAT-VFP32-NEXT: mov r0, r5 +; SOFT-FLOAT-VFP32-NEXT: mov r1, r7 +; SOFT-FLOAT-VFP32-NEXT: mov r2, r6 +; SOFT-FLOAT-VFP32-NEXT: pop {r4, r5, r6, r7, r11, lr} +; SOFT-FLOAT-VFP32-NEXT: mov pc, lr +; +; SOFT-FLOAT-VFP64-LABEL: fmuladd_contract_v4f32: +; SOFT-FLOAT-VFP64: @ %bb.0: +; SOFT-FLOAT-VFP64-NEXT: push {r4, r5, r6, r7, r11, lr} +; SOFT-FLOAT-VFP64-NEXT: mov r7, r1 +; SOFT-FLOAT-VFP64-NEXT: ldr r1, [sp, #24] +; SOFT-FLOAT-VFP64-NEXT: mov r4, r3 +; SOFT-FLOAT-VFP64-NEXT: mov r6, r2 +; SOFT-FLOAT-VFP64-NEXT: bl __mulsf3 +; SOFT-FLOAT-VFP64-NEXT: ldr r1, [sp, #40] +; SOFT-FLOAT-VFP64-NEXT: bl __addsf3 +; SOFT-FLOAT-VFP64-NEXT: ldr r1, [sp, #28] +; SOFT-FLOAT-VFP64-NEXT: mov r5, r0 +; SOFT-FLOAT-VFP64-NEXT: mov r0, r7 +; SOFT-FLOAT-VFP64-NEXT: bl __mulsf3 +; SOFT-FLOAT-VFP64-NEXT: ldr r1, [sp, #44] +; SOFT-FLOAT-VFP64-NEXT: bl __addsf3 +; SOFT-FLOAT-VFP64-NEXT: ldr r1, [sp, #32] +; SOFT-FLOAT-VFP64-NEXT: mov r7, r0 +; SOFT-FLOAT-VFP64-NEXT: mov r0, r6 +; SOFT-FLOAT-VFP64-NEXT: bl __mulsf3 +; SOFT-FLOAT-VFP64-NEXT: ldr r1, [sp, #48] +; SOFT-FLOAT-VFP64-NEXT: bl __addsf3 +; SOFT-FLOAT-VFP64-NEXT: ldr r1, [sp, #36] +; SOFT-FLOAT-VFP64-NEXT: mov r6, r0 +; SOFT-FLOAT-VFP64-NEXT: mov r0, r4 +; SOFT-FLOAT-VFP64-NEXT: bl __mulsf3 +; SOFT-FLOAT-VFP64-NEXT: ldr r1, [sp, #52] +; SOFT-FLOAT-VFP64-NEXT: bl __addsf3 +; SOFT-FLOAT-VFP64-NEXT: mov r3, r0 +; SOFT-FLOAT-VFP64-NEXT: mov r0, r5 +; SOFT-FLOAT-VFP64-NEXT: mov r1, r7 +; SOFT-FLOAT-VFP64-NEXT: mov r2, r6 +; SOFT-FLOAT-VFP64-NEXT: pop {r4, r5, r6, r7, r11, lr} +; SOFT-FLOAT-VFP64-NEXT: mov pc, lr + %product = fmul contract <4 x float> %a, %b + %result = fadd contract <4 x float> %product, %c + ret <4 x float> %result +} + +define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) #0 { +; SOFT-FLOAT-LABEL: fmuladd_contract_v4f64: +; SOFT-FLOAT: @ %bb.0: +; SOFT-FLOAT-NEXT: push {r4, r5, r6, lr} +; SOFT-FLOAT-NEXT: mov r5, r3 +; SOFT-FLOAT-NEXT: mov r6, r2 +; SOFT-FLOAT-NEXT: mov r4, r0 +; SOFT-FLOAT-NEXT: ldr r0, [sp, #32] +; SOFT-FLOAT-NEXT: ldr r1, [sp, #36] +; SOFT-FLOAT-NEXT: ldr r2, [sp, #64] +; SOFT-FLOAT-NEXT: ldr r3, [sp, #68] +; SOFT-FLOAT-NEXT: bl __muldf3 +; SOFT-FLOAT-NEXT: ldr r2, [sp, #96] +; SOFT-FLOAT-NEXT: ldr r3, [sp, #100] +; SOFT-FLOAT-NEXT: bl __adddf3 +; SOFT-FLOAT-NEXT: str r0, [r4, #24] +; SOFT-FLOAT-NEXT: str r1, [r4, #28] +; SOFT-FLOAT-NEXT: ldr r0, [sp, #24] +; SOFT-FLOAT-NEXT: ldr r1, [sp, #28] +; SOFT-FLOAT-NEXT: ldr r2, [sp, #56] +; SOFT-FLOAT-NEXT: ldr r3, [sp, #60] +; SOFT-FLOAT-NEXT: bl __muldf3 +; SOFT-FLOAT-NEXT: ldr r2, [sp, #88] +; SOFT-FLOAT-NEXT: ldr r3, [sp, #92] +; SOFT-FLOAT-NEXT: bl __adddf3 +; SOFT-FLOAT-NEXT: str r0, [r4, #16] +; SOFT-FLOAT-NEXT: str r1, [r4, #20] +; SOFT-FLOAT-NEXT: ldr r0, [sp, #16] +; SOFT-FLOAT-NEXT: ldr r1, [sp, #20] +; SOFT-FLOAT-NEXT: ldr r2, [sp, #48] +; SOFT-FLOAT-NEXT: ldr r3, [sp, #52] +; SOFT-FLOAT-NEXT: bl __muldf3 +; SOFT-FLOAT-NEXT: ldr r2, [sp, #80] +; SOFT-FLOAT-NEXT: ldr r3, [sp, #84] +; SOFT-FLOAT-NEXT: bl __adddf3 +; SOFT-FLOAT-NEXT: ldr r2, [sp, #40] +; SOFT-FLOAT-NEXT: ldr r3, [sp, #44] +; SOFT-FLOAT-NEXT: str r0, [r4, #8] +; SOFT-FLOAT-NEXT: mov r0, r6 +; SOFT-FLOAT-NEXT: str r1, [r4, #12] +; SOFT-FLOAT-NEXT: mov r1, r5 +; SOFT-FLOAT-NEXT: bl __muldf3 +; SOFT-FLOAT-NEXT: ldr r2, [sp, #72] +; SOFT-FLOAT-NEXT: ldr r3, [sp, #76] +; SOFT-FLOAT-NEXT: bl __adddf3 +; SOFT-FLOAT-NEXT: stm r4, {r0, r1} +; SOFT-FLOAT-NEXT: pop {r4, r5, r6, lr} +; SOFT-FLOAT-NEXT: mov pc, lr +; +; SOFT-FLOAT-VFP32-LABEL: fmuladd_contract_v4f64: +; SOFT-FLOAT-VFP32: @ %bb.0: +; SOFT-FLOAT-VFP32-NEXT: push {r4, r5, r6, lr} +; SOFT-FLOAT-VFP32-NEXT: mov r5, r3 +; SOFT-FLOAT-VFP32-NEXT: mov r6, r2 +; SOFT-FLOAT-VFP32-NEXT: mov r4, r0 +; SOFT-FLOAT-VFP32-NEXT: ldr r0, [sp, #32] +; SOFT-FLOAT-VFP32-NEXT: ldr r1, [sp, #36] +; SOFT-FLOAT-VFP32-NEXT: ldr r2, [sp, #64] +; SOFT-FLOAT-VFP32-NEXT: ldr r3, [sp, #68] +; SOFT-FLOAT-VFP32-NEXT: bl __muldf3 +; SOFT-FLOAT-VFP32-NEXT: ldr r2, [sp, #96] +; SOFT-FLOAT-VFP32-NEXT: ldr r3, [sp, #100] +; SOFT-FLOAT-VFP32-NEXT: bl __adddf3 +; SOFT-FLOAT-VFP32-NEXT: str r0, [r4, #24] +; SOFT-FLOAT-VFP32-NEXT: str r1, [r4, #28] +; SOFT-FLOAT-VFP32-NEXT: ldr r0, [sp, #24] +; SOFT-FLOAT-VFP32-NEXT: ldr r1, [sp, #28] +; SOFT-FLOAT-VFP32-NEXT: ldr r2, [sp, #56] +; SOFT-FLOAT-VFP32-NEXT: ldr r3, [sp, #60] +; SOFT-FLOAT-VFP32-NEXT: bl __muldf3 +; SOFT-FLOAT-VFP32-NEXT: ldr r2, [sp, #88] +; SOFT-FLOAT-VFP32-NEXT: ldr r3, [sp, #92] +; SOFT-FLOAT-VFP32-NEXT: bl __adddf3 +; SOFT-FLOAT-VFP32-NEXT: str r0, [r4, #16] +; SOFT-FLOAT-VFP32-NEXT: str r1, [r4, #20] +; SOFT-FLOAT-VFP32-NEXT: ldr r0, [sp, #16] +; SOFT-FLOAT-VFP32-NEXT: ldr r1, [sp, #20] +; SOFT-FLOAT-VFP32-NEXT: ldr r2, [sp, #48] +; SOFT-FLOAT-VFP32-NEXT: ldr r3, [sp, #52] +; SOFT-FLOAT-VFP32-NEXT: bl __muldf3 +; SOFT-FLOAT-VFP32-NEXT: ldr r2, [sp, #80] +; SOFT-FLOAT-VFP32-NEXT: ldr r3, [sp, #84] +; SOFT-FLOAT-VFP32-NEXT: bl __adddf3 +; SOFT-FLOAT-VFP32-NEXT: ldr r2, [sp, #40] +; SOFT-FLOAT-VFP32-NEXT: ldr r3, [sp, #44] +; SOFT-FLOAT-VFP32-NEXT: str r0, [r4, #8] +; SOFT-FLOAT-VFP32-NEXT: mov r0, r6 +; SOFT-FLOAT-VFP32-NEXT: str r1, [r4, #12] +; SOFT-FLOAT-VFP32-NEXT: mov r1, r5 +; SOFT-FLOAT-VFP32-NEXT: bl __muldf3 +; SOFT-FLOAT-VFP32-NEXT: ldr r2, [sp, #72] +; SOFT-FLOAT-VFP32-NEXT: ldr r3, [sp, #76] +; SOFT-FLOAT-VFP32-NEXT: bl __adddf3 +; SOFT-FLOAT-VFP32-NEXT: stm r4, {r0, r1} +; SOFT-FLOAT-VFP32-NEXT: pop {r4, r5, r6, lr} +; SOFT-FLOAT-VFP32-NEXT: mov pc, lr +; +; SOFT-FLOAT-VFP64-LABEL: fmuladd_contract_v4f64: +; SOFT-FLOAT-VFP64: @ %bb.0: +; SOFT-FLOAT-VFP64-NEXT: push {r4, r5, r6, lr} +; SOFT-FLOAT-VFP64-NEXT: mov r5, r3 +; SOFT-FLOAT-VFP64-NEXT: mov r6, r2 +; SOFT-FLOAT-VFP64-NEXT: mov r4, r0 +; SOFT-FLOAT-VFP64-NEXT: ldr r0, [sp, #32] +; SOFT-FLOAT-VFP64-NEXT: ldr r1, [sp, #36] +; SOFT-FLOAT-VFP64-NEXT: ldr r2, [sp, #64] +; SOFT-FLOAT-VFP64-NEXT: ldr r3, [sp, #68] +; SOFT-FLOAT-VFP64-NEXT: bl __muldf3 +; SOFT-FLOAT-VFP64-NEXT: ldr r2, [sp, #96] +; SOFT-FLOAT-VFP64-NEXT: ldr r3, [sp, #100] +; SOFT-FLOAT-VFP64-NEXT: bl __adddf3 +; SOFT-FLOAT-VFP64-NEXT: str r0, [r4, #24] +; SOFT-FLOAT-VFP64-NEXT: str r1, [r4, #28] +; SOFT-FLOAT-VFP64-NEXT: ldr r0, [sp, #24] +; SOFT-FLOAT-VFP64-NEXT: ldr r1, [sp, #28] +; SOFT-FLOAT-VFP64-NEXT: ldr r2, [sp, #56] +; SOFT-FLOAT-VFP64-NEXT: ldr r3, [sp, #60] +; SOFT-FLOAT-VFP64-NEXT: bl __muldf3 +; SOFT-FLOAT-VFP64-NEXT: ldr r2, [sp, #88] +; SOFT-FLOAT-VFP64-NEXT: ldr r3, [sp, #92] +; SOFT-FLOAT-VFP64-NEXT: bl __adddf3 +; SOFT-FLOAT-VFP64-NEXT: str r0, [r4, #16] +; SOFT-FLOAT-VFP64-NEXT: str r1, [r4, #20] +; SOFT-FLOAT-VFP64-NEXT: ldr r0, [sp, #16] +; SOFT-FLOAT-VFP64-NEXT: ldr r1, [sp, #20] +; SOFT-FLOAT-VFP64-NEXT: ldr r2, [sp, #48] +; SOFT-FLOAT-VFP64-NEXT: ldr r3, [sp, #52] +; SOFT-FLOAT-VFP64-NEXT: bl __muldf3 +; SOFT-FLOAT-VFP64-NEXT: ldr r2, [sp, #80] +; SOFT-FLOAT-VFP64-NEXT: ldr r3, [sp, #84] +; SOFT-FLOAT-VFP64-NEXT: bl __adddf3 +; SOFT-FLOAT-VFP64-NEXT: ldr r2, [sp, #40] +; SOFT-FLOAT-VFP64-NEXT: ldr r3, [sp, #44] +; SOFT-FLOAT-VFP64-NEXT: str r0, [r4, #8] +; SOFT-FLOAT-VFP64-NEXT: mov r0, r6 +; SOFT-FLOAT-VFP64-NEXT: str r1, [r4, #12] +; SOFT-FLOAT-VFP64-NEXT: mov r1, r5 +; SOFT-FLOAT-VFP64-NEXT: bl __muldf3 +; SOFT-FLOAT-VFP64-NEXT: ldr r2, [sp, #72] +; SOFT-FLOAT-VFP64-NEXT: ldr r3, [sp, #76] +; SOFT-FLOAT-VFP64-NEXT: bl __adddf3 +; SOFT-FLOAT-VFP64-NEXT: stm r4, {r0, r1} +; SOFT-FLOAT-VFP64-NEXT: pop {r4, r5, r6, lr} +; SOFT-FLOAT-VFP64-NEXT: mov pc, lr + %product = fmul contract <4 x double> %a, %b + %result = fadd contract <4 x double> %product, %c + ret <4 x double> %result +} + +attributes #0 = { "use-soft-float"="true" } + +declare float @llvm.fmuladd.f32(float %a, float %b, float %c) +declare double @llvm.fmuladd.f64(double %a, double %b, double %c) diff --git a/llvm/test/CodeGen/Mips/fmuladd-soft-float.ll b/llvm/test/CodeGen/Mips/fmuladd-soft-float.ll new file mode 100644 index 0000000000000..bbfb7cf9ca907 --- /dev/null +++ b/llvm/test/CodeGen/Mips/fmuladd-soft-float.ll @@ -0,0 +1,932 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=mips < %s | FileCheck %s -check-prefix=SOFT-FLOAT-32 +; RUN: llc -mtriple=mips -mcpu mips32r2 < %s | FileCheck %s -check-prefix=SOFT-FLOAT-32R2 +; RUN: llc -mtriple=mips64 < %s | FileCheck %s -check-prefix=SOFT-FLOAT-64 +; RUN: llc -mtriple=mips64 -mcpu mips64r2 < %s | FileCheck %s -check-prefix=SOFT-FLOAT-64R2 + +define float @fmuladd_intrinsic_f32(float %a, float %b, float %c) #0 { +; SOFT-FLOAT-32-LABEL: fmuladd_intrinsic_f32: +; SOFT-FLOAT-32: # %bb.0: +; SOFT-FLOAT-32-NEXT: addiu $sp, $sp, -24 +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-32-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: sw $16, 16($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: .cfi_offset 31, -4 +; SOFT-FLOAT-32-NEXT: .cfi_offset 16, -8 +; SOFT-FLOAT-32-NEXT: jal __mulsf3 +; SOFT-FLOAT-32-NEXT: move $16, $6 +; SOFT-FLOAT-32-NEXT: move $4, $2 +; SOFT-FLOAT-32-NEXT: jal __addsf3 +; SOFT-FLOAT-32-NEXT: move $5, $16 +; SOFT-FLOAT-32-NEXT: lw $16, 16($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: jr $ra +; SOFT-FLOAT-32-NEXT: addiu $sp, $sp, 24 +; +; SOFT-FLOAT-32R2-LABEL: fmuladd_intrinsic_f32: +; SOFT-FLOAT-32R2: # %bb.0: +; SOFT-FLOAT-32R2-NEXT: addiu $sp, $sp, -24 +; SOFT-FLOAT-32R2-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-32R2-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: sw $16, 16($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 31, -4 +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 16, -8 +; SOFT-FLOAT-32R2-NEXT: jal __mulsf3 +; SOFT-FLOAT-32R2-NEXT: move $16, $6 +; SOFT-FLOAT-32R2-NEXT: move $4, $2 +; SOFT-FLOAT-32R2-NEXT: jal __addsf3 +; SOFT-FLOAT-32R2-NEXT: move $5, $16 +; SOFT-FLOAT-32R2-NEXT: lw $16, 16($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: jr $ra +; SOFT-FLOAT-32R2-NEXT: addiu $sp, $sp, 24 +; +; SOFT-FLOAT-64-LABEL: fmuladd_intrinsic_f32: +; SOFT-FLOAT-64: # %bb.0: +; SOFT-FLOAT-64-NEXT: daddiu $sp, $sp, -16 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: sd $16, 0($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: .cfi_offset 31, -8 +; SOFT-FLOAT-64-NEXT: .cfi_offset 16, -16 +; SOFT-FLOAT-64-NEXT: move $16, $6 +; SOFT-FLOAT-64-NEXT: sll $4, $4, 0 +; SOFT-FLOAT-64-NEXT: jal __mulsf3 +; SOFT-FLOAT-64-NEXT: sll $5, $5, 0 +; SOFT-FLOAT-64-NEXT: sll $4, $2, 0 +; SOFT-FLOAT-64-NEXT: jal __addsf3 +; SOFT-FLOAT-64-NEXT: sll $5, $16, 0 +; SOFT-FLOAT-64-NEXT: ld $16, 0($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: jr $ra +; SOFT-FLOAT-64-NEXT: daddiu $sp, $sp, 16 +; +; SOFT-FLOAT-64R2-LABEL: fmuladd_intrinsic_f32: +; SOFT-FLOAT-64R2: # %bb.0: +; SOFT-FLOAT-64R2-NEXT: daddiu $sp, $sp, -16 +; SOFT-FLOAT-64R2-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64R2-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: sd $16, 0($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 31, -8 +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 16, -16 +; SOFT-FLOAT-64R2-NEXT: move $16, $6 +; SOFT-FLOAT-64R2-NEXT: sll $4, $4, 0 +; SOFT-FLOAT-64R2-NEXT: jal __mulsf3 +; SOFT-FLOAT-64R2-NEXT: sll $5, $5, 0 +; SOFT-FLOAT-64R2-NEXT: sll $4, $2, 0 +; SOFT-FLOAT-64R2-NEXT: jal __addsf3 +; SOFT-FLOAT-64R2-NEXT: sll $5, $16, 0 +; SOFT-FLOAT-64R2-NEXT: ld $16, 0($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: jr $ra +; SOFT-FLOAT-64R2-NEXT: daddiu $sp, $sp, 16 + %result = call float @llvm.fmuladd.f32(float %a, float %b, float %c) + ret float %result +} + +define double @fmuladd_intrinsic_f64(double %a, double %b, double %c) #0 { +; SOFT-FLOAT-32-LABEL: fmuladd_intrinsic_f64: +; SOFT-FLOAT-32: # %bb.0: +; SOFT-FLOAT-32-NEXT: addiu $sp, $sp, -24 +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-32-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: .cfi_offset 31, -4 +; SOFT-FLOAT-32-NEXT: jal __muldf3 +; SOFT-FLOAT-32-NEXT: nop +; SOFT-FLOAT-32-NEXT: move $4, $2 +; SOFT-FLOAT-32-NEXT: lw $6, 40($sp) +; SOFT-FLOAT-32-NEXT: lw $7, 44($sp) +; SOFT-FLOAT-32-NEXT: jal __adddf3 +; SOFT-FLOAT-32-NEXT: move $5, $3 +; SOFT-FLOAT-32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: jr $ra +; SOFT-FLOAT-32-NEXT: addiu $sp, $sp, 24 +; +; SOFT-FLOAT-32R2-LABEL: fmuladd_intrinsic_f64: +; SOFT-FLOAT-32R2: # %bb.0: +; SOFT-FLOAT-32R2-NEXT: addiu $sp, $sp, -24 +; SOFT-FLOAT-32R2-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-32R2-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 31, -4 +; SOFT-FLOAT-32R2-NEXT: jal __muldf3 +; SOFT-FLOAT-32R2-NEXT: nop +; SOFT-FLOAT-32R2-NEXT: move $4, $2 +; SOFT-FLOAT-32R2-NEXT: lw $6, 40($sp) +; SOFT-FLOAT-32R2-NEXT: lw $7, 44($sp) +; SOFT-FLOAT-32R2-NEXT: jal __adddf3 +; SOFT-FLOAT-32R2-NEXT: move $5, $3 +; SOFT-FLOAT-32R2-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: jr $ra +; SOFT-FLOAT-32R2-NEXT: addiu $sp, $sp, 24 +; +; SOFT-FLOAT-64-LABEL: fmuladd_intrinsic_f64: +; SOFT-FLOAT-64: # %bb.0: +; SOFT-FLOAT-64-NEXT: daddiu $sp, $sp, -16 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: sd $16, 0($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: .cfi_offset 31, -8 +; SOFT-FLOAT-64-NEXT: .cfi_offset 16, -16 +; SOFT-FLOAT-64-NEXT: jal __muldf3 +; SOFT-FLOAT-64-NEXT: move $16, $6 +; SOFT-FLOAT-64-NEXT: move $4, $2 +; SOFT-FLOAT-64-NEXT: jal __adddf3 +; SOFT-FLOAT-64-NEXT: move $5, $16 +; SOFT-FLOAT-64-NEXT: ld $16, 0($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: jr $ra +; SOFT-FLOAT-64-NEXT: daddiu $sp, $sp, 16 +; +; SOFT-FLOAT-64R2-LABEL: fmuladd_intrinsic_f64: +; SOFT-FLOAT-64R2: # %bb.0: +; SOFT-FLOAT-64R2-NEXT: daddiu $sp, $sp, -16 +; SOFT-FLOAT-64R2-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64R2-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: sd $16, 0($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 31, -8 +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 16, -16 +; SOFT-FLOAT-64R2-NEXT: jal __muldf3 +; SOFT-FLOAT-64R2-NEXT: move $16, $6 +; SOFT-FLOAT-64R2-NEXT: move $4, $2 +; SOFT-FLOAT-64R2-NEXT: jal __adddf3 +; SOFT-FLOAT-64R2-NEXT: move $5, $16 +; SOFT-FLOAT-64R2-NEXT: ld $16, 0($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: jr $ra +; SOFT-FLOAT-64R2-NEXT: daddiu $sp, $sp, 16 + %result = call double @llvm.fmuladd.f64(double %a, double %b, double %c) + ret double %result +} + +define float @fmuladd_contract_f32(float %a, float %b, float %c) #0 { +; SOFT-FLOAT-32-LABEL: fmuladd_contract_f32: +; SOFT-FLOAT-32: # %bb.0: +; SOFT-FLOAT-32-NEXT: addiu $sp, $sp, -24 +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-32-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: sw $16, 16($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: .cfi_offset 31, -4 +; SOFT-FLOAT-32-NEXT: .cfi_offset 16, -8 +; SOFT-FLOAT-32-NEXT: jal __mulsf3 +; SOFT-FLOAT-32-NEXT: move $16, $6 +; SOFT-FLOAT-32-NEXT: move $4, $2 +; SOFT-FLOAT-32-NEXT: jal __addsf3 +; SOFT-FLOAT-32-NEXT: move $5, $16 +; SOFT-FLOAT-32-NEXT: lw $16, 16($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: jr $ra +; SOFT-FLOAT-32-NEXT: addiu $sp, $sp, 24 +; +; SOFT-FLOAT-32R2-LABEL: fmuladd_contract_f32: +; SOFT-FLOAT-32R2: # %bb.0: +; SOFT-FLOAT-32R2-NEXT: addiu $sp, $sp, -24 +; SOFT-FLOAT-32R2-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-32R2-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: sw $16, 16($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 31, -4 +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 16, -8 +; SOFT-FLOAT-32R2-NEXT: jal __mulsf3 +; SOFT-FLOAT-32R2-NEXT: move $16, $6 +; SOFT-FLOAT-32R2-NEXT: move $4, $2 +; SOFT-FLOAT-32R2-NEXT: jal __addsf3 +; SOFT-FLOAT-32R2-NEXT: move $5, $16 +; SOFT-FLOAT-32R2-NEXT: lw $16, 16($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: jr $ra +; SOFT-FLOAT-32R2-NEXT: addiu $sp, $sp, 24 +; +; SOFT-FLOAT-64-LABEL: fmuladd_contract_f32: +; SOFT-FLOAT-64: # %bb.0: +; SOFT-FLOAT-64-NEXT: daddiu $sp, $sp, -16 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: sd $16, 0($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: .cfi_offset 31, -8 +; SOFT-FLOAT-64-NEXT: .cfi_offset 16, -16 +; SOFT-FLOAT-64-NEXT: move $16, $6 +; SOFT-FLOAT-64-NEXT: sll $4, $4, 0 +; SOFT-FLOAT-64-NEXT: jal __mulsf3 +; SOFT-FLOAT-64-NEXT: sll $5, $5, 0 +; SOFT-FLOAT-64-NEXT: sll $4, $2, 0 +; SOFT-FLOAT-64-NEXT: jal __addsf3 +; SOFT-FLOAT-64-NEXT: sll $5, $16, 0 +; SOFT-FLOAT-64-NEXT: ld $16, 0($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: jr $ra +; SOFT-FLOAT-64-NEXT: daddiu $sp, $sp, 16 +; +; SOFT-FLOAT-64R2-LABEL: fmuladd_contract_f32: +; SOFT-FLOAT-64R2: # %bb.0: +; SOFT-FLOAT-64R2-NEXT: daddiu $sp, $sp, -16 +; SOFT-FLOAT-64R2-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64R2-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: sd $16, 0($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 31, -8 +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 16, -16 +; SOFT-FLOAT-64R2-NEXT: move $16, $6 +; SOFT-FLOAT-64R2-NEXT: sll $4, $4, 0 +; SOFT-FLOAT-64R2-NEXT: jal __mulsf3 +; SOFT-FLOAT-64R2-NEXT: sll $5, $5, 0 +; SOFT-FLOAT-64R2-NEXT: sll $4, $2, 0 +; SOFT-FLOAT-64R2-NEXT: jal __addsf3 +; SOFT-FLOAT-64R2-NEXT: sll $5, $16, 0 +; SOFT-FLOAT-64R2-NEXT: ld $16, 0($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: jr $ra +; SOFT-FLOAT-64R2-NEXT: daddiu $sp, $sp, 16 + %product = fmul contract float %a, %b + %result = fadd contract float %product, %c + ret float %result +} + +define double @fmuladd_contract_f64(double %a, double %b, double %c) #0 { +; SOFT-FLOAT-32-LABEL: fmuladd_contract_f64: +; SOFT-FLOAT-32: # %bb.0: +; SOFT-FLOAT-32-NEXT: addiu $sp, $sp, -24 +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-32-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: .cfi_offset 31, -4 +; SOFT-FLOAT-32-NEXT: jal __muldf3 +; SOFT-FLOAT-32-NEXT: nop +; SOFT-FLOAT-32-NEXT: move $4, $2 +; SOFT-FLOAT-32-NEXT: lw $6, 40($sp) +; SOFT-FLOAT-32-NEXT: lw $7, 44($sp) +; SOFT-FLOAT-32-NEXT: jal __adddf3 +; SOFT-FLOAT-32-NEXT: move $5, $3 +; SOFT-FLOAT-32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: jr $ra +; SOFT-FLOAT-32-NEXT: addiu $sp, $sp, 24 +; +; SOFT-FLOAT-32R2-LABEL: fmuladd_contract_f64: +; SOFT-FLOAT-32R2: # %bb.0: +; SOFT-FLOAT-32R2-NEXT: addiu $sp, $sp, -24 +; SOFT-FLOAT-32R2-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-32R2-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 31, -4 +; SOFT-FLOAT-32R2-NEXT: jal __muldf3 +; SOFT-FLOAT-32R2-NEXT: nop +; SOFT-FLOAT-32R2-NEXT: move $4, $2 +; SOFT-FLOAT-32R2-NEXT: lw $6, 40($sp) +; SOFT-FLOAT-32R2-NEXT: lw $7, 44($sp) +; SOFT-FLOAT-32R2-NEXT: jal __adddf3 +; SOFT-FLOAT-32R2-NEXT: move $5, $3 +; SOFT-FLOAT-32R2-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: jr $ra +; SOFT-FLOAT-32R2-NEXT: addiu $sp, $sp, 24 +; +; SOFT-FLOAT-64-LABEL: fmuladd_contract_f64: +; SOFT-FLOAT-64: # %bb.0: +; SOFT-FLOAT-64-NEXT: daddiu $sp, $sp, -16 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: sd $16, 0($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: .cfi_offset 31, -8 +; SOFT-FLOAT-64-NEXT: .cfi_offset 16, -16 +; SOFT-FLOAT-64-NEXT: jal __muldf3 +; SOFT-FLOAT-64-NEXT: move $16, $6 +; SOFT-FLOAT-64-NEXT: move $4, $2 +; SOFT-FLOAT-64-NEXT: jal __adddf3 +; SOFT-FLOAT-64-NEXT: move $5, $16 +; SOFT-FLOAT-64-NEXT: ld $16, 0($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: jr $ra +; SOFT-FLOAT-64-NEXT: daddiu $sp, $sp, 16 +; +; SOFT-FLOAT-64R2-LABEL: fmuladd_contract_f64: +; SOFT-FLOAT-64R2: # %bb.0: +; SOFT-FLOAT-64R2-NEXT: daddiu $sp, $sp, -16 +; SOFT-FLOAT-64R2-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64R2-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: sd $16, 0($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 31, -8 +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 16, -16 +; SOFT-FLOAT-64R2-NEXT: jal __muldf3 +; SOFT-FLOAT-64R2-NEXT: move $16, $6 +; SOFT-FLOAT-64R2-NEXT: move $4, $2 +; SOFT-FLOAT-64R2-NEXT: jal __adddf3 +; SOFT-FLOAT-64R2-NEXT: move $5, $16 +; SOFT-FLOAT-64R2-NEXT: ld $16, 0($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: jr $ra +; SOFT-FLOAT-64R2-NEXT: daddiu $sp, $sp, 16 + %product = fmul contract double %a, %b + %result = fadd contract double %product, %c + ret double %result +} + +define <4 x float> @fmuladd_contract_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 { +; SOFT-FLOAT-32-LABEL: fmuladd_contract_v4f32: +; SOFT-FLOAT-32: # %bb.0: +; SOFT-FLOAT-32-NEXT: addiu $sp, $sp, -48 +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 48 +; SOFT-FLOAT-32-NEXT: sw $ra, 44($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: sw $21, 40($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: sw $20, 36($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: sw $19, 32($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: sw $18, 28($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: sw $17, 24($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: sw $16, 20($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: .cfi_offset 31, -4 +; SOFT-FLOAT-32-NEXT: .cfi_offset 21, -8 +; SOFT-FLOAT-32-NEXT: .cfi_offset 20, -12 +; SOFT-FLOAT-32-NEXT: .cfi_offset 19, -16 +; SOFT-FLOAT-32-NEXT: .cfi_offset 18, -20 +; SOFT-FLOAT-32-NEXT: .cfi_offset 17, -24 +; SOFT-FLOAT-32-NEXT: .cfi_offset 16, -28 +; SOFT-FLOAT-32-NEXT: move $17, $7 +; SOFT-FLOAT-32-NEXT: move $16, $4 +; SOFT-FLOAT-32-NEXT: lw $4, 64($sp) +; SOFT-FLOAT-32-NEXT: lw $5, 80($sp) +; SOFT-FLOAT-32-NEXT: jal __mulsf3 +; SOFT-FLOAT-32-NEXT: move $18, $6 +; SOFT-FLOAT-32-NEXT: lw $5, 96($sp) +; SOFT-FLOAT-32-NEXT: jal __addsf3 +; SOFT-FLOAT-32-NEXT: move $4, $2 +; SOFT-FLOAT-32-NEXT: lw $4, 68($sp) +; SOFT-FLOAT-32-NEXT: lw $5, 84($sp) +; SOFT-FLOAT-32-NEXT: jal __mulsf3 +; SOFT-FLOAT-32-NEXT: move $19, $2 +; SOFT-FLOAT-32-NEXT: lw $5, 100($sp) +; SOFT-FLOAT-32-NEXT: jal __addsf3 +; SOFT-FLOAT-32-NEXT: move $4, $2 +; SOFT-FLOAT-32-NEXT: move $20, $2 +; SOFT-FLOAT-32-NEXT: lw $5, 76($sp) +; SOFT-FLOAT-32-NEXT: jal __mulsf3 +; SOFT-FLOAT-32-NEXT: move $4, $17 +; SOFT-FLOAT-32-NEXT: move $4, $2 +; SOFT-FLOAT-32-NEXT: lw $17, 88($sp) +; SOFT-FLOAT-32-NEXT: lw $21, 72($sp) +; SOFT-FLOAT-32-NEXT: lw $5, 92($sp) +; SOFT-FLOAT-32-NEXT: sw $20, 12($16) +; SOFT-FLOAT-32-NEXT: jal __addsf3 +; SOFT-FLOAT-32-NEXT: sw $19, 8($16) +; SOFT-FLOAT-32-NEXT: sw $2, 4($16) +; SOFT-FLOAT-32-NEXT: move $4, $18 +; SOFT-FLOAT-32-NEXT: jal __mulsf3 +; SOFT-FLOAT-32-NEXT: move $5, $21 +; SOFT-FLOAT-32-NEXT: move $4, $2 +; SOFT-FLOAT-32-NEXT: jal __addsf3 +; SOFT-FLOAT-32-NEXT: move $5, $17 +; SOFT-FLOAT-32-NEXT: sw $2, 0($16) +; SOFT-FLOAT-32-NEXT: lw $16, 20($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: lw $17, 24($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: lw $18, 28($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: lw $19, 32($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: lw $20, 36($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: lw $21, 40($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: lw $ra, 44($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: jr $ra +; SOFT-FLOAT-32-NEXT: addiu $sp, $sp, 48 +; +; SOFT-FLOAT-32R2-LABEL: fmuladd_contract_v4f32: +; SOFT-FLOAT-32R2: # %bb.0: +; SOFT-FLOAT-32R2-NEXT: addiu $sp, $sp, -48 +; SOFT-FLOAT-32R2-NEXT: .cfi_def_cfa_offset 48 +; SOFT-FLOAT-32R2-NEXT: sw $ra, 44($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: sw $21, 40($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: sw $20, 36($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: sw $19, 32($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: sw $18, 28($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: sw $17, 24($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: sw $16, 20($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 31, -4 +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 21, -8 +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 20, -12 +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 19, -16 +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 18, -20 +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 17, -24 +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 16, -28 +; SOFT-FLOAT-32R2-NEXT: move $17, $7 +; SOFT-FLOAT-32R2-NEXT: move $16, $4 +; SOFT-FLOAT-32R2-NEXT: lw $4, 64($sp) +; SOFT-FLOAT-32R2-NEXT: lw $5, 80($sp) +; SOFT-FLOAT-32R2-NEXT: jal __mulsf3 +; SOFT-FLOAT-32R2-NEXT: move $18, $6 +; SOFT-FLOAT-32R2-NEXT: lw $5, 96($sp) +; SOFT-FLOAT-32R2-NEXT: jal __addsf3 +; SOFT-FLOAT-32R2-NEXT: move $4, $2 +; SOFT-FLOAT-32R2-NEXT: lw $4, 68($sp) +; SOFT-FLOAT-32R2-NEXT: lw $5, 84($sp) +; SOFT-FLOAT-32R2-NEXT: jal __mulsf3 +; SOFT-FLOAT-32R2-NEXT: move $19, $2 +; SOFT-FLOAT-32R2-NEXT: lw $5, 100($sp) +; SOFT-FLOAT-32R2-NEXT: jal __addsf3 +; SOFT-FLOAT-32R2-NEXT: move $4, $2 +; SOFT-FLOAT-32R2-NEXT: move $20, $2 +; SOFT-FLOAT-32R2-NEXT: lw $5, 76($sp) +; SOFT-FLOAT-32R2-NEXT: jal __mulsf3 +; SOFT-FLOAT-32R2-NEXT: move $4, $17 +; SOFT-FLOAT-32R2-NEXT: move $4, $2 +; SOFT-FLOAT-32R2-NEXT: lw $17, 88($sp) +; SOFT-FLOAT-32R2-NEXT: lw $21, 72($sp) +; SOFT-FLOAT-32R2-NEXT: lw $5, 92($sp) +; SOFT-FLOAT-32R2-NEXT: sw $20, 12($16) +; SOFT-FLOAT-32R2-NEXT: jal __addsf3 +; SOFT-FLOAT-32R2-NEXT: sw $19, 8($16) +; SOFT-FLOAT-32R2-NEXT: sw $2, 4($16) +; SOFT-FLOAT-32R2-NEXT: move $4, $18 +; SOFT-FLOAT-32R2-NEXT: jal __mulsf3 +; SOFT-FLOAT-32R2-NEXT: move $5, $21 +; SOFT-FLOAT-32R2-NEXT: move $4, $2 +; SOFT-FLOAT-32R2-NEXT: jal __addsf3 +; SOFT-FLOAT-32R2-NEXT: move $5, $17 +; SOFT-FLOAT-32R2-NEXT: sw $2, 0($16) +; SOFT-FLOAT-32R2-NEXT: lw $16, 20($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: lw $17, 24($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: lw $18, 28($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: lw $19, 32($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: lw $20, 36($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: lw $21, 40($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: lw $ra, 44($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: jr $ra +; SOFT-FLOAT-32R2-NEXT: addiu $sp, $sp, 48 +; +; SOFT-FLOAT-64-LABEL: fmuladd_contract_v4f32: +; SOFT-FLOAT-64: # %bb.0: +; SOFT-FLOAT-64-NEXT: daddiu $sp, $sp, -64 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 64 +; SOFT-FLOAT-64-NEXT: sd $ra, 56($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: sd $22, 48($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: sd $21, 40($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: sd $20, 32($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: sd $19, 24($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: sd $18, 16($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: sd $17, 8($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: sd $16, 0($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: .cfi_offset 31, -8 +; SOFT-FLOAT-64-NEXT: .cfi_offset 22, -16 +; SOFT-FLOAT-64-NEXT: .cfi_offset 21, -24 +; SOFT-FLOAT-64-NEXT: .cfi_offset 20, -32 +; SOFT-FLOAT-64-NEXT: .cfi_offset 19, -40 +; SOFT-FLOAT-64-NEXT: .cfi_offset 18, -48 +; SOFT-FLOAT-64-NEXT: .cfi_offset 17, -56 +; SOFT-FLOAT-64-NEXT: .cfi_offset 16, -64 +; SOFT-FLOAT-64-NEXT: move $16, $9 +; SOFT-FLOAT-64-NEXT: move $17, $8 +; SOFT-FLOAT-64-NEXT: move $18, $7 +; SOFT-FLOAT-64-NEXT: move $19, $6 +; SOFT-FLOAT-64-NEXT: move $20, $5 +; SOFT-FLOAT-64-NEXT: move $21, $4 +; SOFT-FLOAT-64-NEXT: sll $4, $4, 0 +; SOFT-FLOAT-64-NEXT: jal __mulsf3 +; SOFT-FLOAT-64-NEXT: sll $5, $6, 0 +; SOFT-FLOAT-64-NEXT: move $22, $2 +; SOFT-FLOAT-64-NEXT: dsra $4, $21, 32 +; SOFT-FLOAT-64-NEXT: jal __mulsf3 +; SOFT-FLOAT-64-NEXT: dsra $5, $19, 32 +; SOFT-FLOAT-64-NEXT: sll $4, $2, 0 +; SOFT-FLOAT-64-NEXT: jal __addsf3 +; SOFT-FLOAT-64-NEXT: dsra $5, $17, 32 +; SOFT-FLOAT-64-NEXT: # kill: def $v0 killed $v0 def $v0_64 +; SOFT-FLOAT-64-NEXT: sll $4, $22, 0 +; SOFT-FLOAT-64-NEXT: sll $5, $17, 0 +; SOFT-FLOAT-64-NEXT: jal __addsf3 +; SOFT-FLOAT-64-NEXT: dsll $17, $2, 32 +; SOFT-FLOAT-64-NEXT: dsll $1, $2, 32 +; SOFT-FLOAT-64-NEXT: dsrl $1, $1, 32 +; SOFT-FLOAT-64-NEXT: sll $4, $20, 0 +; SOFT-FLOAT-64-NEXT: sll $5, $18, 0 +; SOFT-FLOAT-64-NEXT: jal __mulsf3 +; SOFT-FLOAT-64-NEXT: or $17, $1, $17 +; SOFT-FLOAT-64-NEXT: move $19, $2 +; SOFT-FLOAT-64-NEXT: dsra $4, $20, 32 +; SOFT-FLOAT-64-NEXT: jal __mulsf3 +; SOFT-FLOAT-64-NEXT: dsra $5, $18, 32 +; SOFT-FLOAT-64-NEXT: sll $4, $2, 0 +; SOFT-FLOAT-64-NEXT: jal __addsf3 +; SOFT-FLOAT-64-NEXT: dsra $5, $16, 32 +; SOFT-FLOAT-64-NEXT: # kill: def $v0 killed $v0 def $v0_64 +; SOFT-FLOAT-64-NEXT: dsll $18, $2, 32 +; SOFT-FLOAT-64-NEXT: sll $4, $19, 0 +; SOFT-FLOAT-64-NEXT: jal __addsf3 +; SOFT-FLOAT-64-NEXT: sll $5, $16, 0 +; SOFT-FLOAT-64-NEXT: dsll $1, $2, 32 +; SOFT-FLOAT-64-NEXT: dsrl $1, $1, 32 +; SOFT-FLOAT-64-NEXT: or $3, $1, $18 +; SOFT-FLOAT-64-NEXT: move $2, $17 +; SOFT-FLOAT-64-NEXT: ld $16, 0($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: ld $17, 8($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: ld $18, 16($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: ld $19, 24($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: ld $20, 32($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: ld $21, 40($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: ld $22, 48($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: ld $ra, 56($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: jr $ra +; SOFT-FLOAT-64-NEXT: daddiu $sp, $sp, 64 +; +; SOFT-FLOAT-64R2-LABEL: fmuladd_contract_v4f32: +; SOFT-FLOAT-64R2: # %bb.0: +; SOFT-FLOAT-64R2-NEXT: daddiu $sp, $sp, -64 +; SOFT-FLOAT-64R2-NEXT: .cfi_def_cfa_offset 64 +; SOFT-FLOAT-64R2-NEXT: sd $ra, 56($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: sd $22, 48($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: sd $21, 40($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: sd $20, 32($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: sd $19, 24($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: sd $18, 16($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: sd $17, 8($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: sd $16, 0($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 31, -8 +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 22, -16 +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 21, -24 +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 20, -32 +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 19, -40 +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 18, -48 +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 17, -56 +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 16, -64 +; SOFT-FLOAT-64R2-NEXT: move $16, $9 +; SOFT-FLOAT-64R2-NEXT: move $17, $8 +; SOFT-FLOAT-64R2-NEXT: move $18, $7 +; SOFT-FLOAT-64R2-NEXT: move $19, $6 +; SOFT-FLOAT-64R2-NEXT: move $20, $5 +; SOFT-FLOAT-64R2-NEXT: move $21, $4 +; SOFT-FLOAT-64R2-NEXT: dsra $4, $4, 32 +; SOFT-FLOAT-64R2-NEXT: jal __mulsf3 +; SOFT-FLOAT-64R2-NEXT: dsra $5, $6, 32 +; SOFT-FLOAT-64R2-NEXT: move $22, $2 +; SOFT-FLOAT-64R2-NEXT: sll $4, $21, 0 +; SOFT-FLOAT-64R2-NEXT: jal __mulsf3 +; SOFT-FLOAT-64R2-NEXT: sll $5, $19, 0 +; SOFT-FLOAT-64R2-NEXT: sll $4, $2, 0 +; SOFT-FLOAT-64R2-NEXT: jal __addsf3 +; SOFT-FLOAT-64R2-NEXT: sll $5, $17, 0 +; SOFT-FLOAT-64R2-NEXT: sll $4, $22, 0 +; SOFT-FLOAT-64R2-NEXT: dsra $5, $17, 32 +; SOFT-FLOAT-64R2-NEXT: jal __addsf3 +; SOFT-FLOAT-64R2-NEXT: dext $17, $2, 0, 32 +; SOFT-FLOAT-64R2-NEXT: # kill: def $v0 killed $v0 def $v0_64 +; SOFT-FLOAT-64R2-NEXT: dsll $1, $2, 32 +; SOFT-FLOAT-64R2-NEXT: dsra $4, $20, 32 +; SOFT-FLOAT-64R2-NEXT: dsra $5, $18, 32 +; SOFT-FLOAT-64R2-NEXT: jal __mulsf3 +; SOFT-FLOAT-64R2-NEXT: or $17, $17, $1 +; SOFT-FLOAT-64R2-NEXT: move $19, $2 +; SOFT-FLOAT-64R2-NEXT: sll $4, $20, 0 +; SOFT-FLOAT-64R2-NEXT: jal __mulsf3 +; SOFT-FLOAT-64R2-NEXT: sll $5, $18, 0 +; SOFT-FLOAT-64R2-NEXT: sll $4, $2, 0 +; SOFT-FLOAT-64R2-NEXT: jal __addsf3 +; SOFT-FLOAT-64R2-NEXT: sll $5, $16, 0 +; SOFT-FLOAT-64R2-NEXT: dext $18, $2, 0, 32 +; SOFT-FLOAT-64R2-NEXT: sll $4, $19, 0 +; SOFT-FLOAT-64R2-NEXT: jal __addsf3 +; SOFT-FLOAT-64R2-NEXT: dsra $5, $16, 32 +; SOFT-FLOAT-64R2-NEXT: # kill: def $v0 killed $v0 def $v0_64 +; SOFT-FLOAT-64R2-NEXT: dsll $1, $2, 32 +; SOFT-FLOAT-64R2-NEXT: or $3, $18, $1 +; SOFT-FLOAT-64R2-NEXT: move $2, $17 +; SOFT-FLOAT-64R2-NEXT: ld $16, 0($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: ld $17, 8($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: ld $18, 16($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: ld $19, 24($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: ld $20, 32($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: ld $21, 40($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: ld $22, 48($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: ld $ra, 56($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: jr $ra +; SOFT-FLOAT-64R2-NEXT: daddiu $sp, $sp, 64 + %product = fmul contract <4 x float> %a, %b + %result = fadd contract <4 x float> %product, %c + ret <4 x float> %result +} + +define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) #0 { +; SOFT-FLOAT-32-LABEL: fmuladd_contract_v4f64: +; SOFT-FLOAT-32: # %bb.0: +; SOFT-FLOAT-32-NEXT: addiu $sp, $sp, -64 +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 64 +; SOFT-FLOAT-32-NEXT: sw $ra, 60($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: sw $fp, 56($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: sw $23, 52($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: sw $22, 48($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: sw $21, 44($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: sw $20, 40($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: sw $19, 36($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: sw $18, 32($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: sw $17, 28($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: sw $16, 24($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: .cfi_offset 31, -4 +; SOFT-FLOAT-32-NEXT: .cfi_offset 30, -8 +; SOFT-FLOAT-32-NEXT: .cfi_offset 23, -12 +; SOFT-FLOAT-32-NEXT: .cfi_offset 22, -16 +; SOFT-FLOAT-32-NEXT: .cfi_offset 21, -20 +; SOFT-FLOAT-32-NEXT: .cfi_offset 20, -24 +; SOFT-FLOAT-32-NEXT: .cfi_offset 19, -28 +; SOFT-FLOAT-32-NEXT: .cfi_offset 18, -32 +; SOFT-FLOAT-32-NEXT: .cfi_offset 17, -36 +; SOFT-FLOAT-32-NEXT: .cfi_offset 16, -40 +; SOFT-FLOAT-32-NEXT: sw $7, 20($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: sw $6, 16($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: move $16, $4 +; SOFT-FLOAT-32-NEXT: lw $4, 88($sp) +; SOFT-FLOAT-32-NEXT: lw $5, 92($sp) +; SOFT-FLOAT-32-NEXT: lw $6, 120($sp) +; SOFT-FLOAT-32-NEXT: lw $7, 124($sp) +; SOFT-FLOAT-32-NEXT: jal __muldf3 +; SOFT-FLOAT-32-NEXT: nop +; SOFT-FLOAT-32-NEXT: move $4, $2 +; SOFT-FLOAT-32-NEXT: lw $6, 152($sp) +; SOFT-FLOAT-32-NEXT: lw $7, 156($sp) +; SOFT-FLOAT-32-NEXT: jal __adddf3 +; SOFT-FLOAT-32-NEXT: move $5, $3 +; SOFT-FLOAT-32-NEXT: move $19, $2 +; SOFT-FLOAT-32-NEXT: lw $4, 96($sp) +; SOFT-FLOAT-32-NEXT: lw $5, 100($sp) +; SOFT-FLOAT-32-NEXT: lw $6, 128($sp) +; SOFT-FLOAT-32-NEXT: lw $7, 132($sp) +; SOFT-FLOAT-32-NEXT: jal __muldf3 +; SOFT-FLOAT-32-NEXT: move $20, $3 +; SOFT-FLOAT-32-NEXT: move $4, $2 +; SOFT-FLOAT-32-NEXT: lw $6, 160($sp) +; SOFT-FLOAT-32-NEXT: lw $7, 164($sp) +; SOFT-FLOAT-32-NEXT: jal __adddf3 +; SOFT-FLOAT-32-NEXT: move $5, $3 +; SOFT-FLOAT-32-NEXT: move $21, $2 +; SOFT-FLOAT-32-NEXT: lw $4, 80($sp) +; SOFT-FLOAT-32-NEXT: lw $5, 84($sp) +; SOFT-FLOAT-32-NEXT: lw $6, 112($sp) +; SOFT-FLOAT-32-NEXT: lw $7, 116($sp) +; SOFT-FLOAT-32-NEXT: jal __muldf3 +; SOFT-FLOAT-32-NEXT: move $22, $3 +; SOFT-FLOAT-32-NEXT: move $4, $2 +; SOFT-FLOAT-32-NEXT: move $5, $3 +; SOFT-FLOAT-32-NEXT: lw $23, 140($sp) +; SOFT-FLOAT-32-NEXT: lw $fp, 136($sp) +; SOFT-FLOAT-32-NEXT: lw $17, 108($sp) +; SOFT-FLOAT-32-NEXT: lw $18, 104($sp) +; SOFT-FLOAT-32-NEXT: lw $7, 148($sp) +; SOFT-FLOAT-32-NEXT: lw $6, 144($sp) +; SOFT-FLOAT-32-NEXT: sw $22, 28($16) +; SOFT-FLOAT-32-NEXT: sw $21, 24($16) +; SOFT-FLOAT-32-NEXT: sw $20, 20($16) +; SOFT-FLOAT-32-NEXT: jal __adddf3 +; SOFT-FLOAT-32-NEXT: sw $19, 16($16) +; SOFT-FLOAT-32-NEXT: sw $3, 12($16) +; SOFT-FLOAT-32-NEXT: sw $2, 8($16) +; SOFT-FLOAT-32-NEXT: lw $4, 16($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: lw $5, 20($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: move $6, $18 +; SOFT-FLOAT-32-NEXT: jal __muldf3 +; SOFT-FLOAT-32-NEXT: move $7, $17 +; SOFT-FLOAT-32-NEXT: move $4, $2 +; SOFT-FLOAT-32-NEXT: move $5, $3 +; SOFT-FLOAT-32-NEXT: move $6, $fp +; SOFT-FLOAT-32-NEXT: jal __adddf3 +; SOFT-FLOAT-32-NEXT: move $7, $23 +; SOFT-FLOAT-32-NEXT: sw $3, 4($16) +; SOFT-FLOAT-32-NEXT: sw $2, 0($16) +; SOFT-FLOAT-32-NEXT: lw $16, 24($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: lw $17, 28($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: lw $18, 32($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: lw $19, 36($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: lw $20, 40($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: lw $21, 44($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: lw $22, 48($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: lw $23, 52($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: lw $fp, 56($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: lw $ra, 60($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: jr $ra +; SOFT-FLOAT-32-NEXT: addiu $sp, $sp, 64 +; +; SOFT-FLOAT-32R2-LABEL: fmuladd_contract_v4f64: +; SOFT-FLOAT-32R2: # %bb.0: +; SOFT-FLOAT-32R2-NEXT: addiu $sp, $sp, -64 +; SOFT-FLOAT-32R2-NEXT: .cfi_def_cfa_offset 64 +; SOFT-FLOAT-32R2-NEXT: sw $ra, 60($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: sw $fp, 56($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: sw $23, 52($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: sw $22, 48($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: sw $21, 44($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: sw $20, 40($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: sw $19, 36($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: sw $18, 32($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: sw $17, 28($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: sw $16, 24($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 31, -4 +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 30, -8 +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 23, -12 +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 22, -16 +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 21, -20 +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 20, -24 +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 19, -28 +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 18, -32 +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 17, -36 +; SOFT-FLOAT-32R2-NEXT: .cfi_offset 16, -40 +; SOFT-FLOAT-32R2-NEXT: sw $7, 20($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: sw $6, 16($sp) # 4-byte Folded Spill +; SOFT-FLOAT-32R2-NEXT: move $16, $4 +; SOFT-FLOAT-32R2-NEXT: lw $4, 88($sp) +; SOFT-FLOAT-32R2-NEXT: lw $5, 92($sp) +; SOFT-FLOAT-32R2-NEXT: lw $6, 120($sp) +; SOFT-FLOAT-32R2-NEXT: lw $7, 124($sp) +; SOFT-FLOAT-32R2-NEXT: jal __muldf3 +; SOFT-FLOAT-32R2-NEXT: nop +; SOFT-FLOAT-32R2-NEXT: move $4, $2 +; SOFT-FLOAT-32R2-NEXT: lw $6, 152($sp) +; SOFT-FLOAT-32R2-NEXT: lw $7, 156($sp) +; SOFT-FLOAT-32R2-NEXT: jal __adddf3 +; SOFT-FLOAT-32R2-NEXT: move $5, $3 +; SOFT-FLOAT-32R2-NEXT: move $19, $2 +; SOFT-FLOAT-32R2-NEXT: lw $4, 96($sp) +; SOFT-FLOAT-32R2-NEXT: lw $5, 100($sp) +; SOFT-FLOAT-32R2-NEXT: lw $6, 128($sp) +; SOFT-FLOAT-32R2-NEXT: lw $7, 132($sp) +; SOFT-FLOAT-32R2-NEXT: jal __muldf3 +; SOFT-FLOAT-32R2-NEXT: move $20, $3 +; SOFT-FLOAT-32R2-NEXT: move $4, $2 +; SOFT-FLOAT-32R2-NEXT: lw $6, 160($sp) +; SOFT-FLOAT-32R2-NEXT: lw $7, 164($sp) +; SOFT-FLOAT-32R2-NEXT: jal __adddf3 +; SOFT-FLOAT-32R2-NEXT: move $5, $3 +; SOFT-FLOAT-32R2-NEXT: move $21, $2 +; SOFT-FLOAT-32R2-NEXT: lw $4, 80($sp) +; SOFT-FLOAT-32R2-NEXT: lw $5, 84($sp) +; SOFT-FLOAT-32R2-NEXT: lw $6, 112($sp) +; SOFT-FLOAT-32R2-NEXT: lw $7, 116($sp) +; SOFT-FLOAT-32R2-NEXT: jal __muldf3 +; SOFT-FLOAT-32R2-NEXT: move $22, $3 +; SOFT-FLOAT-32R2-NEXT: move $4, $2 +; SOFT-FLOAT-32R2-NEXT: move $5, $3 +; SOFT-FLOAT-32R2-NEXT: lw $23, 140($sp) +; SOFT-FLOAT-32R2-NEXT: lw $fp, 136($sp) +; SOFT-FLOAT-32R2-NEXT: lw $17, 108($sp) +; SOFT-FLOAT-32R2-NEXT: lw $18, 104($sp) +; SOFT-FLOAT-32R2-NEXT: lw $7, 148($sp) +; SOFT-FLOAT-32R2-NEXT: lw $6, 144($sp) +; SOFT-FLOAT-32R2-NEXT: sw $22, 28($16) +; SOFT-FLOAT-32R2-NEXT: sw $21, 24($16) +; SOFT-FLOAT-32R2-NEXT: sw $20, 20($16) +; SOFT-FLOAT-32R2-NEXT: jal __adddf3 +; SOFT-FLOAT-32R2-NEXT: sw $19, 16($16) +; SOFT-FLOAT-32R2-NEXT: sw $3, 12($16) +; SOFT-FLOAT-32R2-NEXT: sw $2, 8($16) +; SOFT-FLOAT-32R2-NEXT: lw $4, 16($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: lw $5, 20($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: move $6, $18 +; SOFT-FLOAT-32R2-NEXT: jal __muldf3 +; SOFT-FLOAT-32R2-NEXT: move $7, $17 +; SOFT-FLOAT-32R2-NEXT: move $4, $2 +; SOFT-FLOAT-32R2-NEXT: move $5, $3 +; SOFT-FLOAT-32R2-NEXT: move $6, $fp +; SOFT-FLOAT-32R2-NEXT: jal __adddf3 +; SOFT-FLOAT-32R2-NEXT: move $7, $23 +; SOFT-FLOAT-32R2-NEXT: sw $3, 4($16) +; SOFT-FLOAT-32R2-NEXT: sw $2, 0($16) +; SOFT-FLOAT-32R2-NEXT: lw $16, 24($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: lw $17, 28($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: lw $18, 32($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: lw $19, 36($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: lw $20, 40($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: lw $21, 44($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: lw $22, 48($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: lw $23, 52($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: lw $fp, 56($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: lw $ra, 60($sp) # 4-byte Folded Reload +; SOFT-FLOAT-32R2-NEXT: jr $ra +; SOFT-FLOAT-32R2-NEXT: addiu $sp, $sp, 64 +; +; SOFT-FLOAT-64-LABEL: fmuladd_contract_v4f64: +; SOFT-FLOAT-64: # %bb.0: +; SOFT-FLOAT-64-NEXT: daddiu $sp, $sp, -64 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 64 +; SOFT-FLOAT-64-NEXT: sd $ra, 56($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: sd $22, 48($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: sd $21, 40($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: sd $20, 32($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: sd $19, 24($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: sd $18, 16($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: sd $17, 8($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: sd $16, 0($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64-NEXT: .cfi_offset 31, -8 +; SOFT-FLOAT-64-NEXT: .cfi_offset 22, -16 +; SOFT-FLOAT-64-NEXT: .cfi_offset 21, -24 +; SOFT-FLOAT-64-NEXT: .cfi_offset 20, -32 +; SOFT-FLOAT-64-NEXT: .cfi_offset 19, -40 +; SOFT-FLOAT-64-NEXT: .cfi_offset 18, -48 +; SOFT-FLOAT-64-NEXT: .cfi_offset 17, -56 +; SOFT-FLOAT-64-NEXT: .cfi_offset 16, -64 +; SOFT-FLOAT-64-NEXT: move $17, $10 +; SOFT-FLOAT-64-NEXT: move $18, $9 +; SOFT-FLOAT-64-NEXT: move $19, $8 +; SOFT-FLOAT-64-NEXT: move $20, $6 +; SOFT-FLOAT-64-NEXT: move $21, $5 +; SOFT-FLOAT-64-NEXT: move $16, $4 +; SOFT-FLOAT-64-NEXT: move $4, $7 +; SOFT-FLOAT-64-NEXT: jal __muldf3 +; SOFT-FLOAT-64-NEXT: move $5, $11 +; SOFT-FLOAT-64-NEXT: ld $5, 88($sp) +; SOFT-FLOAT-64-NEXT: jal __adddf3 +; SOFT-FLOAT-64-NEXT: move $4, $2 +; SOFT-FLOAT-64-NEXT: move $22, $2 +; SOFT-FLOAT-64-NEXT: ld $5, 64($sp) +; SOFT-FLOAT-64-NEXT: jal __muldf3 +; SOFT-FLOAT-64-NEXT: move $4, $19 +; SOFT-FLOAT-64-NEXT: ld $5, 96($sp) +; SOFT-FLOAT-64-NEXT: jal __adddf3 +; SOFT-FLOAT-64-NEXT: move $4, $2 +; SOFT-FLOAT-64-NEXT: move $19, $2 +; SOFT-FLOAT-64-NEXT: move $4, $20 +; SOFT-FLOAT-64-NEXT: jal __muldf3 +; SOFT-FLOAT-64-NEXT: move $5, $17 +; SOFT-FLOAT-64-NEXT: move $4, $2 +; SOFT-FLOAT-64-NEXT: ld $17, 72($sp) +; SOFT-FLOAT-64-NEXT: ld $5, 80($sp) +; SOFT-FLOAT-64-NEXT: sd $19, 24($16) +; SOFT-FLOAT-64-NEXT: jal __adddf3 +; SOFT-FLOAT-64-NEXT: sd $22, 16($16) +; SOFT-FLOAT-64-NEXT: sd $2, 8($16) +; SOFT-FLOAT-64-NEXT: move $4, $21 +; SOFT-FLOAT-64-NEXT: jal __muldf3 +; SOFT-FLOAT-64-NEXT: move $5, $18 +; SOFT-FLOAT-64-NEXT: move $4, $2 +; SOFT-FLOAT-64-NEXT: jal __adddf3 +; SOFT-FLOAT-64-NEXT: move $5, $17 +; SOFT-FLOAT-64-NEXT: sd $2, 0($16) +; SOFT-FLOAT-64-NEXT: ld $16, 0($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: ld $17, 8($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: ld $18, 16($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: ld $19, 24($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: ld $20, 32($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: ld $21, 40($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: ld $22, 48($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: ld $ra, 56($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64-NEXT: jr $ra +; SOFT-FLOAT-64-NEXT: daddiu $sp, $sp, 64 +; +; SOFT-FLOAT-64R2-LABEL: fmuladd_contract_v4f64: +; SOFT-FLOAT-64R2: # %bb.0: +; SOFT-FLOAT-64R2-NEXT: daddiu $sp, $sp, -64 +; SOFT-FLOAT-64R2-NEXT: .cfi_def_cfa_offset 64 +; SOFT-FLOAT-64R2-NEXT: sd $ra, 56($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: sd $22, 48($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: sd $21, 40($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: sd $20, 32($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: sd $19, 24($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: sd $18, 16($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: sd $17, 8($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: sd $16, 0($sp) # 8-byte Folded Spill +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 31, -8 +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 22, -16 +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 21, -24 +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 20, -32 +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 19, -40 +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 18, -48 +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 17, -56 +; SOFT-FLOAT-64R2-NEXT: .cfi_offset 16, -64 +; SOFT-FLOAT-64R2-NEXT: move $17, $10 +; SOFT-FLOAT-64R2-NEXT: move $18, $9 +; SOFT-FLOAT-64R2-NEXT: move $19, $8 +; SOFT-FLOAT-64R2-NEXT: move $20, $6 +; SOFT-FLOAT-64R2-NEXT: move $21, $5 +; SOFT-FLOAT-64R2-NEXT: move $16, $4 +; SOFT-FLOAT-64R2-NEXT: move $4, $7 +; SOFT-FLOAT-64R2-NEXT: jal __muldf3 +; SOFT-FLOAT-64R2-NEXT: move $5, $11 +; SOFT-FLOAT-64R2-NEXT: ld $5, 88($sp) +; SOFT-FLOAT-64R2-NEXT: jal __adddf3 +; SOFT-FLOAT-64R2-NEXT: move $4, $2 +; SOFT-FLOAT-64R2-NEXT: move $22, $2 +; SOFT-FLOAT-64R2-NEXT: ld $5, 64($sp) +; SOFT-FLOAT-64R2-NEXT: jal __muldf3 +; SOFT-FLOAT-64R2-NEXT: move $4, $19 +; SOFT-FLOAT-64R2-NEXT: ld $5, 96($sp) +; SOFT-FLOAT-64R2-NEXT: jal __adddf3 +; SOFT-FLOAT-64R2-NEXT: move $4, $2 +; SOFT-FLOAT-64R2-NEXT: move $19, $2 +; SOFT-FLOAT-64R2-NEXT: move $4, $20 +; SOFT-FLOAT-64R2-NEXT: jal __muldf3 +; SOFT-FLOAT-64R2-NEXT: move $5, $17 +; SOFT-FLOAT-64R2-NEXT: move $4, $2 +; SOFT-FLOAT-64R2-NEXT: ld $17, 72($sp) +; SOFT-FLOAT-64R2-NEXT: ld $5, 80($sp) +; SOFT-FLOAT-64R2-NEXT: sd $19, 24($16) +; SOFT-FLOAT-64R2-NEXT: jal __adddf3 +; SOFT-FLOAT-64R2-NEXT: sd $22, 16($16) +; SOFT-FLOAT-64R2-NEXT: sd $2, 8($16) +; SOFT-FLOAT-64R2-NEXT: move $4, $21 +; SOFT-FLOAT-64R2-NEXT: jal __muldf3 +; SOFT-FLOAT-64R2-NEXT: move $5, $18 +; SOFT-FLOAT-64R2-NEXT: move $4, $2 +; SOFT-FLOAT-64R2-NEXT: jal __adddf3 +; SOFT-FLOAT-64R2-NEXT: move $5, $17 +; SOFT-FLOAT-64R2-NEXT: sd $2, 0($16) +; SOFT-FLOAT-64R2-NEXT: ld $16, 0($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: ld $17, 8($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: ld $18, 16($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: ld $19, 24($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: ld $20, 32($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: ld $21, 40($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: ld $22, 48($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: ld $ra, 56($sp) # 8-byte Folded Reload +; SOFT-FLOAT-64R2-NEXT: jr $ra +; SOFT-FLOAT-64R2-NEXT: daddiu $sp, $sp, 64 + %product = fmul contract <4 x double> %a, %b + %result = fadd contract <4 x double> %product, %c + ret <4 x double> %result +} + +attributes #0 = { "use-soft-float"="true" } + +declare float @llvm.fmuladd.f32(float %a, float %b, float %c) +declare double @llvm.fmuladd.f64(double %a, double %b, double %c) diff --git a/llvm/test/CodeGen/SPARC/fmuladd-soft-float.ll b/llvm/test/CodeGen/SPARC/fmuladd-soft-float.ll new file mode 100644 index 0000000000000..a9e666e3c9b4d --- /dev/null +++ b/llvm/test/CodeGen/SPARC/fmuladd-soft-float.ll @@ -0,0 +1,385 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=sparc < %s | FileCheck %s -check-prefix=SOFT-FLOAT-32 +; RUN: llc -mtriple=sparc64 < %s | FileCheck %s -check-prefix=SOFT-FLOAT-64 + +define float @fmuladd_intrinsic_f32(float %a, float %b, float %c) #0 { +; SOFT-FLOAT-32-LABEL: fmuladd_intrinsic_f32: +; SOFT-FLOAT-32: .cfi_startproc +; SOFT-FLOAT-32-NEXT: ! %bb.0: +; SOFT-FLOAT-32-NEXT: save %sp, -96, %sp +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_register %fp +; SOFT-FLOAT-32-NEXT: .cfi_window_save +; SOFT-FLOAT-32-NEXT: .cfi_register %o7, %i7 +; SOFT-FLOAT-32-NEXT: mov %i0, %o0 +; SOFT-FLOAT-32-NEXT: call __mulsf3 +; SOFT-FLOAT-32-NEXT: mov %i1, %o1 +; SOFT-FLOAT-32-NEXT: call __addsf3 +; SOFT-FLOAT-32-NEXT: mov %i2, %o1 +; SOFT-FLOAT-32-NEXT: ret +; SOFT-FLOAT-32-NEXT: restore %g0, %o0, %o0 +; +; SOFT-FLOAT-64-LABEL: fmuladd_intrinsic_f32: +; SOFT-FLOAT-64: .cfi_startproc +; SOFT-FLOAT-64-NEXT: ! %bb.0: +; SOFT-FLOAT-64-NEXT: save %sp, -176, %sp +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_register %fp +; SOFT-FLOAT-64-NEXT: .cfi_window_save +; SOFT-FLOAT-64-NEXT: .cfi_register %o7, %i7 +; SOFT-FLOAT-64-NEXT: srl %i0, 0, %o0 +; SOFT-FLOAT-64-NEXT: call __mulsf3 +; SOFT-FLOAT-64-NEXT: srl %i1, 0, %o1 +; SOFT-FLOAT-64-NEXT: call __addsf3 +; SOFT-FLOAT-64-NEXT: srl %i2, 0, %o1 +; SOFT-FLOAT-64-NEXT: ret +; SOFT-FLOAT-64-NEXT: restore %g0, %o0, %o0 + %result = call float @llvm.fmuladd.f32(float %a, float %b, float %c) + ret float %result +} + +define double @fmuladd_intrinsic_f64(double %a, double %b, double %c) #0 { +; SOFT-FLOAT-32-LABEL: fmuladd_intrinsic_f64: +; SOFT-FLOAT-32: .cfi_startproc +; SOFT-FLOAT-32-NEXT: ! %bb.0: +; SOFT-FLOAT-32-NEXT: save %sp, -96, %sp +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_register %fp +; SOFT-FLOAT-32-NEXT: .cfi_window_save +; SOFT-FLOAT-32-NEXT: .cfi_register %o7, %i7 +; SOFT-FLOAT-32-NEXT: mov %i0, %o0 +; SOFT-FLOAT-32-NEXT: mov %i1, %o1 +; SOFT-FLOAT-32-NEXT: mov %i2, %o2 +; SOFT-FLOAT-32-NEXT: call __muldf3 +; SOFT-FLOAT-32-NEXT: mov %i3, %o3 +; SOFT-FLOAT-32-NEXT: mov %i4, %o2 +; SOFT-FLOAT-32-NEXT: call __adddf3 +; SOFT-FLOAT-32-NEXT: mov %i5, %o3 +; SOFT-FLOAT-32-NEXT: mov %o0, %i0 +; SOFT-FLOAT-32-NEXT: ret +; SOFT-FLOAT-32-NEXT: restore %g0, %o1, %o1 +; +; SOFT-FLOAT-64-LABEL: fmuladd_intrinsic_f64: +; SOFT-FLOAT-64: .cfi_startproc +; SOFT-FLOAT-64-NEXT: ! %bb.0: +; SOFT-FLOAT-64-NEXT: save %sp, -176, %sp +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_register %fp +; SOFT-FLOAT-64-NEXT: .cfi_window_save +; SOFT-FLOAT-64-NEXT: .cfi_register %o7, %i7 +; SOFT-FLOAT-64-NEXT: mov %i0, %o0 +; SOFT-FLOAT-64-NEXT: call __muldf3 +; SOFT-FLOAT-64-NEXT: mov %i1, %o1 +; SOFT-FLOAT-64-NEXT: call __adddf3 +; SOFT-FLOAT-64-NEXT: mov %i2, %o1 +; SOFT-FLOAT-64-NEXT: ret +; SOFT-FLOAT-64-NEXT: restore %g0, %o0, %o0 + %result = call double @llvm.fmuladd.f64(double %a, double %b, double %c) + ret double %result +} + +define float @fmuladd_contract_f32(float %a, float %b, float %c) #0 { +; SOFT-FLOAT-32-LABEL: fmuladd_contract_f32: +; SOFT-FLOAT-32: .cfi_startproc +; SOFT-FLOAT-32-NEXT: ! %bb.0: +; SOFT-FLOAT-32-NEXT: save %sp, -96, %sp +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_register %fp +; SOFT-FLOAT-32-NEXT: .cfi_window_save +; SOFT-FLOAT-32-NEXT: .cfi_register %o7, %i7 +; SOFT-FLOAT-32-NEXT: mov %i0, %o0 +; SOFT-FLOAT-32-NEXT: call __mulsf3 +; SOFT-FLOAT-32-NEXT: mov %i1, %o1 +; SOFT-FLOAT-32-NEXT: call __addsf3 +; SOFT-FLOAT-32-NEXT: mov %i2, %o1 +; SOFT-FLOAT-32-NEXT: ret +; SOFT-FLOAT-32-NEXT: restore %g0, %o0, %o0 +; +; SOFT-FLOAT-64-LABEL: fmuladd_contract_f32: +; SOFT-FLOAT-64: .cfi_startproc +; SOFT-FLOAT-64-NEXT: ! %bb.0: +; SOFT-FLOAT-64-NEXT: save %sp, -176, %sp +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_register %fp +; SOFT-FLOAT-64-NEXT: .cfi_window_save +; SOFT-FLOAT-64-NEXT: .cfi_register %o7, %i7 +; SOFT-FLOAT-64-NEXT: srl %i0, 0, %o0 +; SOFT-FLOAT-64-NEXT: call __mulsf3 +; SOFT-FLOAT-64-NEXT: srl %i1, 0, %o1 +; SOFT-FLOAT-64-NEXT: call __addsf3 +; SOFT-FLOAT-64-NEXT: srl %i2, 0, %o1 +; SOFT-FLOAT-64-NEXT: ret +; SOFT-FLOAT-64-NEXT: restore %g0, %o0, %o0 + %product = fmul contract float %a, %b + %result = fadd contract float %product, %c + ret float %result +} + +define double @fmuladd_contract_f64(double %a, double %b, double %c) #0 { +; SOFT-FLOAT-32-LABEL: fmuladd_contract_f64: +; SOFT-FLOAT-32: .cfi_startproc +; SOFT-FLOAT-32-NEXT: ! %bb.0: +; SOFT-FLOAT-32-NEXT: save %sp, -96, %sp +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_register %fp +; SOFT-FLOAT-32-NEXT: .cfi_window_save +; SOFT-FLOAT-32-NEXT: .cfi_register %o7, %i7 +; SOFT-FLOAT-32-NEXT: mov %i0, %o0 +; SOFT-FLOAT-32-NEXT: mov %i1, %o1 +; SOFT-FLOAT-32-NEXT: mov %i2, %o2 +; SOFT-FLOAT-32-NEXT: call __muldf3 +; SOFT-FLOAT-32-NEXT: mov %i3, %o3 +; SOFT-FLOAT-32-NEXT: mov %i4, %o2 +; SOFT-FLOAT-32-NEXT: call __adddf3 +; SOFT-FLOAT-32-NEXT: mov %i5, %o3 +; SOFT-FLOAT-32-NEXT: mov %o0, %i0 +; SOFT-FLOAT-32-NEXT: ret +; SOFT-FLOAT-32-NEXT: restore %g0, %o1, %o1 +; +; SOFT-FLOAT-64-LABEL: fmuladd_contract_f64: +; SOFT-FLOAT-64: .cfi_startproc +; SOFT-FLOAT-64-NEXT: ! %bb.0: +; SOFT-FLOAT-64-NEXT: save %sp, -176, %sp +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_register %fp +; SOFT-FLOAT-64-NEXT: .cfi_window_save +; SOFT-FLOAT-64-NEXT: .cfi_register %o7, %i7 +; SOFT-FLOAT-64-NEXT: mov %i0, %o0 +; SOFT-FLOAT-64-NEXT: call __muldf3 +; SOFT-FLOAT-64-NEXT: mov %i1, %o1 +; SOFT-FLOAT-64-NEXT: call __adddf3 +; SOFT-FLOAT-64-NEXT: mov %i2, %o1 +; SOFT-FLOAT-64-NEXT: ret +; SOFT-FLOAT-64-NEXT: restore %g0, %o0, %o0 + %product = fmul contract double %a, %b + %result = fadd contract double %product, %c + ret double %result +} + +define <4 x float> @fmuladd_contract_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 { +; SOFT-FLOAT-32-LABEL: fmuladd_contract_v4f32: +; SOFT-FLOAT-32: .cfi_startproc +; SOFT-FLOAT-32-NEXT: ! %bb.0: +; SOFT-FLOAT-32-NEXT: save %sp, -96, %sp +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_register %fp +; SOFT-FLOAT-32-NEXT: .cfi_window_save +; SOFT-FLOAT-32-NEXT: .cfi_register %o7, %i7 +; SOFT-FLOAT-32-NEXT: ld [%fp+100], %l0 +; SOFT-FLOAT-32-NEXT: ld [%fp+104], %l1 +; SOFT-FLOAT-32-NEXT: ld [%fp+108], %l2 +; SOFT-FLOAT-32-NEXT: ld [%fp+112], %l3 +; SOFT-FLOAT-32-NEXT: ld [%fp+96], %l4 +; SOFT-FLOAT-32-NEXT: ld [%fp+92], %l5 +; SOFT-FLOAT-32-NEXT: mov %i0, %o0 +; SOFT-FLOAT-32-NEXT: call __mulsf3 +; SOFT-FLOAT-32-NEXT: mov %i4, %o1 +; SOFT-FLOAT-32-NEXT: mov %o0, %l6 +; SOFT-FLOAT-32-NEXT: mov %i1, %o0 +; SOFT-FLOAT-32-NEXT: call __mulsf3 +; SOFT-FLOAT-32-NEXT: mov %i5, %o1 +; SOFT-FLOAT-32-NEXT: mov %o0, %i1 +; SOFT-FLOAT-32-NEXT: mov %i2, %o0 +; SOFT-FLOAT-32-NEXT: call __mulsf3 +; SOFT-FLOAT-32-NEXT: mov %l5, %o1 +; SOFT-FLOAT-32-NEXT: mov %o0, %i4 +; SOFT-FLOAT-32-NEXT: mov %i3, %o0 +; SOFT-FLOAT-32-NEXT: call __mulsf3 +; SOFT-FLOAT-32-NEXT: mov %l4, %o1 +; SOFT-FLOAT-32-NEXT: call __addsf3 +; SOFT-FLOAT-32-NEXT: mov %l3, %o1 +; SOFT-FLOAT-32-NEXT: mov %o0, %i3 +; SOFT-FLOAT-32-NEXT: mov %i4, %o0 +; SOFT-FLOAT-32-NEXT: call __addsf3 +; SOFT-FLOAT-32-NEXT: mov %l2, %o1 +; SOFT-FLOAT-32-NEXT: mov %o0, %i2 +; SOFT-FLOAT-32-NEXT: mov %i1, %o0 +; SOFT-FLOAT-32-NEXT: call __addsf3 +; SOFT-FLOAT-32-NEXT: mov %l1, %o1 +; SOFT-FLOAT-32-NEXT: mov %o0, %i1 +; SOFT-FLOAT-32-NEXT: mov %l6, %o0 +; SOFT-FLOAT-32-NEXT: call __addsf3 +; SOFT-FLOAT-32-NEXT: mov %l0, %o1 +; SOFT-FLOAT-32-NEXT: ret +; SOFT-FLOAT-32-NEXT: restore %g0, %o0, %o0 +; +; SOFT-FLOAT-64-LABEL: fmuladd_contract_v4f32: +; SOFT-FLOAT-64: .cfi_startproc +; SOFT-FLOAT-64-NEXT: ! %bb.0: +; SOFT-FLOAT-64-NEXT: save %sp, -176, %sp +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_register %fp +; SOFT-FLOAT-64-NEXT: .cfi_window_save +; SOFT-FLOAT-64-NEXT: .cfi_register %o7, %i7 +; SOFT-FLOAT-64-NEXT: ld [%fp+2267], %l0 +; SOFT-FLOAT-64-NEXT: ld [%fp+2259], %l1 +; SOFT-FLOAT-64-NEXT: ld [%fp+2251], %l2 +; SOFT-FLOAT-64-NEXT: ld [%fp+2243], %l3 +; SOFT-FLOAT-64-NEXT: ld [%fp+2227], %l4 +; SOFT-FLOAT-64-NEXT: ld [%fp+2235], %o1 +; SOFT-FLOAT-64-NEXT: call __mulsf3 +; SOFT-FLOAT-64-NEXT: srl %i3, 0, %o0 +; SOFT-FLOAT-64-NEXT: mov %o0, %i3 +; SOFT-FLOAT-64-NEXT: srl %i2, 0, %o0 +; SOFT-FLOAT-64-NEXT: call __mulsf3 +; SOFT-FLOAT-64-NEXT: mov %l4, %o1 +; SOFT-FLOAT-64-NEXT: mov %o0, %i2 +; SOFT-FLOAT-64-NEXT: srl %i1, 0, %o0 +; SOFT-FLOAT-64-NEXT: call __mulsf3 +; SOFT-FLOAT-64-NEXT: srl %i5, 0, %o1 +; SOFT-FLOAT-64-NEXT: mov %o0, %i1 +; SOFT-FLOAT-64-NEXT: srl %i0, 0, %o0 +; SOFT-FLOAT-64-NEXT: call __mulsf3 +; SOFT-FLOAT-64-NEXT: srl %i4, 0, %o1 +; SOFT-FLOAT-64-NEXT: call __addsf3 +; SOFT-FLOAT-64-NEXT: mov %l3, %o1 +; SOFT-FLOAT-64-NEXT: mov %o0, %i0 +; SOFT-FLOAT-64-NEXT: mov %i1, %o0 +; SOFT-FLOAT-64-NEXT: call __addsf3 +; SOFT-FLOAT-64-NEXT: mov %l2, %o1 +; SOFT-FLOAT-64-NEXT: mov %o0, %i1 +; SOFT-FLOAT-64-NEXT: mov %i2, %o0 +; SOFT-FLOAT-64-NEXT: call __addsf3 +; SOFT-FLOAT-64-NEXT: mov %l1, %o1 +; SOFT-FLOAT-64-NEXT: mov %o0, %i2 +; SOFT-FLOAT-64-NEXT: mov %i3, %o0 +; SOFT-FLOAT-64-NEXT: call __addsf3 +; SOFT-FLOAT-64-NEXT: mov %l0, %o1 +; SOFT-FLOAT-64-NEXT: ret +; SOFT-FLOAT-64-NEXT: restore %g0, %o0, %o3 + %product = fmul contract <4 x float> %a, %b + %result = fadd contract <4 x float> %product, %c + ret <4 x float> %result +} + +define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) #0 { +; SOFT-FLOAT-32-LABEL: fmuladd_contract_v4f64: +; SOFT-FLOAT-32: .cfi_startproc +; SOFT-FLOAT-32-NEXT: ! %bb.0: +; SOFT-FLOAT-32-NEXT: save %sp, -128, %sp +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_register %fp +; SOFT-FLOAT-32-NEXT: .cfi_window_save +; SOFT-FLOAT-32-NEXT: .cfi_register %o7, %i7 +; SOFT-FLOAT-32-NEXT: ld [%fp+64], %l6 +; SOFT-FLOAT-32-NEXT: ld [%fp+156], %g2 +; SOFT-FLOAT-32-NEXT: st %g2, [%fp+-4] ! 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: ld [%fp+160], %g2 +; SOFT-FLOAT-32-NEXT: st %g2, [%fp+-8] ! 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: ld [%fp+148], %g2 +; SOFT-FLOAT-32-NEXT: st %g2, [%fp+-12] ! 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: ld [%fp+152], %g2 +; SOFT-FLOAT-32-NEXT: st %g2, [%fp+-16] ! 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: ld [%fp+140], %g2 +; SOFT-FLOAT-32-NEXT: st %g2, [%fp+-20] ! 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: ld [%fp+144], %g2 +; SOFT-FLOAT-32-NEXT: st %g2, [%fp+-24] ! 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: ld [%fp+132], %g2 +; SOFT-FLOAT-32-NEXT: st %g2, [%fp+-28] ! 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: ld [%fp+136], %l7 +; SOFT-FLOAT-32-NEXT: ld [%fp+100], %l0 +; SOFT-FLOAT-32-NEXT: ld [%fp+104], %l1 +; SOFT-FLOAT-32-NEXT: ld [%fp+108], %l2 +; SOFT-FLOAT-32-NEXT: ld [%fp+112], %l3 +; SOFT-FLOAT-32-NEXT: ld [%fp+116], %l4 +; SOFT-FLOAT-32-NEXT: ld [%fp+120], %l5 +; SOFT-FLOAT-32-NEXT: ld [%fp+92], %o0 +; SOFT-FLOAT-32-NEXT: ld [%fp+96], %o1 +; SOFT-FLOAT-32-NEXT: ld [%fp+124], %o2 +; SOFT-FLOAT-32-NEXT: call __muldf3 +; SOFT-FLOAT-32-NEXT: ld [%fp+128], %o3 +; SOFT-FLOAT-32-NEXT: st %o0, [%fp+-32] ! 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: st %o1, [%fp+-36] ! 4-byte Folded Spill +; SOFT-FLOAT-32-NEXT: mov %i4, %o0 +; SOFT-FLOAT-32-NEXT: mov %i5, %o1 +; SOFT-FLOAT-32-NEXT: mov %l4, %o2 +; SOFT-FLOAT-32-NEXT: call __muldf3 +; SOFT-FLOAT-32-NEXT: mov %l5, %o3 +; SOFT-FLOAT-32-NEXT: mov %o0, %l4 +; SOFT-FLOAT-32-NEXT: mov %o1, %l5 +; SOFT-FLOAT-32-NEXT: mov %i2, %o0 +; SOFT-FLOAT-32-NEXT: mov %i3, %o1 +; SOFT-FLOAT-32-NEXT: mov %l2, %o2 +; SOFT-FLOAT-32-NEXT: call __muldf3 +; SOFT-FLOAT-32-NEXT: mov %l3, %o3 +; SOFT-FLOAT-32-NEXT: mov %o0, %i4 +; SOFT-FLOAT-32-NEXT: mov %o1, %i5 +; SOFT-FLOAT-32-NEXT: mov %i0, %o0 +; SOFT-FLOAT-32-NEXT: mov %i1, %o1 +; SOFT-FLOAT-32-NEXT: mov %l0, %o2 +; SOFT-FLOAT-32-NEXT: call __muldf3 +; SOFT-FLOAT-32-NEXT: mov %l1, %o3 +; SOFT-FLOAT-32-NEXT: ld [%fp+-28], %o2 ! 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: call __adddf3 +; SOFT-FLOAT-32-NEXT: mov %l7, %o3 +; SOFT-FLOAT-32-NEXT: mov %o0, %i2 +; SOFT-FLOAT-32-NEXT: mov %o1, %i3 +; SOFT-FLOAT-32-NEXT: mov %i4, %o0 +; SOFT-FLOAT-32-NEXT: mov %i5, %o1 +; SOFT-FLOAT-32-NEXT: ld [%fp+-20], %o2 ! 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: call __adddf3 +; SOFT-FLOAT-32-NEXT: ld [%fp+-24], %o3 +; SOFT-FLOAT-32-NEXT: mov %o0, %i4 +; SOFT-FLOAT-32-NEXT: mov %o1, %i5 +; SOFT-FLOAT-32-NEXT: mov %l4, %o0 +; SOFT-FLOAT-32-NEXT: mov %l5, %o1 +; SOFT-FLOAT-32-NEXT: ld [%fp+-12], %o2 ! 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: call __adddf3 +; SOFT-FLOAT-32-NEXT: ld [%fp+-16], %o3 +; SOFT-FLOAT-32-NEXT: mov %o0, %i0 +; SOFT-FLOAT-32-NEXT: mov %o1, %i1 +; SOFT-FLOAT-32-NEXT: ld [%fp+-32], %o0 ! 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: ld [%fp+-36], %o1 ! 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: ld [%fp+-4], %o2 ! 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: call __adddf3 +; SOFT-FLOAT-32-NEXT: ld [%fp+-8], %o3 +; SOFT-FLOAT-32-NEXT: ! kill: def $o0 killed $o0 killed $o0_o1 def $o0_o1 +; SOFT-FLOAT-32-NEXT: ! kill: def $o1 killed $o1 killed $o0_o1 def $o0_o1 +; SOFT-FLOAT-32-NEXT: std %o0, [%l6+24] +; SOFT-FLOAT-32-NEXT: std %i0, [%l6+16] +; SOFT-FLOAT-32-NEXT: std %i4, [%l6+8] +; SOFT-FLOAT-32-NEXT: std %i2, [%l6] +; SOFT-FLOAT-32-NEXT: ret +; SOFT-FLOAT-32-NEXT: restore +; +; SOFT-FLOAT-64-LABEL: fmuladd_contract_v4f64: +; SOFT-FLOAT-64: .cfi_startproc +; SOFT-FLOAT-64-NEXT: ! %bb.0: +; SOFT-FLOAT-64-NEXT: save %sp, -176, %sp +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_register %fp +; SOFT-FLOAT-64-NEXT: .cfi_window_save +; SOFT-FLOAT-64-NEXT: .cfi_register %o7, %i7 +; SOFT-FLOAT-64-NEXT: ldx [%fp+2263], %l0 +; SOFT-FLOAT-64-NEXT: ldx [%fp+2255], %l1 +; SOFT-FLOAT-64-NEXT: ldx [%fp+2247], %l2 +; SOFT-FLOAT-64-NEXT: ldx [%fp+2239], %l3 +; SOFT-FLOAT-64-NEXT: ldx [%fp+2223], %l4 +; SOFT-FLOAT-64-NEXT: ldx [%fp+2231], %o1 +; SOFT-FLOAT-64-NEXT: call __muldf3 +; SOFT-FLOAT-64-NEXT: mov %i3, %o0 +; SOFT-FLOAT-64-NEXT: mov %o0, %i3 +; SOFT-FLOAT-64-NEXT: mov %i2, %o0 +; SOFT-FLOAT-64-NEXT: call __muldf3 +; SOFT-FLOAT-64-NEXT: mov %l4, %o1 +; SOFT-FLOAT-64-NEXT: mov %o0, %i2 +; SOFT-FLOAT-64-NEXT: mov %i1, %o0 +; SOFT-FLOAT-64-NEXT: call __muldf3 +; SOFT-FLOAT-64-NEXT: mov %i5, %o1 +; SOFT-FLOAT-64-NEXT: mov %o0, %i1 +; SOFT-FLOAT-64-NEXT: mov %i0, %o0 +; SOFT-FLOAT-64-NEXT: call __muldf3 +; SOFT-FLOAT-64-NEXT: mov %i4, %o1 +; SOFT-FLOAT-64-NEXT: call __adddf3 +; SOFT-FLOAT-64-NEXT: mov %l3, %o1 +; SOFT-FLOAT-64-NEXT: mov %o0, %i0 +; SOFT-FLOAT-64-NEXT: mov %i1, %o0 +; SOFT-FLOAT-64-NEXT: call __adddf3 +; SOFT-FLOAT-64-NEXT: mov %l2, %o1 +; SOFT-FLOAT-64-NEXT: mov %o0, %i1 +; SOFT-FLOAT-64-NEXT: mov %i2, %o0 +; SOFT-FLOAT-64-NEXT: call __adddf3 +; SOFT-FLOAT-64-NEXT: mov %l1, %o1 +; SOFT-FLOAT-64-NEXT: mov %o0, %i2 +; SOFT-FLOAT-64-NEXT: mov %i3, %o0 +; SOFT-FLOAT-64-NEXT: call __adddf3 +; SOFT-FLOAT-64-NEXT: mov %l0, %o1 +; SOFT-FLOAT-64-NEXT: ret +; SOFT-FLOAT-64-NEXT: restore %g0, %o0, %o3 + %product = fmul contract <4 x double> %a, %b + %result = fadd contract <4 x double> %product, %c + ret <4 x double> %result +} + +attributes #0 = { "use-soft-float"="true" } + +declare float @llvm.fmuladd.f32(float %a, float %b, float %c) +declare double @llvm.fmuladd.f64(double %a, double %b, double %c) diff --git a/llvm/test/CodeGen/SystemZ/fmuladd-soft-float.ll b/llvm/test/CodeGen/SystemZ/fmuladd-soft-float.ll new file mode 100644 index 0000000000000..b01c348b631b8 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/fmuladd-soft-float.ll @@ -0,0 +1,230 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=s390x < %s | FileCheck %s -check-prefix=SOFT-FLOAT + +define float @fmuladd_intrinsic_f32(float %a, float %b, float %c) #0 { +; SOFT-FLOAT-LABEL: fmuladd_intrinsic_f32: +; SOFT-FLOAT: # %bb.0: +; SOFT-FLOAT-NEXT: stmg %r13, %r15, 104(%r15) +; SOFT-FLOAT-NEXT: .cfi_offset %r13, -56 +; SOFT-FLOAT-NEXT: .cfi_offset %r14, -48 +; SOFT-FLOAT-NEXT: .cfi_offset %r15, -40 +; SOFT-FLOAT-NEXT: aghi %r15, -160 +; SOFT-FLOAT-NEXT: .cfi_def_cfa_offset 320 +; SOFT-FLOAT-NEXT: llgfr %r2, %r2 +; SOFT-FLOAT-NEXT: llgfr %r3, %r3 +; SOFT-FLOAT-NEXT: lr %r13, %r4 +; SOFT-FLOAT-NEXT: brasl %r14, __mulsf3@PLT +; SOFT-FLOAT-NEXT: llgfr %r3, %r13 +; SOFT-FLOAT-NEXT: brasl %r14, __addsf3@PLT +; SOFT-FLOAT-NEXT: # kill: def $r2l killed $r2l killed $r2d +; SOFT-FLOAT-NEXT: lmg %r13, %r15, 264(%r15) +; SOFT-FLOAT-NEXT: br %r14 + %result = call float @llvm.fmuladd.f32(float %a, float %b, float %c) + ret float %result +} + +define double @fmuladd_intrinsic_f64(double %a, double %b, double %c) #0 { +; SOFT-FLOAT-LABEL: fmuladd_intrinsic_f64: +; SOFT-FLOAT: # %bb.0: +; SOFT-FLOAT-NEXT: stmg %r13, %r15, 104(%r15) +; SOFT-FLOAT-NEXT: .cfi_offset %r13, -56 +; SOFT-FLOAT-NEXT: .cfi_offset %r14, -48 +; SOFT-FLOAT-NEXT: .cfi_offset %r15, -40 +; SOFT-FLOAT-NEXT: aghi %r15, -160 +; SOFT-FLOAT-NEXT: .cfi_def_cfa_offset 320 +; SOFT-FLOAT-NEXT: lgr %r13, %r4 +; SOFT-FLOAT-NEXT: brasl %r14, __muldf3@PLT +; SOFT-FLOAT-NEXT: lgr %r3, %r13 +; SOFT-FLOAT-NEXT: brasl %r14, __adddf3@PLT +; SOFT-FLOAT-NEXT: lmg %r13, %r15, 264(%r15) +; SOFT-FLOAT-NEXT: br %r14 + %result = call double @llvm.fmuladd.f64(double %a, double %b, double %c) + ret double %result +} + +define float @fmuladd_contract_f32(float %a, float %b, float %c) #0 { +; SOFT-FLOAT-LABEL: fmuladd_contract_f32: +; SOFT-FLOAT: # %bb.0: +; SOFT-FLOAT-NEXT: stmg %r13, %r15, 104(%r15) +; SOFT-FLOAT-NEXT: .cfi_offset %r13, -56 +; SOFT-FLOAT-NEXT: .cfi_offset %r14, -48 +; SOFT-FLOAT-NEXT: .cfi_offset %r15, -40 +; SOFT-FLOAT-NEXT: aghi %r15, -160 +; SOFT-FLOAT-NEXT: .cfi_def_cfa_offset 320 +; SOFT-FLOAT-NEXT: llgfr %r2, %r2 +; SOFT-FLOAT-NEXT: llgfr %r3, %r3 +; SOFT-FLOAT-NEXT: lr %r13, %r4 +; SOFT-FLOAT-NEXT: brasl %r14, __mulsf3@PLT +; SOFT-FLOAT-NEXT: llgfr %r3, %r13 +; SOFT-FLOAT-NEXT: brasl %r14, __addsf3@PLT +; SOFT-FLOAT-NEXT: # kill: def $r2l killed $r2l killed $r2d +; SOFT-FLOAT-NEXT: lmg %r13, %r15, 264(%r15) +; SOFT-FLOAT-NEXT: br %r14 + %product = fmul contract float %a, %b + %result = fadd contract float %product, %c + ret float %result +} + +define double @fmuladd_contract_f64(double %a, double %b, double %c) #0 { +; SOFT-FLOAT-LABEL: fmuladd_contract_f64: +; SOFT-FLOAT: # %bb.0: +; SOFT-FLOAT-NEXT: stmg %r13, %r15, 104(%r15) +; SOFT-FLOAT-NEXT: .cfi_offset %r13, -56 +; SOFT-FLOAT-NEXT: .cfi_offset %r14, -48 +; SOFT-FLOAT-NEXT: .cfi_offset %r15, -40 +; SOFT-FLOAT-NEXT: aghi %r15, -160 +; SOFT-FLOAT-NEXT: .cfi_def_cfa_offset 320 +; SOFT-FLOAT-NEXT: lgr %r13, %r4 +; SOFT-FLOAT-NEXT: brasl %r14, __muldf3@PLT +; SOFT-FLOAT-NEXT: lgr %r3, %r13 +; SOFT-FLOAT-NEXT: brasl %r14, __adddf3@PLT +; SOFT-FLOAT-NEXT: lmg %r13, %r15, 264(%r15) +; SOFT-FLOAT-NEXT: br %r14 + %product = fmul contract double %a, %b + %result = fadd contract double %product, %c + ret double %result +} + +define <4 x float> @fmuladd_contract_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 { +; SOFT-FLOAT-LABEL: fmuladd_contract_v4f32: +; SOFT-FLOAT: # %bb.0: +; SOFT-FLOAT-NEXT: stmg %r7, %r15, 56(%r15) +; SOFT-FLOAT-NEXT: .cfi_offset %r7, -104 +; SOFT-FLOAT-NEXT: .cfi_offset %r8, -96 +; SOFT-FLOAT-NEXT: .cfi_offset %r9, -88 +; SOFT-FLOAT-NEXT: .cfi_offset %r10, -80 +; SOFT-FLOAT-NEXT: .cfi_offset %r11, -72 +; SOFT-FLOAT-NEXT: .cfi_offset %r12, -64 +; SOFT-FLOAT-NEXT: .cfi_offset %r13, -56 +; SOFT-FLOAT-NEXT: .cfi_offset %r14, -48 +; SOFT-FLOAT-NEXT: .cfi_offset %r15, -40 +; SOFT-FLOAT-NEXT: aghi %r15, -176 +; SOFT-FLOAT-NEXT: .cfi_def_cfa_offset 336 +; SOFT-FLOAT-NEXT: llgf %r0, 388(%r15) +; SOFT-FLOAT-NEXT: stg %r0, 168(%r15) # 8-byte Folded Spill +; SOFT-FLOAT-NEXT: llgf %r0, 380(%r15) +; SOFT-FLOAT-NEXT: stg %r0, 160(%r15) # 8-byte Folded Spill +; SOFT-FLOAT-NEXT: llgf %r11, 372(%r15) +; SOFT-FLOAT-NEXT: llgf %r10, 364(%r15) +; SOFT-FLOAT-NEXT: llgf %r8, 340(%r15) +; SOFT-FLOAT-NEXT: llgf %r0, 356(%r15) +; SOFT-FLOAT-NEXT: llgf %r7, 348(%r15) +; SOFT-FLOAT-NEXT: llgfr %r1, %r5 +; SOFT-FLOAT-NEXT: lr %r9, %r4 +; SOFT-FLOAT-NEXT: lr %r13, %r3 +; SOFT-FLOAT-NEXT: lr %r12, %r2 +; SOFT-FLOAT-NEXT: lgr %r2, %r1 +; SOFT-FLOAT-NEXT: lgr %r3, %r0 +; SOFT-FLOAT-NEXT: brasl %r14, __mulsf3@PLT +; SOFT-FLOAT-NEXT: llgfr %r0, %r9 +; SOFT-FLOAT-NEXT: lgr %r9, %r2 +; SOFT-FLOAT-NEXT: lgr %r2, %r0 +; SOFT-FLOAT-NEXT: lgr %r3, %r7 +; SOFT-FLOAT-NEXT: brasl %r14, __mulsf3@PLT +; SOFT-FLOAT-NEXT: llgfr %r0, %r13 +; SOFT-FLOAT-NEXT: lgr %r13, %r2 +; SOFT-FLOAT-NEXT: lgr %r2, %r0 +; SOFT-FLOAT-NEXT: lgr %r3, %r8 +; SOFT-FLOAT-NEXT: brasl %r14, __mulsf3@PLT +; SOFT-FLOAT-NEXT: llgfr %r0, %r12 +; SOFT-FLOAT-NEXT: llgfr %r3, %r6 +; SOFT-FLOAT-NEXT: lgr %r12, %r2 +; SOFT-FLOAT-NEXT: lgr %r2, %r0 +; SOFT-FLOAT-NEXT: brasl %r14, __mulsf3@PLT +; SOFT-FLOAT-NEXT: lgr %r3, %r10 +; SOFT-FLOAT-NEXT: brasl %r14, __addsf3@PLT +; SOFT-FLOAT-NEXT: lgr %r10, %r2 +; SOFT-FLOAT-NEXT: lgr %r2, %r12 +; SOFT-FLOAT-NEXT: lgr %r3, %r11 +; SOFT-FLOAT-NEXT: brasl %r14, __addsf3@PLT +; SOFT-FLOAT-NEXT: lgr %r12, %r2 +; SOFT-FLOAT-NEXT: lgr %r2, %r13 +; SOFT-FLOAT-NEXT: lg %r3, 160(%r15) # 8-byte Folded Reload +; SOFT-FLOAT-NEXT: brasl %r14, __addsf3@PLT +; SOFT-FLOAT-NEXT: lgr %r13, %r2 +; SOFT-FLOAT-NEXT: lgr %r2, %r9 +; SOFT-FLOAT-NEXT: lg %r3, 168(%r15) # 8-byte Folded Reload +; SOFT-FLOAT-NEXT: brasl %r14, __addsf3@PLT +; SOFT-FLOAT-NEXT: lgr %r5, %r2 +; SOFT-FLOAT-NEXT: lr %r2, %r10 +; SOFT-FLOAT-NEXT: lr %r3, %r12 +; SOFT-FLOAT-NEXT: lr %r4, %r13 +; SOFT-FLOAT-NEXT: # kill: def $r5l killed $r5l killed $r5d +; SOFT-FLOAT-NEXT: lmg %r7, %r15, 232(%r15) +; SOFT-FLOAT-NEXT: br %r14 + %product = fmul contract <4 x float> %a, %b + %result = fadd contract <4 x float> %product, %c + ret <4 x float> %result +} + +define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) #0 { +; SOFT-FLOAT-LABEL: fmuladd_contract_v4f64: +; SOFT-FLOAT: # %bb.0: +; SOFT-FLOAT-NEXT: stmg %r6, %r15, 48(%r15) +; SOFT-FLOAT-NEXT: .cfi_offset %r6, -112 +; SOFT-FLOAT-NEXT: .cfi_offset %r7, -104 +; SOFT-FLOAT-NEXT: .cfi_offset %r8, -96 +; SOFT-FLOAT-NEXT: .cfi_offset %r9, -88 +; SOFT-FLOAT-NEXT: .cfi_offset %r10, -80 +; SOFT-FLOAT-NEXT: .cfi_offset %r11, -72 +; SOFT-FLOAT-NEXT: .cfi_offset %r12, -64 +; SOFT-FLOAT-NEXT: .cfi_offset %r13, -56 +; SOFT-FLOAT-NEXT: .cfi_offset %r14, -48 +; SOFT-FLOAT-NEXT: .cfi_offset %r15, -40 +; SOFT-FLOAT-NEXT: aghi %r15, -184 +; SOFT-FLOAT-NEXT: .cfi_def_cfa_offset 344 +; SOFT-FLOAT-NEXT: mvc 176(8,%r15), 24(%r4) # 8-byte Folded Spill +; SOFT-FLOAT-NEXT: mvc 168(8,%r15), 16(%r4) # 8-byte Folded Spill +; SOFT-FLOAT-NEXT: mvc 160(8,%r15), 8(%r4) # 8-byte Folded Spill +; SOFT-FLOAT-NEXT: lg %r10, 0(%r4) +; SOFT-FLOAT-NEXT: lg %r9, 0(%r2) +; SOFT-FLOAT-NEXT: lg %r8, 0(%r3) +; SOFT-FLOAT-NEXT: lg %r7, 8(%r2) +; SOFT-FLOAT-NEXT: lg %r6, 8(%r3) +; SOFT-FLOAT-NEXT: lg %r13, 16(%r2) +; SOFT-FLOAT-NEXT: lg %r2, 24(%r2) +; SOFT-FLOAT-NEXT: lg %r0, 24(%r3) +; SOFT-FLOAT-NEXT: lg %r12, 16(%r3) +; SOFT-FLOAT-NEXT: lgr %r3, %r0 +; SOFT-FLOAT-NEXT: brasl %r14, __muldf3@PLT +; SOFT-FLOAT-NEXT: lgr %r11, %r2 +; SOFT-FLOAT-NEXT: lgr %r2, %r13 +; SOFT-FLOAT-NEXT: lgr %r3, %r12 +; SOFT-FLOAT-NEXT: brasl %r14, __muldf3@PLT +; SOFT-FLOAT-NEXT: lgr %r13, %r2 +; SOFT-FLOAT-NEXT: lgr %r2, %r7 +; SOFT-FLOAT-NEXT: lgr %r3, %r6 +; SOFT-FLOAT-NEXT: brasl %r14, __muldf3@PLT +; SOFT-FLOAT-NEXT: lgr %r12, %r2 +; SOFT-FLOAT-NEXT: lgr %r2, %r9 +; SOFT-FLOAT-NEXT: lgr %r3, %r8 +; SOFT-FLOAT-NEXT: brasl %r14, __muldf3@PLT +; SOFT-FLOAT-NEXT: lgr %r3, %r10 +; SOFT-FLOAT-NEXT: brasl %r14, __adddf3@PLT +; SOFT-FLOAT-NEXT: lgr %r10, %r2 +; SOFT-FLOAT-NEXT: lgr %r2, %r12 +; SOFT-FLOAT-NEXT: lg %r3, 160(%r15) # 8-byte Folded Reload +; SOFT-FLOAT-NEXT: brasl %r14, __adddf3@PLT +; SOFT-FLOAT-NEXT: lgr %r12, %r2 +; SOFT-FLOAT-NEXT: lgr %r2, %r13 +; SOFT-FLOAT-NEXT: lg %r3, 168(%r15) # 8-byte Folded Reload +; SOFT-FLOAT-NEXT: brasl %r14, __adddf3@PLT +; SOFT-FLOAT-NEXT: lgr %r13, %r2 +; SOFT-FLOAT-NEXT: lgr %r2, %r11 +; SOFT-FLOAT-NEXT: lg %r3, 176(%r15) # 8-byte Folded Reload +; SOFT-FLOAT-NEXT: brasl %r14, __adddf3@PLT +; SOFT-FLOAT-NEXT: lgr %r5, %r2 +; SOFT-FLOAT-NEXT: lgr %r2, %r10 +; SOFT-FLOAT-NEXT: lgr %r3, %r12 +; SOFT-FLOAT-NEXT: lgr %r4, %r13 +; SOFT-FLOAT-NEXT: lmg %r6, %r15, 232(%r15) +; SOFT-FLOAT-NEXT: br %r14 + %product = fmul contract <4 x double> %a, %b + %result = fadd contract <4 x double> %product, %c + ret <4 x double> %result +} + +attributes #0 = { "use-soft-float"="true" } + +declare float @llvm.fmuladd.f32(float %a, float %b, float %c) +declare double @llvm.fmuladd.f64(double %a, double %b, double %c) diff --git a/llvm/test/CodeGen/X86/fmuladd-soft-float.ll b/llvm/test/CodeGen/X86/fmuladd-soft-float.ll new file mode 100644 index 0000000000000..ccb2f37590b0a --- /dev/null +++ b/llvm/test/CodeGen/X86/fmuladd-soft-float.ll @@ -0,0 +1,1777 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=i386 < %s | FileCheck %s -check-prefix=SOFT-FLOAT-32 +; RUN: llc -mtriple=i386 -mattr +fma < %s | FileCheck %s -check-prefix=SOFT-FLOAT-32-FMA +; RUN: llc -mtriple=i386 -mattr +fma4 < %s | FileCheck %s -check-prefix=SOFT-FLOAT-32-FMA4 +; RUN: llc -mtriple=x86_64 < %s | FileCheck %s -check-prefix=SOFT-FLOAT-64 +; RUN: llc -mtriple=x86_64 -mattr +fma < %s | FileCheck %s -check-prefix=SOFT-FLOAT-64-FMA +; RUN: llc -mtriple=x86_64 -mattr +fma4 < %s | FileCheck %s -check-prefix=SOFT-FLOAT-64-FMA4 + +define float @fmuladd_intrinsic_f32(float %a, float %b, float %c) #0 { +; SOFT-FLOAT-32-LABEL: fmuladd_intrinsic_f32: +; SOFT-FLOAT-32: # %bb.0: +; SOFT-FLOAT-32-NEXT: pushl %esi +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-NEXT: .cfi_offset %esi, -8 +; SOFT-FLOAT-32-NEXT: movl {{[0-9]+}}(%esp), %esi +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __mulsf3 +; SOFT-FLOAT-32-NEXT: addl $8, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-NEXT: pushl %esi +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl %eax +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __addsf3 +; SOFT-FLOAT-32-NEXT: addl $8, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-NEXT: popl %esi +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: retl +; +; SOFT-FLOAT-32-FMA-LABEL: fmuladd_intrinsic_f32: +; SOFT-FLOAT-32-FMA: # %bb.0: +; SOFT-FLOAT-32-FMA-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-FMA-NEXT: .cfi_offset %esi, -8 +; SOFT-FLOAT-32-FMA-NEXT: movl {{[0-9]+}}(%esp), %esi +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __mulsf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl %eax +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __addsf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA-NEXT: popl %esi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: retl +; +; SOFT-FLOAT-32-FMA4-LABEL: fmuladd_intrinsic_f32: +; SOFT-FLOAT-32-FMA4: # %bb.0: +; SOFT-FLOAT-32-FMA4-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_offset %esi, -8 +; SOFT-FLOAT-32-FMA4-NEXT: movl {{[0-9]+}}(%esp), %esi +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __mulsf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %eax +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __addsf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA4-NEXT: popl %esi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: retl +; +; SOFT-FLOAT-64-LABEL: fmuladd_intrinsic_f32: +; SOFT-FLOAT-64: # %bb.0: +; SOFT-FLOAT-64-NEXT: pushq %rbx +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-NEXT: .cfi_offset %rbx, -16 +; SOFT-FLOAT-64-NEXT: movl %edx, %ebx +; SOFT-FLOAT-64-NEXT: callq __mulsf3@PLT +; SOFT-FLOAT-64-NEXT: movl %eax, %edi +; SOFT-FLOAT-64-NEXT: movl %ebx, %esi +; SOFT-FLOAT-64-NEXT: callq __addsf3@PLT +; SOFT-FLOAT-64-NEXT: popq %rbx +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-64-NEXT: retq +; +; SOFT-FLOAT-64-FMA-LABEL: fmuladd_intrinsic_f32: +; SOFT-FLOAT-64-FMA: # %bb.0: +; SOFT-FLOAT-64-FMA-NEXT: pushq %rbx +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %rbx, -16 +; SOFT-FLOAT-64-FMA-NEXT: movl %edx, %ebx +; SOFT-FLOAT-64-FMA-NEXT: callq __mulsf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: movl %eax, %edi +; SOFT-FLOAT-64-FMA-NEXT: movl %ebx, %esi +; SOFT-FLOAT-64-FMA-NEXT: callq __addsf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: popq %rbx +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-64-FMA-NEXT: retq +; +; SOFT-FLOAT-64-FMA4-LABEL: fmuladd_intrinsic_f32: +; SOFT-FLOAT-64-FMA4: # %bb.0: +; SOFT-FLOAT-64-FMA4-NEXT: pushq %rbx +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %rbx, -16 +; SOFT-FLOAT-64-FMA4-NEXT: movl %edx, %ebx +; SOFT-FLOAT-64-FMA4-NEXT: callq __mulsf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: movl %eax, %edi +; SOFT-FLOAT-64-FMA4-NEXT: movl %ebx, %esi +; SOFT-FLOAT-64-FMA4-NEXT: callq __addsf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: popq %rbx +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-64-FMA4-NEXT: retq + %result = call float @llvm.fmuladd.f32(float %a, float %b, float %c) + ret float %result +} + +define double @fmuladd_intrinsic_f64(double %a, double %b, double %c) #0 { +; SOFT-FLOAT-32-LABEL: fmuladd_intrinsic_f64: +; SOFT-FLOAT-32: # %bb.0: +; SOFT-FLOAT-32-NEXT: pushl %edi +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-NEXT: pushl %esi +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 12 +; SOFT-FLOAT-32-NEXT: .cfi_offset %esi, -12 +; SOFT-FLOAT-32-NEXT: .cfi_offset %edi, -8 +; SOFT-FLOAT-32-NEXT: movl {{[0-9]+}}(%esp), %esi +; SOFT-FLOAT-32-NEXT: movl {{[0-9]+}}(%esp), %edi +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __muldf3 +; SOFT-FLOAT-32-NEXT: addl $16, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-NEXT: pushl %edi +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl %esi +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl %edx +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl %eax +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __adddf3 +; SOFT-FLOAT-32-NEXT: addl $16, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-NEXT: popl %esi +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-NEXT: popl %edi +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: retl +; +; SOFT-FLOAT-32-FMA-LABEL: fmuladd_intrinsic_f64: +; SOFT-FLOAT-32-FMA: # %bb.0: +; SOFT-FLOAT-32-FMA-NEXT: pushl %edi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-FMA-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 12 +; SOFT-FLOAT-32-FMA-NEXT: .cfi_offset %esi, -12 +; SOFT-FLOAT-32-FMA-NEXT: .cfi_offset %edi, -8 +; SOFT-FLOAT-32-FMA-NEXT: movl {{[0-9]+}}(%esp), %esi +; SOFT-FLOAT-32-FMA-NEXT: movl {{[0-9]+}}(%esp), %edi +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __muldf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA-NEXT: pushl %edi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl %edx +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl %eax +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __adddf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA-NEXT: popl %esi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-FMA-NEXT: popl %edi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: retl +; +; SOFT-FLOAT-32-FMA4-LABEL: fmuladd_intrinsic_f64: +; SOFT-FLOAT-32-FMA4: # %bb.0: +; SOFT-FLOAT-32-FMA4-NEXT: pushl %edi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 12 +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_offset %esi, -12 +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_offset %edi, -8 +; SOFT-FLOAT-32-FMA4-NEXT: movl {{[0-9]+}}(%esp), %esi +; SOFT-FLOAT-32-FMA4-NEXT: movl {{[0-9]+}}(%esp), %edi +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __muldf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %edi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %edx +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %eax +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __adddf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA4-NEXT: popl %esi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-FMA4-NEXT: popl %edi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: retl +; +; SOFT-FLOAT-64-LABEL: fmuladd_intrinsic_f64: +; SOFT-FLOAT-64: # %bb.0: +; SOFT-FLOAT-64-NEXT: pushq %rbx +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-NEXT: .cfi_offset %rbx, -16 +; SOFT-FLOAT-64-NEXT: movq %rdx, %rbx +; SOFT-FLOAT-64-NEXT: callq __muldf3@PLT +; SOFT-FLOAT-64-NEXT: movq %rax, %rdi +; SOFT-FLOAT-64-NEXT: movq %rbx, %rsi +; SOFT-FLOAT-64-NEXT: callq __adddf3@PLT +; SOFT-FLOAT-64-NEXT: popq %rbx +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-64-NEXT: retq +; +; SOFT-FLOAT-64-FMA-LABEL: fmuladd_intrinsic_f64: +; SOFT-FLOAT-64-FMA: # %bb.0: +; SOFT-FLOAT-64-FMA-NEXT: pushq %rbx +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %rbx, -16 +; SOFT-FLOAT-64-FMA-NEXT: movq %rdx, %rbx +; SOFT-FLOAT-64-FMA-NEXT: callq __muldf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %rdi +; SOFT-FLOAT-64-FMA-NEXT: movq %rbx, %rsi +; SOFT-FLOAT-64-FMA-NEXT: callq __adddf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: popq %rbx +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-64-FMA-NEXT: retq +; +; SOFT-FLOAT-64-FMA4-LABEL: fmuladd_intrinsic_f64: +; SOFT-FLOAT-64-FMA4: # %bb.0: +; SOFT-FLOAT-64-FMA4-NEXT: pushq %rbx +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %rbx, -16 +; SOFT-FLOAT-64-FMA4-NEXT: movq %rdx, %rbx +; SOFT-FLOAT-64-FMA4-NEXT: callq __muldf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %rdi +; SOFT-FLOAT-64-FMA4-NEXT: movq %rbx, %rsi +; SOFT-FLOAT-64-FMA4-NEXT: callq __adddf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: popq %rbx +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-64-FMA4-NEXT: retq + %result = call double @llvm.fmuladd.f64(double %a, double %b, double %c) + ret double %result +} + +define float @fmuladd_contract_f32(float %a, float %b, float %c) #0 { +; SOFT-FLOAT-32-LABEL: fmuladd_contract_f32: +; SOFT-FLOAT-32: # %bb.0: +; SOFT-FLOAT-32-NEXT: pushl %esi +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-NEXT: .cfi_offset %esi, -8 +; SOFT-FLOAT-32-NEXT: movl {{[0-9]+}}(%esp), %esi +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __mulsf3 +; SOFT-FLOAT-32-NEXT: addl $8, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-NEXT: pushl %esi +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl %eax +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __addsf3 +; SOFT-FLOAT-32-NEXT: addl $8, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-NEXT: popl %esi +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: retl +; +; SOFT-FLOAT-32-FMA-LABEL: fmuladd_contract_f32: +; SOFT-FLOAT-32-FMA: # %bb.0: +; SOFT-FLOAT-32-FMA-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-FMA-NEXT: .cfi_offset %esi, -8 +; SOFT-FLOAT-32-FMA-NEXT: movl {{[0-9]+}}(%esp), %esi +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __mulsf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl %eax +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __addsf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA-NEXT: popl %esi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: retl +; +; SOFT-FLOAT-32-FMA4-LABEL: fmuladd_contract_f32: +; SOFT-FLOAT-32-FMA4: # %bb.0: +; SOFT-FLOAT-32-FMA4-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_offset %esi, -8 +; SOFT-FLOAT-32-FMA4-NEXT: movl {{[0-9]+}}(%esp), %esi +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __mulsf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %eax +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __addsf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA4-NEXT: popl %esi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: retl +; +; SOFT-FLOAT-64-LABEL: fmuladd_contract_f32: +; SOFT-FLOAT-64: # %bb.0: +; SOFT-FLOAT-64-NEXT: pushq %rbx +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-NEXT: .cfi_offset %rbx, -16 +; SOFT-FLOAT-64-NEXT: movl %edx, %ebx +; SOFT-FLOAT-64-NEXT: callq __mulsf3@PLT +; SOFT-FLOAT-64-NEXT: movl %eax, %edi +; SOFT-FLOAT-64-NEXT: movl %ebx, %esi +; SOFT-FLOAT-64-NEXT: callq __addsf3@PLT +; SOFT-FLOAT-64-NEXT: popq %rbx +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-64-NEXT: retq +; +; SOFT-FLOAT-64-FMA-LABEL: fmuladd_contract_f32: +; SOFT-FLOAT-64-FMA: # %bb.0: +; SOFT-FLOAT-64-FMA-NEXT: pushq %rbx +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %rbx, -16 +; SOFT-FLOAT-64-FMA-NEXT: movl %edx, %ebx +; SOFT-FLOAT-64-FMA-NEXT: callq __mulsf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: movl %eax, %edi +; SOFT-FLOAT-64-FMA-NEXT: movl %ebx, %esi +; SOFT-FLOAT-64-FMA-NEXT: callq __addsf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: popq %rbx +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-64-FMA-NEXT: retq +; +; SOFT-FLOAT-64-FMA4-LABEL: fmuladd_contract_f32: +; SOFT-FLOAT-64-FMA4: # %bb.0: +; SOFT-FLOAT-64-FMA4-NEXT: pushq %rbx +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %rbx, -16 +; SOFT-FLOAT-64-FMA4-NEXT: movl %edx, %ebx +; SOFT-FLOAT-64-FMA4-NEXT: callq __mulsf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: movl %eax, %edi +; SOFT-FLOAT-64-FMA4-NEXT: movl %ebx, %esi +; SOFT-FLOAT-64-FMA4-NEXT: callq __addsf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: popq %rbx +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-64-FMA4-NEXT: retq + %product = fmul contract float %a, %b + %result = fadd contract float %product, %c + ret float %result +} + +define double @fmuladd_contract_f64(double %a, double %b, double %c) #0 { +; SOFT-FLOAT-32-LABEL: fmuladd_contract_f64: +; SOFT-FLOAT-32: # %bb.0: +; SOFT-FLOAT-32-NEXT: pushl %edi +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-NEXT: pushl %esi +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 12 +; SOFT-FLOAT-32-NEXT: .cfi_offset %esi, -12 +; SOFT-FLOAT-32-NEXT: .cfi_offset %edi, -8 +; SOFT-FLOAT-32-NEXT: movl {{[0-9]+}}(%esp), %esi +; SOFT-FLOAT-32-NEXT: movl {{[0-9]+}}(%esp), %edi +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __muldf3 +; SOFT-FLOAT-32-NEXT: addl $16, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-NEXT: pushl %edi +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl %esi +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl %edx +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl %eax +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __adddf3 +; SOFT-FLOAT-32-NEXT: addl $16, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-NEXT: popl %esi +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-NEXT: popl %edi +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: retl +; +; SOFT-FLOAT-32-FMA-LABEL: fmuladd_contract_f64: +; SOFT-FLOAT-32-FMA: # %bb.0: +; SOFT-FLOAT-32-FMA-NEXT: pushl %edi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-FMA-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 12 +; SOFT-FLOAT-32-FMA-NEXT: .cfi_offset %esi, -12 +; SOFT-FLOAT-32-FMA-NEXT: .cfi_offset %edi, -8 +; SOFT-FLOAT-32-FMA-NEXT: movl {{[0-9]+}}(%esp), %esi +; SOFT-FLOAT-32-FMA-NEXT: movl {{[0-9]+}}(%esp), %edi +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __muldf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA-NEXT: pushl %edi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl %edx +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl %eax +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __adddf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA-NEXT: popl %esi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-FMA-NEXT: popl %edi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: retl +; +; SOFT-FLOAT-32-FMA4-LABEL: fmuladd_contract_f64: +; SOFT-FLOAT-32-FMA4: # %bb.0: +; SOFT-FLOAT-32-FMA4-NEXT: pushl %edi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 12 +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_offset %esi, -12 +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_offset %edi, -8 +; SOFT-FLOAT-32-FMA4-NEXT: movl {{[0-9]+}}(%esp), %esi +; SOFT-FLOAT-32-FMA4-NEXT: movl {{[0-9]+}}(%esp), %edi +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __muldf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %edi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %edx +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %eax +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __adddf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA4-NEXT: popl %esi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-FMA4-NEXT: popl %edi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: retl +; +; SOFT-FLOAT-64-LABEL: fmuladd_contract_f64: +; SOFT-FLOAT-64: # %bb.0: +; SOFT-FLOAT-64-NEXT: pushq %rbx +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-NEXT: .cfi_offset %rbx, -16 +; SOFT-FLOAT-64-NEXT: movq %rdx, %rbx +; SOFT-FLOAT-64-NEXT: callq __muldf3@PLT +; SOFT-FLOAT-64-NEXT: movq %rax, %rdi +; SOFT-FLOAT-64-NEXT: movq %rbx, %rsi +; SOFT-FLOAT-64-NEXT: callq __adddf3@PLT +; SOFT-FLOAT-64-NEXT: popq %rbx +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-64-NEXT: retq +; +; SOFT-FLOAT-64-FMA-LABEL: fmuladd_contract_f64: +; SOFT-FLOAT-64-FMA: # %bb.0: +; SOFT-FLOAT-64-FMA-NEXT: pushq %rbx +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %rbx, -16 +; SOFT-FLOAT-64-FMA-NEXT: movq %rdx, %rbx +; SOFT-FLOAT-64-FMA-NEXT: callq __muldf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %rdi +; SOFT-FLOAT-64-FMA-NEXT: movq %rbx, %rsi +; SOFT-FLOAT-64-FMA-NEXT: callq __adddf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: popq %rbx +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-64-FMA-NEXT: retq +; +; SOFT-FLOAT-64-FMA4-LABEL: fmuladd_contract_f64: +; SOFT-FLOAT-64-FMA4: # %bb.0: +; SOFT-FLOAT-64-FMA4-NEXT: pushq %rbx +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %rbx, -16 +; SOFT-FLOAT-64-FMA4-NEXT: movq %rdx, %rbx +; SOFT-FLOAT-64-FMA4-NEXT: callq __muldf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %rdi +; SOFT-FLOAT-64-FMA4-NEXT: movq %rbx, %rsi +; SOFT-FLOAT-64-FMA4-NEXT: callq __adddf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: popq %rbx +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-64-FMA4-NEXT: retq + %product = fmul contract double %a, %b + %result = fadd contract double %product, %c + ret double %result +} + +define <4 x float> @fmuladd_contract_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 { +; SOFT-FLOAT-32-LABEL: fmuladd_contract_v4f32: +; SOFT-FLOAT-32: # %bb.0: +; SOFT-FLOAT-32-NEXT: pushl %ebp +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-NEXT: pushl %ebx +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 12 +; SOFT-FLOAT-32-NEXT: pushl %edi +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-32-NEXT: pushl %esi +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 20 +; SOFT-FLOAT-32-NEXT: pushl %eax +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-32-NEXT: .cfi_offset %esi, -20 +; SOFT-FLOAT-32-NEXT: .cfi_offset %edi, -16 +; SOFT-FLOAT-32-NEXT: .cfi_offset %ebx, -12 +; SOFT-FLOAT-32-NEXT: .cfi_offset %ebp, -8 +; SOFT-FLOAT-32-NEXT: movl {{[0-9]+}}(%esp), %esi +; SOFT-FLOAT-32-NEXT: movl {{[0-9]+}}(%esp), %ebp +; SOFT-FLOAT-32-NEXT: movl {{[0-9]+}}(%esp), %ebx +; SOFT-FLOAT-32-NEXT: movl {{[0-9]+}}(%esp), %edi +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __mulsf3 +; SOFT-FLOAT-32-NEXT: addl $8, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-NEXT: movl %eax, (%esp) # 4-byte Spill +; SOFT-FLOAT-32-NEXT: pushl %edi +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl %ebx +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __mulsf3 +; SOFT-FLOAT-32-NEXT: addl $8, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-NEXT: movl %eax, %ebx +; SOFT-FLOAT-32-NEXT: pushl %ebp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __mulsf3 +; SOFT-FLOAT-32-NEXT: addl $8, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-NEXT: movl %eax, %edi +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __mulsf3 +; SOFT-FLOAT-32-NEXT: addl $8, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl %eax +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __addsf3 +; SOFT-FLOAT-32-NEXT: addl $8, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-NEXT: movl %eax, %ebp +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl %edi +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __addsf3 +; SOFT-FLOAT-32-NEXT: addl $8, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-NEXT: movl %eax, %edi +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl %ebx +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __addsf3 +; SOFT-FLOAT-32-NEXT: addl $8, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-NEXT: movl %eax, %ebx +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __addsf3 +; SOFT-FLOAT-32-NEXT: addl $8, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-NEXT: movl %eax, 12(%esi) +; SOFT-FLOAT-32-NEXT: movl %ebx, 8(%esi) +; SOFT-FLOAT-32-NEXT: movl %edi, 4(%esi) +; SOFT-FLOAT-32-NEXT: movl %ebp, (%esi) +; SOFT-FLOAT-32-NEXT: movl %esi, %eax +; SOFT-FLOAT-32-NEXT: addl $4, %esp +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 20 +; SOFT-FLOAT-32-NEXT: popl %esi +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-32-NEXT: popl %edi +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 12 +; SOFT-FLOAT-32-NEXT: popl %ebx +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-NEXT: popl %ebp +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: retl $4 +; +; SOFT-FLOAT-32-FMA-LABEL: fmuladd_contract_v4f32: +; SOFT-FLOAT-32-FMA: # %bb.0: +; SOFT-FLOAT-32-FMA-NEXT: pushl %ebp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-FMA-NEXT: pushl %ebx +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 12 +; SOFT-FLOAT-32-FMA-NEXT: pushl %edi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-32-FMA-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 20 +; SOFT-FLOAT-32-FMA-NEXT: pushl %eax +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-32-FMA-NEXT: .cfi_offset %esi, -20 +; SOFT-FLOAT-32-FMA-NEXT: .cfi_offset %edi, -16 +; SOFT-FLOAT-32-FMA-NEXT: .cfi_offset %ebx, -12 +; SOFT-FLOAT-32-FMA-NEXT: .cfi_offset %ebp, -8 +; SOFT-FLOAT-32-FMA-NEXT: movl {{[0-9]+}}(%esp), %esi +; SOFT-FLOAT-32-FMA-NEXT: movl {{[0-9]+}}(%esp), %ebp +; SOFT-FLOAT-32-FMA-NEXT: movl {{[0-9]+}}(%esp), %ebx +; SOFT-FLOAT-32-FMA-NEXT: movl {{[0-9]+}}(%esp), %edi +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __mulsf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA-NEXT: movl %eax, (%esp) # 4-byte Spill +; SOFT-FLOAT-32-FMA-NEXT: pushl %edi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl %ebx +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __mulsf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA-NEXT: movl %eax, %ebx +; SOFT-FLOAT-32-FMA-NEXT: pushl %ebp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __mulsf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA-NEXT: movl %eax, %edi +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __mulsf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl %eax +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __addsf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA-NEXT: movl %eax, %ebp +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl %edi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __addsf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA-NEXT: movl %eax, %edi +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl %ebx +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __addsf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA-NEXT: movl %eax, %ebx +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __addsf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA-NEXT: movl %eax, 12(%esi) +; SOFT-FLOAT-32-FMA-NEXT: movl %ebx, 8(%esi) +; SOFT-FLOAT-32-FMA-NEXT: movl %edi, 4(%esi) +; SOFT-FLOAT-32-FMA-NEXT: movl %ebp, (%esi) +; SOFT-FLOAT-32-FMA-NEXT: movl %esi, %eax +; SOFT-FLOAT-32-FMA-NEXT: addl $4, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 20 +; SOFT-FLOAT-32-FMA-NEXT: popl %esi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-32-FMA-NEXT: popl %edi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 12 +; SOFT-FLOAT-32-FMA-NEXT: popl %ebx +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-FMA-NEXT: popl %ebp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: retl $4 +; +; SOFT-FLOAT-32-FMA4-LABEL: fmuladd_contract_v4f32: +; SOFT-FLOAT-32-FMA4: # %bb.0: +; SOFT-FLOAT-32-FMA4-NEXT: pushl %ebp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %ebx +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 12 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %edi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 20 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %eax +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_offset %esi, -20 +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_offset %edi, -16 +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_offset %ebx, -12 +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_offset %ebp, -8 +; SOFT-FLOAT-32-FMA4-NEXT: movl {{[0-9]+}}(%esp), %esi +; SOFT-FLOAT-32-FMA4-NEXT: movl {{[0-9]+}}(%esp), %ebp +; SOFT-FLOAT-32-FMA4-NEXT: movl {{[0-9]+}}(%esp), %ebx +; SOFT-FLOAT-32-FMA4-NEXT: movl {{[0-9]+}}(%esp), %edi +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __mulsf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA4-NEXT: movl %eax, (%esp) # 4-byte Spill +; SOFT-FLOAT-32-FMA4-NEXT: pushl %edi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %ebx +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __mulsf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA4-NEXT: movl %eax, %ebx +; SOFT-FLOAT-32-FMA4-NEXT: pushl %ebp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __mulsf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA4-NEXT: movl %eax, %edi +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __mulsf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %eax +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __addsf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA4-NEXT: movl %eax, %ebp +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %edi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __addsf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA4-NEXT: movl %eax, %edi +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %ebx +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __addsf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA4-NEXT: movl %eax, %ebx +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __addsf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $8, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -8 +; SOFT-FLOAT-32-FMA4-NEXT: movl %eax, 12(%esi) +; SOFT-FLOAT-32-FMA4-NEXT: movl %ebx, 8(%esi) +; SOFT-FLOAT-32-FMA4-NEXT: movl %edi, 4(%esi) +; SOFT-FLOAT-32-FMA4-NEXT: movl %ebp, (%esi) +; SOFT-FLOAT-32-FMA4-NEXT: movl %esi, %eax +; SOFT-FLOAT-32-FMA4-NEXT: addl $4, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 20 +; SOFT-FLOAT-32-FMA4-NEXT: popl %esi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-32-FMA4-NEXT: popl %edi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 12 +; SOFT-FLOAT-32-FMA4-NEXT: popl %ebx +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-FMA4-NEXT: popl %ebp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: retl $4 +; +; SOFT-FLOAT-64-LABEL: fmuladd_contract_v4f32: +; SOFT-FLOAT-64: # %bb.0: +; SOFT-FLOAT-64-NEXT: pushq %rbp +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-NEXT: pushq %r15 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-64-NEXT: pushq %r14 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 32 +; SOFT-FLOAT-64-NEXT: pushq %r13 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 40 +; SOFT-FLOAT-64-NEXT: pushq %r12 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 48 +; SOFT-FLOAT-64-NEXT: pushq %rbx +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 56 +; SOFT-FLOAT-64-NEXT: pushq %rax +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 64 +; SOFT-FLOAT-64-NEXT: .cfi_offset %rbx, -56 +; SOFT-FLOAT-64-NEXT: .cfi_offset %r12, -48 +; SOFT-FLOAT-64-NEXT: .cfi_offset %r13, -40 +; SOFT-FLOAT-64-NEXT: .cfi_offset %r14, -32 +; SOFT-FLOAT-64-NEXT: .cfi_offset %r15, -24 +; SOFT-FLOAT-64-NEXT: .cfi_offset %rbp, -16 +; SOFT-FLOAT-64-NEXT: movl %r9d, %r13d +; SOFT-FLOAT-64-NEXT: movl %ecx, %ebp +; SOFT-FLOAT-64-NEXT: movl %edx, %r14d +; SOFT-FLOAT-64-NEXT: movl %esi, %r12d +; SOFT-FLOAT-64-NEXT: movq %rdi, %rbx +; SOFT-FLOAT-64-NEXT: movl {{[0-9]+}}(%rsp), %esi +; SOFT-FLOAT-64-NEXT: movl %r8d, %edi +; SOFT-FLOAT-64-NEXT: callq __mulsf3@PLT +; SOFT-FLOAT-64-NEXT: movl %eax, %r15d +; SOFT-FLOAT-64-NEXT: movl %ebp, %edi +; SOFT-FLOAT-64-NEXT: movl {{[0-9]+}}(%rsp), %esi +; SOFT-FLOAT-64-NEXT: callq __mulsf3@PLT +; SOFT-FLOAT-64-NEXT: movl %eax, %ebp +; SOFT-FLOAT-64-NEXT: movl %r14d, %edi +; SOFT-FLOAT-64-NEXT: movl {{[0-9]+}}(%rsp), %esi +; SOFT-FLOAT-64-NEXT: callq __mulsf3@PLT +; SOFT-FLOAT-64-NEXT: movl %eax, %r14d +; SOFT-FLOAT-64-NEXT: movl %r12d, %edi +; SOFT-FLOAT-64-NEXT: movl %r13d, %esi +; SOFT-FLOAT-64-NEXT: callq __mulsf3@PLT +; SOFT-FLOAT-64-NEXT: movl %eax, %edi +; SOFT-FLOAT-64-NEXT: movl {{[0-9]+}}(%rsp), %esi +; SOFT-FLOAT-64-NEXT: callq __addsf3@PLT +; SOFT-FLOAT-64-NEXT: movl %eax, %r12d +; SOFT-FLOAT-64-NEXT: movl %r14d, %edi +; SOFT-FLOAT-64-NEXT: movl {{[0-9]+}}(%rsp), %esi +; SOFT-FLOAT-64-NEXT: callq __addsf3@PLT +; SOFT-FLOAT-64-NEXT: movl %eax, %r14d +; SOFT-FLOAT-64-NEXT: movl %ebp, %edi +; SOFT-FLOAT-64-NEXT: movl {{[0-9]+}}(%rsp), %esi +; SOFT-FLOAT-64-NEXT: callq __addsf3@PLT +; SOFT-FLOAT-64-NEXT: movl %eax, %ebp +; SOFT-FLOAT-64-NEXT: movl %r15d, %edi +; SOFT-FLOAT-64-NEXT: movl {{[0-9]+}}(%rsp), %esi +; SOFT-FLOAT-64-NEXT: callq __addsf3@PLT +; SOFT-FLOAT-64-NEXT: movl %eax, 12(%rbx) +; SOFT-FLOAT-64-NEXT: movl %ebp, 8(%rbx) +; SOFT-FLOAT-64-NEXT: movl %r14d, 4(%rbx) +; SOFT-FLOAT-64-NEXT: movl %r12d, (%rbx) +; SOFT-FLOAT-64-NEXT: movq %rbx, %rax +; SOFT-FLOAT-64-NEXT: addq $8, %rsp +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 56 +; SOFT-FLOAT-64-NEXT: popq %rbx +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 48 +; SOFT-FLOAT-64-NEXT: popq %r12 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 40 +; SOFT-FLOAT-64-NEXT: popq %r13 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 32 +; SOFT-FLOAT-64-NEXT: popq %r14 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-64-NEXT: popq %r15 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-NEXT: popq %rbp +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-64-NEXT: retq +; +; SOFT-FLOAT-64-FMA-LABEL: fmuladd_contract_v4f32: +; SOFT-FLOAT-64-FMA: # %bb.0: +; SOFT-FLOAT-64-FMA-NEXT: pushq %rbp +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-FMA-NEXT: pushq %r15 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-64-FMA-NEXT: pushq %r14 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 32 +; SOFT-FLOAT-64-FMA-NEXT: pushq %r13 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 40 +; SOFT-FLOAT-64-FMA-NEXT: pushq %r12 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 48 +; SOFT-FLOAT-64-FMA-NEXT: pushq %rbx +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 56 +; SOFT-FLOAT-64-FMA-NEXT: pushq %rax +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 64 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %rbx, -56 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %r12, -48 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %r13, -40 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %r14, -32 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %r15, -24 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %rbp, -16 +; SOFT-FLOAT-64-FMA-NEXT: movl %r9d, %r13d +; SOFT-FLOAT-64-FMA-NEXT: movl %ecx, %ebp +; SOFT-FLOAT-64-FMA-NEXT: movl %edx, %r14d +; SOFT-FLOAT-64-FMA-NEXT: movl %esi, %r12d +; SOFT-FLOAT-64-FMA-NEXT: movq %rdi, %rbx +; SOFT-FLOAT-64-FMA-NEXT: movl {{[0-9]+}}(%rsp), %esi +; SOFT-FLOAT-64-FMA-NEXT: movl %r8d, %edi +; SOFT-FLOAT-64-FMA-NEXT: callq __mulsf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: movl %eax, %r15d +; SOFT-FLOAT-64-FMA-NEXT: movl %ebp, %edi +; SOFT-FLOAT-64-FMA-NEXT: movl {{[0-9]+}}(%rsp), %esi +; SOFT-FLOAT-64-FMA-NEXT: callq __mulsf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: movl %eax, %ebp +; SOFT-FLOAT-64-FMA-NEXT: movl %r14d, %edi +; SOFT-FLOAT-64-FMA-NEXT: movl {{[0-9]+}}(%rsp), %esi +; SOFT-FLOAT-64-FMA-NEXT: callq __mulsf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: movl %eax, %r14d +; SOFT-FLOAT-64-FMA-NEXT: movl %r12d, %edi +; SOFT-FLOAT-64-FMA-NEXT: movl %r13d, %esi +; SOFT-FLOAT-64-FMA-NEXT: callq __mulsf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: movl %eax, %edi +; SOFT-FLOAT-64-FMA-NEXT: movl {{[0-9]+}}(%rsp), %esi +; SOFT-FLOAT-64-FMA-NEXT: callq __addsf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: movl %eax, %r12d +; SOFT-FLOAT-64-FMA-NEXT: movl %r14d, %edi +; SOFT-FLOAT-64-FMA-NEXT: movl {{[0-9]+}}(%rsp), %esi +; SOFT-FLOAT-64-FMA-NEXT: callq __addsf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: movl %eax, %r14d +; SOFT-FLOAT-64-FMA-NEXT: movl %ebp, %edi +; SOFT-FLOAT-64-FMA-NEXT: movl {{[0-9]+}}(%rsp), %esi +; SOFT-FLOAT-64-FMA-NEXT: callq __addsf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: movl %eax, %ebp +; SOFT-FLOAT-64-FMA-NEXT: movl %r15d, %edi +; SOFT-FLOAT-64-FMA-NEXT: movl {{[0-9]+}}(%rsp), %esi +; SOFT-FLOAT-64-FMA-NEXT: callq __addsf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: movl %eax, 12(%rbx) +; SOFT-FLOAT-64-FMA-NEXT: movl %ebp, 8(%rbx) +; SOFT-FLOAT-64-FMA-NEXT: movl %r14d, 4(%rbx) +; SOFT-FLOAT-64-FMA-NEXT: movl %r12d, (%rbx) +; SOFT-FLOAT-64-FMA-NEXT: movq %rbx, %rax +; SOFT-FLOAT-64-FMA-NEXT: addq $8, %rsp +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 56 +; SOFT-FLOAT-64-FMA-NEXT: popq %rbx +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 48 +; SOFT-FLOAT-64-FMA-NEXT: popq %r12 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 40 +; SOFT-FLOAT-64-FMA-NEXT: popq %r13 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 32 +; SOFT-FLOAT-64-FMA-NEXT: popq %r14 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-64-FMA-NEXT: popq %r15 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-FMA-NEXT: popq %rbp +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-64-FMA-NEXT: retq +; +; SOFT-FLOAT-64-FMA4-LABEL: fmuladd_contract_v4f32: +; SOFT-FLOAT-64-FMA4: # %bb.0: +; SOFT-FLOAT-64-FMA4-NEXT: pushq %rbp +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-FMA4-NEXT: pushq %r15 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-64-FMA4-NEXT: pushq %r14 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 32 +; SOFT-FLOAT-64-FMA4-NEXT: pushq %r13 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 40 +; SOFT-FLOAT-64-FMA4-NEXT: pushq %r12 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 48 +; SOFT-FLOAT-64-FMA4-NEXT: pushq %rbx +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 56 +; SOFT-FLOAT-64-FMA4-NEXT: pushq %rax +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 64 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %rbx, -56 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %r12, -48 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %r13, -40 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %r14, -32 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %r15, -24 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %rbp, -16 +; SOFT-FLOAT-64-FMA4-NEXT: movl %r9d, %r13d +; SOFT-FLOAT-64-FMA4-NEXT: movl %ecx, %ebp +; SOFT-FLOAT-64-FMA4-NEXT: movl %edx, %r14d +; SOFT-FLOAT-64-FMA4-NEXT: movl %esi, %r12d +; SOFT-FLOAT-64-FMA4-NEXT: movq %rdi, %rbx +; SOFT-FLOAT-64-FMA4-NEXT: movl {{[0-9]+}}(%rsp), %esi +; SOFT-FLOAT-64-FMA4-NEXT: movl %r8d, %edi +; SOFT-FLOAT-64-FMA4-NEXT: callq __mulsf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: movl %eax, %r15d +; SOFT-FLOAT-64-FMA4-NEXT: movl %ebp, %edi +; SOFT-FLOAT-64-FMA4-NEXT: movl {{[0-9]+}}(%rsp), %esi +; SOFT-FLOAT-64-FMA4-NEXT: callq __mulsf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: movl %eax, %ebp +; SOFT-FLOAT-64-FMA4-NEXT: movl %r14d, %edi +; SOFT-FLOAT-64-FMA4-NEXT: movl {{[0-9]+}}(%rsp), %esi +; SOFT-FLOAT-64-FMA4-NEXT: callq __mulsf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: movl %eax, %r14d +; SOFT-FLOAT-64-FMA4-NEXT: movl %r12d, %edi +; SOFT-FLOAT-64-FMA4-NEXT: movl %r13d, %esi +; SOFT-FLOAT-64-FMA4-NEXT: callq __mulsf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: movl %eax, %edi +; SOFT-FLOAT-64-FMA4-NEXT: movl {{[0-9]+}}(%rsp), %esi +; SOFT-FLOAT-64-FMA4-NEXT: callq __addsf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: movl %eax, %r12d +; SOFT-FLOAT-64-FMA4-NEXT: movl %r14d, %edi +; SOFT-FLOAT-64-FMA4-NEXT: movl {{[0-9]+}}(%rsp), %esi +; SOFT-FLOAT-64-FMA4-NEXT: callq __addsf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: movl %eax, %r14d +; SOFT-FLOAT-64-FMA4-NEXT: movl %ebp, %edi +; SOFT-FLOAT-64-FMA4-NEXT: movl {{[0-9]+}}(%rsp), %esi +; SOFT-FLOAT-64-FMA4-NEXT: callq __addsf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: movl %eax, %ebp +; SOFT-FLOAT-64-FMA4-NEXT: movl %r15d, %edi +; SOFT-FLOAT-64-FMA4-NEXT: movl {{[0-9]+}}(%rsp), %esi +; SOFT-FLOAT-64-FMA4-NEXT: callq __addsf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: movl %eax, 12(%rbx) +; SOFT-FLOAT-64-FMA4-NEXT: movl %ebp, 8(%rbx) +; SOFT-FLOAT-64-FMA4-NEXT: movl %r14d, 4(%rbx) +; SOFT-FLOAT-64-FMA4-NEXT: movl %r12d, (%rbx) +; SOFT-FLOAT-64-FMA4-NEXT: movq %rbx, %rax +; SOFT-FLOAT-64-FMA4-NEXT: addq $8, %rsp +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 56 +; SOFT-FLOAT-64-FMA4-NEXT: popq %rbx +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 48 +; SOFT-FLOAT-64-FMA4-NEXT: popq %r12 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 40 +; SOFT-FLOAT-64-FMA4-NEXT: popq %r13 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 32 +; SOFT-FLOAT-64-FMA4-NEXT: popq %r14 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-64-FMA4-NEXT: popq %r15 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-FMA4-NEXT: popq %rbp +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-64-FMA4-NEXT: retq + %product = fmul contract <4 x float> %a, %b + %result = fadd contract <4 x float> %product, %c + ret <4 x float> %result +} + +define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) #0 { +; SOFT-FLOAT-32-LABEL: fmuladd_contract_v4f64: +; SOFT-FLOAT-32: # %bb.0: +; SOFT-FLOAT-32-NEXT: pushl %ebp +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-NEXT: pushl %ebx +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 12 +; SOFT-FLOAT-32-NEXT: pushl %edi +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-32-NEXT: pushl %esi +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 20 +; SOFT-FLOAT-32-NEXT: subl $16, %esp +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 36 +; SOFT-FLOAT-32-NEXT: .cfi_offset %esi, -20 +; SOFT-FLOAT-32-NEXT: .cfi_offset %edi, -16 +; SOFT-FLOAT-32-NEXT: .cfi_offset %ebx, -12 +; SOFT-FLOAT-32-NEXT: .cfi_offset %ebp, -8 +; SOFT-FLOAT-32-NEXT: movl {{[0-9]+}}(%esp), %edi +; SOFT-FLOAT-32-NEXT: movl {{[0-9]+}}(%esp), %ebx +; SOFT-FLOAT-32-NEXT: movl {{[0-9]+}}(%esp), %esi +; SOFT-FLOAT-32-NEXT: movl {{[0-9]+}}(%esp), %ebp +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __muldf3 +; SOFT-FLOAT-32-NEXT: addl $16, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; SOFT-FLOAT-32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; SOFT-FLOAT-32-NEXT: pushl %ebp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl %esi +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl %ebx +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl %edi +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __muldf3 +; SOFT-FLOAT-32-NEXT: addl $16, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-NEXT: movl %eax, %esi +; SOFT-FLOAT-32-NEXT: movl %edx, %ebp +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __muldf3 +; SOFT-FLOAT-32-NEXT: addl $16, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-NEXT: movl %eax, %edi +; SOFT-FLOAT-32-NEXT: movl %edx, %ebx +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __muldf3 +; SOFT-FLOAT-32-NEXT: addl $16, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl %edx +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl %eax +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __adddf3 +; SOFT-FLOAT-32-NEXT: addl $16, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; SOFT-FLOAT-32-NEXT: movl %edx, (%esp) # 4-byte Spill +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl %ebx +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl %edi +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __adddf3 +; SOFT-FLOAT-32-NEXT: addl $16, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-NEXT: movl %eax, %edi +; SOFT-FLOAT-32-NEXT: movl %edx, %ebx +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl %ebp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl %esi +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __adddf3 +; SOFT-FLOAT-32-NEXT: addl $16, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-NEXT: movl %eax, %ebp +; SOFT-FLOAT-32-NEXT: movl %edx, %esi +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: calll __adddf3 +; SOFT-FLOAT-32-NEXT: addl $16, %esp +; SOFT-FLOAT-32-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; SOFT-FLOAT-32-NEXT: movl %edx, 28(%ecx) +; SOFT-FLOAT-32-NEXT: movl %eax, 24(%ecx) +; SOFT-FLOAT-32-NEXT: movl %esi, 20(%ecx) +; SOFT-FLOAT-32-NEXT: movl %ebp, 16(%ecx) +; SOFT-FLOAT-32-NEXT: movl %ebx, 12(%ecx) +; SOFT-FLOAT-32-NEXT: movl %edi, 8(%ecx) +; SOFT-FLOAT-32-NEXT: movl (%esp), %eax # 4-byte Reload +; SOFT-FLOAT-32-NEXT: movl %eax, 4(%ecx) +; SOFT-FLOAT-32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; SOFT-FLOAT-32-NEXT: movl %eax, (%ecx) +; SOFT-FLOAT-32-NEXT: movl %ecx, %eax +; SOFT-FLOAT-32-NEXT: addl $16, %esp +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 20 +; SOFT-FLOAT-32-NEXT: popl %esi +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-32-NEXT: popl %edi +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 12 +; SOFT-FLOAT-32-NEXT: popl %ebx +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-NEXT: popl %ebp +; SOFT-FLOAT-32-NEXT: .cfi_def_cfa_offset 4 +; SOFT-FLOAT-32-NEXT: retl $4 +; +; SOFT-FLOAT-32-FMA-LABEL: fmuladd_contract_v4f64: +; SOFT-FLOAT-32-FMA: # %bb.0: +; SOFT-FLOAT-32-FMA-NEXT: pushl %ebp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-FMA-NEXT: pushl %ebx +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 12 +; SOFT-FLOAT-32-FMA-NEXT: pushl %edi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-32-FMA-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 20 +; SOFT-FLOAT-32-FMA-NEXT: subl $16, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 36 +; SOFT-FLOAT-32-FMA-NEXT: .cfi_offset %esi, -20 +; SOFT-FLOAT-32-FMA-NEXT: .cfi_offset %edi, -16 +; SOFT-FLOAT-32-FMA-NEXT: .cfi_offset %ebx, -12 +; SOFT-FLOAT-32-FMA-NEXT: .cfi_offset %ebp, -8 +; SOFT-FLOAT-32-FMA-NEXT: movl {{[0-9]+}}(%esp), %edi +; SOFT-FLOAT-32-FMA-NEXT: movl {{[0-9]+}}(%esp), %ebx +; SOFT-FLOAT-32-FMA-NEXT: movl {{[0-9]+}}(%esp), %esi +; SOFT-FLOAT-32-FMA-NEXT: movl {{[0-9]+}}(%esp), %ebp +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __muldf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; SOFT-FLOAT-32-FMA-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; SOFT-FLOAT-32-FMA-NEXT: pushl %ebp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl %ebx +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl %edi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __muldf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA-NEXT: movl %eax, %esi +; SOFT-FLOAT-32-FMA-NEXT: movl %edx, %ebp +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __muldf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA-NEXT: movl %eax, %edi +; SOFT-FLOAT-32-FMA-NEXT: movl %edx, %ebx +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __muldf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl %edx +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl %eax +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __adddf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; SOFT-FLOAT-32-FMA-NEXT: movl %edx, (%esp) # 4-byte Spill +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl %ebx +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl %edi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __adddf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA-NEXT: movl %eax, %edi +; SOFT-FLOAT-32-FMA-NEXT: movl %edx, %ebx +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl %ebp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __adddf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA-NEXT: movl %eax, %ebp +; SOFT-FLOAT-32-FMA-NEXT: movl %edx, %esi +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: calll __adddf3 +; SOFT-FLOAT-32-FMA-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA-NEXT: movl {{[0-9]+}}(%esp), %ecx +; SOFT-FLOAT-32-FMA-NEXT: movl %edx, 28(%ecx) +; SOFT-FLOAT-32-FMA-NEXT: movl %eax, 24(%ecx) +; SOFT-FLOAT-32-FMA-NEXT: movl %esi, 20(%ecx) +; SOFT-FLOAT-32-FMA-NEXT: movl %ebp, 16(%ecx) +; SOFT-FLOAT-32-FMA-NEXT: movl %ebx, 12(%ecx) +; SOFT-FLOAT-32-FMA-NEXT: movl %edi, 8(%ecx) +; SOFT-FLOAT-32-FMA-NEXT: movl (%esp), %eax # 4-byte Reload +; SOFT-FLOAT-32-FMA-NEXT: movl %eax, 4(%ecx) +; SOFT-FLOAT-32-FMA-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; SOFT-FLOAT-32-FMA-NEXT: movl %eax, (%ecx) +; SOFT-FLOAT-32-FMA-NEXT: movl %ecx, %eax +; SOFT-FLOAT-32-FMA-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 20 +; SOFT-FLOAT-32-FMA-NEXT: popl %esi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-32-FMA-NEXT: popl %edi +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 12 +; SOFT-FLOAT-32-FMA-NEXT: popl %ebx +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-FMA-NEXT: popl %ebp +; SOFT-FLOAT-32-FMA-NEXT: .cfi_def_cfa_offset 4 +; SOFT-FLOAT-32-FMA-NEXT: retl $4 +; +; SOFT-FLOAT-32-FMA4-LABEL: fmuladd_contract_v4f64: +; SOFT-FLOAT-32-FMA4: # %bb.0: +; SOFT-FLOAT-32-FMA4-NEXT: pushl %ebp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %ebx +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 12 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %edi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 20 +; SOFT-FLOAT-32-FMA4-NEXT: subl $16, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 36 +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_offset %esi, -20 +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_offset %edi, -16 +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_offset %ebx, -12 +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_offset %ebp, -8 +; SOFT-FLOAT-32-FMA4-NEXT: movl {{[0-9]+}}(%esp), %edi +; SOFT-FLOAT-32-FMA4-NEXT: movl {{[0-9]+}}(%esp), %ebx +; SOFT-FLOAT-32-FMA4-NEXT: movl {{[0-9]+}}(%esp), %esi +; SOFT-FLOAT-32-FMA4-NEXT: movl {{[0-9]+}}(%esp), %ebp +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __muldf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA4-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; SOFT-FLOAT-32-FMA4-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; SOFT-FLOAT-32-FMA4-NEXT: pushl %ebp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %ebx +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %edi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __muldf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA4-NEXT: movl %eax, %esi +; SOFT-FLOAT-32-FMA4-NEXT: movl %edx, %ebp +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __muldf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA4-NEXT: movl %eax, %edi +; SOFT-FLOAT-32-FMA4-NEXT: movl %edx, %ebx +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __muldf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %edx +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %eax +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __adddf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA4-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; SOFT-FLOAT-32-FMA4-NEXT: movl %edx, (%esp) # 4-byte Spill +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %ebx +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %edi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __adddf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA4-NEXT: movl %eax, %edi +; SOFT-FLOAT-32-FMA4-NEXT: movl %edx, %ebx +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %ebp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl %esi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __adddf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA4-NEXT: movl %eax, %ebp +; SOFT-FLOAT-32-FMA4-NEXT: movl %edx, %esi +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[0-9]+}}(%esp) +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: calll __adddf3 +; SOFT-FLOAT-32-FMA4-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_adjust_cfa_offset -16 +; SOFT-FLOAT-32-FMA4-NEXT: movl {{[0-9]+}}(%esp), %ecx +; SOFT-FLOAT-32-FMA4-NEXT: movl %edx, 28(%ecx) +; SOFT-FLOAT-32-FMA4-NEXT: movl %eax, 24(%ecx) +; SOFT-FLOAT-32-FMA4-NEXT: movl %esi, 20(%ecx) +; SOFT-FLOAT-32-FMA4-NEXT: movl %ebp, 16(%ecx) +; SOFT-FLOAT-32-FMA4-NEXT: movl %ebx, 12(%ecx) +; SOFT-FLOAT-32-FMA4-NEXT: movl %edi, 8(%ecx) +; SOFT-FLOAT-32-FMA4-NEXT: movl (%esp), %eax # 4-byte Reload +; SOFT-FLOAT-32-FMA4-NEXT: movl %eax, 4(%ecx) +; SOFT-FLOAT-32-FMA4-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; SOFT-FLOAT-32-FMA4-NEXT: movl %eax, (%ecx) +; SOFT-FLOAT-32-FMA4-NEXT: movl %ecx, %eax +; SOFT-FLOAT-32-FMA4-NEXT: addl $16, %esp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 20 +; SOFT-FLOAT-32-FMA4-NEXT: popl %esi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-32-FMA4-NEXT: popl %edi +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 12 +; SOFT-FLOAT-32-FMA4-NEXT: popl %ebx +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-32-FMA4-NEXT: popl %ebp +; SOFT-FLOAT-32-FMA4-NEXT: .cfi_def_cfa_offset 4 +; SOFT-FLOAT-32-FMA4-NEXT: retl $4 +; +; SOFT-FLOAT-64-LABEL: fmuladd_contract_v4f64: +; SOFT-FLOAT-64: # %bb.0: +; SOFT-FLOAT-64-NEXT: pushq %rbp +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-NEXT: pushq %r15 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-64-NEXT: pushq %r14 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 32 +; SOFT-FLOAT-64-NEXT: pushq %r13 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 40 +; SOFT-FLOAT-64-NEXT: pushq %r12 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 48 +; SOFT-FLOAT-64-NEXT: pushq %rbx +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 56 +; SOFT-FLOAT-64-NEXT: pushq %rax +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 64 +; SOFT-FLOAT-64-NEXT: .cfi_offset %rbx, -56 +; SOFT-FLOAT-64-NEXT: .cfi_offset %r12, -48 +; SOFT-FLOAT-64-NEXT: .cfi_offset %r13, -40 +; SOFT-FLOAT-64-NEXT: .cfi_offset %r14, -32 +; SOFT-FLOAT-64-NEXT: .cfi_offset %r15, -24 +; SOFT-FLOAT-64-NEXT: .cfi_offset %rbp, -16 +; SOFT-FLOAT-64-NEXT: movq %rcx, %r14 +; SOFT-FLOAT-64-NEXT: movq %rdx, %r15 +; SOFT-FLOAT-64-NEXT: movq %rsi, %r12 +; SOFT-FLOAT-64-NEXT: movq %rdi, %rbx +; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rbp +; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SOFT-FLOAT-64-NEXT: movq %r8, %rdi +; SOFT-FLOAT-64-NEXT: callq __muldf3@PLT +; SOFT-FLOAT-64-NEXT: movq %rax, %r13 +; SOFT-FLOAT-64-NEXT: movq %r14, %rdi +; SOFT-FLOAT-64-NEXT: movq %rbp, %rsi +; SOFT-FLOAT-64-NEXT: callq __muldf3@PLT +; SOFT-FLOAT-64-NEXT: movq %rax, %r14 +; SOFT-FLOAT-64-NEXT: movq %r15, %rdi +; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SOFT-FLOAT-64-NEXT: callq __muldf3@PLT +; SOFT-FLOAT-64-NEXT: movq %rax, %r15 +; SOFT-FLOAT-64-NEXT: movq %r12, %rdi +; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SOFT-FLOAT-64-NEXT: callq __muldf3@PLT +; SOFT-FLOAT-64-NEXT: movq %rax, %rdi +; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SOFT-FLOAT-64-NEXT: callq __adddf3@PLT +; SOFT-FLOAT-64-NEXT: movq %rax, %r12 +; SOFT-FLOAT-64-NEXT: movq %r15, %rdi +; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SOFT-FLOAT-64-NEXT: callq __adddf3@PLT +; SOFT-FLOAT-64-NEXT: movq %rax, %r15 +; SOFT-FLOAT-64-NEXT: movq %r14, %rdi +; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SOFT-FLOAT-64-NEXT: callq __adddf3@PLT +; SOFT-FLOAT-64-NEXT: movq %rax, %r14 +; SOFT-FLOAT-64-NEXT: movq %r13, %rdi +; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SOFT-FLOAT-64-NEXT: callq __adddf3@PLT +; SOFT-FLOAT-64-NEXT: movq %rax, 24(%rbx) +; SOFT-FLOAT-64-NEXT: movq %r14, 16(%rbx) +; SOFT-FLOAT-64-NEXT: movq %r15, 8(%rbx) +; SOFT-FLOAT-64-NEXT: movq %r12, (%rbx) +; SOFT-FLOAT-64-NEXT: movq %rbx, %rax +; SOFT-FLOAT-64-NEXT: addq $8, %rsp +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 56 +; SOFT-FLOAT-64-NEXT: popq %rbx +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 48 +; SOFT-FLOAT-64-NEXT: popq %r12 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 40 +; SOFT-FLOAT-64-NEXT: popq %r13 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 32 +; SOFT-FLOAT-64-NEXT: popq %r14 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-64-NEXT: popq %r15 +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-NEXT: popq %rbp +; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-64-NEXT: retq +; +; SOFT-FLOAT-64-FMA-LABEL: fmuladd_contract_v4f64: +; SOFT-FLOAT-64-FMA: # %bb.0: +; SOFT-FLOAT-64-FMA-NEXT: pushq %rbp +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-FMA-NEXT: pushq %r15 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-64-FMA-NEXT: pushq %r14 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 32 +; SOFT-FLOAT-64-FMA-NEXT: pushq %r13 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 40 +; SOFT-FLOAT-64-FMA-NEXT: pushq %r12 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 48 +; SOFT-FLOAT-64-FMA-NEXT: pushq %rbx +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 56 +; SOFT-FLOAT-64-FMA-NEXT: pushq %rax +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 64 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %rbx, -56 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %r12, -48 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %r13, -40 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %r14, -32 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %r15, -24 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %rbp, -16 +; SOFT-FLOAT-64-FMA-NEXT: movq %rcx, %r14 +; SOFT-FLOAT-64-FMA-NEXT: movq %rdx, %r15 +; SOFT-FLOAT-64-FMA-NEXT: movq %rsi, %r12 +; SOFT-FLOAT-64-FMA-NEXT: movq %rdi, %rbx +; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rbp +; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SOFT-FLOAT-64-FMA-NEXT: movq %r8, %rdi +; SOFT-FLOAT-64-FMA-NEXT: callq __muldf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r13 +; SOFT-FLOAT-64-FMA-NEXT: movq %r14, %rdi +; SOFT-FLOAT-64-FMA-NEXT: movq %rbp, %rsi +; SOFT-FLOAT-64-FMA-NEXT: callq __muldf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r14 +; SOFT-FLOAT-64-FMA-NEXT: movq %r15, %rdi +; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SOFT-FLOAT-64-FMA-NEXT: callq __muldf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r15 +; SOFT-FLOAT-64-FMA-NEXT: movq %r12, %rdi +; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SOFT-FLOAT-64-FMA-NEXT: callq __muldf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %rdi +; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SOFT-FLOAT-64-FMA-NEXT: callq __adddf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r12 +; SOFT-FLOAT-64-FMA-NEXT: movq %r15, %rdi +; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SOFT-FLOAT-64-FMA-NEXT: callq __adddf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r15 +; SOFT-FLOAT-64-FMA-NEXT: movq %r14, %rdi +; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SOFT-FLOAT-64-FMA-NEXT: callq __adddf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r14 +; SOFT-FLOAT-64-FMA-NEXT: movq %r13, %rdi +; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SOFT-FLOAT-64-FMA-NEXT: callq __adddf3@PLT +; SOFT-FLOAT-64-FMA-NEXT: movq %rax, 24(%rbx) +; SOFT-FLOAT-64-FMA-NEXT: movq %r14, 16(%rbx) +; SOFT-FLOAT-64-FMA-NEXT: movq %r15, 8(%rbx) +; SOFT-FLOAT-64-FMA-NEXT: movq %r12, (%rbx) +; SOFT-FLOAT-64-FMA-NEXT: movq %rbx, %rax +; SOFT-FLOAT-64-FMA-NEXT: addq $8, %rsp +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 56 +; SOFT-FLOAT-64-FMA-NEXT: popq %rbx +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 48 +; SOFT-FLOAT-64-FMA-NEXT: popq %r12 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 40 +; SOFT-FLOAT-64-FMA-NEXT: popq %r13 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 32 +; SOFT-FLOAT-64-FMA-NEXT: popq %r14 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-64-FMA-NEXT: popq %r15 +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-FMA-NEXT: popq %rbp +; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-64-FMA-NEXT: retq +; +; SOFT-FLOAT-64-FMA4-LABEL: fmuladd_contract_v4f64: +; SOFT-FLOAT-64-FMA4: # %bb.0: +; SOFT-FLOAT-64-FMA4-NEXT: pushq %rbp +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-FMA4-NEXT: pushq %r15 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-64-FMA4-NEXT: pushq %r14 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 32 +; SOFT-FLOAT-64-FMA4-NEXT: pushq %r13 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 40 +; SOFT-FLOAT-64-FMA4-NEXT: pushq %r12 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 48 +; SOFT-FLOAT-64-FMA4-NEXT: pushq %rbx +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 56 +; SOFT-FLOAT-64-FMA4-NEXT: pushq %rax +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 64 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %rbx, -56 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %r12, -48 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %r13, -40 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %r14, -32 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %r15, -24 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %rbp, -16 +; SOFT-FLOAT-64-FMA4-NEXT: movq %rcx, %r14 +; SOFT-FLOAT-64-FMA4-NEXT: movq %rdx, %r15 +; SOFT-FLOAT-64-FMA4-NEXT: movq %rsi, %r12 +; SOFT-FLOAT-64-FMA4-NEXT: movq %rdi, %rbx +; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rbp +; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SOFT-FLOAT-64-FMA4-NEXT: movq %r8, %rdi +; SOFT-FLOAT-64-FMA4-NEXT: callq __muldf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r13 +; SOFT-FLOAT-64-FMA4-NEXT: movq %r14, %rdi +; SOFT-FLOAT-64-FMA4-NEXT: movq %rbp, %rsi +; SOFT-FLOAT-64-FMA4-NEXT: callq __muldf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r14 +; SOFT-FLOAT-64-FMA4-NEXT: movq %r15, %rdi +; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SOFT-FLOAT-64-FMA4-NEXT: callq __muldf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r15 +; SOFT-FLOAT-64-FMA4-NEXT: movq %r12, %rdi +; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SOFT-FLOAT-64-FMA4-NEXT: callq __muldf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %rdi +; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SOFT-FLOAT-64-FMA4-NEXT: callq __adddf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r12 +; SOFT-FLOAT-64-FMA4-NEXT: movq %r15, %rdi +; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SOFT-FLOAT-64-FMA4-NEXT: callq __adddf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r15 +; SOFT-FLOAT-64-FMA4-NEXT: movq %r14, %rdi +; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SOFT-FLOAT-64-FMA4-NEXT: callq __adddf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r14 +; SOFT-FLOAT-64-FMA4-NEXT: movq %r13, %rdi +; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; SOFT-FLOAT-64-FMA4-NEXT: callq __adddf3@PLT +; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, 24(%rbx) +; SOFT-FLOAT-64-FMA4-NEXT: movq %r14, 16(%rbx) +; SOFT-FLOAT-64-FMA4-NEXT: movq %r15, 8(%rbx) +; SOFT-FLOAT-64-FMA4-NEXT: movq %r12, (%rbx) +; SOFT-FLOAT-64-FMA4-NEXT: movq %rbx, %rax +; SOFT-FLOAT-64-FMA4-NEXT: addq $8, %rsp +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 56 +; SOFT-FLOAT-64-FMA4-NEXT: popq %rbx +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 48 +; SOFT-FLOAT-64-FMA4-NEXT: popq %r12 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 40 +; SOFT-FLOAT-64-FMA4-NEXT: popq %r13 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 32 +; SOFT-FLOAT-64-FMA4-NEXT: popq %r14 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 24 +; SOFT-FLOAT-64-FMA4-NEXT: popq %r15 +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 16 +; SOFT-FLOAT-64-FMA4-NEXT: popq %rbp +; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 8 +; SOFT-FLOAT-64-FMA4-NEXT: retq + %product = fmul contract <4 x double> %a, %b + %result = fadd contract <4 x double> %product, %c + ret <4 x double> %result +} + +attributes #0 = { "use-soft-float"="true" } + +declare float @llvm.fmuladd.f32(float %a, float %b, float %c) +declare double @llvm.fmuladd.f64(double %a, double %b, double %c)