Skip to content

Commit cfd1324

Browse files
committed
[MERGE #5664 @nhat-nguyen] Force same src/dst for SQRTSD
Merge pull request #5664 from nhat-nguyen:sqrtsd Force source to be the same as destination to break false dependency on the register; this fixes the slowdown for `sqrtsd`
2 parents ba521f0 + fcb1c6c commit cfd1324

File tree

1 file changed

+19
-6
lines changed

1 file changed

+19
-6
lines changed

lib/Backend/LowerMDShared.cpp

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7711,12 +7711,25 @@ void LowererMD::GenerateFastInlineBuiltInCall(IR::Instr* instr, IR::JnHelperMeth
77117711
switch (instr->m_opcode)
77127712
{
77137713
case Js::OpCode::InlineMathSqrt:
7714-
// Sqrt maps directly to the SSE2 instruction.
7715-
// src and dst should already be XMM registers, all we need is just change the opcode.
7716-
Assert(helperMethod == (IR::JnHelperMethod)0);
7717-
Assert(instr->GetSrc2() == nullptr);
7718-
instr->m_opcode = instr->GetSrc1()->IsFloat64() ? Js::OpCode::SQRTSD : Js::OpCode::SQRTSS;
7719-
break;
7714+
{
7715+
// Sqrt maps directly to the SSE2 instruction.
7716+
// src and dst should already be XMM registers, all we need is just change the opcode.
7717+
Assert(helperMethod == (IR::JnHelperMethod)0);
7718+
Assert(instr->GetSrc2() == nullptr);
7719+
instr->m_opcode = instr->GetSrc1()->IsFloat64() ? Js::OpCode::SQRTSD : Js::OpCode::SQRTSS;
7720+
7721+
IR::Opnd *src = instr->GetSrc1();
7722+
IR::Opnd *dst = instr->GetDst();
7723+
if (!src->IsEqual(dst))
7724+
{
7725+
Assert(src->IsRegOpnd() && dst->IsRegOpnd());
7726+
// Force source to be the same as destination to break false dependency on the register
7727+
Lowerer::InsertMove(dst, src, instr, false /* generateWriteBarrier */);
7728+
instr->ReplaceSrc1(dst);
7729+
}
7730+
7731+
break;
7732+
}
77207733

77217734
case Js::OpCode::InlineMathAbs:
77227735
Assert(helperMethod == (IR::JnHelperMethod)0);

0 commit comments

Comments
 (0)