Skip to content

Commit 3030d6b

Browse files
sys-igcpszymich
authored andcommitted
[Autobackout][FunctionalRegression]Revert of change: 8723ba3: Move HtoFp optimization to unsafe.
Truncate float to half -> add/multiply add -> extend half to float, skips the truncation and extension instructions, performing calculations on floats directly. This optimization is now unsafe and should only be used with the fast, reassoc, or afn attributes. (cherry picked from commit bf295fd)
1 parent 728fd2a commit 3030d6b

File tree

5 files changed

+159
-294
lines changed

5 files changed

+159
-294
lines changed

IGC/Compiler/CustomSafeOptPass.cpp

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1688,6 +1688,162 @@ bool CustomSafeOptPass::isEmulatedAdd(BinaryOperator& I)
16881688
return false;
16891689
}
16901690

1691+
// Attempt to create new float instruction if both operands are from FPTruncInst instructions.
1692+
// Example with fadd:
1693+
// %Temp-31.prec.i = fptrunc float %34 to half
1694+
// %Temp-30.prec.i = fptrunc float %33 to half
1695+
// %41 = fadd fast half %Temp-31.prec.i, %Temp-30.prec.i
1696+
// %Temp-32.i = fpext half %41 to float
1697+
//
1698+
// This fadd is used as a float, and doesn't need the operands to be cased to half.
1699+
// We can remove the extra casts in this case.
1700+
// This becomes:
1701+
// %41 = fadd fast float %34, %33
1702+
// Can also do matches with fadd/fmul that will later become an mad instruction.
1703+
// mad example:
1704+
// %.prec70.i = fptrunc float %273 to half
1705+
// %.prec78.i = fptrunc float %276 to half
1706+
// %279 = fmul fast half %233, %.prec70.i
1707+
// %282 = fadd fast half %279, %.prec78.i
1708+
// %.prec84.i = fpext half %282 to float
1709+
// This becomes:
1710+
// %279 = fpext half %233 to float
1711+
// %280 = fmul fast float %273, %279
1712+
// %281 = fadd fast float %280, %276
1713+
void CustomSafeOptPass::removeHftoFCast(Instruction& I)
1714+
{
1715+
if (!I.getType()->isFloatingPointTy())
1716+
return;
1717+
1718+
// Check if the only user is a FPExtInst
1719+
if (!I.hasOneUse())
1720+
return;
1721+
1722+
// Check if this instruction is used in a single FPExtInst
1723+
FPExtInst* castInst = NULL;
1724+
User* U = *I.user_begin();
1725+
if (FPExtInst* inst = dyn_cast<FPExtInst>(U))
1726+
{
1727+
if (inst->getType()->isFloatTy())
1728+
{
1729+
castInst = inst;
1730+
}
1731+
}
1732+
if (!castInst)
1733+
return;
1734+
1735+
// Check for fmad pattern
1736+
if (I.getOpcode() == Instruction::FAdd)
1737+
{
1738+
Value* src0 = nullptr, * src1 = nullptr, * src2 = nullptr;
1739+
1740+
// CodeGenPatternMatch::MatchMad matches the first fmul.
1741+
Instruction* fmulInst = nullptr;
1742+
for (uint i = 0; i < 2; i++)
1743+
{
1744+
fmulInst = dyn_cast<Instruction>(I.getOperand(i));
1745+
if (fmulInst && fmulInst->getOpcode() == Instruction::FMul)
1746+
{
1747+
src0 = fmulInst->getOperand(0);
1748+
src1 = fmulInst->getOperand(1);
1749+
src2 = I.getOperand(1 - i);
1750+
break;
1751+
}
1752+
else
1753+
{
1754+
// Prevent other non-fmul instructions from getting used
1755+
fmulInst = nullptr;
1756+
}
1757+
}
1758+
if (fmulInst)
1759+
{
1760+
// Used to get the new float operands for the new instructions
1761+
auto getFloatValue = [](Value* operand, Instruction* I, Type* type)
1762+
{
1763+
if (FPTruncInst* inst = dyn_cast<FPTruncInst>(operand))
1764+
{
1765+
// Use the float input of the FPTrunc
1766+
if (inst->getOperand(0)->getType()->isFloatTy())
1767+
{
1768+
return inst->getOperand(0);
1769+
}
1770+
else
1771+
{
1772+
return (Value*)NULL;
1773+
}
1774+
}
1775+
else if (Instruction* inst = dyn_cast<Instruction>(operand))
1776+
{
1777+
// Cast the result of this operand to a float
1778+
return dyn_cast<Value>(new FPExtInst(inst, type, "", I));
1779+
}
1780+
return (Value*)NULL;
1781+
};
1782+
1783+
int convertCount = 0;
1784+
if (dyn_cast<FPTruncInst>(src0))
1785+
convertCount++;
1786+
if (dyn_cast<FPTruncInst>(src1))
1787+
convertCount++;
1788+
if (dyn_cast<FPTruncInst>(src2))
1789+
convertCount++;
1790+
if (convertCount >= 2)
1791+
{
1792+
// Conversion for the hf values
1793+
auto floatTy = castInst->getType();
1794+
src0 = getFloatValue(src0, fmulInst, floatTy);
1795+
src1 = getFloatValue(src1, fmulInst, floatTy);
1796+
src2 = getFloatValue(src2, &I, floatTy);
1797+
1798+
if (!src0 || !src1 || !src2)
1799+
return;
1800+
1801+
// Create new float fmul and fadd instructions
1802+
Value* newFmul = BinaryOperator::Create(Instruction::FMul, src0, src1, "", &I);
1803+
Value* newFadd = BinaryOperator::Create(Instruction::FAdd, newFmul, src2, "", &I);
1804+
1805+
// Copy fast math flags
1806+
Instruction* fmulInst = dyn_cast<Instruction>(newFmul);
1807+
Instruction* faddInst = dyn_cast<Instruction>(newFadd);
1808+
fmulInst->copyFastMathFlags(fmulInst);
1809+
faddInst->copyFastMathFlags(&I);
1810+
faddInst->setDebugLoc(castInst->getDebugLoc());
1811+
1812+
castInst->replaceAllUsesWith(faddInst);
1813+
return;
1814+
}
1815+
}
1816+
}
1817+
1818+
// Check if operands come from a Float to HF Cast
1819+
Value *S1 = NULL, *S2 = NULL;
1820+
if (FPTruncInst* inst = dyn_cast<FPTruncInst>(I.getOperand(0)))
1821+
{
1822+
if (!inst->getType()->isHalfTy())
1823+
return;
1824+
S1 = inst->getOperand(0);
1825+
}
1826+
if (FPTruncInst* inst = dyn_cast<FPTruncInst>(I.getOperand(1)))
1827+
{
1828+
if (!inst->getType()->isHalfTy())
1829+
return;
1830+
S2 = inst->getOperand(0);
1831+
}
1832+
if (!S1 || !S2)
1833+
{
1834+
return;
1835+
}
1836+
1837+
Value* newInst = NULL;
1838+
if (BinaryOperator* bo = dyn_cast<BinaryOperator>(&I))
1839+
{
1840+
newInst = BinaryOperator::Create(bo->getOpcode(), S1, S2, "", &I);
1841+
Instruction* inst = dyn_cast<Instruction>(newInst);
1842+
inst->copyFastMathFlags(&I);
1843+
inst->setDebugLoc(castInst->getDebugLoc());
1844+
castInst->replaceAllUsesWith(inst);
1845+
}
1846+
}
16911847

16921848
void CustomSafeOptPass::visitBinaryOperator(BinaryOperator& I)
16931849
{
@@ -1763,6 +1919,8 @@ void CustomSafeOptPass::visitBinaryOperator(BinaryOperator& I)
17631919
}
17641920
}
17651921
}
1922+
} else if (I.getType()->isFloatingPointTy()) {
1923+
removeHftoFCast(I);
17661924
}
17671925

17681926
if (IGC_IS_FLAG_ENABLED(ForceHoistDp3) || (!pContext->m_retryManager.IsFirstTry() && IGC_IS_FLAG_ENABLED(EnableHoistDp3)))

IGC/Compiler/CustomSafeOptPass.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ namespace IGC
5555
void visitUDiv(llvm::BinaryOperator& I);
5656
void visitAllocaInst(llvm::AllocaInst& I);
5757
void visitCallInst(llvm::CallInst& C);
58+
void removeHftoFCast(llvm::Instruction& I);
5859
void visitBinaryOperator(llvm::BinaryOperator& I);
5960
bool isEmulatedAdd(llvm::BinaryOperator& I);
6061
void visitBfi(llvm::CallInst* inst);

IGC/Compiler/CustomUnsafeOptPass.cpp

Lines changed: 0 additions & 167 deletions
Original file line numberDiff line numberDiff line change
@@ -1822,12 +1822,6 @@ void CustomUnsafeOptPass::visitBinaryOperator(BinaryOperator& I)
18221822
patternFound = visitBinaryOperatorFmulFaddPropagation(I);
18231823
}
18241824

1825-
// remove casting to half when assigning to float
1826-
if (!patternFound)
1827-
{
1828-
patternFound = visitBinaryOperatorRemoveHftoFCast(I);
1829-
}
1830-
18311825
// A/B +C/D can be changed to (A * D +C * B)/(B * D).
18321826
if (!patternFound && IGC_IS_FLAG_ENABLED(EnableSumFractions))
18331827
{
@@ -2035,167 +2029,6 @@ void CustomUnsafeOptPass::visitBinaryOperator(BinaryOperator& I)
20352029
}
20362030
}
20372031

2038-
// Attempt to create new float instruction if both operands are from FPTruncInst instructions.
2039-
// Example with fadd:
2040-
// %Temp-31.prec.i = fptrunc float %34 to half
2041-
// %Temp-30.prec.i = fptrunc float %33 to half
2042-
// %41 = fadd fast half %Temp-31.prec.i, %Temp-30.prec.i
2043-
// %Temp-32.i = fpext half %41 to float
2044-
//
2045-
// This fadd is used as a float, and doesn't need the operands to be cased to half.
2046-
// We can remove the extra casts in this case.
2047-
// This becomes:
2048-
// %41 = fadd fast float %34, %33
2049-
// Can also do matches with fadd/fmul that will later become an mad instruction.
2050-
// mad example:
2051-
// %.prec70.i = fptrunc float %273 to half
2052-
// %.prec78.i = fptrunc float %276 to half
2053-
// %279 = fmul fast half %233, %.prec70.i
2054-
// %282 = fadd fast half %279, %.prec78.i
2055-
// %.prec84.i = fpext half %282 to float
2056-
// This becomes:
2057-
// %279 = fpext half %233 to float
2058-
// %280 = fmul fast float %273, %279
2059-
// %281 = fadd fast float %280, %276
2060-
bool CustomUnsafeOptPass::visitBinaryOperatorRemoveHftoFCast(BinaryOperator& I)
2061-
{
2062-
// Allow only if the reassoc or afn flags are used
2063-
if (!(I.hasAllowReassoc() || I.hasApproxFunc()))
2064-
return false;
2065-
2066-
// Check if the only user is a FPExtInst
2067-
if (!I.hasOneUse())
2068-
return false;
2069-
2070-
// Check if this instruction is used in a single FPExtInst
2071-
FPExtInst* CastInst = NULL;
2072-
User* U = *I.user_begin();
2073-
if (FPExtInst* inst = dyn_cast<FPExtInst>(U))
2074-
{
2075-
if (inst->getType()->isFloatTy())
2076-
{
2077-
CastInst = inst;
2078-
}
2079-
}
2080-
if (!CastInst || CastInst->use_empty())
2081-
return false;
2082-
2083-
2084-
// Check for fmad pattern
2085-
if (I.getOpcode() == Instruction::FAdd)
2086-
{
2087-
Value* Src0 = nullptr, * Src1 = nullptr, * Src2 = nullptr;
2088-
2089-
// CodeGenPatternMatch::MatchMad matches the first fmul.
2090-
Instruction* FmulInst = nullptr;
2091-
for (uint i = 0; i < 2; i++)
2092-
{
2093-
FmulInst = dyn_cast<Instruction>(I.getOperand(i));
2094-
if (FmulInst && FmulInst->getOpcode() == Instruction::FMul)
2095-
{
2096-
Src0 = FmulInst->getOperand(0);
2097-
Src1 = FmulInst->getOperand(1);
2098-
Src2 = I.getOperand(1 - i);
2099-
break;
2100-
}
2101-
else
2102-
{
2103-
// Prevent other non-fmul instructions from getting used
2104-
FmulInst = nullptr;
2105-
}
2106-
}
2107-
if (FmulInst && (I.hasAllowReassoc() || I.hasApproxFunc()))
2108-
{
2109-
// Used to get the new float operands for the new instructions
2110-
auto getFloatValue = [](Value* operand, Instruction* I, Type* type)
2111-
{
2112-
if (FPTruncInst* Inst = dyn_cast<FPTruncInst>(operand))
2113-
{
2114-
// Use the float input of the FPTrunc
2115-
if (Inst->getOperand(0)->getType()->isFloatTy())
2116-
{
2117-
return Inst->getOperand(0);
2118-
}
2119-
else
2120-
{
2121-
return (Value*)NULL;
2122-
}
2123-
}
2124-
else if (operand->getType()->isHalfTy())
2125-
{
2126-
return dyn_cast<Value>(new FPExtInst(operand, type, "", I));
2127-
}
2128-
return (Value*)NULL;
2129-
};
2130-
2131-
int ConvertCount = 0;
2132-
if (dyn_cast<FPTruncInst>(Src0))
2133-
ConvertCount++;
2134-
if (dyn_cast<FPTruncInst>(Src1))
2135-
ConvertCount++;
2136-
if (dyn_cast<FPTruncInst>(Src2))
2137-
ConvertCount++;
2138-
if (ConvertCount >= 2)
2139-
{
2140-
// Conversion for the hf values
2141-
auto FloatTy = CastInst->getType();
2142-
Src0 = getFloatValue(Src0, FmulInst, FloatTy);
2143-
Src1 = getFloatValue(Src1, FmulInst, FloatTy);
2144-
Src2 = getFloatValue(Src2, &I, FloatTy);
2145-
2146-
if (!Src0 || !Src1 || !Src2)
2147-
return false;
2148-
2149-
// Create new float fmul and fadd instructions
2150-
Value* NewFmul = BinaryOperator::Create(Instruction::FMul, Src0, Src1, "", &I);
2151-
Value* NewFadd = BinaryOperator::Create(Instruction::FAdd, NewFmul, Src2, "", &I);
2152-
2153-
// Copy fast math flags
2154-
Instruction* FmulInst = dyn_cast<Instruction>(NewFmul);
2155-
Instruction* FaddInst = dyn_cast<Instruction>(NewFadd);
2156-
FmulInst->copyFastMathFlags(FmulInst);
2157-
FaddInst->copyFastMathFlags(&I);
2158-
FaddInst->setDebugLoc(CastInst->getDebugLoc());
2159-
CastInst->replaceAllUsesWith(FaddInst);
2160-
collectForErase(*CastInst, 3);
2161-
return true;
2162-
}
2163-
}
2164-
}
2165-
2166-
// Check if operands come from a Float to HF Cast
2167-
Value* S1 = NULL, * S2 = NULL;
2168-
if (FPTruncInst* Inst = dyn_cast<FPTruncInst>(I.getOperand(0)))
2169-
{
2170-
if (!Inst->getType()->isHalfTy())
2171-
return false;
2172-
S1 = Inst->getOperand(0);
2173-
}
2174-
if (FPTruncInst* Inst = dyn_cast<FPTruncInst>(I.getOperand(1)))
2175-
{
2176-
if (!Inst->getType()->isHalfTy())
2177-
return false;
2178-
S2 = Inst->getOperand(0);
2179-
}
2180-
if (!S1 || !S2)
2181-
{
2182-
return false;
2183-
}
2184-
2185-
Value* newInst = NULL;
2186-
if (BinaryOperator* BinOp = dyn_cast<BinaryOperator>(&I))
2187-
{
2188-
newInst = BinaryOperator::Create(BinOp->getOpcode(), S1, S2, "", &I);
2189-
Instruction* Inst = dyn_cast<Instruction>(newInst);
2190-
Inst->copyFastMathFlags(&I);
2191-
Inst->setDebugLoc(CastInst->getDebugLoc());
2192-
CastInst->replaceAllUsesWith(Inst);
2193-
collectForErase(*CastInst, 2);
2194-
return true;
2195-
}
2196-
return false;
2197-
}
2198-
21992032
// Optimize mix operation if detected.
22002033
// Mix is computed as x*(1 - a) + y*a
22012034
// Replace it with a*(y - x) + x to save one instruction ('add' ISA, 'sub' in IR).

IGC/Compiler/CustomUnsafeOptPass.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,6 @@ namespace IGC
7070
bool visitBinaryOperatorAddSubOp(llvm::BinaryOperator& I);
7171
bool visitBinaryOperatorDivAddDiv(llvm::BinaryOperator& I);
7272
bool visitBinaryOperatorFDivFMulCancellation(llvm::BinaryOperator& I);
73-
bool visitBinaryOperatorRemoveHftoFCast(llvm::BinaryOperator& I);
7473
bool isFDiv(llvm::Value* I, llvm::Value*& numerator, llvm::Value*& denominator);
7574
bool possibleForFmadOpt(llvm::Instruction* inst);
7675
bool visitFCmpInstFCmpFAddOp(llvm::FCmpInst& FC);

0 commit comments

Comments
 (0)