Skip to content

Commit 5e1e6d0

Browse files
lwesierssys_zuul
authored andcommitted
Add sat and overflow operation for add and sub
Fixes #119 Change-Id: Ib8e3f05419f95b1b84762a29fff64c200f69d982
1 parent ea74223 commit 5e1e6d0

File tree

3 files changed

+118
-34
lines changed

3 files changed

+118
-34
lines changed

IGC/Compiler/IGC_IR_spec.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,10 @@ IGC_IR_LLVM_INTRINSIC(fshr)
106106
IGC_IR_LLVM_INTRINSIC(usub_sat)
107107
IGC_IR_LLVM_INTRINSIC(ssub_sat)
108108
#endif
109+
#if LLVM_VERSION_MAJOR >= 10
110+
IGC_IR_LLVM_INTRINSIC(uadd_sat)
111+
IGC_IR_LLVM_INTRINSIC(sadd_sat)
112+
#endif
109113

110114
// LLVM instructions allowed in IGC IR
111115
//------------------------------------

IGC/Compiler/LegalizationPass.cpp

Lines changed: 114 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1619,17 +1619,122 @@ void Legalization::visitAlloca(AllocaInst& I)
16191619
void Legalization::visitIntrinsicInst(llvm::IntrinsicInst& I)
16201620
{
16211621
m_ctx->m_instrTypes.numInsts++;
1622-
switch (I.getIntrinsicID())
1622+
IRBuilder<> Builder(&I);
1623+
1624+
auto intrinsicID = I.getIntrinsicID();
1625+
1626+
switch (intrinsicID)
1627+
{
1628+
#if LLVM_VERSION_MAJOR >= 9
1629+
case Intrinsic::usub_sat:
1630+
case Intrinsic::ssub_sat:
1631+
#if LLVM_VERSION_MAJOR >= 10
1632+
case Intrinsic::uadd_sat:
1633+
case Intrinsic::sadd_sat:
1634+
#endif
16231635
{
1636+
llvm::Intrinsic::ID OverflowIntrinID;
1637+
switch (I.getIntrinsicID()) {
1638+
case Intrinsic::usub_sat: OverflowIntrinID = Intrinsic::usub_with_overflow; break;
1639+
case Intrinsic::ssub_sat: OverflowIntrinID = Intrinsic::ssub_with_overflow; break;
1640+
#if LLVM_VERSION_MAJOR >= 10
1641+
case Intrinsic::uadd_sat: OverflowIntrinID = Intrinsic::uadd_with_overflow; break;
1642+
case Intrinsic::sadd_sat: OverflowIntrinID = Intrinsic::sadd_with_overflow; break;
1643+
#endif
1644+
default: assert(0 && "Incorrect intrinsic"); break;
1645+
}
1646+
1647+
int BitWidth = I.getType()->getIntegerBitWidth();
1648+
auto OverFlowIntrin = Builder.CreateIntrinsic(OverflowIntrinID,
1649+
{ I.getArgOperand(0)->getType(), I.getArgOperand(1)->getType() },
1650+
{ I.getArgOperand(0), I.getArgOperand(1) }
1651+
);
1652+
Value* Result = Builder.CreateExtractValue(OverFlowIntrin, (uint64_t)0);
1653+
Value* Overflow = Builder.CreateExtractValue(OverFlowIntrin, (uint64_t)1);
1654+
1655+
Value* Boundary = nullptr;
1656+
switch (I.getIntrinsicID()) {
1657+
case Intrinsic::usub_sat:
1658+
Boundary = Builder.getInt(APInt::getMinValue(BitWidth));
1659+
break;
1660+
case Intrinsic::ssub_sat: {
1661+
Value* isMaxOrMinOverflow = Builder.CreateICmpSLT(Builder.getIntN(BitWidth, 0), I.getArgOperand(1));
1662+
APInt MinVal = APInt::getSignedMinValue(BitWidth);
1663+
APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
1664+
Boundary = Builder.CreateSelect(isMaxOrMinOverflow, Builder.getInt(MinVal), Builder.getInt(MaxVal));
1665+
}
1666+
break;
1667+
#if LLVM_VERSION_MAJOR >= 10
1668+
case Intrinsic::uadd_sat:
1669+
Boundary = Builder.getInt(APInt::getMaxValue(BitWidth));
1670+
break;
1671+
case Intrinsic::sadd_sat: {
1672+
Value* isMaxOrMinOverflow = Builder.CreateICmpSLT(Builder.getIntN(BitWidth, 0), I.getArgOperand(1));
1673+
APInt MinVal = APInt::getSignedMinValue(BitWidth);
1674+
APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
1675+
Boundary = Builder.CreateSelect(isMaxOrMinOverflow, Builder.getInt(MaxVal), Builder.getInt(MinVal));
1676+
}
1677+
break;
1678+
#endif
1679+
default: assert(0 && "Incorrect intrinsic"); break;
1680+
}
1681+
1682+
Value* Saturated = Builder.CreateSelect(Overflow, Boundary, Result);
1683+
I.replaceAllUsesWith(Saturated);
1684+
I.eraseFromParent();
1685+
visit(*OverFlowIntrin);
1686+
}
1687+
break;
1688+
#endif
1689+
case Intrinsic::sadd_with_overflow:
1690+
case Intrinsic::usub_with_overflow:
1691+
case Intrinsic::ssub_with_overflow:
16241692
case Intrinsic::uadd_with_overflow:
16251693
{
16261694
Value* src0 = I.getArgOperand(0);
16271695
Value* src1 = I.getArgOperand(1);
1628-
Value* res = BinaryOperator::Create(Instruction::Add, src0, src1, "", &I);
1629-
// Unsigned a + b overflows iff a + b < a (for an unsigned comparison)
1630-
Value* isOverflow = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT, res, src0, "", &I);
16311696

1632-
// llvm.uadd.with.overflow returns a struct, where the first element is the add result,
1697+
Value* res = nullptr;
1698+
Value* isOverflow = nullptr;
1699+
1700+
switch (intrinsicID)
1701+
{
1702+
case Intrinsic::uadd_with_overflow:
1703+
res = BinaryOperator::Create(Instruction::Add, src0, src1, "", &I);
1704+
// Unsigned a + b overflows if a + b < a (for an unsigned comparison)
1705+
isOverflow = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT, res, src0, "", &I);
1706+
break;
1707+
case Intrinsic::usub_with_overflow:
1708+
res = BinaryOperator::Create(Instruction::Sub, src0, src1, "", &I);
1709+
// Unsigned a - b overflows if a - b > a (for an unsigned comparison)
1710+
isOverflow = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_UGT, res, src0, "", &I);
1711+
break;
1712+
case Intrinsic::sadd_with_overflow:
1713+
case Intrinsic::ssub_with_overflow:
1714+
{
1715+
Value* usrc0 = BitCastInst::CreateZExtOrBitCast(src0, src0->getType(), "", &I);
1716+
Value* usrc1 = BitCastInst::CreateZExtOrBitCast(src1, src1->getType(), "", &I);
1717+
res = BinaryOperator::Create(
1718+
intrinsicID == Intrinsic::sadd_with_overflow ? Instruction::Add : Instruction::Sub,
1719+
usrc0, usrc1, "", &I);
1720+
if (intrinsicID == Intrinsic::ssub_with_overflow)
1721+
{
1722+
usrc1 = BinaryOperator::CreateNot(usrc1, "", &I);
1723+
}
1724+
Value* usrc0_xor_usrc1 = BinaryOperator::Create(Instruction::Xor, usrc0, usrc1, "", &I);
1725+
Value* res_xor_usrc0 = BinaryOperator::Create(Instruction::Xor, res, usrc0, "", &I);
1726+
Value* negOpt = BinaryOperator::CreateNot(usrc0_xor_usrc1, "", &I);
1727+
Value* andOpt = BinaryOperator::CreateAnd(negOpt, res_xor_usrc0, "", &I);
1728+
auto zero = ConstantInt::get(src0->getType(), 0, true);
1729+
// Signed a - b overflows if the sign of a and -b are the same, but diffrent from the result
1730+
// Signed a + b overflows if the sign of a and b are the same, but diffrent from the result
1731+
isOverflow = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_SLT, andOpt, zero, "", &I);
1732+
}
1733+
break;
1734+
default: assert(0 && "Incorrect intrinsic"); break;
1735+
}
1736+
1737+
// llvm.x.with.overflow returns a struct, where the first element is the operation result,
16331738
// and the second is the overflow flag.
16341739
// Replace each extract with the correct instruction.
16351740
for (auto U = I.user_begin(), EU = I.user_end(); U != EU; ++U)
@@ -1664,9 +1769,6 @@ void Legalization::visitIntrinsicInst(llvm::IntrinsicInst& I)
16641769
case Intrinsic::assume:
16651770
m_instructionsToRemove.push_back(&I);
16661771
break;
1667-
case Intrinsic::sadd_with_overflow:
1668-
case Intrinsic::usub_with_overflow:
1669-
case Intrinsic::ssub_with_overflow:
16701772
case Intrinsic::umul_with_overflow:
16711773
case Intrinsic::smul_with_overflow:
16721774
TODO("Handle the other with_overflow intrinsics");
@@ -1685,11 +1787,10 @@ void Legalization::visitIntrinsicInst(llvm::IntrinsicInst& I)
16851787
case llvm::Intrinsic::floor:
16861788
case llvm::Intrinsic::ceil:
16871789
case llvm::Intrinsic::trunc: {
1688-
IRBuilder<> IRB(&I);
1689-
Value* Val = IRB.CreateFPExt(I.getOperand(0), IRB.getFloatTy());
1690-
Value* Callee = Intrinsic::getDeclaration(I.getParent()->getParent()->getParent(), IID, IRB.getFloatTy());
1691-
Val = IRB.CreateCall(Callee, Val);
1692-
Val = IRB.CreateFPTrunc(Val, I.getType());
1790+
Value* Val = Builder.CreateFPExt(I.getOperand(0), Builder.getFloatTy());
1791+
Value* Callee = Intrinsic::getDeclaration(I.getParent()->getParent()->getParent(), IID, Builder.getFloatTy());
1792+
Val = Builder.CreateCall(Callee, Val);
1793+
Val = Builder.CreateFPTrunc(Val, I.getType());
16931794
I.replaceAllUsesWith(Val);
16941795
I.eraseFromParent();
16951796
break;

IGC/Compiler/Optimizer/OpenCLPasses/ReplaceUnsupportedIntrinsics/ReplaceUnsupportedIntrinsics.cpp

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -808,31 +808,10 @@ namespace {
808808
}
809809
#endif
810810

811-
#if LLVM_VERSION_MAJOR >= 9
812-
void replaceSubtractionSaturation(IntrinsicInst * I) {
813-
assert(I->getIntrinsicID() == Intrinsic::ssub_sat ||
814-
I->getIntrinsicID() == Intrinsic::usub_sat);
815-
IRBuilder<> Builder(I);
816-
817-
auto predicate = I->getIntrinsicID() == Intrinsic::usub_sat ? CmpInst::Predicate::ICMP_UGT : CmpInst::Predicate::ICMP_SGT;
818-
819-
auto cmpInst = Builder.CreateICmp(predicate, I->getArgOperand(0), I->getArgOperand(1));
820-
auto selectInst = Builder.CreateSelect(cmpInst, I->getArgOperand(0), I->getArgOperand(1));
821-
auto result = Builder.CreateSub(selectInst, I->getArgOperand(1));
822-
823-
I->replaceAllUsesWith(result);
824-
I->eraseFromParent();
825-
}
826-
#endif
827-
828811
std::map<Intrinsic::ID, std::function<void(IntrinsicInst*)>> intrinsicToFunc = {
829812
#if LLVM_VERSION_MAJOR >= 8
830813
{ Intrinsic::fshl, replaceFunnelShift },
831814
{ Intrinsic::fshr, replaceFunnelShift },
832-
#endif
833-
#if LLVM_VERSION_MAJOR >= 9
834-
{ Intrinsic::ssub_sat, replaceSubtractionSaturation },
835-
{ Intrinsic::usub_sat, replaceSubtractionSaturation },
836815
#endif
837816
{ Intrinsic::memcpy, replaceMemcpy },
838817
{ Intrinsic::memset, replaceMemset },

0 commit comments

Comments
 (0)