Skip to content

Commit 5bd6ffc

Browse files
committed
[WIP][llvm] Experiment lowering mul/div fx intrinsics to IR
1 parent 99e53cb commit 5bd6ffc

File tree

1 file changed

+182
-0
lines changed

1 file changed

+182
-0
lines changed

llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp

Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
//===----------------------------------------------------------------------===//
1313

1414
#include "llvm/CodeGen/PreISelIntrinsicLowering.h"
15+
#include "llvm/ADT/APSInt.h"
1516
#include "llvm/Analysis/ObjCARCInstKind.h"
1617
#include "llvm/Analysis/ObjCARCUtil.h"
1718
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -122,6 +123,163 @@ static bool lowerLoadRelative(Function &F) {
122123
return Changed;
123124
}
124125

126+
static Value *lowerFixedPointMul(Module *M, IRBuilder<> &B, Value *Op0,
127+
Value *Op1, unsigned Scale, bool Signed,
128+
bool Saturating) {
129+
IntegerType *OpTy = cast<IntegerType>(Op0->getType());
130+
unsigned NumBits = OpTy->getBitWidth();
131+
132+
if (!Scale) {
133+
if (!Saturating) {
134+
// [us]mul.fix(a, b, 0) -> mul(a, b)
135+
return B.CreateMul(Op0, Op1);
136+
}
137+
138+
APSInt MinVal = APSInt::getMinValue(NumBits, !Signed);
139+
APSInt MaxVal = APSInt::getMaxValue(NumBits, !Signed);
140+
Constant *Zero = Constant::getNullValue(OpTy);
141+
if (Signed) {
142+
Value *Res =
143+
B.CreateIntrinsic(Intrinsic::smul_with_overflow, {OpTy}, {Op0, Op1});
144+
Value *Prod = B.CreateExtractValue(Res, {0});
145+
Value *Overflowed = B.CreateExtractValue(Res, {1});
146+
Value *Xor = B.CreateXor(
147+
Op0, Op1); // The sign bit will be 1 if the product is negative.
148+
Value *ProdNeg = B.CreateICmpSLT(Xor, Zero);
149+
Value *OverflowRes =
150+
B.CreateSelect(ProdNeg, ConstantInt::get(OpTy, MinVal),
151+
ConstantInt::get(OpTy, MaxVal));
152+
return B.CreateSelect(Overflowed, OverflowRes, Prod);
153+
} else {
154+
Value *Res =
155+
B.CreateIntrinsic(Intrinsic::umul_with_overflow, {OpTy}, {Op0, Op1});
156+
Value *Prod = B.CreateExtractValue(Res, {0});
157+
Value *Overflowed = B.CreateExtractValue(Res, {1});
158+
return B.CreateSelect(Overflowed, ConstantInt::get(OpTy, MaxVal), Prod);
159+
}
160+
}
161+
162+
// Cast to a wider type.
163+
IntegerType *WideTy = cast<IntegerType>(OpTy)->getExtendedType();
164+
Value *WideOp0 =
165+
Signed ? B.CreateSExt(Op0, WideTy) : B.CreateZExt(Op0, WideTy);
166+
Value *WideOp1 =
167+
Signed ? B.CreateSExt(Op1, WideTy) : B.CreateZExt(Op1, WideTy);
168+
169+
// Then do the mul.
170+
Value *Prod = B.CreateMul(WideOp0, WideOp1);
171+
if (Saturating) {
172+
APSInt MinVal =
173+
APSInt::getMinValue(NumBits, !Signed).extend(WideTy->getBitWidth());
174+
APSInt MaxVal =
175+
APSInt::getMaxValue(NumBits, !Signed).extend(WideTy->getBitWidth());
176+
Constant *SatMin = ConstantInt::get(WideTy, MinVal);
177+
Constant *SatMax = ConstantInt::get(WideTy, MaxVal);
178+
if (Signed) {
179+
Value *OverflowedBelow = B.CreateICmpSLT(Prod, SatMin);
180+
Prod = B.CreateSelect(OverflowedBelow, SatMin, Prod);
181+
Value *OverflowedAbove = B.CreateICmpSGT(Prod, SatMax);
182+
Prod = B.CreateSelect(OverflowedAbove, SatMax, Prod);
183+
} else {
184+
Value *OverflowedAbove = B.CreateICmpUGT(Prod, SatMax);
185+
Prod = B.CreateSelect(OverflowedAbove, SatMax, Prod);
186+
}
187+
}
188+
189+
// RShift by the scale then truncate.
190+
Value *Res = B.CreateAShr(Prod, Scale);
191+
return B.CreateTrunc(Res, OpTy);
192+
}
193+
194+
static bool lowerFixedPointMul(Function &F, bool Signed, bool Saturating) {
195+
if (F.use_empty())
196+
return false;
197+
198+
bool Changed = false;
199+
Module *M = F.getParent();
200+
201+
for (Use &U : llvm::make_early_inc_range(F.uses())) {
202+
auto CI = dyn_cast<CallInst>(U.getUser());
203+
if (!CI || CI->getCalledOperand() != &F)
204+
continue;
205+
206+
IRBuilder<> B(CI);
207+
unsigned Scale = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
208+
Value *Result =
209+
lowerFixedPointMul(M, B, CI->getArgOperand(0), CI->getArgOperand(1),
210+
Scale, Signed, Saturating);
211+
212+
CI->replaceAllUsesWith(Result);
213+
CI->eraseFromParent();
214+
Changed = true;
215+
}
216+
217+
return Changed;
218+
}
219+
220+
static Value *lowerFixedPointDiv(Module *M, IRBuilder<> &B, Value *Op0,
221+
Value *Op1, unsigned Scale, bool Signed,
222+
bool Saturating) {
223+
IntegerType *OpTy = cast<IntegerType>(Op0->getType());
224+
unsigned NumBits = OpTy->getBitWidth();
225+
226+
// Widen the type by the scale then do the division.
227+
IntegerType *WideTy = IntegerType::get(M->getContext(), NumBits + Scale);
228+
Value *WideOp0 =
229+
Signed ? B.CreateSExt(Op0, WideTy) : B.CreateZExt(Op0, WideTy);
230+
Value *WideOp1 =
231+
Signed ? B.CreateSExt(Op1, WideTy) : B.CreateZExt(Op1, WideTy);
232+
233+
WideOp0 = B.CreateShl(WideOp0, Scale);
234+
Value *Quot =
235+
Signed ? B.CreateSDiv(WideOp0, WideOp1) : B.CreateUDiv(WideOp0, WideOp1);
236+
if (Saturating) {
237+
APSInt MinVal =
238+
APSInt::getMinValue(NumBits, !Signed).extend(WideTy->getBitWidth());
239+
APSInt MaxVal =
240+
APSInt::getMaxValue(NumBits, !Signed).extend(WideTy->getBitWidth());
241+
Constant *SatMin = ConstantInt::get(WideTy, MinVal);
242+
Constant *SatMax = ConstantInt::get(WideTy, MaxVal);
243+
if (Signed) {
244+
Value *OverflowedBelow = B.CreateICmpSLT(Quot, SatMin);
245+
Quot = B.CreateSelect(OverflowedBelow, SatMin, Quot);
246+
Value *OverflowedAbove = B.CreateICmpSGT(Quot, SatMax);
247+
Quot = B.CreateSelect(OverflowedAbove, SatMax, Quot);
248+
} else {
249+
Value *OverflowedAbove = B.CreateICmpUGT(Quot, SatMax);
250+
Quot = B.CreateSelect(OverflowedAbove, SatMax, Quot);
251+
}
252+
}
253+
254+
return B.CreateTrunc(Quot, OpTy);
255+
}
256+
257+
static bool lowerFixedPointDiv(Function &F, bool Signed, bool Saturating) {
258+
if (F.use_empty())
259+
return false;
260+
261+
bool Changed = false;
262+
Module *M = F.getParent();
263+
264+
for (Use &U : llvm::make_early_inc_range(F.uses())) {
265+
auto CI = dyn_cast<CallInst>(U.getUser());
266+
if (!CI || CI->getCalledOperand() != &F)
267+
continue;
268+
269+
IRBuilder<> B(CI);
270+
unsigned Scale = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
271+
Value *Result =
272+
lowerFixedPointDiv(M, B, CI->getArgOperand(0), CI->getArgOperand(1),
273+
Scale, Signed, Saturating);
274+
275+
CI->replaceAllUsesWith(Result);
276+
CI->eraseFromParent();
277+
Changed = true;
278+
}
279+
280+
return Changed;
281+
}
282+
125283
// ObjCARC has knowledge about whether an obj-c runtime function needs to be
126284
// always tail-called or never tail-called.
127285
static CallInst::TailCallKind getOverridingTailCallKind(const Function &F) {
@@ -455,6 +613,30 @@ bool PreISelIntrinsicLowering::lowerIntrinsics(Module &M) const {
455613
case Intrinsic::load_relative:
456614
Changed |= lowerLoadRelative(F);
457615
break;
616+
case Intrinsic::smul_fix:
617+
Changed |= lowerFixedPointMul(F, /*Signed=*/true, /*Saturating=*/false);
618+
break;
619+
case Intrinsic::smul_fix_sat:
620+
Changed |= lowerFixedPointMul(F, /*Signed=*/true, /*Saturating=*/true);
621+
break;
622+
case Intrinsic::umul_fix:
623+
Changed |= lowerFixedPointMul(F, /*Signed=*/false, /*Saturating=*/false);
624+
break;
625+
case Intrinsic::umul_fix_sat:
626+
Changed |= lowerFixedPointMul(F, /*Signed=*/false, /*Saturating=*/true);
627+
break;
628+
case Intrinsic::sdiv_fix:
629+
Changed |= lowerFixedPointDiv(F, /*Signed=*/true, /*Saturating=*/false);
630+
break;
631+
case Intrinsic::sdiv_fix_sat:
632+
Changed |= lowerFixedPointDiv(F, /*Signed=*/true, /*Saturating=*/true);
633+
break;
634+
case Intrinsic::udiv_fix:
635+
Changed |= lowerFixedPointDiv(F, /*Signed=*/false, /*Saturating=*/false);
636+
break;
637+
case Intrinsic::udiv_fix_sat:
638+
Changed |= lowerFixedPointDiv(F, /*Signed=*/false, /*Saturating=*/true);
639+
break;
458640
case Intrinsic::is_constant:
459641
case Intrinsic::objectsize:
460642
Changed |= forEachCall(F, [&](CallInst *CI) {

0 commit comments

Comments
 (0)