Skip to content

Commit 367d5c1

Browse files
badermaarquitos14
andauthored
[Inline] Enable constant folding for fpbuiltin sqrt (#19746)
Constant folding should meet any accuracy requirements enforceby the fpbuiltin intrinsic as it provides correctly-rounded implementation. --------- Co-authored-by: Marcos Maronas <[email protected]>
1 parent ad63849 commit 367d5c1

File tree

2 files changed

+22
-0
lines changed

2 files changed

+22
-0
lines changed

llvm/lib/Analysis/ConstantFolding.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1702,6 +1702,12 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
17021702
case Intrinsic::amdgcn_fma_legacy:
17031703
case Intrinsic::amdgcn_fract:
17041704
case Intrinsic::amdgcn_sin:
1705+
1706+
// Floating point builtin intrinsics can be folded if accuracy requirements
1707+
// are satisfied in addition to the rules defined for regular floating point
1708+
// operations.
1709+
case Intrinsic::fpbuiltin_sqrt:
1710+
17051711
// The intrinsics below depend on rounding mode in MXCSR.
17061712
case Intrinsic::x86_sse_cvtss2si:
17071713
case Intrinsic::x86_sse_cvtss2si64:
@@ -2549,6 +2555,7 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
25492555
return ConstantFP::get(Ty->getContext(), U);
25502556
return ConstantFoldFP(atan, APF, Ty);
25512557
case Intrinsic::sqrt:
2558+
case Intrinsic::fpbuiltin_sqrt:
25522559
return ConstantFoldFP(sqrt, APF, Ty);
25532560
case Intrinsic::amdgcn_cos:
25542561
case Intrinsic::amdgcn_sin: {

llvm/test/Transforms/Inline/inline_constprop.ll

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,3 +374,18 @@ bb.true:
374374
bb.false:
375375
ret float %x
376376
}
377+
378+
define float @caller9() {
379+
; Check that we can constant-prop through fp intrinsics.
380+
;
381+
; CHECK-LABEL: @caller9(
382+
; CHECK-NEXT: ret float 2.000000e+00
383+
%x = call float @callee9(float 16.0)
384+
ret float %x
385+
}
386+
387+
define float @callee9(float %x) {
388+
%s = call fast float @llvm.sqrt.f32(float %x)
389+
%fs = call fast float @llvm.fpbuiltin.sqrt.f32(float %s)
390+
ret float %fs
391+
}

0 commit comments

Comments
 (0)