Skip to content

Commit f24da99

Browse files
[llvm] Don't combine repeated fp divisors with subnormals
DAGCombiner performs this rewrite: (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip) However, when D is subnormal, this produces a*inf and b*inf. With fast-math flags enabled, this creates poisons that break the rewritten consumers. Guard this transformation with checks for subnormal operands.
1 parent 4355356 commit f24da99

File tree

2 files changed

+32
-0
lines changed

2 files changed

+32
-0
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18235,6 +18235,16 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
1823518235
if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
1823618236
return SDValue();
1823718237

18238+
// Skip if we have subnormals, multiplying with the reciprocal will introduce
18239+
// infinities.
18240+
ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, /* AllowUndefs */ true);
18241+
if (N1CFP) {
18242+
FPClassTest FPClass = N1CFP->getValueAPF().classify();
18243+
if (FPClass == fcPosSubnormal || FPClass == fcNegSubnormal) {
18244+
return SDValue();
18245+
}
18246+
}
18247+
1823818248
// Exit early if the target does not want this transform or if there can't
1823918249
// possibly be enough uses of the divisor to make the transform worthwhile.
1824018250
unsigned MinUses = TLI.combineRepeatedFPDivisors();
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=x86_64 -verify-machineinstrs < %s | FileCheck %s
3+
4+
; Negative test: repeated FP divisor transform should bail out when the rewrite
5+
; would introduce infinities because of subnormal constant divisors.
6+
define void @two_denorm_fdivs(float %a0, float %a1, float %a2, ptr %res) {
7+
; CHECK-LABEL: two_denorm_fdivs:
8+
; CHECK: # %bb.0: # %entry
9+
; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.95915678E-39,0.0E+0,0.0E+0,0.0E+0]
10+
; CHECK-NEXT: divss %xmm0, %xmm1
11+
; CHECK-NEXT: movss %xmm1, (%rdi)
12+
; CHECK-NEXT: divss %xmm0, %xmm2
13+
; CHECK-NEXT: movss %xmm2, 4(%rdi)
14+
; CHECK-NEXT: retq
15+
entry:
16+
%div0 = fdiv ninf float %a1, 0x37E5555500000000
17+
store float %div0, ptr %res
18+
%ptr1 = getelementptr inbounds float, ptr %res, i64 1
19+
%div1 = fdiv ninf float %a2, 0x37E5555500000000
20+
store float %div1, ptr %ptr1
21+
ret void
22+
}

0 commit comments

Comments
 (0)