Skip to content

Commit 17f5b59

Browse files
Merge pull request #181 from stephentyrone/document-augmented-arithmetic
Document the Augmented enum in RealModule in advance of 1.0
2 parents f50a1ac + 702c6fc commit 17f5b59

File tree

2 files changed

+75
-8
lines changed

2 files changed

+75
-8
lines changed

Sources/ComplexModule/ElementaryFunctions.swift

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -311,13 +311,13 @@ extension Complex: ElementaryFunctions {
311311
// as exact head-tail products (u is guaranteed to be well scaled,
312312
// v may underflow, but if it does it doesn't matter, the u term is
313313
// all we need).
314-
let (a,b) = Augmented.twoProdFMA(u, u)
315-
let (c,d) = Augmented.twoProdFMA(v, v)
314+
let (a,b) = Augmented.product(u, u)
315+
let (c,d) = Augmented.product(v, v)
316316
// It would be nice if we could simply use a - 1, but unfortunately
317317
// we don't have a tight enough bound to guarantee that that expression
318318
// is exact; a may be as small as 1/4, so we could lose a single bit
319319
// to rounding if we did that.
320-
var (s,e) = Augmented.fastTwoSum(-1, a)
320+
var (s,e) = Augmented.sum(large: -1, small: a)
321321
// Now we are ready to assemble the result. If cancellation happens,
322322
// then |c| > |e| > |b|, |d|, so this assembly order is safe. It's
323323
// also possible that |c| and |d| are small, but if that happens then
@@ -353,9 +353,9 @@ extension Complex: ElementaryFunctions {
353353
// So now we need to evaluate (2+x)x + y² accurately. To do this,
354354
// we employ augmented arithmetic; the key observation here is that
355355
// cancellation is only a problem when y² ≈ -(2+x)x
356-
let xp2 = Augmented.fastTwoSum(2, z.x) // Known that 2 > |x|.
357-
let a = Augmented.twoProdFMA(z.x, xp2.head)
358-
let y² = Augmented.twoProdFMA(z.y, z.y)
356+
let xp2 = Augmented.sum(large: 2, small: z.x) // Known that 2 > |x|.
357+
let a = Augmented.product(z.x, xp2.head)
358+
let y² = Augmented.product(z.y, z.y)
359359
let s = (a.head + y².head + a.tail + y².tail).addingProduct(z.x, xp2.tail)
360360
return Complex(.log(onePlus: s)/2, θ)
361361
}

Sources/RealModule/AugmentedArithmetic.swift

Lines changed: 69 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,18 +9,85 @@
99
//
1010
//===----------------------------------------------------------------------===//
1111

12+
/// A namespace for "augmented arithmetic" operations for types conforming to
13+
/// `Real`.
14+
///
15+
/// Augmented arithmetic refers to a family of algorithms that represent
16+
/// the results of floating-point computations using multiple values such that
17+
/// either the error is minimized or the result is exact.
1218
public enum Augmented { }
1319

1420
extension Augmented {
21+
/// The product `a * b` represented as an implicit sum `head + tail`.
22+
///
23+
/// `head` is the correctly rounded value of `a*b`. If no overflow or
24+
/// underflow occurs, `tail` represents the rounding error incurred in
25+
/// computing `head`, such that the exact product is the sum of `head`
26+
/// and `tail` computed without rounding.
27+
///
28+
/// This operation is sometimes called "twoProd" or "twoProduct".
29+
///
30+
/// Edge Cases:
31+
/// -
32+
/// - `head` is always the IEEE 754 product `a * b`.
33+
/// - If `head` is not finite, `tail` is unspecified and should not be
34+
/// interpreted as having any meaning (it may be `NaN` or `infinity`).
35+
/// - When `head` is close to the underflow boundary, the rounding error
36+
/// may not be representable due to underflow, and `tail` will be rounded.
37+
/// If `head` is very small, `tail` may even be zero, even though the
38+
/// product is not exact.
39+
/// - If `head` is zero, `tail` is also a zero with unspecified sign.
40+
///
41+
/// Postconditions:
42+
/// -
43+
/// - If `head` is normal, then `abs(tail) < abs(head.ulp)`.
44+
/// Assuming IEEE 754 default rounding, `abs(tail) <= abs(head.ulp)/2`.
45+
/// - If both `head` and `tail` are normal, then `a * b` is exactly
46+
/// equal to `head + tail` when computed as real numbers.
1547
@_transparent
16-
public static func twoProdFMA<T:Real>(_ a: T, _ b: T) -> (head: T, tail: T) {
48+
public static func product<T:Real>(_ a: T, _ b: T) -> (head: T, tail: T) {
1749
let head = a*b
50+
// TODO: consider providing an FMA-less implementation for use when
51+
// targeting platforms without hardware FMA support. This works everywhere,
52+
// falling back on the C math.h fma funcions, but may be slow on older x86.
1853
let tail = (-head).addingProduct(a, b)
1954
return (head, tail)
2055
}
2156

57+
/// The sum `a + b` represented as an implicit sum `head + tail`.
58+
///
59+
/// `head` is the correctly rounded value of `a + b`. `tail` is the
60+
/// error from that computation rounded to the closest representable
61+
/// value.
62+
///
63+
/// Unlike `Augmented.product(a, b)`, the rounding error of a sum can
64+
/// never underflow. However, it may not be exactly representable when
65+
/// `a` and `b` differ widely in magnitude.
66+
///
67+
/// This operation is sometimes called "fastTwoSum".
68+
///
69+
/// - Parameters:
70+
/// - a: The summand with larger magnitude.
71+
/// - b: The summand with smaller magnitude.
72+
///
73+
/// Preconditions:
74+
/// -
75+
/// - `large.magnitude` must not be smaller than `small.magnitude`.
76+
/// They may be equal, or one or both may be `NaN`.
77+
/// This precondition is only enforced in debug builds.
78+
///
79+
/// Edge Cases:
80+
/// -
81+
/// - `head` is always the IEEE 754 sum `a + b`.
82+
/// - If `head` is not finite, `tail` is unspecified and should not be
83+
/// interpreted as having any meaning (it may be `NaN` or `infinity`).
84+
///
85+
/// Postconditions:
86+
/// -
87+
/// - If `head` is normal, then `abs(tail) < abs(head.ulp)`.
88+
/// Assuming IEEE 754 default rounding, `abs(tail) <= abs(head.ulp)/2`.
2289
@_transparent
23-
public static func fastTwoSum<T:Real>(_ a: T, _ b: T) -> (head: T, tail: T) {
90+
public static func sum<T:Real>(large a: T, small b: T) -> (head: T, tail: T) {
2491
assert(!(b.magnitude > a.magnitude))
2592
let head = a + b
2693
let tail = a - head + b

0 commit comments

Comments
 (0)