Merge pull request #289 from stephentyrone/rework-complex-division

stephentyrone · web-flow · commit 877fd0aa9a61 · 2024-05-16T22:24:22.000-04:00
Replaces the rescaling algorithm for Complex division to one inspired by Doug Priest's "Efficient Scaling for Complex Division," with some further tweaks to:

- allow it to work for arbitrary FloatingPoint types, including Float16
- get exactly the same rounding behavior as the un-rescaled path, so that z/w = tz/tw when tz and tw are computed exactly.
- allow future optimizations to hoist a rescaled reciprocal for more speedups.

Unlike Priest, we do not try to avoid spurious overflow in the final computation when the result is very near the overflow boundary but cancellation brings us just inside it. We do not believe that this is a good tradeoff, as complex multiplication overflows in exactly the same way. We will investigate providing opt-in API to avoid this overflow case in a future PR.
diff --git a/Sources/ComplexModule/Complex+AlgebraicField.swift b/Sources/ComplexModule/Complex+AlgebraicField.swift
@@ -2,7 +2,7 @@
 //
 // This source file is part of the Swift Numerics open source project
 //
-// Copyright (c) 2019-2021 Apple Inc. and the Swift Numerics project authors
+// Copyright (c) 2019-2024 Apple Inc. and the Swift Numerics project authors
 // Licensed under Apache License v2.0 with Runtime Library Exception
 //
 // See https://swift.org/LICENSE.txt for license information
@@ -27,52 +27,93 @@ extension Complex: AlgebraicField {
   }
   
   @_transparent
-  public static func /(z: Complex, w: Complex) -> Complex {
-    // Try the naive expression z/w = z*conj(w) / |w|^2; if we can compute
-    // this without over/underflow, everything is fine and the result is
-    // correct. If not, we have to rescale and do the computation carefully.
-    let lenSq = w.lengthSquared
-    guard lenSq.isNormal else { return rescaledDivide(z, w) }
-    return z * (w.conjugate.divided(by: lenSq))
+  public static func /=(z: inout Complex, w: Complex) {
+    z = z / w
   }
   
   @_transparent
-  public static func /=(z: inout Complex, w: Complex) {
-    z = z / w
+  public static func /(z: Complex, w: Complex) -> Complex {
+    // Try the naive expression z/w = z * (conj(w) / |w|^2); if we can
+    // compute this without over/underflow, everything is fine and the
+    // result is correct. If not, we have to rescale and do the
+    // computation carefully (see below).
+    let lenSq = w.lengthSquared
+    guard lenSq.isNormal else { return rescaledDivide(z, w) }
+    return z * w.conjugate.divided(by: lenSq)
   }
   
   @usableFromInline @_alwaysEmitIntoClient @inline(never)
   internal static func rescaledDivide(_ z: Complex, _ w: Complex) -> Complex {
     if w.isZero { return .infinity }
-    if z.isZero || !w.isFinite { return .zero }
-    // TODO: detect when RealType is Float and just promote to Double, then
-    // use the naive algorithm.
-    let zScale = z.magnitude
-    let wScale = w.magnitude
-    let zNorm = z.divided(by: zScale)
-    let wNorm = w.divided(by: wScale)
-    let r = (zNorm * wNorm.conjugate).divided(by: wNorm.lengthSquared)
-    // At this point, the result is (r * zScale)/wScale computed without
-    // undue overflow or underflow. We know that r is close to unity, so
-    // the question is simply what order in which to do this computation
-    // to avoid spurious overflow or underflow. There are three options
-    // to choose from:
+    if !w.isFinite { return .zero }
+    //  Scaling algorithm adapted from Doug Priest's "Efficient Scaling for
+    //  Complex Division":
+    if w.magnitude < .leastNormalMagnitude {
+      //  A difference from Priest's algorithm is that he didn't have to worry
+      //  about types like Float16, where the significand width is comparable
+      //  to the exponent range, such that |leastNormalMagnitude|^(-¾) isn't
+      //  representable (e.g. for Float16 it would want to be 2¹⁸, but the
+      //  largest allowed exponent is 15). Note that it's critical to use zʹ/wʹ
+      //  after rescaling to avoid this, rather than falling through into the
+      //  normal rescaling, because otherwise we might end up back in the
+      //  situation where |w| ~ 1.
+      let s = 1/(RealType(RealType.radix) * .leastNormalMagnitude)
+      let wʹ = w.multiplied(by: s)
+      let zʹ = z.multiplied(by: s)
+      return zʹ / wʹ
+    }
+    //  Having handled that case, we proceed pretty similarly to Priest:
     //
-    // - r * (zScale / wScale)
-    // - (r * zScale) / wScale
-    // - (r / wScale) * zScale
+    //  1. Choose real scale s ~ |w|^(-¾), an exact power of the radix.
+    //  2. wʹ ← sw
+    //  3. zʹ ← sz
+    //  4. return zʹ * (wʹ.conjugate / wʹ.lengthSquared) (i.e. zʹ/wʹ).
     //
-    // The simplest case is when zScale / wScale is normal:
-    if (zScale / wScale).isNormal {
-      return r.multiplied(by: zScale / wScale)
-    }
-    // Otherwise, we need to compute either rNorm * zScale or rNorm / wScale
-    // first. Choose the first if the first scaling behaves well, otherwise
-    // choose the other one.
-    if (r.magnitude * zScale).isNormal {
-      return r.multiplied(by: zScale).divided(by: wScale)
-    }
-    return r.divided(by: wScale).multiplied(by: zScale)
+    //  Why is this safe and accurate? First, observe that wʹ and zʹ are both
+    //  computed exactly because:
+    //
+    //  - s is an exact power of radix.
+    //  - wʹ ~ |w|^(¼), and hence cannot overflow or underflow.
+    //  - zʹ might overflow or underflow, but only if the final result also
+    //       overflows or underflows. (This is more subtle than I make it
+    //       sound. In particular, most of the fast ways one might try to
+    //       compute s give rise to a situation where when |w| is close to
+    //       one, multiplication by s is a dilation even though the actual
+    //       division is a contraction or vice-versa, and thus intermediate
+    //       computations might incorrectly overflow or underflow. Priest
+    //       had to take some care to avoid this situation, but we do not,
+    //       because we have already ruled out |w| ~ 1 before we call this
+    //       function.)
+    //
+    //  Next observe that |wʹ.lengthSquared| ~ |w|^(½), so again this cannot
+    //  overflow or underflow, and neither can (wʹ.conjugate/wʹ.lengthSquared),
+    //  which has magnitude like |w|^(-¼).
+    //
+    //  Note that because the scale factor is always a power of the radix,
+    //  the rescaling does not affect rounding, and so this algorithm is scale-
+    //  invariant compared to the mainline `/` implementation, up to the
+    //  underflow boundary.
+    //
+    //  Note that our final assembly of the result is different from Priest;
+    //  he applies s to w twice, instead of once to w and once to z, and
+    //  does the product as (zw̅ʺ)*(1/|wʹ|²), while we do zʹ(w̅ʹ/|wʹ|²). We
+    //  prefer our version for three reasons:
+    //
+    //  1. it extracts a little more ILP
+    //  2. it makes it so that we get exactly the same roundings on the
+    //     rescaled divide path as on the fast path, so that z/w = tz/tw
+    //     when tz and tw are computed exactly.
+    //  3. it unlocks a future optimization where we hoist s and
+    //     (w̅ʹ/|wʹ|²) and make divisions all fast-path without perturbing
+    //     rounding.
+    let s = RealType(
+      sign: .plus,
+      exponent: -3*w.magnitude.exponent/4,
+      significand: 1
+    )
+    let wʹ = w.multiplied(by: s)
+    let zʹ = z.multiplied(by: s)
+    return zʹ * wʹ.conjugate.divided(by: wʹ.lengthSquared)
   }
   
   /// A normalized complex number with the same phase as this value.
diff --git a/Tests/ComplexTests/ArithmeticTests.swift b/Tests/ComplexTests/ArithmeticTests.swift
@@ -2,7 +2,7 @@
 //
 // This source file is part of the Swift Numerics open source project
 //
-// Copyright (c) 2019 Apple Inc. and the Swift Numerics project authors
+// Copyright (c) 2019-2024 Apple Inc. and the Swift Numerics project authors
 // Licensed under Apache License v2.0 with Runtime Library Exception
 //
 // See https://swift.org/LICENSE.txt for license information
@@ -13,11 +13,23 @@ import XCTest
 import ComplexModule
 import RealModule
 
+func ulpsFromInfinity<T: Real>(_ a: T) -> T {
+  (.greatestFiniteMagnitude - a) / .greatestFiniteMagnitude.ulp + 1
+}
+
 // TODO: improve this to be a general-purpose complex comparison with tolerance
 func relativeError<T>(_ a: Complex<T>, _ b: Complex<T>) -> T {
   if a == b { return 0 }
-  let scale = max(a.magnitude, b.magnitude, T.leastNormalMagnitude).ulp
-  return (a - b).magnitude / scale
+  if a.isFinite && b.isFinite {
+    let scale = max(a.magnitude, b.magnitude, T.leastNormalMagnitude).ulp
+    return (a - b).magnitude / scale
+  } else {
+    if a.isFinite {
+      return ulpsFromInfinity(a.magnitude)
+    } else {
+      return ulpsFromInfinity(b.magnitude)
+    }
+  }
 }
 
 func closeEnough<T: Real>(_ a: T, _ b: T, ulps allowed: T) -> Bool {
@@ -29,11 +41,15 @@ func checkMultiply<T>(
   _ a: Complex<T>, _ b: Complex<T>, expected: Complex<T>, ulps allowed: T
 ) -> Bool {
   let observed = a*b
+  if observed == expected { return false }
+  // Even if the expected result is finite, we allow overflow if
+  // the two-norm of the expected result overflows.
+  if !observed.isFinite && !expected.length.isFinite { return false }
   let rel = relativeError(observed, expected)
-  if rel > allowed {
+  guard rel <= allowed else {
     print("Over-large error in \(a)*\(b)")
     print("Expected: \(expected)\nObserved: \(observed)")
-    print("Relative error was \(rel) (tolerance: \(allowed).")
+    print("Relative error was \(rel) (tolerance: \(allowed)).")
     return true
   }
   return false
@@ -43,11 +59,15 @@ func checkDivide<T>(
   _ a: Complex<T>, _ b: Complex<T>, expected: Complex<T>, ulps allowed: T
 ) -> Bool {
   let observed = a/b
+  if observed == expected { return false }
+  // Even if the expected result is finite, we allow overflow if
+  // the two-norm of the expected result overflows.
+  if !observed.isFinite && !expected.length.isFinite { return false }
   let rel = relativeError(observed, expected)
-  if rel > allowed {
+  guard rel <= allowed else {
     print("Over-large error in \(a)/\(b)")
     print("Expected: \(expected)\nObserved: \(observed)")
-    print("Relative error was \(rel) (tolerance: \(allowed).")
+    print("Relative error was \(rel) (tolerance: \(allowed)).")
     return true
   }
   return false
@@ -63,7 +83,6 @@ final class ArithmeticTests: XCTestCase {
   func testPolar<T>(_ type: T.Type)
   where T: BinaryFloatingPoint, T: Real,
         T.Exponent: FixedWidthInteger, T.RawSignificand: FixedWidthInteger {
-    
     // In order to support round-tripping from rectangular to polar coordinate
     // systems, as a special case phase can be non-finite when length is
     // either zero or infinity.
@@ -76,10 +95,9 @@ final class ArithmeticTests: XCTestCase {
     XCTAssertEqual(Complex<T>(length:-.infinity, phase: .infinity), .infinity)
     XCTAssertEqual(Complex<T>(length:-.infinity, phase:-.infinity), .infinity)
     XCTAssertEqual(Complex<T>(length:-.infinity, phase: .nan     ), .infinity)
-          
+    
     let exponentRange =
-      (T.leastNormalMagnitude.exponent + T.Exponent(T.significandBitCount)) ...
-        T.greatestFiniteMagnitude.exponent
+    T.leastNormalMagnitude.exponent ... T.greatestFiniteMagnitude.exponent
     let inputs = (0..<100).map { _ in
       Polar(length: T(
         sign: .plus,
@@ -136,20 +154,29 @@ final class ArithmeticTests: XCTestCase {
       // Now test multiplication and division using the polar inputs:
       for q in inputs {
         let w = Complex(length: q.length, phase: q.phase)
-        let product = Complex(length: p.length * q.length, phase: p.phase + q.phase)
+        var product = Complex(length: p.length, phase: p.phase + q.phase)
+        product.real *= q.length
+        product.imaginary *= q.length
         if checkMultiply(z, w, expected: product, ulps: 16) { XCTFail() }
-        let quotient = Complex(length: p.length / q.length, phase: p.phase - q.phase)
+        var quotient = Complex(length: p.length, phase: p.phase - q.phase)
+        quotient.real /= q.length
+        quotient.imaginary /= q.length
         if checkDivide(z, w, expected: quotient, ulps: 16) { XCTFail() }
       }
     }
   }
   
   func testPolar() {
+#if !((os(macOS) || targetEnvironment(macCatalyst)) && arch(x86_64)) && LONG_TESTS
+    if #available(macOS 11.0, iOS 14.0, tvOS 14.0, watchOS 7.0, *) {
+      testPolar(Float16.self)
+    }
+#endif
     testPolar(Float.self)
     testPolar(Double.self)
-    #if (arch(i386) || arch(x86_64)) && !os(Windows) && !os(Android)
+#if (arch(i386) || arch(x86_64)) && !os(Windows) && !os(Android)
     testPolar(Float80.self)
-    #endif
+#endif
   }
   
   func testBaudinSmith() {
@@ -191,16 +218,38 @@ final class ArithmeticTests: XCTestCase {
                       Complex(1.02951151789360578e-84, 6.97145987515076231e-220)),
     ]
     for test in vectors {
-      if checkDivide(test.a, test.b, expected: test.c, ulps: 0.5) { XCTFail() }
+      if checkDivide(test.a, test.b, expected: test.c, ulps: 1.0) { XCTFail() }
       if checkDivide(test.a, test.c, expected: test.b, ulps: 1.0) { XCTFail() }
       if checkMultiply(test.b, test.c, expected: test.a, ulps: 1.0) { XCTFail() }
     }
   }
-
+  
   func testDivisionByZero() {
     XCTAssertFalse((Complex(0, 0) / Complex(0, 0)).isFinite)
     XCTAssertFalse((Complex(1, 1) / Complex(0, 0)).isFinite)
     XCTAssertFalse((Complex.infinity / Complex(0, 0)).isFinite)
     XCTAssertFalse((Complex.i / Complex(0, 0)).isFinite)
+    
+  }
+  
+#if !((os(macOS) || targetEnvironment(macCatalyst)) && arch(x86_64)) && LONG_TESTS
+  @available(macOS 11.0, iOS 14.0, tvOS 14.0, watchOS 7.0, *)
+  func testFloat16DivisionSemiExhaustive() {
+    func complex(bitPattern: UInt32) -> Complex<Float16> {
+      Complex(
+        Float16(bitPattern: UInt16(truncatingIfNeeded: bitPattern)),
+        Float16(bitPattern: UInt16(truncatingIfNeeded: bitPattern >> 16))
+      )
+    }
+    for bits in 0 ... UInt32.max {
+      let a = complex(bitPattern: bits)
+      if bits & 0xfffff == 0 { print(a) }
+      let b = complex(bitPattern: UInt32.random(in: 0 ... .max))
+      var q = Complex<Float>(a)/Complex<Float>(b)
+      if checkDivide(a, b, expected: Complex<Float16>(q), ulps: 4) { XCTFail() }
+      q = Complex<Float>(b)/Complex<Float>(a)
+      if checkDivide(b, a, expected: Complex<Float16>(q), ulps: 4) { XCTFail() }
+    }
   }
+#endif
 }