WIP

stephentyrone · stephentyrone · commit 97bab250eb6b · 2024-05-14T10:24:31.000-04:00
diff --git a/Sources/ComplexModule/Complex+AlgebraicField.swift b/Sources/ComplexModule/Complex+AlgebraicField.swift
@@ -26,53 +26,96 @@ extension Complex: AlgebraicField {
     Complex(x, -y)
   }
   
+  @_transparent
+  public static func /=(z: inout Complex, w: Complex) {
+    z = z / w
+  }
+  
   @_transparent
   public static func /(z: Complex, w: Complex) -> Complex {
-    // Try the naive expression z/w = z*conj(w) / |w|^2; if we can compute
-    // this without over/underflow, everything is fine and the result is
-    // correct. If not, we have to rescale and do the computation carefully.
+    // Try the naive expression z/w = z * (conj(w) / |w|^2); if we can
+    // compute this without over/underflow, everything is fine and the
+    // result is correct. If not, we have to rescale and do the
+    // computation carefully (see below).
     let lenSq = w.lengthSquared
     guard lenSq.isNormal else { return rescaledDivide(z, w) }
     return z * (w.conjugate.divided(by: lenSq))
   }
   
-  @_transparent
-  public static func /=(z: inout Complex, w: Complex) {
-    z = z / w
-  }
-  
-  @usableFromInline @_alwaysEmitIntoClient @inline(never)
+  @inline(never)
+  @_specialize(exported: true, where RealType == Float)
+  @_specialize(exported: true, where RealType == Double)
+  @usableFromInline
   internal static func rescaledDivide(_ z: Complex, _ w: Complex) -> Complex {
     if w.isZero { return .infinity }
-    if z.isZero || !w.isFinite { return .zero }
-    // TODO: detect when RealType is Float and just promote to Double, then
-    // use the naive algorithm.
-    let zScale = z.magnitude
-    let wScale = w.magnitude
-    let zNorm = z.divided(by: zScale)
-    let wNorm = w.divided(by: wScale)
-    let r = (zNorm * wNorm.conjugate).divided(by: wNorm.lengthSquared)
-    // At this point, the result is (r * zScale)/wScale computed without
-    // undue overflow or underflow. We know that r is close to unity, so
-    // the question is simply what order in which to do this computation
-    // to avoid spurious overflow or underflow. There are three options
-    // to choose from:
+    if !w.isFinite { return .zero }
+    // Scaling algorithm adapted from Doug Priest's "Efficient Scaling for
+    // Complex Division":
     //
-    // - r * (zScale / wScale)
-    // - (r * zScale) / wScale
-    // - (r / wScale) * zScale
+    // 1. Choose real scale s ≅ |w|^(-¾), an exact power of the radix.
+    // 2. wʹ ← sw
+    // 3. zʹ ← sz
+    // 4. return zʹ * (wʹ.conjugate / wʹ.lengthSquared)
     //
-    // The simplest case is when zScale / wScale is normal:
-    if (zScale / wScale).isNormal {
-      return r.multiplied(by: zScale / wScale)
-    }
-    // Otherwise, we need to compute either rNorm * zScale or rNorm / wScale
-    // first. Choose the first if the first scaling behaves well, otherwise
-    // choose the other one.
-    if (r.magnitude * zScale).isNormal {
-      return r.multiplied(by: zScale).divided(by: wScale)
+    // Why is this safe and accurate? First, observe that wʹ and zʹ are both
+    // computed exactly because:
+    //
+    // - s is an exact power of radix.
+    // - wʹ ~ |w|^(¼), and hence cannot overflow or underflow.
+    // - zʹ can overflow or underflow, but only if the final result also
+    //      overflows or underflows (this is more subtle than it might
+    //      appear at first; Priest has to be very careful about it
+    //      because you get into trouble precisely in the case where
+    //      |w| is very close to 1. However, if we were in that case, we would
+    //      have just handled the division inline and never would have ended
+    //      up here.
+    //
+    // Next observe that |wʹ.lengthSquared| ~ |w|^(½), so again this cannot
+    // overflow or underflow, and neither can
+    // (wʹ.conjugate / wʹ.lengthSquared)
+    
+    
+    // are of comparable
+    // magnitude, and in particular the exponents of their magnitudes have the
+    // same sign, so either both are a contraction or both are an expansion,
+    // so any intermediate overflow or underflow is deserved.²
+    //
+    // Note that because the scale factor is always a power of the radix,
+    // the rescaling does not affect rounding, and so this algorithm is scale-
+    // invariant compared to the mainline `/` implementation, up to the
+    // underflow boundary.
+    //
+    // ¹ This falls apart for formats where the number of significand bits is
+    // comparable to the exponent range (in particular Float16), because then
+    // the desired s is not representable. E.g. if w ~ .leastNonzeroMagnitude
+    // in Float16 (0x1p-24), we want to have s = 0x1p18, which is outside the
+    // range of representable values. This does not occur for any other types,
+    // so we just carry a special-case implementation for Float16 to fix it.
+    //
+    // Priest never had to worry about this because Float16 didn't really exist
+    // yet when he published and he was interested in double anyway.
+    //
+    // ² This WOULD NOT BE TRUE if we hadn't already handled well-scaled
+    // divisors in the mainline path for the `/` operator above; it only
+    // holds for sufficiently badly-scaled `w`. If the well-scaled cases
+    // were not already eliminated, it would be possible to have |wʹ| a
+    // little bigger than one and |wʺ| a bit smaller than one (or vice-versa), so
+    // that intermediate undeserved overflow or underflow might occur. Priest
+    // has to worry about this, but we do not.
+    if w.magnitude < RealType.leastNormalMagnitude {
+      let z = z.divided(by: RealType.leastNormalMagnitude)
+      let w = w.divided(by: RealType.leastNormalMagnitude)
+      return rescaledDivide(z, w)
     }
-    return r.divided(by: wScale).multiplied(by: zScale)
+    var exponent = -3 * w.magnitude.exponent / 4
+    let s = RealType(
+      sign: .plus,
+      exponent: exponent,
+      significand: 1
+    )
+    let wʹ = w.multiplied(by: s)
+    let zʹ = z.multiplied(by: s)
+    return zʹ / wʹ
   }
   
   /// A normalized complex number with the same phase as this value.
diff --git a/Tests/ComplexTests/ArithmeticTests.swift b/Tests/ComplexTests/ArithmeticTests.swift
@@ -30,10 +30,10 @@ func checkMultiply<T>(
 ) -> Bool {
   let observed = a*b
   let rel = relativeError(observed, expected)
-  if rel > allowed {
+  guard rel <= allowed else {
     print("Over-large error in \(a)*\(b)")
     print("Expected: \(expected)\nObserved: \(observed)")
-    print("Relative error was \(rel) (tolerance: \(allowed).")
+    print("Relative error was \(rel) (tolerance: \(allowed)).")
     return true
   }
   return false
@@ -44,10 +44,10 @@ func checkDivide<T>(
 ) -> Bool {
   let observed = a/b
   let rel = relativeError(observed, expected)
-  if rel > allowed {
+  guard rel <= allowed else {
     print("Over-large error in \(a)/\(b)")
     print("Expected: \(expected)\nObserved: \(observed)")
-    print("Relative error was \(rel) (tolerance: \(allowed).")
+    print("Relative error was \(rel) (tolerance: \(allowed)).")
     return true
   }
   return false
@@ -63,93 +63,99 @@ final class ArithmeticTests: XCTestCase {
   func testPolar<T>(_ type: T.Type)
   where T: BinaryFloatingPoint, T: Real,
         T.Exponent: FixedWidthInteger, T.RawSignificand: FixedWidthInteger {
-    
-    // In order to support round-tripping from rectangular to polar coordinate
-    // systems, as a special case phase can be non-finite when length is
-    // either zero or infinity.
-    XCTAssertEqual(Complex<T>(length: .zero, phase: .infinity), .zero)
-    XCTAssertEqual(Complex<T>(length: .zero, phase:-.infinity), .zero)
-    XCTAssertEqual(Complex<T>(length: .zero, phase: .nan     ), .zero)
-    XCTAssertEqual(Complex<T>(length: .infinity, phase: .infinity), .infinity)
-    XCTAssertEqual(Complex<T>(length: .infinity, phase:-.infinity), .infinity)
-    XCTAssertEqual(Complex<T>(length: .infinity, phase: .nan     ), .infinity)
-    XCTAssertEqual(Complex<T>(length:-.infinity, phase: .infinity), .infinity)
-    XCTAssertEqual(Complex<T>(length:-.infinity, phase:-.infinity), .infinity)
-    XCTAssertEqual(Complex<T>(length:-.infinity, phase: .nan     ), .infinity)
           
-    let exponentRange =
-      (T.leastNormalMagnitude.exponent + T.Exponent(T.significandBitCount)) ...
-        T.greatestFiniteMagnitude.exponent
-    let inputs = (0..<100).map { _ in
-      Polar(length: T(
-        sign: .plus,
-        exponent: T.Exponent.random(in: exponentRange),
-        significand: T.random(in: 1 ..< 2)
-      ), phase: T.random(in: -.pi ... .pi))
-    }
-    for p in inputs {
-      // first test that each value can round-trip between rectangular and
-      // polar coordinates with reasonable accuracy. We'll probably need to
-      // relax this for some platforms (currently we're using the default
-      // RNG, which means we don't get the same sequence of values each time;
-      // this is good--more test coverage!--and bad, because without tight
-      // bounds on every platform's libm, we can't get tight bounds on the
-      // accuracy of these operations, so we need to relax them gradually).
-      let z = Complex(length: p.length, phase: p.phase)
-      if !closeEnough(z.length, p.length, ulps: 16) {
-        print("p = \(p)\nz = \(z)\nz.length = \(z.length)")
-        XCTFail()
-      }
-      if !closeEnough(z.phase, p.phase, ulps: 16) {
-        print("p = \(p)\nz = \(z)\nz.phase = \(z.phase)")
-        XCTFail()
-      }
-      // Complex(length: -r, phase: θ) = -Complex(length: r, phase: θ).
-      let w = Complex(length: -p.length, phase: p.phase)
-      if w != -z {
-        print("p = \(p)\nw = \(w)\nz = \(z)")
-        XCTFail()
-      }
-      XCTAssertEqual(w, -z)
-      // if length*length is normal, it should be lengthSquared, up
-      // to small error.
-      if (p.length*p.length).isNormal {
-        if !closeEnough(z.lengthSquared, p.length*p.length, ulps: 16) {
-          print("p = \(p)\nz = \(z)\nz.lengthSquared = \(z.lengthSquared)")
-          XCTFail()
-        }
-      }
-      // Test reciprocal and normalized:
-      let r = Complex(length: 1/p.length, phase: -p.phase)
-      if r.isNormal {
-        if relativeError(r, z.reciprocal!) > 16 {
-          print("p = \(p)\nz = \(z)\nz.reciprocal = \(r)")
-          XCTFail()
+          // In order to support round-tripping from rectangular to polar coordinate
+          // systems, as a special case phase can be non-finite when length is
+          // either zero or infinity.
+          XCTAssertEqual(Complex<T>(length: .zero, phase: .infinity), .zero)
+          XCTAssertEqual(Complex<T>(length: .zero, phase:-.infinity), .zero)
+          XCTAssertEqual(Complex<T>(length: .zero, phase: .nan     ), .zero)
+          XCTAssertEqual(Complex<T>(length: .infinity, phase: .infinity), .infinity)
+          XCTAssertEqual(Complex<T>(length: .infinity, phase:-.infinity), .infinity)
+          XCTAssertEqual(Complex<T>(length: .infinity, phase: .nan     ), .infinity)
+          XCTAssertEqual(Complex<T>(length:-.infinity, phase: .infinity), .infinity)
+          XCTAssertEqual(Complex<T>(length:-.infinity, phase:-.infinity), .infinity)
+          XCTAssertEqual(Complex<T>(length:-.infinity, phase: .nan     ), .infinity)
+          
+          let exponentRange =
+          T.leastNormalMagnitude.exponent ... T.greatestFiniteMagnitude.exponent
+          let inputs = (0..<100).map { _ in
+            Polar(length: T(
+              sign: .plus,
+              exponent: T.Exponent.random(in: exponentRange),
+              significand: T.random(in: 1 ..< 2)
+            ), phase: T.random(in: -.pi ... .pi))
+          }
+          for p in inputs {
+            // first test that each value can round-trip between rectangular and
+            // polar coordinates with reasonable accuracy. We'll probably need to
+            // relax this for some platforms (currently we're using the default
+            // RNG, which means we don't get the same sequence of values each time;
+            // this is good--more test coverage!--and bad, because without tight
+            // bounds on every platform's libm, we can't get tight bounds on the
+            // accuracy of these operations, so we need to relax them gradually).
+            let z = Complex(length: p.length, phase: p.phase)
+            if !closeEnough(z.length, p.length, ulps: 16) {
+              print("p = \(p)\nz = \(z)\nz.length = \(z.length)")
+              XCTFail()
+            }
+            if !closeEnough(z.phase, p.phase, ulps: 16) {
+              print("p = \(p)\nz = \(z)\nz.phase = \(z.phase)")
+              XCTFail()
+            }
+            // Complex(length: -r, phase: θ) = -Complex(length: r, phase: θ).
+            let w = Complex(length: -p.length, phase: p.phase)
+            if w != -z {
+              print("p = \(p)\nw = \(w)\nz = \(z)")
+              XCTFail()
+            }
+            XCTAssertEqual(w, -z)
+            // if length*length is normal, it should be lengthSquared, up
+            // to small error.
+            if (p.length*p.length).isNormal {
+              if !closeEnough(z.lengthSquared, p.length*p.length, ulps: 16) {
+                print("p = \(p)\nz = \(z)\nz.lengthSquared = \(z.lengthSquared)")
+                XCTFail()
+              }
+            }
+            // Test reciprocal and normalized:
+            let r = Complex(length: 1/p.length, phase: -p.phase)
+            if r.isNormal {
+              if relativeError(r, z.reciprocal!) > 16 {
+                print("p = \(p)\nz = \(z)\nz.reciprocal = \(r)")
+                XCTFail()
+              }
+            } else { XCTAssertNil(z.reciprocal) }
+            let n = Complex(length: 1, phase: p.phase)
+            if relativeError(n, z.normalized!) > 16 {
+              print("p = \(p)\nz = \(z)\nz.normalized = \(n)")
+              XCTFail()
+            }
+            
+            // Now test multiplication and division using the polar inputs:
+            for q in inputs {
+              let w = Complex(length: q.length, phase: q.phase)
+              var product = Complex(length: p.length, phase: p.phase + q.phase)
+              product.real *= q.length
+              product.imaginary *= q.length
+              if checkMultiply(z, w, expected: product, ulps: 16) { XCTFail() }
+              var quotient = Complex(length: p.length, phase: p.phase - q.phase)
+              quotient.real /= q.length
+              quotient.imaginary /= q.length
+              if checkDivide(z, w, expected: quotient, ulps: 16) { XCTFail() }
+            }
+          }
         }
-      } else { XCTAssertNil(z.reciprocal) }
-      let n = Complex(length: 1, phase: p.phase)
-      if relativeError(n, z.normalized!) > 16 {
-        print("p = \(p)\nz = \(z)\nz.normalized = \(n)")
-        XCTFail()
-      }
-      
-      // Now test multiplication and division using the polar inputs:
-      for q in inputs {
-        let w = Complex(length: q.length, phase: q.phase)
-        let product = Complex(length: p.length * q.length, phase: p.phase + q.phase)
-        if checkMultiply(z, w, expected: product, ulps: 16) { XCTFail() }
-        let quotient = Complex(length: p.length / q.length, phase: p.phase - q.phase)
-        if checkDivide(z, w, expected: quotient, ulps: 16) { XCTFail() }
-      }
-    }
-  }
   
   func testPolar() {
+#if (arch(arm64))
+    // testPolar(Float16.self)
+#endif
     testPolar(Float.self)
     testPolar(Double.self)
-    #if (arch(i386) || arch(x86_64)) && !os(Windows) && !os(Android)
+#if (arch(i386) || arch(x86_64)) && !os(Windows) && !os(Android)
     testPolar(Float80.self)
-    #endif
+#endif
   }
   
   func testBaudinSmith() {
@@ -191,16 +197,49 @@ final class ArithmeticTests: XCTestCase {
                       Complex(1.02951151789360578e-84, 6.97145987515076231e-220)),
     ]
     for test in vectors {
-      if checkDivide(test.a, test.b, expected: test.c, ulps: 0.5) { XCTFail() }
+      if checkDivide(test.a, test.b, expected: test.c, ulps: 1.0) { XCTFail() }
       if checkDivide(test.a, test.c, expected: test.b, ulps: 1.0) { XCTFail() }
       if checkMultiply(test.b, test.c, expected: test.a, ulps: 1.0) { XCTFail() }
     }
   }
-
+  
   func testDivisionByZero() {
     XCTAssertFalse((Complex(0, 0) / Complex(0, 0)).isFinite)
     XCTAssertFalse((Complex(1, 1) / Complex(0, 0)).isFinite)
     XCTAssertFalse((Complex.infinity / Complex(0, 0)).isFinite)
     XCTAssertFalse((Complex.i / Complex(0, 0)).isFinite)
+    
+  }
+  
+#if !((os(macOS) || targetEnvironment(macCatalyst)) && arch(x86_64))
+
+  /*
+  @available(macOS 11.0, iOS 14.0, tvOS 14.0, watchOS 7.0, *)
+  func testFloat16DivisionSemiExhaustive() {
+    func complex(bitPattern: UInt32) -> Complex<Float16> {
+      Complex(
+        Float16(bitPattern: UInt16(truncatingIfNeeded: bitPattern)),
+        Float16(bitPattern: UInt16(truncatingIfNeeded: bitPattern >> 16))
+      )
+    }
+    for bits in 0 ... UInt32.max {
+      let a = complex(bitPattern: bits)
+      if bits & 0xfffff == 0 { print(a) }
+      let b = complex(bitPattern: UInt32.random(in: 0 ... .max))
+      var q = Complex<Float>(a)/Complex<Float>(b)
+      if checkDivide(a, b, expected: Complex<Float16>(q), ulps: 32) { XCTFail() }
+      q = Complex<Float>(b)/Complex<Float>(a)
+      if checkDivide(b, a, expected: Complex<Float16>(q), ulps: 32) { XCTFail() }
+    }
+  }
+   */
+  
+  @available(macOS 11.0, iOS 14.0, tvOS 14.0, watchOS 7.0, *)
+  func testSpecificFloat16Value() {
+    let a = Complex<Float16>(4.66, 3e-07)
+    let b = Complex<Float16>(-4.32e-05, 4.977e-05)
+    let q = a / b
+    XCTAssertEqual(q, Complex<Float16>(-46368.0, -53376.0))
   }
+#endif
 }