Provide a default implementation of multipliedFullWidth (swiftlang#25346)

stephentyrone · natecook1000 · commit 2df36527d3d7 · 2019-06-11T22:02:48.000-07:00
* Provide a default implementation of multipliedFullWidth

Previously, [U]Int64 fatalErrored on 32b platforms, which is obviously undesirable. This PR provides a default implementation on FixedWidthInteger, which is not ideally efficient for all types, but is correct, and gives the optimizer all the information that it needs to generate good code in the important case of Int64 arithmetic on 32b platforms. There's still some minor room for improvement, but we'll call that an optimizer bug now.

* Clarify comments somewhat, remove `merge` nested function

I was only using `merge` in one place, so making it a function seems unnecessary. Also got rid of some trucatingIfNeeded inits where the compiler is able to reason that no checks are needed anyway.

* Add some basic test coverage specifically for multipliedFullWidth

* Fix typo, further clarify bounds comments.

* Make new defaulted implementation @_aEIC so we don't need availability.
diff --git a/stdlib/public/core/IntegerTypes.swift.gyb b/stdlib/public/core/IntegerTypes.swift.gyb
@@ -1485,6 +1485,7 @@ ${assignmentOperatorComment(x.operator, True)}
 % end
 
 %   dbits = bits*2
+%   if bits <= word_bits:
   /// Returns a tuple containing the high and low parts of the result of
   /// multiplying this value by the given value.
   ///
@@ -1517,11 +1518,6 @@ ${assignmentOperatorComment(x.operator, True)}
   public func multipliedFullWidth(by other: ${Self})
     -> (high: ${Self}, low: ${Self}.Magnitude) {
     // FIXME(integers): tests
-%   # 128 bit types are not provided by the 32-bit LLVM
-%   if word_bits == 32 and bits == 64:
-    // FIXME(integers): implement
-    fatalError("Operation is not supported")
-%   else:
     let lhs_ = Builtin.${z}ext_Int${bits}_Int${dbits}(self._value)
     let rhs_ = Builtin.${z}ext_Int${bits}_Int${dbits}(other._value)
 
@@ -1531,8 +1527,8 @@ ${assignmentOperatorComment(x.operator, True)}
     let shifted = Builtin.ashr_Int${dbits}(res, shift)
     let high = ${Self}(Builtin.truncOrBitCast_Int${dbits}_Int${bits}(shifted))
     return (high: high, low: low)
-%   end
   }
+%   end
 
   /// Returns a tuple containing the quotient and remainder of dividing the
   /// given value by this value.
diff --git a/stdlib/public/core/Integers.swift b/stdlib/public/core/Integers.swift
@@ -2317,6 +2317,65 @@ extension FixedWidthInteger {
     return byteSwapped
 #endif
   }
+  
+  // Default implementation of multipliedFullWidth.
+  //
+  // This implementation is mainly intended for [U]Int64 on 32b platforms. It
+  // will not be especially efficient for other types that do not provide their
+  // own implementation, but neither will it be catastrophically bad. It can
+  // surely be improved on even for Int64, but that is mostly an optimization
+  // problem; the basic algorithm here gives the compiler all the information
+  // that it needs to generate efficient code.
+  @_alwaysEmitIntoClient
+  public func multipliedFullWidth(by other: Self) -> (high: Self, low: Magnitude) {
+    // We define a utility function for splitting an integer into high and low
+    // halves. Note that the low part is always unsigned, while the high part
+    // matches the signedness of the input type. Both result types are the
+    // full width of the original number; this may be surprising at first, but
+    // there are two reasons for it:
+    //
+    // - we're going to use these as inputs to a multiplication operation, and
+    //   &* is quite a bit less verbose than `multipliedFullWidth`, so it makes
+    //   the rest of the code in this function somewhat easier to read.
+    //
+    // - there's no "half width type" that we can get at from this generic
+    //   context, so there's not really another option anyway.
+    //
+    // Fortunately, the compiler is pretty good about propagating the necessary
+    // information to optimize away unnecessary arithmetic.
+    func split<T: FixedWidthInteger>(_ x: T) -> (high: T, low: T.Magnitude) {
+      let n = T.bitWidth/2
+      return (x >> n, T.Magnitude(truncatingIfNeeded: x) & ((1 &<< n) &- 1))
+    }
+    // Split `self` and `other` into high and low parts, compute the partial
+    // products carrying high words in as we go. We use the wrapping operators
+    // and `truncatingIfNeeded` inits purely as an optimization hint to the
+    // compiler; none of these operations will ever wrap due to the constraints
+    // on the arithmetic. The bounds are documented before each line for signed
+    // types. For unsigned types, the bounds are much more well known and
+    // easier to derive, so I haven't bothered to document them here, but they
+    // all boil down to the fact that a*b + c + d cannot overflow a double-
+    // width result with unsigned a, b, c, d.
+    let (x1, x0) = split(self)
+    let (y1, y0) = split(other)
+    // If B is 2^bitWidth/2, x0 and y0 are in 0 ... B-1, so their product is
+    // in 0 ... B^2-2B+1. For further analysis, we'll need the fact that
+    // the high word is in 0 ... B-2.
+    let p00 = x0 &* y0
+    // x1 is in -B/2 ... B/2-1, so the product x1*y0 is in
+    // -(B^2-B)/2 ... (B^2-3B+2)/2; after adding the high word of p00, the
+    // result is in -(B^2-B)/2 ... (B^2-B-2)/2.
+    let p01 = x1 &* Self(y0) &+ Self(split(p00).high)
+    // The previous analysis holds for this product as well, and the sum is
+    // in -(B^2-B)/2 ... (B^2-B)/2.
+    let p10 = Self(x0) &* y1 &+ Self(split(p01).low)
+    // No analysis is necessary for this term, because we know the product as
+    // a whole cannot overflow, and this term is the final high word of the
+    // product.
+    let p11 = x1 &* y1 &+ split(p01).high &+ split(p10).high
+    // Now we only need to assemble the low word of the product.
+    return (p11, split(p10).low << (bitWidth/2) | split(p00).low)
+  }
 
   /// Returns the result of shifting a value's binary representation the
   /// specified number of digits to the right, masking the shift amount to the
diff --git a/test/stdlib/MultipliedFullWidth.swift b/test/stdlib/MultipliedFullWidth.swift
@@ -0,0 +1,127 @@
+//===--- MultipliedFullWidth.swift.gyb ------------------------*- swift -*-===//
+//
+// This source file is part of the Swift.org open source project
+//
+// Copyright (c) 2019 Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See https://swift.org/LICENSE.txt for license information
+// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
+//
+//===----------------------------------------------------------------------===//
+
+// RUN: %target-run-simple-swift
+// REQUIRES: executable_test
+
+import StdlibUnittest
+
+var tests = TestSuite("MultipliedFullWidth")
+
+func testCase<T: FixedWidthInteger>(
+  _ x: T, _ y: T, high: T, low: T.Magnitude, line: UInt = #line
+) {
+  let result = x.multipliedFullWidth(by: y)
+  expectEqual(high, result.high, line: line)
+  expectEqual(low, result.low, line: line)
+}
+
+func specialValues<T: FixedWidthInteger & SignedInteger>(_ type: T.Type) {
+  let umin = T.Magnitude(truncatingIfNeeded: T.min)
+  testCase(T.min, .min, high: -(.min >> 1), low: 0)
+  testCase(T.min, -1,   high: 0, low: umin)
+  testCase(T.min,  0,   high: 0, low: 0)
+  testCase(T.min,  1,   high: -1, low: umin)
+  testCase(T.min, .max, high: .min >> 1, low: umin)
+  
+  testCase(T(-1), .min, high: 0, low: umin)
+  testCase(T(-1), -1,   high: 0, low: 1)
+  testCase(T(-1),  0,   high: 0, low: 0)
+  testCase(T(-1),  1,   high: -1, low: .max)
+  testCase(T(-1), .max, high: -1, low: umin + 1)
+  
+  testCase(T(0), .min, high: 0, low: 0)
+  testCase(T(0), -1,   high: 0, low: 0)
+  testCase(T(0),  0,   high: 0, low: 0)
+  testCase(T(0),  1,   high: 0, low: 0)
+  testCase(T(0), .max, high: 0, low: 0)
+  
+  testCase(T(1), .min, high: -1, low: umin)
+  testCase(T(1), -1,   high: -1, low: .max)
+  testCase(T(1),  0,   high: 0, low: 0)
+  testCase(T(1),  1,   high: 0, low: 1)
+  testCase(T(1), .max, high: 0, low: .max >> 1)
+  
+  testCase(T.max, .min, high: .min >> 1, low: umin)
+  testCase(T.max, -1,   high: -1, low: umin + 1)
+  testCase(T.max,  0,   high: 0, low: 0)
+  testCase(T.max,  1,   high: 0, low: .max >> 1)
+  testCase(T.max, .max, high: (.max >> 1), low: 1)
+}
+
+func specialValues<T: FixedWidthInteger & UnsignedInteger>(_ type: T.Type) {
+  testCase(T(0),  0,   high: 0, low: 0)
+  testCase(T(0),  1,   high: 0, low: 0)
+  testCase(T(0), .max, high: 0, low: 0)
+
+  testCase(T(1),  0,   high: 0, low: 0)
+  testCase(T(1),  1,   high: 0, low: 1)
+  testCase(T(1), .max, high: 0, low: .max)
+  
+  testCase(T.max,  0,   high: 0, low: 0)
+  testCase(T.max,  1,   high: 0, low: .max)
+  testCase(T.max, .max, high: .max-1, low: 1)
+}
+
+tests.test("Special Values") {
+  specialValues(Int.self)
+  specialValues(Int64.self)
+  specialValues(Int32.self)
+  specialValues(Int16.self)
+  specialValues(Int8.self)
+  
+  specialValues(UInt.self)
+  specialValues(UInt64.self)
+  specialValues(UInt32.self)
+  specialValues(UInt16.self)
+  specialValues(UInt8.self)
+}
+
+tests.test("Random Values") {
+  // Some extra coverage for the 64b integers, since they are the only users
+  // of the default implementation (only on 32b systems):
+  testCase(Int64(-5837700935641288840), -1537421853862307457, high: 486536212510185592, low: 3055263144559363208)
+  testCase(Int64(1275671358463268836), 7781435829978284036, high: 538119614841437377, low: 14789118443021950864)
+  testCase(Int64(4911382318934676967), -5753361984332212917, high: -1531812888571062585, low: 1722298197364104621)
+  testCase(Int64(6581943526282064299), -8155192887281934825, high: -2909837032245044682, low: 16706127436327993437)
+  testCase(Int64(4009108071534959395), 7605188192539249328, high: 1652867370329384990, low: 3839516780320392720)
+  testCase(Int64(-1272471934452731280), -7713709059189882656, high: 532098144210826160, low: 4919265615377605120)
+  testCase(Int64(-1290602245486355209), -6877877092931971073, high: 481201646472028302, low: 4015257672509033225)
+  testCase(Int64(1966873427191941886), -7829903732960672311, high: -834858960925259072, low: 12998587081554941806)
+  testCase(Int64(5459471085932887725), 7323207134727813062, high: 2167365549637832126, low: 5826569093894448334)
+  testCase(Int64(-5681348775805725880), -6546051581806832250, high: 2016095739825622823, low: 7531931343377498032)
+  testCase(Int64(3528630718229643203), 6780383198781339163, high: 1297002242834103876, low: 16845851682892995473)
+  testCase(Int64(4386302929483327645), 756139473360675718, high: 179796324698125913, low: 13652654049648998702)
+  testCase(Int64(-2864416372170195291), 5089997120359086926, high: -790376395292167927, low: 8341529919881354566)
+  testCase(Int64(-252886279681789793), 1113918432442210295, high: -15270699648874904, low: 4582052466224525929)
+  testCase(Int64(-7821806154093904666), -678157520322455918, high: 287553003647030877, low: 6476241133902266156)
+  testCase(Int64(-7739162216163589826), 3946867172269483361, high: -1655871907247741938, low: 13863106094322986622)
+  
+  testCase(UInt64(4052776605025255999), 17841868768407320997, high: 3919884617339462744, low: 486827993115916699)
+  testCase(UInt64(6242835766066895539), 14960190906716810460, high: 5062899690398282642, low: 14718350117826688468)
+  testCase(UInt64(17427627038899386484), 13127734187148388607, high: 12402473540330496943, low: 11581729162526677900)
+  testCase(UInt64(14992872905705044844), 12933105414992193421, high: 10511578899141219143, low: 7252341782600986236)
+  testCase(UInt64(12642327504267244590), 10708397907431293358, high: 7338914274020844379, low: 8873679764824466756)
+  testCase(UInt64(18257718462988034339), 17327659287939371125, high: 17150101049683916791, low: 14387647780301477119)
+  testCase(UInt64(5589411463208969260), 14342285504591657788, high: 4345749834640583520, low: 12301233398332628560)
+  testCase(UInt64(14319626538136147986), 2140855187369381019, high: 1661878466620705928, low: 2387587391530298086)
+  testCase(UInt64(12737453267169023056), 10991462038848276938, high: 7589590526017326520, low: 4333382898129426208)
+  testCase(UInt64(13832741301927421318), 7713698894698105596, high: 5784305396386691701, low: 6880744869607156712)
+  testCase(UInt64(5299309970076755930), 12147554660789977612, high: 3489702967025088672, low: 8435073470527345208)
+  testCase(UInt64(3775627568330760013), 12573993794040378591, high: 2573609598696611841, low: 11258650814777796627)
+  testCase(UInt64(10163828939432769169), 11406425048812406036, high: 6284737975620258563, low: 8064735036375816276)
+  testCase(UInt64(10553402338235046132), 16330771020588292162, high: 9342851854245941300, low: 7535126182307876584)
+  testCase(UInt64(17113612905570890777), 11972779332523394977, high: 11107516322766487141, low: 955396679557657273)
+  testCase(UInt64(10933450006210087838), 18204962163032170108, high: 10790145018498752040, low: 692054466472649864)
+}
+
+runAllTests()