@@ -850,14 +850,166 @@ widemul(x::Bool,y::Number) = x * y
850
850
widemul (x:: Number ,y:: Bool ) = x * y
851
851
852
852
853
- # Int128 multiply and divide
854
- * (x:: T , y:: T ) where {T<: Union{Int128,UInt128} } = mul_int (x, y)
853
+ # # wide multiplication, Int128 multiply and divide ##
854
+
855
+ if Core. sizeof (Int) == 4
856
+ function widemul (u:: Int64 , v:: Int64 )
857
+ local u0:: UInt64 , v0:: UInt64 , w0:: UInt64
858
+ local u1:: Int64 , v1:: Int64 , w1:: UInt64 , w2:: Int64 , t:: UInt64
859
+
860
+ u0 = u & 0xffffffff ; u1 = u >> 32
861
+ v0 = v & 0xffffffff ; v1 = v >> 32
862
+ w0 = u0 * v0
863
+ t = reinterpret (UInt64, u1) * v0 + (w0 >>> 32 )
864
+ w2 = reinterpret (Int64, t) >> 32
865
+ w1 = u0 * reinterpret (UInt64, v1) + (t & 0xffffffff )
866
+ hi = u1 * v1 + w2 + (reinterpret (Int64, w1) >> 32 )
867
+ lo = w0 & 0xffffffff + (w1 << 32 )
868
+ return Int128 (hi) << 64 + Int128 (lo)
869
+ end
870
+
871
+ function widemul (u:: UInt64 , v:: UInt64 )
872
+ local u0:: UInt64 , v0:: UInt64 , w0:: UInt64
873
+ local u1:: UInt64 , v1:: UInt64 , w1:: UInt64 , w2:: UInt64 , t:: UInt64
874
+
875
+ u0 = u & 0xffffffff ; u1 = u >>> 32
876
+ v0 = v & 0xffffffff ; v1 = v >>> 32
877
+ w0 = u0 * v0
878
+ t = u1 * v0 + (w0 >>> 32 )
879
+ w2 = t >>> 32
880
+ w1 = u0 * v1 + (t & 0xffffffff )
881
+ hi = u1 * v1 + w2 + (w1 >>> 32 )
882
+ lo = w0 & 0xffffffff + (w1 << 32 )
883
+ return UInt128 (hi) << 64 + UInt128 (lo)
884
+ end
885
+
886
+ function * (u:: Int128 , v:: Int128 )
887
+ u0 = u % UInt64; u1 = Int64 (u >> 64 )
888
+ v0 = v % UInt64; v1 = Int64 (v >> 64 )
889
+ lolo = widemul (u0, v0)
890
+ lohi = widemul (reinterpret (Int64, u0), v1)
891
+ hilo = widemul (u1, reinterpret (Int64, v0))
892
+ t = reinterpret (UInt128, hilo) + (lolo >>> 64 )
893
+ w1 = reinterpret (UInt128, lohi) + (t & 0xffffffffffffffff )
894
+ return Int128 (lolo & 0xffffffffffffffff ) + reinterpret (Int128, w1) << 64
895
+ end
896
+
897
+ function * (u:: UInt128 , v:: UInt128 )
898
+ u0 = u % UInt64; u1 = UInt64 (u>>> 64 )
899
+ v0 = v % UInt64; v1 = UInt64 (v>>> 64 )
900
+ lolo = widemul (u0, v0)
901
+ lohi = widemul (u0, v1)
902
+ hilo = widemul (u1, v0)
903
+ t = hilo + (lolo >>> 64 )
904
+ w1 = lohi + (t & 0xffffffffffffffff )
905
+ return (lolo & 0xffffffffffffffff ) + UInt128 (w1) << 64
906
+ end
907
+
908
+ function _setbit (x:: UInt128 , i)
909
+ # faster version of `return x | (UInt128(1) << i)`
910
+ j = i >> 5
911
+ y = UInt128 (one (UInt32) << (i & 0x1f ))
912
+ if j == 0
913
+ return x | y
914
+ elseif j == 1
915
+ return x | (y << 32 )
916
+ elseif j == 2
917
+ return x | (y << 64 )
918
+ elseif j == 3
919
+ return x | (y << 96 )
920
+ end
921
+ return x
922
+ end
855
923
856
- div (x:: Int128 , y:: Int128 ) = checked_sdiv_int (x, y)
857
- div (x:: UInt128 , y:: UInt128 ) = checked_udiv_int (x, y)
924
+ function divrem (x:: UInt128 , y:: UInt128 )
925
+ iszero (y) && throw (DivideError ())
926
+ if (x >> 64 ) % UInt64 == 0
927
+ if (y >> 64 ) % UInt64 == 0
928
+ # fast path: upper 64 bits are zero, so we can fallback to UInt64 division
929
+ q64, x64 = divrem (x % UInt64, y % UInt64)
930
+ return UInt128 (q64), UInt128 (x64)
931
+ else
932
+ # this implies y>x, so
933
+ return zero (UInt128), x
934
+ end
935
+ end
936
+ n = leading_zeros (y) - leading_zeros (x)
937
+ q = zero (UInt128)
938
+ ys = y << n
939
+ while n >= 0
940
+ # ys == y * 2^n
941
+ if ys <= x
942
+ x -= ys
943
+ q = _setbit (q, n)
944
+ if (x >> 64 ) % UInt64 == 0
945
+ # exit early, similar to above fast path
946
+ if (y >> 64 ) % UInt64 == 0
947
+ q64, x64 = divrem (x % UInt64, y % UInt64)
948
+ q |= q64
949
+ x = UInt128 (x64)
950
+ end
951
+ return q, x
952
+ end
953
+ end
954
+ ys >>>= 1
955
+ n -= 1
956
+ end
957
+ return q, x
958
+ end
858
959
859
- rem (x:: Int128 , y:: Int128 ) = checked_srem_int (x, y)
860
- rem (x:: UInt128 , y:: UInt128 ) = checked_urem_int (x, y)
960
+ function div (x:: Int128 , y:: Int128 )
961
+ (x == typemin (Int128)) & (y == - 1 ) && throw (DivideError ())
962
+ return Int128 (div (BigInt (x), BigInt (y))):: Int128
963
+ end
964
+ div (x:: UInt128 , y:: UInt128 ) = divrem (x, y)[1 ]
965
+
966
+ function rem (x:: Int128 , y:: Int128 )
967
+ return Int128 (rem (BigInt (x), BigInt (y))):: Int128
968
+ end
969
+
970
+ function rem (x:: UInt128 , y:: UInt128 )
971
+ iszero (y) && throw (DivideError ())
972
+ if (x >> 64 ) % UInt64 == 0
973
+ if (y >> 64 ) % UInt64 == 0
974
+ # fast path: upper 64 bits are zero, so we can fallback to UInt64 division
975
+ return UInt128 (rem (x % UInt64, y % UInt64))
976
+ else
977
+ # this implies y>x, so
978
+ return x
979
+ end
980
+ end
981
+ n = leading_zeros (y) - leading_zeros (x)
982
+ ys = y << n
983
+ while n >= 0
984
+ # ys == y * 2^n
985
+ if ys <= x
986
+ x -= ys
987
+ if (x >> 64 ) % UInt64 == 0
988
+ # exit early, similar to above fast path
989
+ if (y >> 64 ) % UInt64 == 0
990
+ x = UInt128 (rem (x % UInt64, y % UInt64))
991
+ end
992
+ return x
993
+ end
994
+ end
995
+ ys >>>= 1
996
+ n -= 1
997
+ end
998
+ return x
999
+ end
1000
+
1001
+ function mod (x:: Int128 , y:: Int128 )
1002
+ return Int128 (mod (BigInt (x), BigInt (y))):: Int128
1003
+ end
1004
+ else
1005
+ * (x:: T , y:: T ) where {T<: Union{Int128,UInt128} } = mul_int (x, y)
1006
+
1007
+ div (x:: Int128 , y:: Int128 ) = checked_sdiv_int (x, y)
1008
+ div (x:: UInt128 , y:: UInt128 ) = checked_udiv_int (x, y)
1009
+
1010
+ rem (x:: Int128 , y:: Int128 ) = checked_srem_int (x, y)
1011
+ rem (x:: UInt128 , y:: UInt128 ) = checked_urem_int (x, y)
1012
+ end
861
1013
862
1014
# issue #15489: since integer ops are unchecked, they shouldn't check promotion
863
1015
for op in (:+ , :- , :* , :& , :| , :xor )
0 commit comments