@@ -122,31 +122,89 @@ fpinttype(::Type{Float128}) = UInt128
122
122
# conversion
123
123
Float128 (x:: Float128 ) = x
124
124
125
- # # Float64
125
+ # Float64
126
126
Float128 (x:: Float64 ) =
127
127
Float128 (@ccall (quadoplib. __extenddftf2 (x:: Cdouble ):: Cfloat128 ))
128
128
Float64 (x:: Float128 ) =
129
129
@ccall (quadoplib. __trunctfdf2 (x:: Cfloat128 ):: Cdouble )
130
130
131
- # Int32
131
+ # Float32
132
+ Float128 (x:: Float32 ) =
133
+ Float128 (@ccall (quadoplib. __extendsftf2 (x:: Cfloat ):: Cfloat128 ))
134
+ Float32 (x:: Float128 ) =
135
+ @ccall (quadoplib. __trunctfsf2 (x:: Cfloat128 ):: Cfloat )
136
+
137
+ # integer -> Float128
132
138
Float128 (x:: Int32 ) =
133
139
Float128 (@ccall (quadoplib. __floatsitf (x:: Int32 ):: Cfloat128 ))
134
- Int32 (x:: Float128 ) =
135
- @ccall (quadoplib. __fixtfsi (x:: Cfloat128 ):: Int32 )
136
140
137
- # UInt32
138
141
Float128 (x:: UInt32 ) =
139
142
Float128 (@ccall (quadoplib. __floatunsitf (x:: UInt32 ):: Cfloat128 ))
140
143
141
- # Int64
142
144
Float128 (x:: Int64 ) =
143
145
Float128 (@ccall (quadoplib. __floatditf (x:: Int64 ):: Cfloat128 ))
144
- Int64 (x:: Float128 ) =
145
- @ccall (quadoplib. __fixtfdi (x:: Cfloat128 ):: Int64 )
146
+
147
+ Float128 (x:: UInt64 ) =
148
+ Float128 (@ccall (quadoplib. __floatunditf (x:: UInt64 ):: Cfloat128 ))
149
+
150
+ Float128 (x:: Int16 ) = Float128 (Int32 (x))
151
+ Float128 (x:: Int8 ) = Float128 (Int32 (x))
152
+ Float128 (x:: UInt16 ) = Float128 (UInt32 (x))
153
+ Float128 (x:: UInt8 ) = Float128 (UInt32 (x))
154
+
155
+ function Float128 (x:: UInt128 )
156
+ x == 0 && return Float128 (0.0 )
157
+ n = 128 - leading_zeros (x) # ndigits0z(x,2)
158
+ if n <= 113
159
+ y = ((x % UInt128) << (113 - n)) & significand_mask (Float128)
160
+ else
161
+ y = ((x >> (n- 114 )) % UInt128) & 0x001_ffff_ffff_ffff_ffff_ffff_ffff_ffff # keep 1 extra bit
162
+ y = (y+ 1 )>> 1 # round, ties up (extra leading bit in case of next exponent)
163
+ y &= ~ UInt64 (trailing_zeros (x) == (n- 114 )) # fix last bit to round to even
164
+ end
165
+ d = ((n+ 16382 ) % UInt128) << 112
166
+ # reinterpret(Float128, d + y)
167
+ d += y
168
+ if Sys. iswindows ()
169
+ return reinterpret (Float128,d)
170
+ else
171
+ y1 = reinterpret (Float64,UInt64 (d >> 64 ))
172
+ y2 = reinterpret (Float64,(d % UInt64))
173
+ return Float128 ((y2,y1))
174
+ end
175
+ end
176
+
177
+ function Float128 (x:: Int128 )
178
+ x == 0 && return 0.0
179
+ s = reinterpret (UInt128,x) & sign_mask (Float128) # sign bit
180
+ x = abs (x) % UInt128
181
+ n = 128 - leading_zeros (x) # ndigits0z(x,2)
182
+ if n <= 113
183
+ y = ((x % UInt128) << (113 - n)) & significand_mask (Float128)
184
+ else
185
+ y = ((x >> (n- 114 )) % UInt128) & 0x0001_ffff_ffff_ffff_ffff_ffff_ffff_ffff # keep 1 extra bit
186
+ y = (y+ 1 )>> 1 # round, ties up (extra leading bit in case of next exponent)
187
+ y &= ~ UInt64 (trailing_zeros (x) == (n- 114 )) # fix last bit to round to even
188
+ end
189
+ d = ((n+ 16382 ) % UInt128) << 112
190
+ # reinterpret(Float128, s | d + y)
191
+ d = s | d + y
192
+ if Sys. iswindows ()
193
+ return reinterpret (Float128,d)
194
+ else
195
+ y1 = reinterpret (Float64,UInt64 (d >> 64 ))
196
+ y2 = reinterpret (Float64,(d % UInt64))
197
+ Float128 ((y2,y1))
198
+ end
199
+ end
200
+
201
+ # Float128 -> integer requires arithmetic, so is below
146
202
147
203
# Rational
148
204
Float128 (x:: Rational{T} ) where T = Float128 (numerator (x))/ Float128 (denominator (x))
149
205
206
+ Float128 (x:: Bool ) = x ? Float128 (1 ) : Float128 (0 )
207
+
150
208
# Comparison
151
209
(== )(x:: Float128 , y:: Float128 ) =
152
210
@ccall (quadoplib. __eqtf2 (x:: Cfloat128 , y:: Cfloat128 ):: Cint ) == 0
@@ -168,6 +226,85 @@ Float128(x::Rational{T}) where T = Float128(numerator(x))/Float128(denominator(x
168
226
(- )(x:: Float128 ) =
169
227
Float128 (@ccall (quadoplib. __negtf2 (x:: Cfloat128 ):: Cfloat128 ))
170
228
229
+ # Float128 -> Integer
230
+ unsafe_trunc (:: Type{Int32} , x:: Float128 ) =
231
+ @ccall (quadoplib. __fixtfsi (x:: Cfloat128 ):: Int32 )
232
+
233
+ unsafe_trunc (:: Type{Int64} , x:: Float128 ) =
234
+ @ccall (quadoplib. __fixtfdi (x:: Cfloat128 ):: Int64 )
235
+
236
+ unsafe_trunc (:: Type{UInt32} , x:: Float128 ) =
237
+ @ccall (quadoplib. __fixunstfsi (x:: Cfloat128 ):: UInt32 )
238
+
239
+ unsafe_trunc (:: Type{UInt64} , x:: Float128 ) =
240
+ @ccall (quadoplib. __fixunstfdi (x:: Cfloat128 ):: UInt64 )
241
+
242
+ function unsafe_trunc (:: Type{UInt128} , x:: Float128 )
243
+ xu = reinterpret (UInt128,x)
244
+ k = (Int64 (xu >> 112 ) & 0x07fff ) - 16382 - 113
245
+ xu = (xu & significand_mask (Float128)) | 0x0001_0000_0000_0000_0000_0000_0000_0000
246
+ if k <= 0
247
+ UInt128 (xu >> - k)
248
+ else
249
+ UInt128 (xu) << k
250
+ end
251
+ end
252
+ function unsafe_trunc (:: Type{Int128} , x:: Float128 )
253
+ copysign (unsafe_trunc (UInt128,x) % Int128, x)
254
+ end
255
+ trunc (:: Type{Signed} , x:: Float128 ) = trunc (Int,x)
256
+ trunc (:: Type{Unsigned} , x:: Float128 ) = trunc (Int,x)
257
+ trunc (:: Type{Integer} , x:: Float128 ) = trunc (Int,x)
258
+
259
+ for Ti in (Int32, Int64, Int128, UInt32, UInt64, UInt128)
260
+ let Tf = Float128
261
+ if Ti <: Unsigned || sizeof (Ti) < sizeof (Tf)
262
+ # Here `Tf(typemin(Ti))-1` is exact, so we can compare the lower-bound
263
+ # directly. `Tf(typemax(Ti))+1` is either always exactly representable, or
264
+ # rounded to `Inf` (e.g. when `Ti==UInt128 && Tf==Float32`).
265
+ @eval begin
266
+ function trunc (:: Type{$Ti} ,x:: $Tf )
267
+ if $ (Tf (typemin (Ti))- one (Tf)) < x < $ (Tf (typemax (Ti))+ one (Tf))
268
+ return unsafe_trunc ($ Ti,x)
269
+ else
270
+ throw (InexactError (:trunc , $ Ti, x))
271
+ end
272
+ end
273
+ function (:: Type{$Ti} )(x:: $Tf )
274
+ if ($ (Tf (typemin (Ti))) <= x <= $ (Tf (typemax (Ti)))) && (round (x, RoundToZero) == x)
275
+ return unsafe_trunc ($ Ti,x)
276
+ else
277
+ throw (InexactError ($ (Expr (:quote ,Ti. name. name)), $ Ti, x))
278
+ end
279
+ end
280
+ end
281
+ else
282
+ # Here `eps(Tf(typemin(Ti))) > 1`, so the only value which can be truncated to
283
+ # `Tf(typemin(Ti)` is itself. Similarly, `Tf(typemax(Ti))` is inexact and will
284
+ # be rounded up. This assumes that `Tf(typemin(Ti)) > -Inf`, which is true for
285
+ # these types, but not for `Float16` or larger integer types.
286
+ @eval begin
287
+ function trunc (:: Type{$Ti} ,x:: $Tf )
288
+ if $ (Tf (typemin (Ti))) <= x < $ (Tf (typemax (Ti)))
289
+ return unsafe_trunc ($ Ti,x)
290
+ else
291
+ throw (InexactError (:trunc , $ Ti, x))
292
+ end
293
+ end
294
+ function (:: Type{$Ti} )(x:: $Tf )
295
+ if ($ (Tf (typemin (Ti))) <= x < $ (Tf (typemax (Ti)))) && (round (x, RoundToZero) == x)
296
+ return unsafe_trunc ($ Ti,x)
297
+ else
298
+ throw (InexactError ($ (Expr (:quote ,Ti. name. name)), $ Ti, x))
299
+ end
300
+ end
301
+ end
302
+ end
303
+ end
304
+ end
305
+
306
+ # # math
307
+
171
308
# # one argument
172
309
for f in (:acos , :acosh , :asin , :asinh , :atan , :atanh , :cosh , :cos ,
173
310
:exp , :expm1 , :log , :log2 , :log10 , :log1p ,
180
317
181
318
abs (x:: Float128 ) = Float128 (@ccall (libquadmath. fabsq (x:: Cfloat128 ):: Cfloat128 ))
182
319
round (x:: Float128 ) = Float128 (@ccall (libquadmath. rintq (x:: Cfloat128 ):: Cfloat128 ))
320
+ round (x:: Float128 , r:: RoundingMode{:Down} ) = floor (x)
321
+ round (x:: Float128 , r:: RoundingMode{:Up} ) = ceil (x)
322
+ round (x:: Float128 , r:: RoundingMode{:ToZero} ) = round (x)
183
323
184
324
# # two argument
185
325
(^ )(x:: Float128 , y:: Float128 ) =
0 commit comments