|
3 | 3 |
|
4 | 4 | import numpy as np
|
5 | 5 |
|
6 |
| -from .floating import floor_exact |
| 6 | + |
| 7 | +class CastingError(Exception): |
| 8 | + pass |
| 9 | + |
| 10 | + |
| 11 | +def float_to_int(arr, int_type, nan2zero=True, infmax=False): |
| 12 | + """ Convert floating point array `arr` to type `int_type` |
| 13 | +
|
| 14 | + * Rounds numbers to nearest integer |
| 15 | + * Clips values to prevent overflows when casting |
| 16 | + * Converts NaN to 0 (for `nan2zero`==True |
| 17 | +
|
| 18 | + Casting floats to integers is delicate because the result is undefined |
| 19 | + and platform specific for float values outside the range of `int_type`. |
| 20 | + Define ``shared_min`` to be the minimum value that can be exactly |
| 21 | + represented in both the float type of `arr` and `int_type`. Define |
| 22 | + `shared_max` to be the equivalent maximum value. To avoid undefined results |
| 23 | + we threshold `arr` at ``shared_min`` and ``shared_max``. |
| 24 | +
|
| 25 | + Parameters |
| 26 | + ---------- |
| 27 | + arr : array-like |
| 28 | + Array of floating point type |
| 29 | + int_type : object |
| 30 | + Numpy integer type |
| 31 | + nan2zero : {True, False} |
| 32 | + Whether to convert NaN value to zero. Default is True. If False, and |
| 33 | + NaNs are present, raise CastingError |
| 34 | + infmax : {False, True} |
| 35 | + If True, set np.inf values in `arr` to be `int_type` integer maximum |
| 36 | + value, -np.inf as `int_type` integer minimum. If False, set +/- infs to |
| 37 | + be ``shared_min``, ``shared_max`` as defined above. Therefore False |
| 38 | + gives faster conversion at the expense of infs that are further from |
| 39 | + infinity. |
| 40 | +
|
| 41 | + Returns |
| 42 | + ------- |
| 43 | + iarr : ndarray |
| 44 | + of type `int_type` |
| 45 | +
|
| 46 | + Examples |
| 47 | + -------- |
| 48 | + >>> float_to_int([np.nan, np.inf, -np.inf, 1.1, 6.6], np.int16) |
| 49 | + array([ 0, 32767, -32768, 1, 7], dtype=int16) |
| 50 | +
|
| 51 | + Notes |
| 52 | + ----- |
| 53 | + Numpy relies on the C library to cast from float to int using the standard |
| 54 | + ``astype`` method of the array. |
| 55 | +
|
| 56 | + Quoting from section F4 of the C99 standard: |
| 57 | +
|
| 58 | + If the floating value is infinite or NaN or if the integral part of the |
| 59 | + floating value exceeds the range of the integer type, then the |
| 60 | + "invalid" floating-point exception is raised and the resulting value |
| 61 | + is unspecified. |
| 62 | +
|
| 63 | + Hence we threshold at ``shared_min`` and ``shared_max`` to avoid casting to |
| 64 | + values that are undefined. |
| 65 | +
|
| 66 | + See: http://en.wikipedia.org/wiki/C99 . There are links to the C99 standard |
| 67 | + from that page. |
| 68 | + """ |
| 69 | + arr = np.asarray(arr) |
| 70 | + flt_type = arr.dtype.type |
| 71 | + int_type = np.dtype(int_type).type |
| 72 | + # Deal with scalar as input; fancy indexing needs 1D |
| 73 | + shape = arr.shape |
| 74 | + arr = np.atleast_1d(arr) |
| 75 | + mn, mx = _cached_int_clippers(flt_type, int_type) |
| 76 | + nans = np.isnan(arr) |
| 77 | + have_nans = np.any(nans) |
| 78 | + if not nan2zero and have_nans: |
| 79 | + raise CastingError('NaNs in array, nan2zero not True') |
| 80 | + iarr = np.clip(np.rint(arr), mn, mx).astype(int_type) |
| 81 | + if have_nans: |
| 82 | + iarr[nans] = 0 |
| 83 | + if not infmax: |
| 84 | + return iarr.reshape(shape) |
| 85 | + ii = np.iinfo(int_type) |
| 86 | + iarr[arr == np.inf] = ii.max |
| 87 | + if ii.min != int(mn): |
| 88 | + iarr[arr == -np.inf] = ii.min |
| 89 | + return iarr.reshape(shape) |
| 90 | + |
7 | 91 |
|
8 | 92 | def int_clippers(flt_type, int_type):
|
9 | 93 | """ Min and max in float type that are >=min, <=max in integer type
|
@@ -40,58 +124,229 @@ def _cached_int_clippers(flt_type, int_type):
|
40 | 124 | FLT_INT_CLIPS[flt_type, int_type] = int_clippers(flt_type, int_type)
|
41 | 125 | return FLT_INT_CLIPS[(flt_type, int_type)]
|
42 | 126 |
|
| 127 | +# --------------------------------------------------------------------------- |
| 128 | +# Routines to work out the next lowest representable intger in floating point |
| 129 | +# types. |
| 130 | +# --------------------------------------------------------------------------- |
43 | 131 |
|
44 |
| -class CastingError(Exception): |
| 132 | +try: |
| 133 | + _float16 = np.float16 |
| 134 | +except AttributeError: # float16 not present in np < 1.6 |
| 135 | + _float16 = None |
| 136 | + |
| 137 | +# The number of significand digits in IEEE floating point formats, not including |
| 138 | +# the implicit leading 0. See http://en.wikipedia.org/wiki/IEEE_754-2008 |
| 139 | +_flt_nmant = { |
| 140 | + _float16: 10, |
| 141 | + np.float32: 23, |
| 142 | + np.float64: 52, |
| 143 | + } |
| 144 | + |
| 145 | + |
| 146 | +class FloatingError(Exception): |
45 | 147 | pass
|
46 | 148 |
|
47 | 149 |
|
48 |
| -def nice_round(arr, int_type, nan2zero=True, infmax=False): |
49 |
| - """ Round floating point array `arr` to type `int_type` |
| 150 | +def flt2nmant(flt_type): |
| 151 | + """ Number of significand bits in float type `flt_type` |
50 | 152 |
|
51 | 153 | Parameters
|
52 | 154 | ----------
|
53 |
| - arr : array-like |
54 |
| - Array of floating point type |
55 |
| - int_type : object |
56 |
| - Numpy integer type |
57 |
| - nan2zero : {True, False} |
58 |
| - Whether to convert NaN value to zero. Default is True. If False, and |
59 |
| - NaNs are present, raise CastingError |
60 |
| - infmax : {False, True} |
61 |
| - If True, set np.inf values in `arr` to be `int_type` integer maximum |
62 |
| - value, -np.inf as `int_type` integer minimum. If False, merely set infs |
63 |
| - to be numbers at or near the maximum / minumum number in `arr` that can be |
64 |
| - contained in `int_type`. Therefore False gives faster conversion at the |
65 |
| - expense of infs that are further from infinity. |
| 155 | + flt_type : object |
| 156 | + Numpy floating point type, such as np.float32 |
66 | 157 |
|
67 | 158 | Returns
|
68 | 159 | -------
|
69 |
| - iarr : ndarray |
70 |
| - of type `int_type` |
| 160 | + nmant : int |
| 161 | + Number of digits in the signficand |
| 162 | + """ |
| 163 | + try: |
| 164 | + return _flt_nmant[flt_type] |
| 165 | + except KeyError: |
| 166 | + pass |
| 167 | + nmant = np.finfo(flt_type).nmant |
| 168 | + # Assuming the np.float type is always IEEE 64 bit |
| 169 | + if flt_type is np.float and nmant == 52: |
| 170 | + return 52 |
| 171 | + # Now we should be testing long doubles |
| 172 | + assert flt_type is np.longdouble |
| 173 | + if nmant == 63: # 80-bit intel type |
| 174 | + return 63 # Not including explicit first digit |
| 175 | + raise FloatingError('Cannot be confident of nmant value for %s' % flt_type) |
| 176 | + |
| 177 | + |
| 178 | +def as_int(x, check=True): |
| 179 | + """ Return python integer representation of number |
| 180 | +
|
| 181 | + This is useful because the numpy int(val) mechanism is broken for large |
| 182 | + values in np.longdouble. |
| 183 | +
|
| 184 | + This routine will still break for values that are outside the range of |
| 185 | + float64. |
| 186 | +
|
| 187 | + Parameters |
| 188 | + ---------- |
| 189 | + x : object |
| 190 | + Floating point value |
| 191 | + check : {True, False} |
| 192 | + If True, raise error for values that are not integers |
| 193 | +
|
| 194 | + Returns |
| 195 | + ------- |
| 196 | + i : int |
| 197 | + Python integer |
71 | 198 |
|
72 | 199 | Examples
|
73 | 200 | --------
|
74 |
| - >>> nice_round([np.nan, np.inf, -np.inf, 1.1, 6.6], np.int16) |
75 |
| - array([ 0, 32767, -32768, 1, 7], dtype=int16) |
| 201 | + >>> as_int(2.0) |
| 202 | + 2 |
| 203 | + >>> as_int(-2.0) |
| 204 | + -2 |
| 205 | + >>> as_int(2.1) |
| 206 | + Traceback (most recent call last): |
| 207 | + ... |
| 208 | + FloatingError: Not an integer: 2.1 |
| 209 | + >>> as_int(2.1, check=False) |
| 210 | + 2 |
76 | 211 | """
|
77 |
| - arr = np.asarray(arr) |
78 |
| - flt_type = arr.dtype.type |
79 |
| - int_type = np.dtype(int_type).type |
80 |
| - # Deal with scalar as input; fancy indexing needs 1D |
81 |
| - shape = arr.shape |
82 |
| - arr = np.atleast_1d(arr) |
83 |
| - mn, mx = _cached_int_clippers(flt_type, int_type) |
84 |
| - nans = np.isnan(arr) |
85 |
| - have_nans = np.any(nans) |
86 |
| - if not nan2zero and have_nans: |
87 |
| - raise CastingError('NaNs in array, nan2zero not True') |
88 |
| - iarr = np.clip(np.rint(arr), mn, mx).astype(int_type) |
89 |
| - if have_nans: |
90 |
| - iarr[nans] = 0 |
91 |
| - if not infmax: |
92 |
| - return iarr.reshape(shape) |
93 |
| - ii = np.iinfo(int_type) |
94 |
| - iarr[arr == np.inf] = ii.max |
95 |
| - if ii.min != int(mn): |
96 |
| - iarr[arr == -np.inf] = ii.min |
97 |
| - return iarr.reshape(shape) |
| 212 | + ix = int(x) |
| 213 | + if ix == x: |
| 214 | + return ix |
| 215 | + fx = np.floor(x) |
| 216 | + if check and fx != x: |
| 217 | + raise FloatingError('Not an integer: %s' % x) |
| 218 | + f64 = np.float64(fx) |
| 219 | + i64 = int(f64) |
| 220 | + assert f64 == i64 |
| 221 | + res = fx - f64 |
| 222 | + return ix + int(res) |
| 223 | + |
| 224 | + |
| 225 | +def int_to_float(val, flt_type): |
| 226 | + """ Convert integer `val` to floating point type `flt_type` |
| 227 | +
|
| 228 | + Useful because casting to ``np.longdouble`` loses precision as it appears to |
| 229 | + go through casting to np.float64. |
| 230 | +
|
| 231 | + Parameters |
| 232 | + ---------- |
| 233 | + val : int |
| 234 | + Integer value |
| 235 | + flt_type : object |
| 236 | + numpy floating point type |
| 237 | +
|
| 238 | + Returns |
| 239 | + ------- |
| 240 | + f : numpy scalar |
| 241 | + of type `flt_type` |
| 242 | + """ |
| 243 | + if not flt_type is np.longdouble: |
| 244 | + return flt_type(val) |
| 245 | + f64 = np.float64(val) |
| 246 | + res = val - int(f64) |
| 247 | + return np.longdouble(f64) + np.longdouble(res) |
| 248 | + |
| 249 | + |
| 250 | +def floor_exact(val, flt_type): |
| 251 | + """ Get nearest exact integer to `val`, towards 0, in float type `flt_type` |
| 252 | +
|
| 253 | + Parameters |
| 254 | + ---------- |
| 255 | + val : int |
| 256 | + We have to pass val as an int rather than the floating point type |
| 257 | + because large integers cast as floating point may be rounded by the |
| 258 | + casting process. |
| 259 | + flt_type : numpy type |
| 260 | + numpy float type. Only IEEE types supported (np.float16, np.float32, |
| 261 | + np.float64) |
| 262 | +
|
| 263 | + Returns |
| 264 | + ------- |
| 265 | + floor_val : object |
| 266 | + value of same floating point type as `val`, that is the next excat |
| 267 | + integer in this type, towards zero, or == `val` if val is exactly |
| 268 | + representable. |
| 269 | +
|
| 270 | + Examples |
| 271 | + -------- |
| 272 | + Obviously 2 is within the range of representable integers for float32 |
| 273 | +
|
| 274 | + >>> floor_exact(2, np.float32) |
| 275 | + 2.0 |
| 276 | +
|
| 277 | + As is 2**24-1 (the number of significand digits is 23 + 1 implicit) |
| 278 | +
|
| 279 | + >>> floor_exact(2**24-1, np.float32) == 2**24-1 |
| 280 | + True |
| 281 | +
|
| 282 | + But 2**24+1 gives a number that float32 can't represent exactly |
| 283 | +
|
| 284 | + >>> floor_exact(2**24+1, np.float32) == 2**24 |
| 285 | + True |
| 286 | + """ |
| 287 | + val = int(val) |
| 288 | + flt_type = np.dtype(flt_type).type |
| 289 | + sign = val > 0 and 1 or -1 |
| 290 | + aval = abs(val) |
| 291 | + if flt_type is np.longdouble: |
| 292 | + # longdouble seems to go through casting to float64, so getting the |
| 293 | + # value into float128 with the given precision needs to go through two |
| 294 | + # steps, first float64, then adding the remainder. |
| 295 | + f64 = floor_exact(aval, np.float64) |
| 296 | + i64 = int(f64) |
| 297 | + assert f64 == i64 |
| 298 | + res = aval - i64 |
| 299 | + try: |
| 300 | + faval = flt_type(i64) + flt_type(res) |
| 301 | + except OverflowError: |
| 302 | + faval = np.inf |
| 303 | + if faval == np.inf: |
| 304 | + return sign * np.finfo(flt_type).max |
| 305 | + if (faval - f64) <= res: |
| 306 | + # Float casting has made the value go down or stay the same |
| 307 | + return sign * faval |
| 308 | + else: # Normal case |
| 309 | + try: |
| 310 | + faval = flt_type(aval) |
| 311 | + except OverflowError: |
| 312 | + faval = np.inf |
| 313 | + if faval == np.inf: |
| 314 | + return sign * np.finfo(flt_type).max |
| 315 | + if int(faval) <= aval: |
| 316 | + # Float casting has made the value go down or stay the same |
| 317 | + return sign * faval |
| 318 | + # Float casting made the value go up |
| 319 | + nmant = flt2nmant(flt_type) |
| 320 | + biggest_gap = 2**(floor_log2(aval) - nmant) |
| 321 | + assert biggest_gap > 1 |
| 322 | + faval -= flt_type(biggest_gap) |
| 323 | + return sign * faval |
| 324 | + |
| 325 | + |
| 326 | +def floor_log2(x): |
| 327 | + """ floor of log2 of abs(`x`) |
| 328 | +
|
| 329 | + Embarrassingly, from http://en.wikipedia.org/wiki/Binary_logarithm |
| 330 | +
|
| 331 | + Parameters |
| 332 | + ---------- |
| 333 | + x : int |
| 334 | +
|
| 335 | + Returns |
| 336 | + ------- |
| 337 | + L : int |
| 338 | + floor of base 2 log of `x` |
| 339 | +
|
| 340 | + Examples |
| 341 | + -------- |
| 342 | + >>> floor_log2(2**9+1) |
| 343 | + 9 |
| 344 | + >>> floor_log2(-2**9+1) |
| 345 | + 8 |
| 346 | + """ |
| 347 | + ip = 0 |
| 348 | + rem = abs(x) |
| 349 | + while rem>=2: |
| 350 | + ip += 1 |
| 351 | + rem /= 2 |
| 352 | + return ip |
0 commit comments