|
| 1 | +""" Utilties for casting floats to integers |
| 2 | +""" |
| 3 | + |
| 4 | +import numpy as np |
| 5 | + |
| 6 | + |
| 7 | +class CastingError(Exception): |
| 8 | + pass |
| 9 | + |
| 10 | + |
| 11 | +def float_to_int(arr, int_type, nan2zero=True, infmax=False): |
| 12 | + """ Convert floating point array `arr` to type `int_type` |
| 13 | +
|
| 14 | + * Rounds numbers to nearest integer |
| 15 | + * Clips values to prevent overflows when casting |
| 16 | + * Converts NaN to 0 (for `nan2zero`==True |
| 17 | +
|
| 18 | + Casting floats to integers is delicate because the result is undefined |
| 19 | + and platform specific for float values outside the range of `int_type`. |
| 20 | + Define ``shared_min`` to be the minimum value that can be exactly |
| 21 | + represented in both the float type of `arr` and `int_type`. Define |
| 22 | + `shared_max` to be the equivalent maximum value. To avoid undefined results |
| 23 | + we threshold `arr` at ``shared_min`` and ``shared_max``. |
| 24 | +
|
| 25 | + Parameters |
| 26 | + ---------- |
| 27 | + arr : array-like |
| 28 | + Array of floating point type |
| 29 | + int_type : object |
| 30 | + Numpy integer type |
| 31 | + nan2zero : {True, False} |
| 32 | + Whether to convert NaN value to zero. Default is True. If False, and |
| 33 | + NaNs are present, raise CastingError |
| 34 | + infmax : {False, True} |
| 35 | + If True, set np.inf values in `arr` to be `int_type` integer maximum |
| 36 | + value, -np.inf as `int_type` integer minimum. If False, set +/- infs to |
| 37 | + be ``shared_min``, ``shared_max`` as defined above. Therefore False |
| 38 | + gives faster conversion at the expense of infs that are further from |
| 39 | + infinity. |
| 40 | +
|
| 41 | + Returns |
| 42 | + ------- |
| 43 | + iarr : ndarray |
| 44 | + of type `int_type` |
| 45 | +
|
| 46 | + Examples |
| 47 | + -------- |
| 48 | + >>> float_to_int([np.nan, np.inf, -np.inf, 1.1, 6.6], np.int16) |
| 49 | + array([ 0, 32767, -32768, 1, 7], dtype=int16) |
| 50 | +
|
| 51 | + Notes |
| 52 | + ----- |
| 53 | + Numpy relies on the C library to cast from float to int using the standard |
| 54 | + ``astype`` method of the array. |
| 55 | +
|
| 56 | + Quoting from section F4 of the C99 standard: |
| 57 | +
|
| 58 | + If the floating value is infinite or NaN or if the integral part of the |
| 59 | + floating value exceeds the range of the integer type, then the |
| 60 | + "invalid" floating-point exception is raised and the resulting value |
| 61 | + is unspecified. |
| 62 | +
|
| 63 | + Hence we threshold at ``shared_min`` and ``shared_max`` to avoid casting to |
| 64 | + values that are undefined. |
| 65 | +
|
| 66 | + See: http://en.wikipedia.org/wiki/C99 . There are links to the C99 standard |
| 67 | + from that page. |
| 68 | + """ |
| 69 | + arr = np.asarray(arr) |
| 70 | + flt_type = arr.dtype.type |
| 71 | + int_type = np.dtype(int_type).type |
| 72 | + # Deal with scalar as input; fancy indexing needs 1D |
| 73 | + shape = arr.shape |
| 74 | + arr = np.atleast_1d(arr) |
| 75 | + mn, mx = _cached_int_clippers(flt_type, int_type) |
| 76 | + nans = np.isnan(arr) |
| 77 | + have_nans = np.any(nans) |
| 78 | + if not nan2zero and have_nans: |
| 79 | + raise CastingError('NaNs in array, nan2zero not True') |
| 80 | + iarr = np.clip(np.rint(arr), mn, mx).astype(int_type) |
| 81 | + if have_nans: |
| 82 | + iarr[nans] = 0 |
| 83 | + if not infmax: |
| 84 | + return iarr.reshape(shape) |
| 85 | + ii = np.iinfo(int_type) |
| 86 | + iarr[arr == np.inf] = ii.max |
| 87 | + if ii.min != int(mn): |
| 88 | + iarr[arr == -np.inf] = ii.min |
| 89 | + return iarr.reshape(shape) |
| 90 | + |
| 91 | + |
| 92 | +def int_clippers(flt_type, int_type): |
| 93 | + """ Min and max in float type that are >=min, <=max in integer type |
| 94 | +
|
| 95 | + This is not as easy as it sounds, because the float type may not be able to |
| 96 | + exactly represent the max or min integer values, so we have to find the next |
| 97 | + exactly representable floating point value to do the thresholding. |
| 98 | +
|
| 99 | + Parameters |
| 100 | + ---------- |
| 101 | + flt_type : object |
| 102 | + numpy floating point type |
| 103 | + int_type : object |
| 104 | + numpy integer type |
| 105 | +
|
| 106 | + Returns |
| 107 | + ------- |
| 108 | + mn : object |
| 109 | + Number of type `flt_type` that is the minumum value in the range of |
| 110 | + `int_type`, such that ``mn.astype(int_type)`` >= min of `int_type` |
| 111 | + mx : object |
| 112 | + Number of type `flt_type` that is the maximum value in the range of |
| 113 | + `int_type`, such that ``mx.astype(int_type)`` <= max of `int_type` |
| 114 | + """ |
| 115 | + ii = np.iinfo(int_type) |
| 116 | + return floor_exact(ii.min, flt_type), floor_exact(ii.max, flt_type) |
| 117 | + |
| 118 | + |
| 119 | +# Cache clip values |
| 120 | +FLT_INT_CLIPS = {} |
| 121 | + |
| 122 | +def _cached_int_clippers(flt_type, int_type): |
| 123 | + if not (flt_type, int_type) in FLT_INT_CLIPS: |
| 124 | + FLT_INT_CLIPS[flt_type, int_type] = int_clippers(flt_type, int_type) |
| 125 | + return FLT_INT_CLIPS[(flt_type, int_type)] |
| 126 | + |
| 127 | +# --------------------------------------------------------------------------- |
| 128 | +# Routines to work out the next lowest representable intger in floating point |
| 129 | +# types. |
| 130 | +# --------------------------------------------------------------------------- |
| 131 | + |
| 132 | +try: |
| 133 | + _float16 = np.float16 |
| 134 | +except AttributeError: # float16 not present in np < 1.6 |
| 135 | + _float16 = None |
| 136 | + |
| 137 | +# The number of significand digits in IEEE floating point formats, not including |
| 138 | +# the implicit leading 0. See http://en.wikipedia.org/wiki/IEEE_754-2008 |
| 139 | +_flt_nmant = { |
| 140 | + _float16: 10, |
| 141 | + np.float32: 23, |
| 142 | + np.float64: 52, |
| 143 | + } |
| 144 | + |
| 145 | + |
| 146 | +class FloatingError(Exception): |
| 147 | + pass |
| 148 | + |
| 149 | + |
| 150 | +def flt2nmant(flt_type): |
| 151 | + """ Number of significand bits in float type `flt_type` |
| 152 | +
|
| 153 | + Parameters |
| 154 | + ---------- |
| 155 | + flt_type : object |
| 156 | + Numpy floating point type, such as np.float32 |
| 157 | +
|
| 158 | + Returns |
| 159 | + ------- |
| 160 | + nmant : int |
| 161 | + Number of digits in the signficand |
| 162 | + """ |
| 163 | + try: |
| 164 | + return _flt_nmant[flt_type] |
| 165 | + except KeyError: |
| 166 | + pass |
| 167 | + nmant = np.finfo(flt_type).nmant |
| 168 | + # Assuming the np.float type is always IEEE 64 bit |
| 169 | + if flt_type is np.float and nmant == 52: |
| 170 | + return 52 |
| 171 | + # Now we should be testing long doubles |
| 172 | + assert flt_type is np.longdouble |
| 173 | + if nmant == 63: # 80-bit intel type |
| 174 | + return 63 # Not including explicit first digit |
| 175 | + raise FloatingError('Cannot be confident of nmant value for %s' % flt_type) |
| 176 | + |
| 177 | + |
| 178 | +def as_int(x, check=True): |
| 179 | + """ Return python integer representation of number |
| 180 | +
|
| 181 | + This is useful because the numpy int(val) mechanism is broken for large |
| 182 | + values in np.longdouble. |
| 183 | +
|
| 184 | + This routine will still break for values that are outside the range of |
| 185 | + float64. |
| 186 | +
|
| 187 | + Parameters |
| 188 | + ---------- |
| 189 | + x : object |
| 190 | + Floating point value |
| 191 | + check : {True, False} |
| 192 | + If True, raise error for values that are not integers |
| 193 | +
|
| 194 | + Returns |
| 195 | + ------- |
| 196 | + i : int |
| 197 | + Python integer |
| 198 | +
|
| 199 | + Examples |
| 200 | + -------- |
| 201 | + >>> as_int(2.0) |
| 202 | + 2 |
| 203 | + >>> as_int(-2.0) |
| 204 | + -2 |
| 205 | + >>> as_int(2.1) |
| 206 | + Traceback (most recent call last): |
| 207 | + ... |
| 208 | + FloatingError: Not an integer: 2.1 |
| 209 | + >>> as_int(2.1, check=False) |
| 210 | + 2 |
| 211 | + """ |
| 212 | + ix = int(x) |
| 213 | + if ix == x: |
| 214 | + return ix |
| 215 | + fx = np.floor(x) |
| 216 | + if check and fx != x: |
| 217 | + raise FloatingError('Not an integer: %s' % x) |
| 218 | + f64 = np.float64(fx) |
| 219 | + i64 = int(f64) |
| 220 | + assert f64 == i64 |
| 221 | + res = fx - f64 |
| 222 | + return ix + int(res) |
| 223 | + |
| 224 | + |
| 225 | +def int_to_float(val, flt_type): |
| 226 | + """ Convert integer `val` to floating point type `flt_type` |
| 227 | +
|
| 228 | + Useful because casting to ``np.longdouble`` loses precision as it appears to |
| 229 | + go through casting to np.float64. |
| 230 | +
|
| 231 | + Parameters |
| 232 | + ---------- |
| 233 | + val : int |
| 234 | + Integer value |
| 235 | + flt_type : object |
| 236 | + numpy floating point type |
| 237 | +
|
| 238 | + Returns |
| 239 | + ------- |
| 240 | + f : numpy scalar |
| 241 | + of type `flt_type` |
| 242 | + """ |
| 243 | + if not flt_type is np.longdouble: |
| 244 | + return flt_type(val) |
| 245 | + f64 = np.float64(val) |
| 246 | + res = val - int(f64) |
| 247 | + return np.longdouble(f64) + np.longdouble(res) |
| 248 | + |
| 249 | + |
| 250 | +def floor_exact(val, flt_type): |
| 251 | + """ Get nearest exact integer to `val`, towards 0, in float type `flt_type` |
| 252 | +
|
| 253 | + Parameters |
| 254 | + ---------- |
| 255 | + val : int |
| 256 | + We have to pass val as an int rather than the floating point type |
| 257 | + because large integers cast as floating point may be rounded by the |
| 258 | + casting process. |
| 259 | + flt_type : numpy type |
| 260 | + numpy float type. Only IEEE types supported (np.float16, np.float32, |
| 261 | + np.float64) |
| 262 | +
|
| 263 | + Returns |
| 264 | + ------- |
| 265 | + floor_val : object |
| 266 | + value of same floating point type as `val`, that is the next excat |
| 267 | + integer in this type, towards zero, or == `val` if val is exactly |
| 268 | + representable. |
| 269 | +
|
| 270 | + Examples |
| 271 | + -------- |
| 272 | + Obviously 2 is within the range of representable integers for float32 |
| 273 | +
|
| 274 | + >>> floor_exact(2, np.float32) |
| 275 | + 2.0 |
| 276 | +
|
| 277 | + As is 2**24-1 (the number of significand digits is 23 + 1 implicit) |
| 278 | +
|
| 279 | + >>> floor_exact(2**24-1, np.float32) == 2**24-1 |
| 280 | + True |
| 281 | +
|
| 282 | + But 2**24+1 gives a number that float32 can't represent exactly |
| 283 | +
|
| 284 | + >>> floor_exact(2**24+1, np.float32) == 2**24 |
| 285 | + True |
| 286 | + """ |
| 287 | + val = int(val) |
| 288 | + flt_type = np.dtype(flt_type).type |
| 289 | + sign = val > 0 and 1 or -1 |
| 290 | + aval = abs(val) |
| 291 | + if flt_type is np.longdouble: |
| 292 | + # longdouble seems to go through casting to float64, so getting the |
| 293 | + # value into float128 with the given precision needs to go through two |
| 294 | + # steps, first float64, then adding the remainder. |
| 295 | + f64 = floor_exact(aval, np.float64) |
| 296 | + i64 = int(f64) |
| 297 | + assert f64 == i64 |
| 298 | + res = aval - i64 |
| 299 | + try: |
| 300 | + faval = flt_type(i64) + flt_type(res) |
| 301 | + except OverflowError: |
| 302 | + faval = np.inf |
| 303 | + if faval == np.inf: |
| 304 | + return sign * np.finfo(flt_type).max |
| 305 | + if (faval - f64) <= res: |
| 306 | + # Float casting has made the value go down or stay the same |
| 307 | + return sign * faval |
| 308 | + else: # Normal case |
| 309 | + try: |
| 310 | + faval = flt_type(aval) |
| 311 | + except OverflowError: |
| 312 | + faval = np.inf |
| 313 | + if faval == np.inf: |
| 314 | + return sign * np.finfo(flt_type).max |
| 315 | + if int(faval) <= aval: |
| 316 | + # Float casting has made the value go down or stay the same |
| 317 | + return sign * faval |
| 318 | + # Float casting made the value go up |
| 319 | + nmant = flt2nmant(flt_type) |
| 320 | + biggest_gap = 2**(floor_log2(aval) - nmant) |
| 321 | + assert biggest_gap > 1 |
| 322 | + faval -= flt_type(biggest_gap) |
| 323 | + return sign * faval |
| 324 | + |
| 325 | + |
| 326 | +def floor_log2(x): |
| 327 | + """ floor of log2 of abs(`x`) |
| 328 | +
|
| 329 | + Embarrassingly, from http://en.wikipedia.org/wiki/Binary_logarithm |
| 330 | +
|
| 331 | + Parameters |
| 332 | + ---------- |
| 333 | + x : int |
| 334 | +
|
| 335 | + Returns |
| 336 | + ------- |
| 337 | + L : int |
| 338 | + floor of base 2 log of `x` |
| 339 | +
|
| 340 | + Examples |
| 341 | + -------- |
| 342 | + >>> floor_log2(2**9+1) |
| 343 | + 9 |
| 344 | + >>> floor_log2(-2**9+1) |
| 345 | + 8 |
| 346 | + """ |
| 347 | + ip = 0 |
| 348 | + rem = abs(x) |
| 349 | + while rem>=2: |
| 350 | + ip += 1 |
| 351 | + rem /= 2 |
| 352 | + return ip |
0 commit comments