-
Notifications
You must be signed in to change notification settings - Fork 106
Bitround Codec #299
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Bitround Codec #299
Changes from 13 commits
cdb77b2
c0b6347
9e0c943
06a27d7
7c7dc7c
69263a5
6df3b69
b5abbbb
7d9846a
76e9f6f
2f6207e
594202a
c027936
8b1fcfa
4b25209
e5829cb
374cf9e
1122beb
9622fe3
e62de86
166e15c
775d368
7deff68
cb93cb6
66b7b1a
56d9511
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,82 @@ | ||
| import numpy as np | ||
|
|
||
|
|
||
| from .abc import Codec | ||
| from .compat import ensure_ndarray, ndarray_copy | ||
|
|
||
| max_bits = { | ||
| "float16": 10, | ||
| "float32": 23, | ||
| "float64": 52, | ||
| } | ||
| types = { | ||
| "float16": np.int16, | ||
| "float32": np.int32, | ||
| "float64": np.int64, | ||
| } | ||
martindurant marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| inverse = { | ||
| "int16": np.float16, | ||
| "int32": np.float32, | ||
| "int64": np.float64 | ||
| } | ||
martindurant marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
|
|
||
| class BitRound(Codec): | ||
| """Floating-point bit rounding codec | ||
|
|
||
| Drops a specified number of bits from the floating point mantissa, | ||
| leaving an array more amenable to compression. The number of bits to keep should | ||
| be determined by an information analysis of the data to be compressed. See | ||
martindurant marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| https://github.com/zarr-developers/numcodecs/issues/298 for discussion | ||
| and the original implementation in Julia referred to at | ||
| https://www.nature.com/articles/s43588-021-00156-2 | ||
martindurant marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| Parameters | ||
| ---------- | ||
|
|
||
| keepbits: int | ||
| The number of bits of the mantissa to keep. The range allowed | ||
| depends on the dtype input data. If keepbits is | ||
| equal to the maximum allowed for the data type, this is equivalent | ||
| to no transform. | ||
| """ | ||
|
|
||
| codec_id = 'bitround' | ||
|
|
||
| def __init__(self, keepbits: int): | ||
| if keepbits < 0: | ||
| raise ValueError("keepbits must be zero or positive") | ||
martindurant marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| self.keepbits = keepbits | ||
|
|
||
| def encode(self, buf): | ||
| """Create int array by rounding floating-point data | ||
|
|
||
| The itemsize will be preserved, but the output should be much more | ||
| compressible. | ||
| """ | ||
| a = ensure_ndarray(buf) | ||
martindurant marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| bits = max_bits[str(a.dtype)] | ||
| all_set = np.frombuffer(b"\xff" * a.dtype.itemsize, dtype=types[str(a.dtype)]) | ||
martindurant marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| if self.keepbits == bits: | ||
| return a | ||
| if self.keepbits > bits: | ||
| raise ValueError("Keepbits too large for given dtype") | ||
| if not a.dtype.kind == "f" or a.dtype.itemsize > 8: | ||
| raise TypeError("Only float arrays (16-64bit) can be bit-rounded") | ||
| b = a.view(types[str(a.dtype)]) | ||
martindurant marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| maskbits = 23 - self.keepbits | ||
martindurant marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| mask = (all_set >> maskbits) << maskbits | ||
| half_quantum1 = (1 << (maskbits - 1)) - 1 | ||
| b += ((b >> maskbits) & 1) + half_quantum1 | ||
| b &= mask | ||
| return b | ||
|
|
||
| def decode(self, buf, out=None): | ||
| """Remake floats from ints | ||
|
|
||
| As with ``encode``, preserves itemsize. | ||
| """ | ||
| dt = buf.dtype if buf.dtype.kind == "f" else inverse[str(buf.dtype)] | ||
martindurant marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| data = ensure_ndarray(buf).view(dt) | ||
| out = ndarray_copy(data, out) | ||
| return out | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,80 @@ | ||
| import numpy as np | ||
|
|
||
| import pytest | ||
|
|
||
| from numcodecs.bitround import BitRound | ||
|
|
||
| # adapted from https://github.com/milankl/BitInformation.jl/blob/main/test/round_nearest.jl | ||
|
|
||
|
|
||
| # TODO: add other dtypes | ||
| @pytest.fixture(params=[np.float32]) | ||
| def dtype(request): | ||
| return request.param | ||
|
|
||
|
|
||
| # number of mantissa bits for each dtype | ||
| MBITS = {np.float32: 23} | ||
|
|
||
|
|
||
| def round(data, keepbits): | ||
| codec = BitRound(keepbits=keepbits) | ||
| data = data.copy() # otherwise overwrites the input | ||
| encoded = codec.encode(data) | ||
| return codec.decode(encoded) | ||
|
|
||
|
|
||
| def test_round_zero_to_zero(dtype): | ||
| a = np.zeros((3, 2), dtype=dtype) | ||
| # Don't understand Milan's original test: | ||
| # How is it possible to have negative keepbits? | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You just end up rounding the exponent bits, see other comment |
||
| # for k in range(-5, 50): | ||
| for k in range(0, MBITS[dtype]): | ||
| ar = round(a, k) | ||
| np.testing.assert_equal(a, ar) | ||
|
|
||
|
|
||
| def test_round_one_to_one(dtype): | ||
| a = np.ones((3, 2), dtype=dtype) | ||
| for k in range(0, MBITS[dtype]): | ||
| ar = round(a, k) | ||
| np.testing.assert_equal(a, ar) | ||
|
|
||
|
|
||
| def test_round_minus_one_to_minus_one(dtype): | ||
| a = -np.ones((3, 2), dtype=dtype) | ||
| for k in range(0, MBITS[dtype]): | ||
| ar = round(a, k) | ||
| np.testing.assert_equal(a, ar) | ||
|
|
||
|
|
||
| def test_no_rounding(dtype): | ||
| a = np.random.random_sample((300, 200)).astype(dtype) | ||
| keepbits = MBITS[dtype] | ||
| ar = round(a, keepbits) | ||
| np.testing.assert_equal(a, ar) | ||
|
|
||
|
|
||
| APPROX_KEEPBITS = {np.float32: 11} | ||
|
|
||
|
|
||
| def test_approx_equal(dtype): | ||
| a = np.random.random_sample((300, 200)).astype(dtype) | ||
| ar = round(a, APPROX_KEEPBITS[dtype]) | ||
| # Mimic julia behavior - https://docs.julialang.org/en/v1/base/math/#Base.isapprox | ||
| rtol = np.sqrt(np.finfo(np.float32).eps) | ||
| # This gets us much closer but still failing for ~6% of the array | ||
| # It does pass if we add 1 to keepbits (11 instead of 10) | ||
| # Is there an off-by-one issue here? | ||
| np.testing.assert_allclose(a, ar, rtol=rtol) | ||
|
Comment on lines
+57
to
+65
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The fact that this test passes if we use keepbits=11 instead of 10 makes me think we are dealing with a off-by-one issue, perhaps related to julia vs. python indexing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To be honest, I just guessed the tolerances here. The motivation for this test was less to test exactness, but just to flag immediately if rounding is completely off.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok then I will just bump keepbits to 11 for this test. |
||
|
|
||
|
|
||
| def test_idempotence(dtype): | ||
| a = np.random.random_sample((300, 200)).astype(dtype) | ||
| for k in range(20): | ||
| ar = round(a, k) | ||
| ar2 = round(a, k) | ||
| np.testing.assert_equal(ar, ar2) | ||
|
|
||
|
|
||
| # TODO: implement tie_to_even and round_to_nearest | ||
Uh oh!
There was an error while loading. Please reload this page.