-
Notifications
You must be signed in to change notification settings - Fork 106
Bitround Codec #299
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Bitround Codec #299
Changes from 2 commits
cdb77b2
c0b6347
9e0c943
06a27d7
7c7dc7c
69263a5
6df3b69
b5abbbb
7d9846a
76e9f6f
2f6207e
594202a
c027936
8b1fcfa
4b25209
e5829cb
374cf9e
1122beb
9622fe3
e62de86
166e15c
775d368
7deff68
cb93cb6
66b7b1a
56d9511
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,31 @@ | ||
| import numpy as np | ||
|
|
||
|
|
||
| from .abc import Codec | ||
| from .compat import ensure_ndarray, ndarray_copy | ||
|
|
||
|
|
||
| class BitRound(Codec): | ||
| codec_id = 'bitround' | ||
|
|
||
| def __init__(self, keepbits: int): | ||
| self.keepbits = keepbits | ||
|
|
||
| def encode(self, buf): | ||
| # TODO: figure out if we need to make a copy | ||
| # Currently this appears to be overwriting the input buffer | ||
| # Is that the right behavior? | ||
| a = ensure_ndarray(buf).view() | ||
| assert a.dtype == np.float32 | ||
| b = a.view(dtype=np.int32) | ||
| maskbits = 23 - self.keepbits | ||
martindurant marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| mask = (0xFFFFFFFF >> maskbits) << maskbits | ||
| half_quantum1 = (1 << (maskbits - 1)) - 1 | ||
|
||
| b += ((b >> maskbits) & 1) + half_quantum1 | ||
| b &= mask | ||
| return b | ||
|
|
||
| def decode(self, buf, out=None): | ||
| data = ensure_ndarray(buf).view(np.float32) | ||
| out = ndarray_copy(data, out) | ||
| return out | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,78 @@ | ||
| import numpy as np | ||
|
|
||
| import pytest | ||
|
|
||
| from numcodecs.bitround import BitRound | ||
|
|
||
| # adapted from https://github.com/milankl/BitInformation.jl/blob/main/test/round_nearest.jl | ||
|
|
||
|
|
||
| # TODO: add other dtypes | ||
| @pytest.fixture(params=[np.float32]) | ||
| def dtype(request): | ||
| return request.param | ||
|
|
||
|
|
||
| # number of mantissa bits for each dtype | ||
| MBITS = {np.float32: 23} | ||
|
|
||
|
|
||
| def round(data, keepbits): | ||
| codec = BitRound(keepbits=keepbits) | ||
| data = data.copy() # otherwise overwrites the input | ||
| encoded = codec.encode(data) | ||
| return codec.decode(encoded) | ||
|
|
||
|
|
||
| def test_round_zero_to_zero(dtype): | ||
| a = np.zeros((3, 2), dtype=dtype) | ||
| # Don't understand Milan's original test: | ||
| # How is it possible to have negative keepbits? | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You just end up rounding the exponent bits, see other comment |
||
| # for k in range(-5, 50): | ||
| for k in range(0, MBITS[dtype]): | ||
| ar = round(a, k) | ||
| np.testing.assert_equal(a, ar) | ||
|
|
||
|
|
||
| def test_round_one_to_one(dtype): | ||
| a = np.ones((3, 2), dtype=dtype) | ||
| for k in range(0, MBITS[dtype]): | ||
| ar = round(a, k) | ||
| np.testing.assert_equal(a, ar) | ||
|
|
||
|
|
||
| def test_round_minus_one_to_minus_one(dtype): | ||
| a = -np.ones((3, 2), dtype=dtype) | ||
| for k in range(0, MBITS[dtype]): | ||
| ar = round(a, k) | ||
| np.testing.assert_equal(a, ar) | ||
|
|
||
|
|
||
| # This triggers a 'negative shift count' error in the codec | ||
| def test_no_rounding(dtype): | ||
| a = np.random.random_sample((300, 200)).astype(dtype) | ||
| keepbits = MBITS[dtype] | ||
| ar = round(a, keepbits) | ||
| np.testing.assert_equal(a, ar) | ||
|
|
||
|
|
||
| APPROX_KEEPBITS = {np.float32: 10} | ||
|
|
||
|
|
||
| # This does not pass at the default tolerance of allclose | ||
| # How is it different from Julia's ≈ operator? | ||
| def test_approx_equal(dtype): | ||
| a = np.random.random_sample((300, 200)).astype(dtype) | ||
| ar = round(a, APPROX_KEEPBITS[dtype]) | ||
| np.testing.assert_allclose(a, ar) | ||
rabernat marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
|
|
||
| def test_idempotence(dtype): | ||
| a = np.random.random_sample((300, 200)).astype(dtype) | ||
| for k in range(20): | ||
| ar = round(a, k) | ||
| ar2 = round(a, k) | ||
| np.testing.assert_equal(ar, ar2) | ||
|
|
||
|
|
||
| # TODO: implement tie_to_even and round_to_nearest | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In BitInformation.jl the rounding is implemented as scalar version which does not overwrite the input (as float32 is immutable so a copy is created anyway), however, I define a rounding function for arrays, that can either act in-place (i.e. overwriting the bits in an existing array) or acts on a copy of the array, such that the input array in unchanged.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Understood. This is more a question about numcodecs (e.g. for @jakirkham), rather than about BitInformation.jl.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No we shouldn't be overwriting the input buffer.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How about changing
to
to avoid the overwriting of the input buffer?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
a.astype(np.int32, copy=True)is more canonical.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Continued in PR: #608