zarr-developers · jakirkham · May 18, 2022 · Dec 17, 2021 · Dec 17, 2021 · Dec 17, 2021
diff --git a/numcodecs/bitround.py b/numcodecs/bitround.py
@@ -0,0 +1,31 @@
+import numpy as np
+
+
+from .abc import Codec
+from .compat import ensure_ndarray, ndarray_copy
+
+
+class BitRound(Codec):
+    codec_id = 'bitround'
+
+    def __init__(self, keepbits: int):
+        self.keepbits = keepbits
+
+    def encode(self, buf):
+        # TODO: figure out if we need to make a copy
+        # Currently this appears to be overwriting the input buffer
+        # Is that the right behavior?
+        a = ensure_ndarray(buf).view()
+        assert a.dtype == np.float32
+        b = a.view(dtype=np.int32)
+        maskbits = 23 - self.keepbits
+        mask = (0xFFFFFFFF >> maskbits) << maskbits
+        half_quantum1 = (1 << (maskbits - 1)) - 1
+        b += ((b >> maskbits) & 1) + half_quantum1
+        b &= mask
+        return b
+
+    def decode(self, buf, out=None):
+        data = ensure_ndarray(buf).view(np.float32)
+        out = ndarray_copy(data, out)
+        return out
diff --git a/numcodecs/tests/test_bitround.py b/numcodecs/tests/test_bitround.py
@@ -0,0 +1,81 @@
+import numpy as np
+
+import pytest
+
+from numcodecs.bitround import BitRound
+
+# adapted from https://github.com/milankl/BitInformation.jl/blob/main/test/round_nearest.jl
+
+
+# TODO: add other dtypes
+@pytest.fixture(params=[np.float32])
+def dtype(request):
+    return request.param
+
+
+# number of mantissa bits for each dtype
+MBITS = {np.float32: 23}
+
+
+def round(data, keepbits):
+    codec = BitRound(keepbits=keepbits)
+    data = data.copy()  # otherwise overwrites the input
+    encoded = codec.encode(data)
+    return codec.decode(encoded)
+
+
+def test_round_zero_to_zero(dtype):
+    a = np.zeros((3, 2), dtype=dtype)
+    # Don't understand Milan's original test:
+    # How is it possible to have negative keepbits?
+    # for k in range(-5, 50):
+    for k in range(0, MBITS[dtype]):
+        ar = round(a, k)
+        np.testing.assert_equal(a, ar)
+
+
+def test_round_one_to_one(dtype):
+    a = np.ones((3, 2), dtype=dtype)
+    for k in range(0, MBITS[dtype]):
+        ar = round(a, k)
+        np.testing.assert_equal(a, ar)
+
+
+def test_round_minus_one_to_minus_one(dtype):
+    a = -np.ones((3, 2), dtype=dtype)
+    for k in range(0, MBITS[dtype]):
+        ar = round(a, k)
+        np.testing.assert_equal(a, ar)
+
+
+# This triggers a 'negative shift count' error in the codec
+def test_no_rounding(dtype):
+    a = np.random.random_sample((300, 200)).astype(dtype)
+    keepbits = MBITS[dtype]
+    ar = round(a, keepbits)
+    np.testing.assert_equal(a, ar)
+
+
+APPROX_KEEPBITS = {np.float32: 10}
+
+
+def test_approx_equal(dtype):
+    a = np.random.random_sample((300, 200)).astype(dtype)
+    ar = round(a, APPROX_KEEPBITS[dtype])
+    # Mimic julia behavior - https://docs.julialang.org/en/v1/base/math/#Base.isapprox
+    rtol = np.sqrt(np.finfo(np.float32).eps)
+    # This gets us much closer but still failing for ~6% of the array
+    # It does pass if we add 1 to keepbits (11 instead of 10)
+    # Is there an off-by-one issue here?
+    np.testing.assert_allclose(a, ar, rtol=rtol)
+
+
+def test_idempotence(dtype):
+    a = np.random.random_sample((300, 200)).astype(dtype)
+    for k in range(20):
+        ar = round(a, k)
+        ar2 = round(a, k)
+        np.testing.assert_equal(ar, ar2)
+
+
+# TODO: implement tie_to_even and round_to_nearest