Skip to content

Commit e317691

Browse files
authored
Merge pull request #873 from davidhassell/quantization-init
Implement lossy compression via quantization
2 parents 0d44569 + bf6fd1d commit e317691

31 files changed

+911
-19
lines changed

Changelog.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,14 @@ Version NEXTVERSION
33

44
**2025-??-??**
55

6+
* Implement lossy compression via quantization
7+
(https://github.com/NCAS-CMS/cf-python/issues/870)
8+
* New quantization class: `cf.Quantization`
9+
(https://github.com/NCAS-CMS/cf-python/issues/870)
10+
* New quantization methods: `cf.Field.get_quantization`,
11+
`cf.Field.get_quantize_on_write`, `cf.Field.set_quantize_on_write`,
12+
`cf.Field.del_quantize_on_write`
13+
(https://github.com/NCAS-CMS/cf-python/issues/870)
614
* New keyword parameter to `cf.write`: ``chunk_cache``
715
(https://github.com/NCAS-CMS/cf-python/issues/871)
816
* Read Zarr datasets with `cf.read`

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,8 @@ of its array manipulation and can:
116116
* create running means from field constructs,
117117
* apply differential operators to field constructs,
118118
* create derived quantities (such as relative vorticity).
119+
* read and write that data that are quantized to eliminate false
120+
precision.
119121

120122
Visualization
121123
=============

RELEASE.md

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,11 @@
1717
- [ ] Change the version and date in `cf/__init__.py` (`__version__` and
1818
`__date__` variables)
1919

20-
- [ ] Ensure that the requirements on dependencies & their versions are
21-
up-to-date and consistent in both the `requirements.txt` and in
22-
`docs/source/installation.rst`; and in the `_requires` list and
23-
`Version` checks in `cf/__init__.py`.
20+
- [ ] Ensure that the requirements on dependencies & their versions
21+
are up-to-date and consistent in both the `requirements.txt` and in
22+
`docs/source/installation.rst` (paying particular attention to
23+
`cfdm`); and in the `_requires` list and `Version` checks in
24+
`cf/__init__.py`.
2425

2526
- [ ] Make sure that `README.md` is up to date.
2627

cf/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,7 @@
241241
from .nodecountproperties import NodeCountProperties
242242
from .partnodecountproperties import PartNodeCountProperties
243243
from .interiorring import InteriorRing
244+
from .quantization import Quantization
244245
from .tiepointindex import TiePointIndex
245246

246247
from .bounds import Bounds

cf/cfimplementation.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
List,
2424
NodeCountProperties,
2525
PartNodeCountProperties,
26+
Quantization,
2627
TiePointIndex,
2728
)
2829
from .data import Data
@@ -147,6 +148,7 @@ def set_construct(self, parent, construct, axes=None, copy=True, **kwargs):
147148
H5netcdfArray=H5netcdfArray,
148149
NetCDF4Array=NetCDF4Array,
149150
PointTopologyArray=PointTopologyArray,
151+
Quantization=Quantization,
150152
RaggedContiguousArray=RaggedContiguousArray,
151153
RaggedIndexedArray=RaggedIndexedArray,
152154
RaggedIndexedContiguousArray=RaggedIndexedContiguousArray,
@@ -203,6 +205,7 @@ def implementation():
203205
'H5netcdfArray': cf.data.array.h5netcdfarray.H5netcdfArray,
204206
'NetCDF4Array': cf.data.array.netcdf4array.NetCDF4Array,
205207
'PointTopologyArray': <class 'cf.data.array.pointtopologyarray.PointTopologyArray'>,
208+
'Quantization': cf.quantization.Quantization,
206209
'RaggedContiguousArray': cf.data.array.raggedcontiguousarray.RaggedContiguousArray,
207210
'RaggedIndexedArray': cf.data.array.raggedindexedarray.RaggedIndexedArray,
208211
'RaggedIndexedContiguousArray': cf.data.array.raggedindexedcontiguousarray.RaggedIndexedContiguousArray,

cf/field.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
Flags,
2323
Index,
2424
List,
25+
Quantization,
2526
mixin,
2627
)
2728
from .constants import masked as cf_masked
@@ -280,7 +281,7 @@ def __new__(cls, *args, **kwargs):
280281
instance._Domain = Domain
281282
instance._DomainAncillary = DomainAncillary
282283
instance._DomainAxis = DomainAxis
283-
# instance._Data = Data
284+
instance._Quantization = Quantization
284285
instance._RaggedContiguousArray = RaggedContiguousArray
285286
instance._RaggedIndexedArray = RaggedIndexedArray
286287
instance._RaggedIndexedContiguousArray = RaggedIndexedContiguousArray

cf/fieldancillary.py

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,42 @@
11
import cfdm
22

3-
from . import mixin
3+
from . import Quantization, mixin
44

55

66
class FieldAncillary(mixin.PropertiesData, cfdm.FieldAncillary):
7-
pass
7+
"""A field ancillary construct of the CF data model.
8+
9+
The field ancillary construct provides metadata which are
10+
distributed over the same sampling domain as the field itself. For
11+
example, if a data variable holds a variable retrieved from a
12+
satellite instrument, a related ancillary data variable might
13+
provide the uncertainty estimates for those retrievals (varying
14+
over the same spatiotemporal domain).
15+
16+
The field ancillary construct consists of an array of the
17+
ancillary data, which is zero-dimensional or which depends on one
18+
or more of the domain axes, and properties to describe the
19+
data. It is assumed that the data do not depend on axes of the
20+
domain which are not spanned by the array, along which the values
21+
are implicitly propagated. CF-netCDF ancillary data variables
22+
correspond to field ancillary constructs. Note that a field
23+
ancillary construct is constrained by the domain definition of the
24+
parent field construct but does not contribute to the domain's
25+
definition, unlike, for instance, an auxiliary coordinate
26+
construct or domain ancillary construct.
27+
28+
**NetCDF interface**
29+
30+
{{netCDF variable}}
31+
32+
{{netCDF dataset chunks}}
33+
34+
.. versionadded:: 2.0
35+
36+
"""
37+
38+
def __new__(cls, *args, **kwargs):
39+
"""Store component classes."""
40+
instance = super().__new__(cls)
41+
instance._Quantization = Quantization
42+
return instance

cf/quantization.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import cfdm
2+
3+
4+
class Quantization(cfdm.Quantization):
5+
"""A quantization variable.
6+
7+
A quantization variable describes a quantization algorithm via a
8+
collection of parameters.
9+
10+
The ``algorithm`` parameter names a specific quantization
11+
algorithm via one of the keys in the `algorithm_parameters`
12+
dictionary.
13+
14+
The ``implementation`` parameter contains unstandardised text that
15+
concisely conveys the algorithm provenance including the name of
16+
the library or client that performed the quantization, the
17+
software version, and any other information required to
18+
disambiguate the source of the algorithm employed. The text must
19+
take the form ``software-name version version-string
20+
[(optional-information)]``.
21+
22+
The retained precision of the algorithm is defined with either the
23+
``quantization_nsb`` or ``quantization_nsd`` parameter.
24+
25+
For instance, the following parameters describe quantization via
26+
the BitRound algorithm, retaining 6 significant bits, and
27+
implemented by libnetcdf::
28+
29+
>>> q = {{package}}.{{class}}(
30+
... parameters={'algorithm': 'bitround',
31+
... 'quantization_nsb': 6,
32+
... 'implementation': 'libnetcdf version 4.9.4'}
33+
... )
34+
>>> q.parameters()
35+
{'algorithm': 'bitround',
36+
'quantization_nsb': 6,
37+
'implementation': 'libnetcdf version 4.9.4'}
38+
39+
See CF section 8.4. "Lossy Compression via Quantization".
40+
41+
**NetCDF interface**
42+
43+
{{netCDF variable}}
44+
45+
{{netCDF group attributes}}
46+
47+
.. versionadded:: NEXTVERSION
48+
49+
"""
50+
51+
def __repr__(self):
52+
"""Called by the `repr` built-in function.
53+
54+
x.__repr__() <==> repr(x)
55+
56+
.. versionadded:: NEXTVERSION
57+
58+
"""
59+
return super().__repr__().replace("<", "<CF ", 1)

cf/test/test_Quantization.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import datetime
2+
import unittest
3+
4+
import cf
5+
6+
7+
class QuantizationTest(unittest.TestCase):
8+
"""Unit test for the Quantization class."""
9+
10+
q = cf.Quantization({"quantization_nsd": 4, "algorithm": "bitgroom"})
11+
12+
def test_Quantization_algorithm_parameters(self):
13+
"""Test Quantization.algorithm_parameters."""
14+
self.assertEqual(
15+
cf.Quantization().algorithm_parameters(),
16+
{
17+
"bitgroom": "quantization_nsd",
18+
"bitround": "quantization_nsb",
19+
"digitround": "quantization_nsd",
20+
"granular_bitround": "quantization_nsd",
21+
},
22+
)
23+
24+
def test_Quantization__str__(self):
25+
"""Test Quantization.__str__."""
26+
self.assertEqual(str(self.q), "algorithm=bitgroom, quantization_nsd=4")
27+
28+
def test_Quantization_dump(self):
29+
"""Test Quantization.dump."""
30+
self.assertEqual(
31+
self.q.dump(display=False),
32+
"Quantization: \n"
33+
" algorithm = 'bitgroom'\n"
34+
" quantization_nsd = 4",
35+
)
36+
37+
38+
if __name__ == "__main__":
39+
print("Run date:", datetime.datetime.now())
40+
cf.environment()
41+
print("")
42+
unittest.main(verbosity=2)

0 commit comments

Comments
 (0)