Skip to content

Commit c734c0c

Browse files
authored
Merge pull request #805 from davidhassell/active-storage-new
Optimsed lazy indexing - h5netcdf backend - Active storage reductions
2 parents 39136ff + 93fa1f0 commit c734c0c

File tree

87 files changed

+4406
-1494
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

87 files changed

+4406
-1494
lines changed

Changelog.rst

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,22 @@
1+
version NEXTVERSION + 1
2+
-----------------------
3+
4+
**2024-??-??**
5+
6+
* Allow access to netCDF-4 files in S3 object stores
7+
(https://github.com/NCAS-CMS/cf-python/issues/712)
8+
* New class `cf.H5netcdfArray`
9+
* New class `cf.NetCDF4Array`
10+
* New class `cf.CFAH5netcdfArray`
11+
* New class `cf.CFANetCDF4Array`
12+
* New dependency: ``h5netcdf>=1.3.0``
13+
* New dependency: ``h5py>=3.10.0``
14+
* New dependency: ``s3fs>=2024.2.0``
15+
* Changed dependency: ``1.11.2.0<=cfdm<1.11.3.0``
16+
* Changed dependency: ``cfunits>=3.3.7``
17+
18+
----
19+
120
version NEXTVERSION
221
-------------------
322

@@ -141,6 +160,8 @@ version 3.16.0
141160
* Changed dependency: ``1.11.0.0<=cfdm<1.11.1.0``
142161
* New dependency: ``scipy>=1.10.0``
143162

163+
----
164+
144165
version 3.15.4
145166
--------------
146167

@@ -279,7 +300,7 @@ version 3.14.1
279300

280301
----
281302

282-
version 3.14.0 (*first Dask release*)
303+
version 3.14.0 (*first Dask version*)
283304
-------------------------------------
284305

285306
**2023-01-31**
@@ -314,7 +335,7 @@ version 3.14.0 (*first Dask release*)
314335

315336
----
316337

317-
version 3.13.1 (*last LAMA release*)
338+
version 3.13.1 (*last LAMA version*)
318339
------------------------------------
319340

320341
**2022-10-17**

cf/__init__.py

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,17 @@
1313
1414
* read field constructs from netCDF, CDL, PP and UM datasets,
1515
16+
* read field constructs and domain constructs from netCDF, CDL, PP and
17+
UM datasets with a choice of netCDF backends,
18+
19+
* read files from OPeNDAP servers and S3 object stores,
20+
1621
* create new field constructs in memory,
1722
1823
* write and append field constructs to netCDF datasets on disk,
1924
25+
* read, write, and manipulate UGRID mesh topologies,
26+
2027
* read, write, and create coordinates defined by geometry cells,
2128
2229
* read netCDF and CDL datasets containing hierarchical groups,
@@ -74,8 +81,8 @@
7481
"""
7582

7683
__Conventions__ = "CF-1.11"
77-
__date__ = "2024-04-26"
78-
__version__ = "3.16.2"
84+
__date__ = "2024-??-??"
85+
__version__ = "3.17.0"
7986

8087
_requires = (
8188
"numpy",
@@ -199,8 +206,8 @@
199206
)
200207

201208
# Check the version of cfdm
202-
_minimum_vn = "1.11.1.0"
203-
_maximum_vn = "1.11.2.0"
209+
_minimum_vn = "1.11.2.0"
210+
_maximum_vn = "1.11.3.0"
204211
_cfdm_version = Version(cfdm.__version__)
205212
if not Version(_minimum_vn) <= _cfdm_version < Version(_maximum_vn):
206213
raise RuntimeError(
@@ -209,12 +216,6 @@
209216
)
210217

211218
# Check the version of dask
212-
_minimum_vn = "2022.12.1"
213-
if Version(dask.__version__) < Version(_minimum_vn):
214-
raise RuntimeError(
215-
f"Bad dask version: cf requires dask>={_minimum_vn}. "
216-
f"Got {dask.__version__} at {dask.__file__}"
217-
)
218219

219220
# Check the version of Python
220221
_minimum_vn = "3.8.0"
@@ -274,15 +275,19 @@
274275
from .data.array import (
275276
BoundsFromNodesArray,
276277
CellConnectivityArray,
277-
CFANetCDFArray,
278+
CFAH5netcdfArray,
279+
CFANetCDF4Array,
278280
FullArray,
279281
GatheredArray,
282+
H5netcdfArray,
280283
NetCDFArray,
284+
NetCDF4Array,
281285
PointTopologyArray,
282286
RaggedContiguousArray,
283287
RaggedIndexedArray,
284288
RaggedIndexedContiguousArray,
285289
SubsampledArray,
290+
UMArray,
286291
)
287292

288293
from .data.fragment import (

cf/cellmethod.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ class CellMethod(cfdm.CellMethod):
5656
def __new__(cls, *args, **kwargs):
5757
"""This must be overridden in subclasses.
5858
59-
.. versionadded:: (cfdm) 3.7.0
59+
.. versionadded:: 3.7.0
6060
6161
"""
6262
instance = super().__new__(cls)

cf/cfimplementation.py

Lines changed: 33 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,16 @@
2626
TiePointIndex,
2727
)
2828
from .data import Data
29+
30+
# REVIEW: h5: `cfimplementation.py`: import `CFAH5netcdfArray`, `CFANetCDF4Array`, `H5netcdfArray`,`NetCDF4Array`
2931
from .data.array import (
3032
BoundsFromNodesArray,
3133
CellConnectivityArray,
32-
CFANetCDFArray,
34+
CFAH5netcdfArray,
35+
CFANetCDF4Array,
3336
GatheredArray,
34-
NetCDFArray,
37+
H5netcdfArray,
38+
NetCDF4Array,
3539
PointTopologyArray,
3640
RaggedContiguousArray,
3741
RaggedIndexedArray,
@@ -112,65 +116,41 @@ def set_construct(self, parent, construct, axes=None, copy=True, **kwargs):
112116
parent, construct, axes=axes, copy=copy, **kwargs
113117
)
114118

115-
def initialise_CFANetCDFArray(
116-
self,
117-
filename=None,
118-
address=None,
119-
dtype=None,
120-
mask=True,
121-
units=False,
122-
calendar=False,
123-
instructions=None,
124-
substitutions=None,
125-
term=None,
126-
x=None,
127-
**kwargs,
128-
):
129-
"""Return a `CFANetCDFArray` instance.
119+
# REVIEW: h5: `initialise_CFANetCDF4Array`: new method to initialise `CFANetCDF4Array`
120+
def initialise_CFANetCDF4Array(self, **kwargs):
121+
"""Return a `CFANetCDF4Array` instance.
130122
131123
:Parameters:
132124
133-
filename: `str`
134-
135-
address: (sequence of) `str` or `int`
136-
137-
dytpe: `numpy.dtype`
138-
139-
mask: `bool`, optional
125+
kwargs: optional
126+
Initialisation parameters to pass to the new instance.
140127
141-
units: `str` or `None`, optional
128+
:Returns:
142129
143-
calendar: `str` or `None`, optional
130+
`CFANetCDF4Array`
144131
145-
instructions: `str`, optional
132+
"""
133+
cls = self.get_class("CFANetCDF4Array")
134+
return cls(**kwargs)
146135

147-
substitutions: `dict`, optional
136+
# REVIEW: h5: `initialise_CFAH5netcdfArray`: new method to initialise `CFAH5netcdfArray`
137+
def initialise_CFAH5netcdfArray(self, **kwargs):
138+
"""Return a `CFAH5netcdfArray` instance.
148139
149-
term: `str`, optional
140+
.. versionadded:: NEXTVERSION
150141
151-
x: `dict`, optional
142+
:Parameters:
152143
153144
kwargs: optional
154-
Ignored.
145+
Initialisation parameters to pass to the new instance.
155146
156147
:Returns:
157148
158-
`CFANetCDFArray`
149+
`CFAH5netcdfArray`
159150
160151
"""
161-
cls = self.get_class("CFANetCDFArray")
162-
return cls(
163-
filename=filename,
164-
address=address,
165-
dtype=dtype,
166-
mask=mask,
167-
units=units,
168-
calendar=calendar,
169-
instructions=instructions,
170-
substitutions=substitutions,
171-
term=term,
172-
x=x,
173-
)
152+
cls = self.get_class("CFAH5netcdfArray")
153+
return cls(**kwargs)
174154

175155

176156
_implementation = CFImplementation(
@@ -179,7 +159,8 @@ def initialise_CFANetCDFArray(
179159
CellConnectivity=CellConnectivity,
180160
CellMeasure=CellMeasure,
181161
CellMethod=CellMethod,
182-
CFANetCDFArray=CFANetCDFArray,
162+
CFAH5netcdfArray=CFAH5netcdfArray,
163+
CFANetCDF4Array=CFANetCDF4Array,
183164
CoordinateReference=CoordinateReference,
184165
DimensionCoordinate=DimensionCoordinate,
185166
Domain=Domain,
@@ -202,7 +183,8 @@ def initialise_CFANetCDFArray(
202183
BoundsFromNodesArray=BoundsFromNodesArray,
203184
CellConnectivityArray=CellConnectivityArray,
204185
GatheredArray=GatheredArray,
205-
NetCDFArray=NetCDFArray,
186+
H5netcdfArray=H5netcdfArray,
187+
NetCDF4Array=NetCDF4Array,
206188
PointTopologyArray=PointTopologyArray,
207189
RaggedContiguousArray=RaggedContiguousArray,
208190
RaggedIndexedArray=RaggedIndexedArray,
@@ -236,7 +218,8 @@ def implementation():
236218
'CellConnectivityArray': cf.data.array.cellconnectivityarray.CellConnectivityArray,
237219
'CellMeasure': cf.cellmeasure.CellMeasure,
238220
'CellMethod': cf.cellmethod.CellMethod,
239-
'CFANetCDFArray': cf.data.array.cfanetcdfarray.CFANetCDFArray,
221+
'CFAH5netcdfArray': cf.data.array.cfah5netcdfarray.CFAH5netcdfArray,
222+
'CFANetCDF4Array': cf.data.array.cfanetcdf4array.CFANetCDF4Array,
240223
'CoordinateReference': cf.coordinatereference.CoordinateReference,
241224
'DimensionCoordinate': cf.dimensioncoordinate.DimensionCoordinate,
242225
'Domain': cf.domain.Domain,
@@ -257,7 +240,8 @@ def implementation():
257240
'PartNodeCountProperties': cf.partnodecountproperties.PartNodeCountProperties,
258241
'Data': cf.data.data.Data,
259242
'GatheredArray': cf.data.array.gatheredarray.GatheredArray,
260-
'NetCDFArray': cf.data.array.netcdfarray.NetCDFArray,
243+
'H5netcdfArray': cf.data.array.h5netcdfarray.H5netcdfArray,
244+
'NetCDF4Array': cf.data.array.netcdf4array.NetCDF4Array,
261245
'PointTopologyArray': <class 'cf.data.array.pointtopologyarray.PointTopologyArray'>,
262246
'RaggedContiguousArray': cf.data.array.raggedcontiguousarray.RaggedContiguousArray,
263247
'RaggedIndexedArray': cf.data.array.raggedindexedarray.RaggedIndexedArray,

cf/constants.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@
6363
"LOG_LEVEL": logging.getLevelName(logging.getLogger().level),
6464
"BOUNDS_COMBINATION_MODE": "AND",
6565
"CHUNKSIZE": parse_bytes(_CHUNKSIZE),
66+
# REVIEW: active: `CONSTANTS`: new constants 'active_storage', 'active_storage_url', 'active_storage_max_requests'
67+
"active_storage": False,
68+
"active_storage_url": None,
69+
"active_storage_max_requests": 100,
6670
}
6771

6872
masked = np.ma.masked

cf/data/array/__init__.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,20 @@
11
from .boundsfromnodesarray import BoundsFromNodesArray
22
from .cellconnectivityarray import CellConnectivityArray
3-
from .cfanetcdfarray import CFANetCDFArray
3+
4+
# REVIEW: h5: `__init__.py`: import `CFAH5netcdfArray`
5+
from .cfah5netcdfarray import CFAH5netcdfArray
6+
7+
# REVIEW: h5: `__init__.py`: import `CFAH5netcdfArray`
8+
from .cfanetcdf4array import CFANetCDF4Array
49
from .fullarray import FullArray
510
from .gatheredarray import GatheredArray
11+
12+
# REVIEW: h5: `__init__.py`: import `H5netcdfArray`
13+
from .h5netcdfarray import H5netcdfArray
614
from .netcdfarray import NetCDFArray
15+
16+
# REVIEW: h5: `__init__.py`: import `NetCDF4Array`
17+
from .netcdf4array import NetCDF4Array
718
from .pointtopologyarray import PointTopologyArray
819
from .raggedcontiguousarray import RaggedContiguousArray
920
from .raggedindexedarray import RaggedIndexedArray

cf/data/array/cfah5netcdfarray.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# REVIEW: h5: `CFAH5netcdfArray`: New class for accessing CFA with `h5netcdf`
2+
from .h5netcdfarray import H5netcdfArray
3+
from .mixin import CFAMixin
4+
5+
6+
class CFAH5netcdfArray(CFAMixin, H5netcdfArray):
7+
"""A CFA-netCDF array accessed with `h5netcdf`
8+
9+
.. versionadded:: NEXTVERSION
10+
11+
"""

cf/data/array/cfanetcdf4array.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# REVIEW: h5: `CFAnetCDF4Array`: New class for accessing CFA with `netCDF4`
2+
from .mixin import CFAMixin
3+
from .netcdf4array import NetCDF4Array
4+
5+
6+
class CFANetCDF4Array(CFAMixin, NetCDF4Array):
7+
"""A CFA-netCDF array accessed with `netCDF4`.
8+
9+
.. versionadded:: NEXTVERSION
10+
11+
"""

0 commit comments

Comments
 (0)