Skip to content

Commit 9e29106

Browse files
authored
Merge pull request #836 from davidhassell/dask-in-cfdm
Get core Dask functionality from `cfdm`
2 parents 0f2c702 + 582a076 commit 9e29106

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+1297
-7722
lines changed

Changelog.rst

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
version NEXTVERSION
22
-------------------
33

4-
**2024-??-??**
4+
**2024-12-??**
55

66
* Allow ``'nearest_dtos'`` 2-d regridding to work with discrete
77
sampling geometry source grids
@@ -23,6 +23,8 @@ version NEXTVERSION
2323
* New class `cf.NetCDF4Array`
2424
* New class `cf.CFAH5netcdfArray`
2525
* New class `cf.CFANetCDF4Array`
26+
* Replace core `dask` functionality with that imported from `cfdm`
27+
(https://github.com/NCAS-CMS/cf-python/issues/839)
2628
* Fix bug that sometimes puts an incorrect ``radian-1`` or
2729
``radian-2`` in the returned units of the differential operator
2830
methods and functions
@@ -41,9 +43,11 @@ version NEXTVERSION
4143
(https://github.com/NCAS-CMS/cf-python/issues/828)
4244
* New dependency: ``h5netcdf>=1.3.0``
4345
* New dependency: ``h5py>=3.10.0``
44-
* New dependency: ``s3fs>=2024.2.0``
46+
* New dependency: ``s3fs>=2024.6.0``
47+
* Changed dependency: ``numpy>=1.15,<2.0``
4548
* Changed dependency: ``1.11.2.0<=cfdm<1.11.3.0``
4649
* Changed dependency: ``cfunits>=3.3.7``
50+
* Changed dependency: ``dask>=2024.6.0,<=2024.7.1``
4751

4852
----
4953

cf/__init__.py

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@
123123
raise ImportError(_error0 + str(error1))
124124

125125
try:
126-
import numpy
126+
import numpy as np
127127
except ImportError as error1:
128128
raise ImportError(_error0 + str(error1))
129129

@@ -191,10 +191,11 @@
191191

192192
# Check the version of numpy
193193
_minimum_vn = "1.22"
194-
if Version(numpy.__version__) < Version(_minimum_vn):
195-
raise RuntimeError(
196-
f"Bad numpy version: cf requires numpy>={_minimum_vn}. "
197-
f"Got {numpy.__version__} at {numpy.__file__}"
194+
_maximum_vn = "2.0"
195+
if not Version(_minimum_vn) <= Version(np.__version__) < Version(_maximum_vn):
196+
raise ValueError(
197+
"Bad numpy version: cf requires _minimum_vn}<=numpy<{_maximum_vn}. "
198+
f"Got {np.__version__} at {np.__file__}"
198199
)
199200

200201
# Check the version of cfunits
@@ -208,15 +209,30 @@
208209
# Check the version of cfdm
209210
_minimum_vn = "1.11.2.0"
210211
_maximum_vn = "1.11.3.0"
211-
_cfdm_version = Version(cfdm.__version__)
212-
if not Version(_minimum_vn) <= _cfdm_version < Version(_maximum_vn):
212+
if (
213+
not Version(_minimum_vn)
214+
<= Version(cfdm.__version__)
215+
< Version(_maximum_vn)
216+
):
213217
raise RuntimeError(
214218
f"Bad cfdm version: cf requires {_minimum_vn}<=cfdm<{_maximum_vn}. "
215-
f"Got {_cfdm_version} at {cfdm.__file__}"
219+
f"Got {cfdm.__version__} at {cfdm.__file__}"
216220
)
217221

218222
# Check the version of dask
219223

224+
_minimum_vn = "2024.6.1"
225+
_maximum_vn = "2024.7.1"
226+
if (
227+
not Version(_minimum_vn)
228+
<= Version(dask.__version__)
229+
<= Version(_maximum_vn)
230+
):
231+
raise ValueError(
232+
"Bad dask version: cf requires {_minimum_vn}<=dask<={_maximum_vn}. "
233+
f"Got {dask.__version__} at {dask.__file__}"
234+
)
235+
220236
# Check the version of Python
221237
_minimum_vn = "3.8.0"
222238
if Version(platform.python_version()) < Version(_minimum_vn):
@@ -233,6 +249,8 @@
233249
f"Got {scipy.__version__} at {scipy.__file__}"
234250
)
235251

252+
del _minimum_vn, _maximum_vn
253+
236254
from .constructs import Constructs
237255

238256
from .mixin import Coordinate

cf/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
Find the total amount of physical memory (in bytes).
3838
3939
CHUNKSIZE: `int`
40-
The chunk size (in bytes) for data storage and processing.
40+
The Dask chunk size (in bytes). See `cf.chunksize`.
4141
4242
TEMPDIR: `str`
4343
The location to store temporary files. By default it is the

cf/data/array/fullarray.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
import numpy as np
2+
from cfdm.data.mixin import IndexMixin
23

34
from ...functions import indices_shape, parse_indices
45
from .abstract import Array
5-
from .mixin import IndexMixin
66

77
_FULLARRAY_HANDLED_FUNCTIONS = {}
88

cf/data/array/h5netcdfarray.py

Lines changed: 1 addition & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
11
import cfdm
22

33
from ...mixin_container import Container
4-
from .locks import netcdf_lock
5-
from .mixin import ActiveStorageMixin, ArrayMixin, FileArrayMixin, IndexMixin
4+
from .mixin import ActiveStorageMixin, ArrayMixin, FileArrayMixin
65

76

87
class H5netcdfArray(
98
ActiveStorageMixin,
10-
IndexMixin,
119
FileArrayMixin,
1210
ArrayMixin,
1311
Container,
@@ -23,59 +21,3 @@ class H5netcdfArray(
2321
.. versionadded:: NEXTVERSION
2422
2523
"""
26-
27-
def __dask_tokenize__(self):
28-
"""Return a value fully representative of the object.
29-
30-
.. versionadded:: NEXTVERSION
31-
32-
"""
33-
return super().__dask_tokenize__() + (self.get_mask(),)
34-
35-
@property
36-
def _lock(self):
37-
"""Set the lock for use in `dask.array.from_array`.
38-
39-
Returns a lock object because concurrent reads are not
40-
currently supported by the HDF5 library. The lock object will
41-
be the same for all `NetCDF4Array` and `H5netcdfArray`
42-
instances, regardless of the dataset they access, which means
43-
that access to all netCDF and HDF files coordinates around the
44-
same lock.
45-
46-
.. versionadded:: NEXTVERSION
47-
48-
"""
49-
return netcdf_lock
50-
51-
def _get_array(self, index=None):
52-
"""Returns a subspace of the dataset variable.
53-
54-
.. versionadded:: NEXTVERSION
55-
56-
.. seealso:: `__array__`, `index`
57-
58-
:Parameters:
59-
60-
{{index: `tuple` or `None`, optional}}
61-
62-
:Returns:
63-
64-
`numpy.ndarray`
65-
The subspace.
66-
67-
"""
68-
if index is None:
69-
index = self.index()
70-
71-
# We need to lock because the netCDF file is about to be accessed.
72-
self._lock.acquire()
73-
74-
# It's cfdm.H5netcdfArray.__getitem__ that we want to
75-
# call here, but we use 'Container' in super because
76-
# that comes immediately before cfdm.H5netcdfArray in
77-
# the method resolution order.
78-
array = super(Container, self).__getitem__(index)
79-
80-
self._lock.release()
81-
return array

cf/data/array/locks.py

Lines changed: 0 additions & 4 deletions
This file was deleted.

cf/data/array/mixin/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,3 @@
33
from .cfamixin import CFAMixin
44
from .compressedarraymixin import CompressedArrayMixin
55
from .filearraymixin import FileArrayMixin
6-
from .indexmixin import IndexMixin

cf/data/array/mixin/cfamixin.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@
33
from itertools import accumulate, product
44

55
import numpy as np
6-
7-
from ...utils import chunk_locations, chunk_positions
6+
from cfdm.data.utils import chunk_locations, chunk_positions
87

98

109
class CFAMixin:

cf/data/array/mixin/compressedarraymixin.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,12 +76,11 @@ def to_dask_array(self, chunks="auto"):
7676
from functools import partial
7777

7878
import dask.array as da
79+
from cfdm.data.utils import normalize_chunks
7980
from dask import config
8081
from dask.array.core import getter
8182
from dask.base import tokenize
8283

83-
from ...utils import normalize_chunks
84-
8584
name = (f"{self.__class__.__name__}-{tokenize(self)}",)
8685

8786
dtype = self.dtype

0 commit comments

Comments
 (0)