Skip to content

Commit 627105b

Browse files
authored
Merge pull request #743 from davidhassell/cfa-write
Reduce output CFA netCDF file size by setting the HDF5 chunksizes of CFA variables to be no larger than required
2 parents e94f82a + be9fda1 commit 627105b

File tree

3 files changed

+53
-9
lines changed

3 files changed

+53
-9
lines changed

Changelog.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@ version NEXT
1515
axis coordinates (https://github.com/NCAS-CMS/cf-python/issues/741)
1616
* Improve `cf.Field.__getitem__` performance by not re-calculating
1717
axis cyclicity (https://github.com/NCAS-CMS/cf-python/issues/744)
18+
* Reduce output CFA netCDF file size by setting the HDF5 chunksizes of
19+
CFA variables to be no larger than required
20+
(https://github.com/NCAS-CMS/cf-python/issues/739)
1821
* Fix misleading error message when it is not possible to create area
1922
weights requested from `cf.Field.collapse`
2023
(https://github.com/NCAS-CMS/cf-python/issues/731)

cf/cfimplementation.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,33 @@ class CFImplementation(cfdm.CFDMImplementation):
4848
4949
"""
5050

51+
def nc_set_hdf5_chunksizes(self, data, sizes, override=False):
52+
"""Set the data HDF5 chunksizes.
53+
54+
.. versionadded:: NEXTVERSION
55+
56+
:Parameters:
57+
58+
data: `Data`
59+
The data.
60+
61+
sizes: sequence of `int`
62+
The new HDF5 chunk sizes.
63+
64+
override: `bool`, optional
65+
If True then set the HDF5 chunks sizes even if some
66+
have already been specified. If False, the default,
67+
then only set the HDF5 chunks sizes if some none have
68+
already been specified.
69+
70+
:Returns:
71+
72+
`None`
73+
74+
"""
75+
if override or not data.nc_hdf5_chunksizes():
76+
data.nc_set_hdf5_chunksizes(sizes)
77+
5178
def set_construct(self, parent, construct, axes=None, copy=True, **kwargs):
5279
"""Insert a construct into a field or domain.
5380

cf/read_write/netcdf/netcdfwrite.py

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -483,8 +483,10 @@ def _create_cfa_data(self, ncvar, ncdimensions, data, cfvar):
483483

484484
# Location
485485
term = "location"
486+
data = cfa[term]
487+
self.implementation.nc_set_hdf5_chunksizes(data, data.shape)
486488
term_ncvar = self._cfa_write_term_variable(
487-
cfa[term],
489+
data,
488490
aggregated_data.get(term, f"cfa_{term}"),
489491
location_ncdimensions,
490492
)
@@ -502,8 +504,10 @@ def _create_cfa_data(self, ncvar, ncdimensions, data, cfvar):
502504
else:
503505
attributes = None
504506

507+
data = cfa[term]
508+
self.implementation.nc_set_hdf5_chunksizes(data, data.shape)
505509
term_ncvar = self._cfa_write_term_variable(
506-
cfa[term],
510+
data,
507511
aggregated_data.get(term, f"cfa_{term}"),
508512
fragment_ncdimensions,
509513
attributes=attributes,
@@ -521,8 +525,10 @@ def _create_cfa_data(self, ncvar, ncdimensions, data, cfvar):
521525
else:
522526
dimensions = fragment_ncdimensions
523527

528+
data = cfa[term]
529+
self.implementation.nc_set_hdf5_chunksizes(data, data.shape)
524530
term_ncvar = self._cfa_write_term_variable(
525-
cfa[term],
531+
data,
526532
aggregated_data.get(term, f"cfa_{term}"),
527533
dimensions,
528534
)
@@ -539,8 +545,10 @@ def _create_cfa_data(self, ncvar, ncdimensions, data, cfvar):
539545
else:
540546
dimensions = fragment_ncdimensions
541547

548+
data = cfa[term]
549+
self.implementation.nc_set_hdf5_chunksizes(data, data.shape)
542550
term_ncvar = self._cfa_write_term_variable(
543-
cfa[term],
551+
data,
544552
aggregated_data.get(term, f"cfa_{term}"),
545553
dimensions,
546554
)
@@ -809,8 +817,10 @@ def _cfa_write_non_standard_terms(
809817
terms.append(term)
810818

811819
# Create the new CFA term variable
820+
data = type(data)(dx)
821+
self.implementation.nc_set_hdf5_chunksizes(data, data.shape)
812822
term_ncvar = self._cfa_write_term_variable(
813-
data=type(data)(dx),
823+
data=data,
814824
ncvar=aggregated_data.get(term, f"cfa_{term}"),
815825
ncdimensions=fragment_ncdimensions,
816826
)
@@ -904,6 +914,7 @@ def _cfa_aggregation_instructions(self, data, cfvar):
904914
aggregation_format = []
905915
for indices in data.chunk_indices():
906916
file_details = self._cfa_get_file_details(data[indices])
917+
907918
if len(file_details) != 1:
908919
if file_details:
909920
raise ValueError(
@@ -962,6 +973,8 @@ def _cfa_aggregation_instructions(self, data, cfvar):
962973
):
963974
n = n_trailing - len(filenames)
964975
if n:
976+
# This chunk has fewer fragment files than some
977+
# others, so some padding is required.
965978
pad = ("",) * n
966979
aggregation_file[i] = filenames + pad
967980
aggregation_format[i] = formats + pad
@@ -1055,13 +1068,14 @@ def _cfa_get_file_details(self, data):
10551068
{(('/home/file.pp',), (34556,), ('um',))}
10561069
10571070
"""
1058-
out = set()
1071+
out = []
1072+
out_append = out.append
10591073
for a in data.todict().values():
10601074
try:
1061-
out.update(
1062-
((a.get_filenames(), a.get_addresses(), a.get_formats()),)
1075+
out_append(
1076+
(a.get_filenames(), a.get_addresses(), a.get_formats())
10631077
)
10641078
except AttributeError:
10651079
pass
10661080

1067-
return out
1081+
return set(out)

0 commit comments

Comments
 (0)