Skip to content

Commit d53e228

Browse files
committed
dev
1 parent 29ec058 commit d53e228

File tree

2 files changed

+19
-24
lines changed

2 files changed

+19
-24
lines changed

Changelog.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ version NEXT
99
to regrid the vertical axis in logarithmic coordinates to
1010
`cf.Field.regrids` and `cf.Field.regridc`
1111
(https://github.com/NCAS-CMS/cf-python/issues/715)
12+
* Reduce output CFA netCDF file size by setting the HDF5 chunksizes of
13+
CFA variables to be no larger than required
14+
(https://github.com/NCAS-CMS/cf-python/issues/739)
1215
* Fix misleading error message when it is not possible to create area
1316
weights requested from `cf.Field.collapse`
1417
(https://github.com/NCAS-CMS/cf-python/issues/731)

cf/read_write/netcdf/netcdfwrite.py

Lines changed: 16 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -430,7 +430,7 @@ def _create_cfa_data(self, ncvar, ncdimensions, data, cfvar):
430430
431431
`None`
432432
433-
"""
433+
"""
434434
g = self.write_vars
435435

436436
ndim = data.ndim
@@ -485,12 +485,12 @@ def _create_cfa_data(self, ncvar, ncdimensions, data, cfvar):
485485
data = cfa[term]
486486
self.implementation.nc_set_hdf5_chunksizes(data, data.shape)
487487
term_ncvar = self._cfa_write_term_variable(
488-
data ,#cfa[term],
488+
data,
489489
aggregated_data.get(term, f"cfa_{term}"),
490490
location_ncdimensions,
491491
)
492492
aggregated_data_attr.append(f"{term}: {term_ncvar}")
493-
493+
494494
# File
495495
term = "file"
496496
if substitutions:
@@ -506,7 +506,7 @@ def _create_cfa_data(self, ncvar, ncdimensions, data, cfvar):
506506
data = cfa[term]
507507
self.implementation.nc_set_hdf5_chunksizes(data, data.shape)
508508
term_ncvar = self._cfa_write_term_variable(
509-
data, #cfa[term],
509+
data,
510510
aggregated_data.get(term, f"cfa_{term}"),
511511
fragment_ncdimensions,
512512
attributes=attributes,
@@ -527,7 +527,7 @@ def _create_cfa_data(self, ncvar, ncdimensions, data, cfvar):
527527
data = cfa[term]
528528
self.implementation.nc_set_hdf5_chunksizes(data, data.shape)
529529
term_ncvar = self._cfa_write_term_variable(
530-
data, # cfa[term],
530+
data,
531531
aggregated_data.get(term, f"cfa_{term}"),
532532
dimensions,
533533
)
@@ -547,7 +547,7 @@ def _create_cfa_data(self, ncvar, ncdimensions, data, cfvar):
547547
data = cfa[term]
548548
self.implementation.nc_set_hdf5_chunksizes(data, data.shape)
549549
term_ncvar = self._cfa_write_term_variable(
550-
data, #cfa[term],
550+
data,
551551
aggregated_data.get(term, f"cfa_{term}"),
552552
dimensions,
553553
)
@@ -819,7 +819,7 @@ def _cfa_write_non_standard_terms(
819819
data = type(data)(dx)
820820
self.implementation.nc_set_hdf5_chunksizes(data, data.shape)
821821
term_ncvar = self._cfa_write_term_variable(
822-
data=data, #type(data)(dx),
822+
data=data,
823823
ncvar=aggregated_data.get(term, f"cfa_{term}"),
824824
ncdimensions=fragment_ncdimensions,
825825
)
@@ -893,10 +893,7 @@ def _cfa_aggregation_instructions(self, data, cfvar):
893893
from os.path import abspath, join, relpath
894894
from pathlib import PurePath
895895
from urllib.parse import urlparse
896-
import time # TODO
897-
print (f"\n{cfvar!r}") # TODO
898-
start = time.time() # TODO
899-
896+
900897
g = self.write_vars
901898

902899
# Define the CFA file susbstitutions, giving precedence over
@@ -911,15 +908,12 @@ def _cfa_aggregation_instructions(self, data, cfvar):
911908
# Size of the trailing dimension
912909
n_trailing = 0
913910

914-
start1 = time.time() # TODO
915911
aggregation_file = []
916912
aggregation_address = []
917913
aggregation_format = []
918-
nnn = 0
919914
for indices in data.chunk_indices():
920-
nnn += 1
921915
file_details = self._cfa_get_file_details(data[indices])
922-
916+
923917
if len(file_details) != 1:
924918
if file_details:
925919
raise ValueError(
@@ -964,9 +958,7 @@ def _cfa_aggregation_instructions(self, data, cfvar):
964958
aggregation_file.append(tuple(filenames2))
965959
aggregation_address.append(addresses)
966960
aggregation_format.append(formats)
967-
print ('len(data.chunk_indices()) =',nnn)
968-
print (f"loop 1: {time.time() - start1:.3}")
969-
961+
970962
# Pad each value of the aggregation instruction arrays so that
971963
# it has 'n_trailing' elements
972964
a_shape = data.numblocks
@@ -980,6 +972,8 @@ def _cfa_aggregation_instructions(self, data, cfvar):
980972
):
981973
n = n_trailing - len(filenames)
982974
if n:
975+
# This chunk has fewer fragment files than some
976+
# others, so some padding is required.
983977
pad = ("",) * n
984978
aggregation_file[i] = filenames + pad
985979
aggregation_format[i] = formats + pad
@@ -1023,7 +1017,6 @@ def _cfa_aggregation_instructions(self, data, cfvar):
10231017
# Return Data objects
10241018
# ------------------------------------------------------------
10251019
data = type(data)
1026-
print (f"_cfa_aggregation_instructions: {time.time() - start:.3}")
10271020
return {
10281021
"location": data(aggregation_location),
10291022
"file": data(aggregation_file),
@@ -1074,13 +1067,12 @@ def _cfa_get_file_details(self, data):
10741067
{(('/home/file.pp',), (34556,), ('um',))}
10751068
10761069
"""
1077-
out = set()
1070+
out = []
1071+
append = out.append
10781072
for a in data.todict().values():
10791073
try:
1080-
out.update(
1081-
((a.get_filenames(), a.get_addresses(), a.get_formats()),)
1082-
)
1074+
append((a.get_filenames(), a.get_addresses(), a.get_formats()))
10831075
except AttributeError:
10841076
pass
10851077

1086-
return out
1078+
return set(out)

0 commit comments

Comments
 (0)