Skip to content

Commit e7c6f8e

Browse files
committed
dev
1 parent 20aaac8 commit e7c6f8e

File tree

2 files changed

+18
-23
lines changed

2 files changed

+18
-23
lines changed

Changelog.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ version NEXT
1111
(https://github.com/NCAS-CMS/cf-python/issues/715)
1212
* Improve `cf.Field.collapse` performance by lazily computing reduced
1313
axis coordinates (https://github.com/NCAS-CMS/cf-python/issues/741)
14+
* Reduce output CFA file size with by tailoring the HDF5 chunk sizes
15+
to fit exactly the CFA instruction variables
16+
(https://github.com/NCAS-CMS/cf-python/issues/739)
1417
* Fix misleading error message when it is not possible to create area
1518
weights requested from `cf.Field.collapse`
1619
(https://github.com/NCAS-CMS/cf-python/issues/731)

cf/read_write/netcdf/netcdfwrite.py

Lines changed: 15 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -430,10 +430,11 @@ def _create_cfa_data(self, ncvar, ncdimensions, data, cfvar):
430430
431431
`None`
432432
433-
"""
433+
"""
434434
g = self.write_vars
435435

436436
ndim = data.ndim
437+
437438
cfa = self._cfa_aggregation_instructions(data, cfvar)
438439

439440
# ------------------------------------------------------------
@@ -485,12 +486,12 @@ def _create_cfa_data(self, ncvar, ncdimensions, data, cfvar):
485486
data = cfa[term]
486487
self.implementation.nc_set_hdf5_chunksizes(data, data.shape)
487488
term_ncvar = self._cfa_write_term_variable(
488-
data ,#cfa[term],
489+
data,
489490
aggregated_data.get(term, f"cfa_{term}"),
490491
location_ncdimensions,
491492
)
492493
aggregated_data_attr.append(f"{term}: {term_ncvar}")
493-
494+
494495
# File
495496
term = "file"
496497
if substitutions:
@@ -506,7 +507,7 @@ def _create_cfa_data(self, ncvar, ncdimensions, data, cfvar):
506507
data = cfa[term]
507508
self.implementation.nc_set_hdf5_chunksizes(data, data.shape)
508509
term_ncvar = self._cfa_write_term_variable(
509-
data, #cfa[term],
510+
data,
510511
aggregated_data.get(term, f"cfa_{term}"),
511512
fragment_ncdimensions,
512513
attributes=attributes,
@@ -527,7 +528,7 @@ def _create_cfa_data(self, ncvar, ncdimensions, data, cfvar):
527528
data = cfa[term]
528529
self.implementation.nc_set_hdf5_chunksizes(data, data.shape)
529530
term_ncvar = self._cfa_write_term_variable(
530-
data, # cfa[term],
531+
data,
531532
aggregated_data.get(term, f"cfa_{term}"),
532533
dimensions,
533534
)
@@ -547,7 +548,7 @@ def _create_cfa_data(self, ncvar, ncdimensions, data, cfvar):
547548
data = cfa[term]
548549
self.implementation.nc_set_hdf5_chunksizes(data, data.shape)
549550
term_ncvar = self._cfa_write_term_variable(
550-
data, #cfa[term],
551+
data,
551552
aggregated_data.get(term, f"cfa_{term}"),
552553
dimensions,
553554
)
@@ -819,7 +820,7 @@ def _cfa_write_non_standard_terms(
819820
data = type(data)(dx)
820821
self.implementation.nc_set_hdf5_chunksizes(data, data.shape)
821822
term_ncvar = self._cfa_write_term_variable(
822-
data=data, #type(data)(dx),
823+
data=data,
823824
ncvar=aggregated_data.get(term, f"cfa_{term}"),
824825
ncdimensions=fragment_ncdimensions,
825826
)
@@ -893,10 +894,7 @@ def _cfa_aggregation_instructions(self, data, cfvar):
893894
from os.path import abspath, join, relpath
894895
from pathlib import PurePath
895896
from urllib.parse import urlparse
896-
import time # TODO
897-
print (f"\n{cfvar!r}") # TODO
898-
start = time.time() # TODO
899-
897+
900898
g = self.write_vars
901899

902900
# Define the CFA file susbstitutions, giving precedence over
@@ -911,15 +909,11 @@ def _cfa_aggregation_instructions(self, data, cfvar):
911909
# Size of the trailing dimension
912910
n_trailing = 0
913911

914-
start1 = time.time() # TODO
915912
aggregation_file = []
916913
aggregation_address = []
917914
aggregation_format = []
918-
nnn = 0
919915
for indices in data.chunk_indices():
920-
nnn += 1
921916
file_details = self._cfa_get_file_details(data[indices])
922-
923917
if len(file_details) != 1:
924918
if file_details:
925919
raise ValueError(
@@ -964,9 +958,7 @@ def _cfa_aggregation_instructions(self, data, cfvar):
964958
aggregation_file.append(tuple(filenames2))
965959
aggregation_address.append(addresses)
966960
aggregation_format.append(formats)
967-
print ('len(data.chunk_indices()) =',nnn)
968-
print (f"loop 1: {time.time() - start1:.3}")
969-
961+
970962
# Pad each value of the aggregation instruction arrays so that
971963
# it has 'n_trailing' elements
972964
a_shape = data.numblocks
@@ -1023,7 +1015,6 @@ def _cfa_aggregation_instructions(self, data, cfvar):
10231015
# Return Data objects
10241016
# ------------------------------------------------------------
10251017
data = type(data)
1026-
print (f"_cfa_aggregation_instructions: {time.time() - start:.3}")
10271018
return {
10281019
"location": data(aggregation_location),
10291020
"file": data(aggregation_file),
@@ -1074,13 +1065,14 @@ def _cfa_get_file_details(self, data):
10741065
{(('/home/file.pp',), (34556,), ('um',))}
10751066
10761067
"""
1077-
out = set()
1068+
out = []
1069+
out_append = out.append
10781070
for a in data.todict().values():
10791071
try:
1080-
out.update(
1081-
((a.get_filenames(), a.get_addresses(), a.get_formats()),)
1072+
out_append(
1073+
(a.get_filenames(), a.get_addresses(), a.get_formats())
10821074
)
10831075
except AttributeError:
10841076
pass
10851077

1086-
return out
1078+
return set(out)

0 commit comments

Comments
 (0)