Skip to content

Commit be9fda1

Browse files
authored
Merge branch 'main' into cfa-write
2 parents 2ba85c1 + e94f82a commit be9fda1

File tree

18 files changed

+244
-112
lines changed

18 files changed

+244
-112
lines changed

Changelog.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,12 @@ version NEXT
99
to regrid the vertical axis in logarithmic coordinates to
1010
`cf.Field.regrids` and `cf.Field.regridc`
1111
(https://github.com/NCAS-CMS/cf-python/issues/715)
12+
* Improve the performance of reading and accessing the data of PP and
13+
UM fields files (https://github.com/NCAS-CMS/cf-python/issues/746)
1214
* Improve `cf.Field.collapse` performance by lazily computing reduced
1315
axis coordinates (https://github.com/NCAS-CMS/cf-python/issues/741)
16+
* Improve `cf.Field.__getitem__` performance by not re-calculating
17+
axis cyclicity (https://github.com/NCAS-CMS/cf-python/issues/744)
1418
* Reduce output CFA netCDF file size by setting the HDF5 chunksizes of
1519
CFA variables to be no larger than required
1620
(https://github.com/NCAS-CMS/cf-python/issues/739)

DOCUMENTATION.md

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
* The generation of recipes using Sphinx-Gallery requires:
22

33
```txt
4-
sphinx-gallery==0.11.0
5-
sphinx-copybutton==0.5.1
6-
sphinx-toggleprompt==0.2.0
7-
sphinxcontrib-spelling==4.3.0
8-
sphinxcontrib.serializinghtml==1.1.5
9-
sphinx==2.4.5
4+
pip install sphinx-gallery==0.11.0
5+
pip install sphinx-copybutton==0.5.1
6+
pip install sphinx-toggleprompt==0.2.0
7+
pip install sphinxcontrib-spelling==4.3.0
8+
pip install sphinxcontrib.serializinghtml==1.1.5
9+
pip install sphinxcontrib.htmlhelp==1.0.3
10+
pip install sphinx==2.4.5
11+
```
1012

1113
* The `.py` files to generate recipes are stored in `docs/source/recipes/`.
1214

README.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -107,9 +107,10 @@ of its array manipulation and can:
107107
including those with geometry cells and UGRID mesh topologies,
108108
* perform histogram, percentile and binning operations on field
109109
constructs,
110-
* regrid field constructs with (multi-)linear, nearest neighbour,
111-
first- and second-order conservative and higher order patch recovery
112-
methods, to and from structured and unstructured grids,
110+
* regrid structured grid, mesh and DSG field constructs with
111+
(multi-)linear, nearest neighbour, first- and second-order
112+
conservative and higher order patch recovery methods, including 3-d
113+
regridding,
113114
* apply convolution filters to field constructs,
114115
* create running means from field constructs,
115116
* apply differential operators to field constructs,

RELEASE.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717
* Make sure that `Changelog.rst` is up to date (version, date and
1818
changes).
1919

20+
* Search for occurences of `NEXTRELEASE` and replace them with the
21+
release version (i.e. the value of the now correct `cf.__version__`)
22+
2023
* Deprecated methods and keyword arguments: Check the code for
2124
deprecated methods and keyword arguments that can be completely
2225
removed, i.e. those with a ``removed_at`` version that is at or

cf/data/array/umarray.py

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
load_stash2standard_name,
99
parse_indices,
1010
)
11-
from ...umread_lib.umfile import File
11+
from ...umread_lib.umfile import File, Rec
1212
from .abstract import Array
1313
from .mixin import FileArrayMixin
1414

@@ -272,13 +272,22 @@ def _get_rec(self, f, header_offset):
272272
The record container.
273273
274274
"""
275-
# TODOCFA: This method doesn't require data_offset and disk_length,
276-
# so plays nicely with CFA. Is it fast enough that we can
277-
# use this method always?
278-
for v in f.vars:
279-
for r in v.recs:
280-
if r.hdr_offset == header_offset:
281-
return r
275+
return Rec.from_file_and_offsets(f, header_offset)
276+
277+
# ------------------------------------------------------------
278+
# Leave the following commented code here for debugging
279+
# purposes. Replacing the above line with this code moves the
280+
# calculation of the data offset and disk length from pure
281+
# Python to the C library, at the expense of completely
282+
# parsing the file. Note: If you do replace the above line
283+
# with the commented code, then you *must* also set
284+
# 'parse=True' in the `open` method.
285+
# ------------------------------------------------------------
286+
287+
# for v in f.vars:
288+
# for r in v.recs:
289+
# if r.hdr_offset == header_offset:
290+
# return r
282291

283292
def _set_units(self, int_hdr):
284293
"""The units and calendar properties.
@@ -666,21 +675,24 @@ def get_word_size(self):
666675
return self._get_component("word_size", None)
667676

668677
def open(self):
669-
"""Returns an open dataset containing the data array.
678+
"""Returns an open dataset and the address of the data.
670679
671680
:Returns:
672681
673-
`umfile_lib.File`, `int`
682+
`umfile_lib.umfile.File`, `int`
683+
The open file object, and the start address in bytes
684+
of the lookup header.
674685
675686
**Examples**
676687
677688
>>> f.open()
678-
(<cf.umread_lib.umfile.File object at 0x7fdc25056380>, 44567)
689+
(<cf.umread_lib.umfile.File object at 0x7fdc25056380>, 4)
679690
680691
"""
681692
return super().open(
682693
File,
683694
byte_ordering=self.get_byte_ordering(),
684695
word_size=self.get_word_size(),
685696
fmt=self.get_fmt(),
697+
parse=False,
686698
)

cf/docstring/docstring.py

Lines changed: 27 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -621,30 +621,33 @@
621621
True, or a tuple of both if *item* is True.""",
622622
# regrid RegridOperator
623623
"{{regrid RegridOperator}}": """* `RegridOperator`: The grid is defined by a regrid
624-
operator that has been returned by a previous call
625-
with the *return_operator* parameter set to True.
626-
627-
Unlike the other options, for which the regrid weights
628-
need to be calculated, the regrid operator already
629-
contains the weights. Therefore, for cases where
630-
multiple fields with the same source grids need to be
631-
regridded to the same destination grid, using a regrid
632-
operator can give performance improvements by avoiding
633-
having to calculate the weights for each source
634-
field. Note that for the other types of *dst*
635-
parameter, the calculation of the regrid weights is
636-
not a lazy operation.
637-
638-
.. note:: The source grid of the regrid operator is
639-
immediately checked for compatibility with
640-
the grid of the source field. By default
641-
only the computationally cheap tests are
642-
performed (checking that the coordinate
643-
system, cyclicity and grid shape are the
644-
same), with the grid coordinates not being
645-
checked. The coordinates check will be
646-
carried out, however, if the
647-
*check_coordinates* parameter is True.""",
624+
operator that has been returned by a previous call
625+
with the *return_operator* parameter set to True.
626+
627+
Unlike the other options, for which the regrid
628+
weights need to be calculated, the regrid operator
629+
already contains the weights. Therefore, for cases
630+
where multiple fields with the same source grids
631+
need to be regridded to the same destination grid,
632+
using a regrid operator can give performance
633+
improvements by avoiding having to calculate the
634+
weights for each source field. Note that for the
635+
other types of *dst* parameter, the calculation of
636+
the regrid weights is not a lazy operation.
637+
638+
.. note:: When *dst* is a `RegridOperator`, the
639+
source grid of the regrid operator is
640+
immediately checked for compatibility with
641+
the grid of the source field. By default
642+
only the computationally cheap tests are
643+
performed (checking that the coordinate
644+
system, cyclicity, grid shape, regridding
645+
dimesionality, mesh location, and feature
646+
type are the same), with the grid
647+
coordinates not being checked. The
648+
coordinates check will be carried out,
649+
however, if the *check_coordinates*
650+
parameter is True.""",
648651
# Returns cfa_file_substitutions
649652
"{{Returns cfa_file_substitutions}}": """The CFA-netCDF file name substitutions in a dictionary
650653
whose key/value pairs are the file name parts to be

cf/field.py

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,25 @@ def __getitem__(self, indices):
441441
for axis, size in zip(data_axes, new_data.shape):
442442
domain_axes[axis].set_size(size)
443443

444+
# Record which axes were cyclic before the subspace
445+
org_cyclic = [data_axes.index(axis) for axis in new.cyclic()]
446+
447+
# Set the subspaced data
448+
new.set_data(new_data, axes=data_axes, copy=False)
449+
450+
# Update axis cylcicity. Note that this can only entail
451+
# setting an originally cyclic axis to be non-cyclic. Doing
452+
# this now enables us to disable the (possibly very slow)
453+
# automatic check for cyclicity on the 'set_construct' calls
454+
# below.
455+
if org_cyclic:
456+
new_cyclic = new_data.cyclic()
457+
[
458+
new.cyclic(i, iscyclic=False)
459+
for i in org_cyclic
460+
if i not in new_cyclic
461+
]
462+
444463
# ------------------------------------------------------------
445464
# Subspace constructs with data
446465
# ------------------------------------------------------------
@@ -507,6 +526,7 @@ def __getitem__(self, indices):
507526
key=key,
508527
axes=construct_axes,
509528
copy=False,
529+
autocyclic={"no-op": True},
510530
)
511531

512532
new.set_data(new_data, axes=data_axes, copy=False)
@@ -13701,7 +13721,7 @@ def regrids(
1370113721

1370213722
Ignored if *dst* is a `RegridOperator`.
1370313723

13704-
.. versionadded:: 3.17.0
13724+
.. versionadded:: NEXTRELEASE
1370513725

1370613726
dst_z: optional
1370713727
If `None`, the default, then the regridding is 2-d in
@@ -13715,7 +13735,7 @@ def regrids(
1371513735

1371613736
Ignored if *dst* is a `RegridOperator`.
1371713737

13718-
.. versionadded:: 3.17.0
13738+
.. versionadded:: NEXTRELEASE
1371913739

1372013740
z: optional
1372113741
The *z* parameter is a convenience that may be used to
@@ -13729,11 +13749,11 @@ def regrids(
1372913749
*Example:*
1373013750
``z='Z'`` is equivalent to ``src_z='Z', dst_z='Z'``.
1373113751

13732-
.. versionadded:: 3.17.0
13752+
.. versionadded:: NEXTRELEASE
1373313753

1373413754
{{ln_z: `bool` or `None`, optional}}
1373513755

13736-
.. versionadded:: 3.17.0
13756+
.. versionadded:: NEXTRELEASE
1373713757

1373813758
{{verbose: `int` or `str` or `None`, optional}}
1373913759

@@ -14000,7 +14020,7 @@ def regridc(
1400014020

1400114021
Ignored if *dst* is a `RegridOperator`.
1400214022

14003-
.. versionadded:: 3.17.0
14023+
.. versionadded:: NEXTRELEASE
1400414024

1400514025
dst_z: optional
1400614026
If not `None` then *dst_z* specifies the identity of a
@@ -14010,7 +14030,7 @@ def regridc(
1401014030

1401114031
Ignored if *dst* is a `RegridOperator`.
1401214032

14013-
.. versionadded:: 3.17.0
14033+
.. versionadded:: NEXTRELEASE
1401414034

1401514035
z: optional
1401614036
The *z* parameter is a convenience that may be used to
@@ -14022,11 +14042,11 @@ def regridc(
1402214042
*Example:*
1402314043
``z='Z'`` is equivalent to ``src_z='Z', dst_z='Z'``.
1402414044

14025-
.. versionadded:: 3.17.0
14045+
.. versionadded:: NEXTRELEASE
1402614046

1402714047
{{ln_z: `bool` or `None`, optional}}
1402814048

14029-
.. versionadded:: 3.17.0
14049+
.. versionadded:: NEXTRELEASE
1403014050

1403114051
{{inplace: `bool`, optional}}
1403214052

cf/mixin/fielddomain.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1109,10 +1109,15 @@ def autocyclic(self, key=None, coord=None, verbose=None, config={}):
11091109
11101110
:Returns:
11111111
1112-
`bool`
1112+
`bool` or `None`
1113+
`True` if the dimension is cyclic, `False` if it isn't,
1114+
or `None` if no checks were done.
11131115
11141116
"""
11151117
noop = config.get("no-op")
1118+
if noop:
1119+
# Don't do anything
1120+
return
11161121

11171122
if "cyclic" in config:
11181123
if not config["cyclic"]:

0 commit comments

Comments
 (0)