Skip to content

Commit 4f9da22

Browse files
authored
Merge branch 'main' into merge_dataless
2 parents e0c2cd8 + 37f4547 commit 4f9da22

File tree

561 files changed

+6710
-31327
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

561 files changed

+6710
-31327
lines changed

.github/workflows/ci-manifest.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,4 @@ concurrency:
2323
jobs:
2424
manifest:
2525
name: "check-manifest"
26-
uses: scitools/workflows/.github/workflows/ci-manifest.yml@2025.09.6
26+
uses: scitools/workflows/.github/workflows/ci-manifest.yml@2025.10.3

.github/workflows/ci-template-check.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ on:
1010

1111
jobs:
1212
prompt-share:
13-
uses: scitools/workflows/.github/workflows/ci-template-check.yml@2025.09.6
13+
uses: scitools/workflows/.github/workflows/ci-template-check.yml@2025.10.3
1414
secrets: inherit
1515
with:
1616
pr_number: ${{ github.event.pull_request.number }}

.github/workflows/refresh-lockfiles.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,5 +14,5 @@ on:
1414

1515
jobs:
1616
refresh_lockfiles:
17-
uses: scitools/workflows/.github/workflows/refresh-lockfiles.yml@2025.09.6
17+
uses: scitools/workflows/.github/workflows/refresh-lockfiles.yml@2025.10.3
1818
secrets: inherit

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ repos:
2929
- id: no-commit-to-branch
3030

3131
- repo: https://github.com/astral-sh/ruff-pre-commit
32-
rev: "v0.13.2"
32+
rev: "v0.14.0"
3333
hooks:
3434
- id: ruff
3535
types: [file, python]

docs/src/whatsnew/latest.rst

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ This document explains the changes made to Iris for this release
4545
Also added a new documentation section on dataless cubes.
4646
(:issue:`5770`, :pull:`6581`)
4747

48+
#. `@ukmo-ccbunney`_ added a new :class:`~iris.util.CMLSettings` class to control
49+
the formatting of Cube CML output via a context manager.
50+
(:issue:`6244`, :pull:`6743`)
51+
4852

4953
🐛 Bugs Fixed
5054
=============
@@ -70,10 +74,18 @@ This document explains the changes made to Iris for this release
7074
#. N/A
7175

7276

73-
🚀 Performance Enhancements
74-
===========================
77+
🚀 Performance
78+
==============
7579

76-
#. N/A
80+
#. `@trexfeathers`_ investigated a significant performance regression in NetCDF
81+
loading and saving, caused by ``libnetcdf`` version ``4.9.3``.
82+
The regression is equal to several milliseconds per chunk
83+
of parallel operation; so a dataset containing ~100 chunks could be around
84+
0.5 seconds slower to load or save. This regression will NOT be fixed within
85+
Iris - doing so would introduce unacceptable complexity and potential
86+
concurrency problems. The regession has been reported to the NetCDF team; it
87+
is hoped that a future ``libnetcdf`` release will recover the original
88+
performance. See `netcdf-c#3183`_ for more details. (:pull:`6747`)
7789

7890

7991
🔥 Deprecations
@@ -106,9 +118,12 @@ This document explains the changes made to Iris for this release
106118
#. `@melissaKG`_ upgraded Iris' tests to no longer use the deprecated
107119
``git whatchanged`` command. (:pull:`6672`)
108120

109-
#. `@ukmo-ccbunney` merged functionality of ``assert_CML_approx_data`` into
121+
#. `@ukmo-ccbunney`_ merged functionality of ``assert_CML_approx_data`` into
110122
``assert_CML`` via the use of a new ``approx_data`` keyword. (:pull:`6713`)
111123

124+
#. `@ukmo-ccbunney`_ ``assert_CML`` now uses stricter array formatting to avoid
125+
changes in tests due to Numpy version changes. (:pull:`6743`)
126+
112127

113128
.. comment
114129
Whatsnew author names (@github name) in alphabetical order. Note that,
@@ -119,4 +134,6 @@ This document explains the changes made to Iris for this release
119134

120135

121136
.. comment
122-
Whatsnew resources in alphabetical order:
137+
Whatsnew resources in alphabetical order:
138+
139+
.. _netcdf-c#3183: https://github.com/Unidata/netcdf-c/issues/3183

lib/iris/coords.py

Lines changed: 70 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import iris.exceptions
3333
import iris.time
3434
import iris.util
35+
from iris.util import CML_SETTINGS
3536
import iris.warnings
3637

3738
#: The default value for ignore_axis which controls guess_coord_axis' behaviour
@@ -853,10 +854,45 @@ def xml_element(self, doc):
853854
if self.coord_system:
854855
element.appendChild(self.coord_system.xml_element(doc))
855856

857+
is_masked_array = np.ma.isMaskedArray(self._values)
858+
856859
# Add the values
857860
element.setAttribute("value_type", str(self._value_type_name()))
858861
element.setAttribute("shape", str(self.shape))
859862

863+
# data checksum
864+
if CML_SETTINGS.coord_checksum:
865+
crc = iris.util.array_checksum(self._values)
866+
element.setAttribute("checksum", crc)
867+
868+
if is_masked_array:
869+
# Add the number of masked elements
870+
if np.ma.is_masked(self._values):
871+
crc = iris.util.array_checksum(self._values.mask)
872+
else:
873+
crc = "no-masked-elements"
874+
element.setAttribute("mask_checksum", crc)
875+
876+
# array ordering:
877+
def _order(array):
878+
order = ""
879+
if array.flags["C_CONTIGUOUS"]:
880+
order = "C"
881+
elif array.flags["F_CONTIGUOUS"]:
882+
order = "F"
883+
return order
884+
885+
if CML_SETTINGS.coord_order:
886+
element.setAttribute("order", _order(self._values))
887+
if is_masked_array:
888+
element.setAttribute("mask_order", _order(self._values.mask))
889+
890+
# masked element count:
891+
if CML_SETTINGS.masked_value_count and is_masked_array:
892+
element.setAttribute(
893+
"masked_count", str(np.count_nonzero(self._values.mask))
894+
)
895+
860896
# The values are referred to "points" of a coordinate and "data"
861897
# otherwise.
862898
if isinstance(self, Coord):
@@ -865,7 +901,31 @@ def xml_element(self, doc):
865901
values_term = "indices"
866902
else:
867903
values_term = "data"
868-
element.setAttribute(values_term, self._xml_array_repr(self._values))
904+
element.setAttribute(
905+
values_term,
906+
self._xml_array_repr(self._values),
907+
)
908+
909+
if iris.util.CML_SETTINGS.coord_data_array_stats and len(self._values) > 1:
910+
data = self._values
911+
912+
if np.issubdtype(data.dtype.type, np.number):
913+
data_min = data.min()
914+
data_max = data.max()
915+
if data_min == data_max:
916+
# When data is constant, std() is too sensitive.
917+
data_std = 0
918+
else:
919+
data_std = data.std()
920+
921+
stats_xml_element = doc.createElement("stats")
922+
stats_xml_element.setAttribute("std", str(data_std))
923+
stats_xml_element.setAttribute("min", str(data_min))
924+
stats_xml_element.setAttribute("max", str(data_max))
925+
stats_xml_element.setAttribute("masked", str(ma.is_masked(data)))
926+
stats_xml_element.setAttribute("mean", str(data.mean()))
927+
928+
element.appendChild(stats_xml_element)
869929

870930
return element
871931

@@ -896,7 +956,11 @@ def _xml_array_repr(data):
896956
if hasattr(data, "to_xml_attr"):
897957
result = data._values.to_xml_attr()
898958
else:
899-
result = iris.util.format_array(data)
959+
edgeitems = CML_SETTINGS.array_edgeitems
960+
if CML_SETTINGS.numpy_formatting:
961+
result = iris.util.format_array(data, edgeitems=edgeitems)
962+
else:
963+
result = iris.util.array_summary(data, edgeitems=edgeitems)
900964
return result
901965

902966
def _value_type_name(self):
@@ -2565,7 +2629,10 @@ def xml_element(self, doc):
25652629

25662630
# Add bounds, points are handled by the parent class.
25672631
if self.has_bounds():
2568-
element.setAttribute("bounds", self._xml_array_repr(self.bounds))
2632+
element.setAttribute(
2633+
"bounds",
2634+
self._xml_array_repr(self.bounds),
2635+
)
25692636

25702637
return element
25712638

lib/iris/cube.py

Lines changed: 69 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
from typing import TYPE_CHECKING, Any, Optional, TypeGuard
2323
import warnings
2424
from xml.dom.minidom import Document
25-
import zlib
2625

2726
from cf_units import Unit
2827
import dask.array as da
@@ -56,6 +55,7 @@
5655
from iris.mesh import MeshCoord
5756
import iris.exceptions
5857
import iris.util
58+
from iris.util import CML_SETTINGS
5959
import iris.warnings
6060

6161
__all__ = ["Cube", "CubeAttrsDict", "CubeList"]
@@ -171,7 +171,10 @@ def insert(self, index, cube):
171171
super(CubeList, self).insert(index, cube)
172172

173173
def xml(self, checksum=False, order=True, byteorder=True):
174-
"""Return a string of the XML that this list of cubes represents."""
174+
"""Return a string of the XML that this list of cubes represents.
175+
176+
See :func:`iris.util.CML_SETTINGS.set` for controlling the XML output formatting.
177+
"""
175178
with np.printoptions(legacy=NP_PRINTOPTIONS_LEGACY):
176179
doc = Document()
177180
cubes_xml_element = doc.createElement("cubes")
@@ -3902,12 +3905,29 @@ def xml(
39023905
order: bool = True,
39033906
byteorder: bool = True,
39043907
) -> str:
3905-
"""Return a fully valid CubeML string representation of the Cube."""
3908+
"""Return a fully valid CubeML string representation of the Cube.
3909+
3910+
The format of the generated XML can be controlled using the
3911+
``iris.util.CML_SETTINGS.set`` method as a context manager.
3912+
3913+
For example, to include array statistics for the coordinate data:
3914+
3915+
.. code-block:: python
3916+
3917+
with CML_SETTINGS.set(coord_data_array_stats=True):
3918+
print(cube.xml())
3919+
3920+
See :func:`iris.util.CML_SETTINGS.set` for more details.
3921+
3922+
"""
39063923
with np.printoptions(legacy=NP_PRINTOPTIONS_LEGACY):
39073924
doc = Document()
39083925

39093926
cube_xml_element = self._xml_element(
3910-
doc, checksum=checksum, order=order, byteorder=byteorder
3927+
doc,
3928+
checksum=checksum,
3929+
order=order,
3930+
byteorder=byteorder,
39113931
)
39123932
cube_xml_element.setAttribute("xmlns", XML_NAMESPACE_URI)
39133933
doc.appendChild(cube_xml_element)
@@ -3916,7 +3936,13 @@ def xml(
39163936
doc = self._sort_xml_attrs(doc)
39173937
return iris.util._print_xml(doc)
39183938

3919-
def _xml_element(self, doc, checksum=False, order=True, byteorder=True):
3939+
def _xml_element(
3940+
self,
3941+
doc,
3942+
checksum=False,
3943+
order=True,
3944+
byteorder=True,
3945+
):
39203946
cube_xml_element = doc.createElement("cube")
39213947

39223948
if self.standard_name:
@@ -4006,39 +4032,46 @@ def dimmeta_xml_element(element, typename, dimscall):
40064032
data_xml_element = doc.createElement("data")
40074033
data_xml_element.setAttribute("shape", str(self.shape))
40084034

4009-
# NB. Getting a checksum triggers any deferred loading,
4035+
# NB. Getting a checksum or data stats triggers any deferred loading,
40104036
# in which case it also has the side-effect of forcing the
40114037
# byte order to be native.
4038+
40124039
if checksum:
40134040
data = self.data
4014-
4015-
# Ensure consistent memory layout for checksums.
4016-
def normalise(data):
4017-
data = np.ascontiguousarray(data)
4018-
if data.dtype.newbyteorder("<") != data.dtype:
4019-
data = data.byteswap(False)
4020-
data.dtype = data.dtype.newbyteorder("<")
4021-
return data
4022-
4041+
crc = iris.util.array_checksum(data)
4042+
data_xml_element.setAttribute("checksum", crc)
40234043
if ma.isMaskedArray(data):
4024-
# Fill in masked values to avoid the checksum being
4025-
# sensitive to unused numbers. Use a fixed value so
4026-
# a change in fill_value doesn't affect the
4027-
# checksum.
4028-
crc = "0x%08x" % (zlib.crc32(normalise(data.filled(0))) & 0xFFFFFFFF,)
4029-
data_xml_element.setAttribute("checksum", crc)
40304044
if ma.is_masked(data):
4031-
crc = "0x%08x" % (zlib.crc32(normalise(data.mask)) & 0xFFFFFFFF,)
4045+
crc = iris.util.array_checksum(data.mask)
40324046
else:
40334047
crc = "no-masked-elements"
40344048
data_xml_element.setAttribute("mask_checksum", crc)
4049+
4050+
if CML_SETTINGS.data_array_stats:
4051+
data = self.data
4052+
data_min = data.min()
4053+
data_max = data.max()
4054+
if data_min == data_max:
4055+
# When data is constant, std() is too sensitive.
4056+
data_std = 0
40354057
else:
4036-
crc = "0x%08x" % (zlib.crc32(normalise(data)) & 0xFFFFFFFF,)
4037-
data_xml_element.setAttribute("checksum", crc)
4038-
elif self.has_lazy_data():
4039-
data_xml_element.setAttribute("state", "deferred")
4040-
else:
4041-
data_xml_element.setAttribute("state", "loaded")
4058+
data_std = data.std()
4059+
4060+
stats_xml_element = doc.createElement("stats")
4061+
stats_xml_element.setAttribute("std", str(data_std))
4062+
stats_xml_element.setAttribute("min", str(data_min))
4063+
stats_xml_element.setAttribute("max", str(data_max))
4064+
stats_xml_element.setAttribute("masked", str(ma.is_masked(data)))
4065+
stats_xml_element.setAttribute("mean", str(data.mean()))
4066+
4067+
data_xml_element.appendChild(stats_xml_element)
4068+
4069+
# We only print the "state" if we have not output checksum or data stats:
4070+
if not (checksum or CML_SETTINGS.data_array_stats):
4071+
if self.has_lazy_data():
4072+
data_xml_element.setAttribute("state", "deferred")
4073+
else:
4074+
data_xml_element.setAttribute("state", "loaded")
40424075

40434076
# Add the dtype, and also the array and mask orders if the
40444077
# data is loaded.
@@ -4065,8 +4098,14 @@ def _order(array):
40654098
if array_byteorder is not None:
40664099
data_xml_element.setAttribute("byteorder", array_byteorder)
40674100

4068-
if order and ma.isMaskedArray(data):
4069-
data_xml_element.setAttribute("mask_order", _order(data.mask))
4101+
if ma.isMaskedArray(data):
4102+
if CML_SETTINGS.masked_value_count:
4103+
data_xml_element.setAttribute(
4104+
"masked_count", str(np.count_nonzero(data.mask))
4105+
)
4106+
if order:
4107+
data_xml_element.setAttribute("mask_order", _order(data.mask))
4108+
40704109
else:
40714110
dtype = self.lazy_data().dtype
40724111
data_xml_element.setAttribute("dtype", dtype.name)

0 commit comments

Comments
 (0)