2222from typing import TYPE_CHECKING , Any , Optional , TypeGuard
2323import warnings
2424from xml .dom .minidom import Document
25- import zlib
2625
2726from cf_units import Unit
2827import dask .array as da
5655 from iris .mesh import MeshCoord
5756import iris .exceptions
5857import iris .util
58+ from iris .util import CML_SETTINGS
5959import iris .warnings
6060
6161__all__ = ["Cube" , "CubeAttrsDict" , "CubeList" ]
@@ -171,7 +171,10 @@ def insert(self, index, cube):
171171 super (CubeList , self ).insert (index , cube )
172172
173173 def xml (self , checksum = False , order = True , byteorder = True ):
174- """Return a string of the XML that this list of cubes represents."""
174+ """Return a string of the XML that this list of cubes represents.
175+
176+ See :func:`iris.util.CML_SETTINGS.set` for controlling the XML output formatting.
177+ """
175178 with np .printoptions (legacy = NP_PRINTOPTIONS_LEGACY ):
176179 doc = Document ()
177180 cubes_xml_element = doc .createElement ("cubes" )
@@ -3902,12 +3905,29 @@ def xml(
39023905 order : bool = True ,
39033906 byteorder : bool = True ,
39043907 ) -> str :
3905- """Return a fully valid CubeML string representation of the Cube."""
3908+ """Return a fully valid CubeML string representation of the Cube.
3909+
3910+ The format of the generated XML can be controlled using the
3911+ ``iris.util.CML_SETTINGS.set`` method as a context manager.
3912+
3913+ For example, to include array statistics for the coordinate data:
3914+
3915+ .. code-block:: python
3916+
3917+ with CML_SETTINGS.set(coord_data_array_stats=True):
3918+ print(cube.xml())
3919+
3920+ See :func:`iris.util.CML_SETTINGS.set` for more details.
3921+
3922+ """
39063923 with np .printoptions (legacy = NP_PRINTOPTIONS_LEGACY ):
39073924 doc = Document ()
39083925
39093926 cube_xml_element = self ._xml_element (
3910- doc , checksum = checksum , order = order , byteorder = byteorder
3927+ doc ,
3928+ checksum = checksum ,
3929+ order = order ,
3930+ byteorder = byteorder ,
39113931 )
39123932 cube_xml_element .setAttribute ("xmlns" , XML_NAMESPACE_URI )
39133933 doc .appendChild (cube_xml_element )
@@ -3916,7 +3936,13 @@ def xml(
39163936 doc = self ._sort_xml_attrs (doc )
39173937 return iris .util ._print_xml (doc )
39183938
3919- def _xml_element (self , doc , checksum = False , order = True , byteorder = True ):
3939+ def _xml_element (
3940+ self ,
3941+ doc ,
3942+ checksum = False ,
3943+ order = True ,
3944+ byteorder = True ,
3945+ ):
39203946 cube_xml_element = doc .createElement ("cube" )
39213947
39223948 if self .standard_name :
@@ -4006,39 +4032,46 @@ def dimmeta_xml_element(element, typename, dimscall):
40064032 data_xml_element = doc .createElement ("data" )
40074033 data_xml_element .setAttribute ("shape" , str (self .shape ))
40084034
4009- # NB. Getting a checksum triggers any deferred loading,
4035+ # NB. Getting a checksum or data stats triggers any deferred loading,
40104036 # in which case it also has the side-effect of forcing the
40114037 # byte order to be native.
4038+
40124039 if checksum :
40134040 data = self .data
4014-
4015- # Ensure consistent memory layout for checksums.
4016- def normalise (data ):
4017- data = np .ascontiguousarray (data )
4018- if data .dtype .newbyteorder ("<" ) != data .dtype :
4019- data = data .byteswap (False )
4020- data .dtype = data .dtype .newbyteorder ("<" )
4021- return data
4022-
4041+ crc = iris .util .array_checksum (data )
4042+ data_xml_element .setAttribute ("checksum" , crc )
40234043 if ma .isMaskedArray (data ):
4024- # Fill in masked values to avoid the checksum being
4025- # sensitive to unused numbers. Use a fixed value so
4026- # a change in fill_value doesn't affect the
4027- # checksum.
4028- crc = "0x%08x" % (zlib .crc32 (normalise (data .filled (0 ))) & 0xFFFFFFFF ,)
4029- data_xml_element .setAttribute ("checksum" , crc )
40304044 if ma .is_masked (data ):
4031- crc = "0x%08x" % ( zlib . crc32 ( normalise ( data .mask )) & 0xFFFFFFFF , )
4045+ crc = iris . util . array_checksum ( data .mask )
40324046 else :
40334047 crc = "no-masked-elements"
40344048 data_xml_element .setAttribute ("mask_checksum" , crc )
4049+
4050+ if CML_SETTINGS .data_array_stats :
4051+ data = self .data
4052+ data_min = data .min ()
4053+ data_max = data .max ()
4054+ if data_min == data_max :
4055+ # When data is constant, std() is too sensitive.
4056+ data_std = 0
40354057 else :
4036- crc = "0x%08x" % (zlib .crc32 (normalise (data )) & 0xFFFFFFFF ,)
4037- data_xml_element .setAttribute ("checksum" , crc )
4038- elif self .has_lazy_data ():
4039- data_xml_element .setAttribute ("state" , "deferred" )
4040- else :
4041- data_xml_element .setAttribute ("state" , "loaded" )
4058+ data_std = data .std ()
4059+
4060+ stats_xml_element = doc .createElement ("stats" )
4061+ stats_xml_element .setAttribute ("std" , str (data_std ))
4062+ stats_xml_element .setAttribute ("min" , str (data_min ))
4063+ stats_xml_element .setAttribute ("max" , str (data_max ))
4064+ stats_xml_element .setAttribute ("masked" , str (ma .is_masked (data )))
4065+ stats_xml_element .setAttribute ("mean" , str (data .mean ()))
4066+
4067+ data_xml_element .appendChild (stats_xml_element )
4068+
4069+ # We only print the "state" if we have not output checksum or data stats:
4070+ if not (checksum or CML_SETTINGS .data_array_stats ):
4071+ if self .has_lazy_data ():
4072+ data_xml_element .setAttribute ("state" , "deferred" )
4073+ else :
4074+ data_xml_element .setAttribute ("state" , "loaded" )
40424075
40434076 # Add the dtype, and also the array and mask orders if the
40444077 # data is loaded.
@@ -4065,8 +4098,14 @@ def _order(array):
40654098 if array_byteorder is not None :
40664099 data_xml_element .setAttribute ("byteorder" , array_byteorder )
40674100
4068- if order and ma .isMaskedArray (data ):
4069- data_xml_element .setAttribute ("mask_order" , _order (data .mask ))
4101+ if ma .isMaskedArray (data ):
4102+ if CML_SETTINGS .masked_value_count :
4103+ data_xml_element .setAttribute (
4104+ "masked_count" , str (np .count_nonzero (data .mask ))
4105+ )
4106+ if order :
4107+ data_xml_element .setAttribute ("mask_order" , _order (data .mask ))
4108+
40704109 else :
40714110 dtype = self .lazy_data ().dtype
40724111 data_xml_element .setAttribute ("dtype" , dtype .name )
0 commit comments