2222 display_data ,
2323 is_log_level_info ,
2424 parse_indices ,
25+ persist_data ,
2526)
2627from ..mixin .container import Container
2728from ..mixin .files import Files
@@ -140,6 +141,9 @@ def __new__(cls, *args, **kwargs):
140141 # Function for determining whether or not to display data
141142 # elements during `__str__`
142143 instance ._display_data = display_data
144+ # Function for determining whether or not to persist computed
145+ # data
146+ instance ._persist_data = persist_data
143147 return instance
144148
145149 def __init__ (
@@ -2721,11 +2725,17 @@ def array(self):
27212725 """A numpy array copy of the data.
27222726
27232727 In-place changes to the returned numpy array do not affect the
2724- underlying dask array.
2728+ underlying Dask array.
27252729
27262730 The returned numpy array has the same mask hardness and fill
27272731 values as the data.
27282732
2733+ .. note:: If the `{{package}}.persist_data` function returns
2734+ True then calling `array` will persist the
2735+ underlying lazy Dask array into an equivalent
2736+ chunked Dask array, but now with the results fully
2737+ computed and cached memory.
2738+
27292739 Compare with `compute`.
27302740
27312741 **Performance**
@@ -2734,7 +2744,8 @@ def array(self):
27342744 returned `numpy` array is a deep copy of that returned by
27352745 created `compute`.
27362746
2737- .. seealso:: `datetime_array`, `compute`, `persist`
2747+ .. seealso:: `datetime_array`, `compute`, `persist`,
2748+ `{{package}}.persist_data`
27382749
27392750 **Examples**
27402751
@@ -4102,7 +4113,9 @@ def compressed(self, inplace=False):
41024113 d ._set_dask (dx , clear = self ._ALL , in_memory = True )
41034114 return d
41044115
4105- def compute (self , _force_to_memory = True , _cache_elements = True ):
4116+ def compute (
4117+ self , persist = None , _force_to_memory = True , _cache_elements = True
4118+ ):
41064119 """A view of the computed data.
41074120
41084121 In-place changes to the returned array *might* affect the
@@ -4120,11 +4133,30 @@ def compute(self, _force_to_memory=True, _cache_elements=True):
41204133
41214134 .. versionadded:: (cfdm) 1.11.2.0
41224135
4123- .. seealso:: `persist `, `array `, `datetime_array `,
4124- `sparse_array `
4136+ .. seealso:: `array `, `datetime_array `, `sparse_array `,
4137+ `persist`, `{{package}}.persist_data `
41254138
41264139 :Parameters:
41274140
4141+ persist: `None` or `bool`, optional
4142+ Control the persistence of computed data. Persisting
4143+ turns the underlying lazy dask array into an
4144+ equivalent chunked dask array, but now with the
4145+ results fully computed and cached memory. This can
4146+ avoid the expense of re-reading the data from disk, or
4147+ re-computing it, when the data is accessed on multiple
4148+ occasions.
4149+
4150+ If *persist* is `None` (the default) then the value of
4151+ *persist* will be taken from the
4152+ `{{package}}.persist_data` function. If *persist* is
4153+ True then the data is persisted, regardless of value
4154+ returned by `{{package}}.persist_data`. If *persist*
4155+ is False then the data is not persisted, regardless of
4156+ value returned by `{{package}}.persist_data`.
4157+
4158+ .. versionadded:: (cfdm) NEXTVERSION
4159+
41284160 _force_to_memory: `bool`, optional
41294161 If True (the default) then force the data resulting
41304162 from computing the returned Dask graph to be in
@@ -4172,12 +4204,28 @@ def compute(self, _force_to_memory=True, _cache_elements=True):
41724204 [0.029 0.059 0.039 0.07 0.058 0.072 0.009 0.017]
41734205 [0.006 0.036 0.019 0.035 0.018 0.037 0.034 0.013]]
41744206 >>> f.data.compute(_force_to_memory=False)
4175- <{{repr}}NetCDF4Array (5, 8): file.nc, q(5, 8)>
4207+ <{{repr}}PyfiveArray (5, 8): file.nc, q(5, 8)>
41764208
41774209 """
41784210 dx = self .to_dask_array (
41794211 _force_mask_hardness = False , _force_to_memory = _force_to_memory
41804212 )
4213+
4214+ if persist is None :
4215+ persist = self ._persist_data ()
4216+
4217+ if persist :
4218+ dx = dx .persist ()
4219+
4220+ # Note to developers: If the following `_set_dask` call is
4221+ # changed, consider making the same
4222+ # changes in `persist`.
4223+ self ._set_dask (
4224+ dx ,
4225+ clear = self ._ALL ^ self ._ARRAY ^ self ._CACHE ,
4226+ in_memory = True ,
4227+ )
4228+
41814229 a = dx .compute ()
41824230
41834231 if np .ma .isMA (a ) and a is not np .ma .masked :
@@ -4189,11 +4237,16 @@ def compute(self, _force_to_memory=True, _cache_elements=True):
41894237 a .set_fill_value (self .get_fill_value (None ))
41904238
41914239 if _cache_elements :
4192- from scipy .sparse import issparse
4240+ ok = False
4241+ if isinstance (a , (np .ndarray , int , float , bool , str )):
4242+ ok = True
4243+ else :
4244+ from scipy .sparse import issparse
41934245
4194- if isinstance (a , (np .ndarray , int , float , bool , str )) or issparse (
4195- a
4196- ):
4246+ if issparse (a ):
4247+ ok = True
4248+
4249+ if ok :
41974250 self .cache_elements (_array = a )
41984251
41994252 return a
@@ -6584,6 +6637,10 @@ def persist(self, inplace=False):
65846637 _force_mask_hardness = False , _force_to_memory = True
65856638 )
65866639 dx = dx .persist ()
6640+
6641+ # Note to developers: If the following `_set_dask` call is
6642+ # changed, consider making the same
6643+ # changes in `compute`.
65876644 d ._set_dask (
65886645 dx , clear = self ._ALL ^ self ._ARRAY ^ self ._CACHE , in_memory = True
65896646 )
0 commit comments