@@ -92,7 +92,7 @@ def relative_vorticity(
9292 ) # pragman: no cover
9393
9494
95- def histogram (* digitized ):
95+ def histogram (* digitized , density = False ):
9696 """Return the distribution of a set of variables in the form of an
9797 N-dimensional histogram.
9898
@@ -151,6 +151,17 @@ def histogram(*digitized):
151151 manipulating the dimensions of the digitized field
152152 construct's data to ensure that broadcasting can occur.
153153
154+ density: `bool`, optional
155+ If False, the default, the result will contain the number
156+ of samples in each bin. If True, the result is the value
157+ of the probability density function at the bin, normalized
158+ such that the integral over the range is 1. Note that the
159+ sum of the histogram values will not be equal to 1 unless
160+ bins of unity size are chosen; it is not a probability
161+ mass function.
162+
163+ .. versionadded:: NEXTVERSION
164+
154165 :Returns:
155166
156167 `Field`
@@ -197,7 +208,7 @@ def histogram(*digitized):
197208 [0.1174 0.1317]
198209 [0.1317 0.146 ]]
199210 >>> h = cf.histogram(indices)
200- >>> rint (h)
211+ >>> print (h)
201212 Field: number_of_observations
202213 -----------------------------
203214 Data : number_of_observations(specific_humidity(10)) 1
@@ -216,7 +227,21 @@ def histogram(*digitized):
216227 [0.1031 0.1174]
217228 [0.1174 0.1317]
218229 [0.1317 0.146 ]]
230+ >>> p = cf.histogram(indices, density=True)
231+ >>> print(p)
232+ Field: long_name=probability density function
233+ ---------------------------------------------
234+ Data : long_name=probability density function(specific_humidity(10)) 1
235+ Cell methods : latitude: longitude: point
236+ Dimension coords: specific_humidity(10) = [0.01015, ..., 0.13885] 1
237+ >>> print(p.data.round(2).array))
238+ [15.73 12.24 15.73 6.99 8.74 1.75 1.75 1.75 3.5 1.75]
239+
240+ The sum of the density values multiplied by the bin sizes is
241+ unity:
219242
243+ >>> print((p * p.dimension_coordinate().cellsize).sum().array)
244+ 1.0
220245
221246 Create a two-dimensional histogram based on specific humidity and
222247 temperature bins. The temperature bins in this example are derived
@@ -225,8 +250,8 @@ def histogram(*digitized):
225250
226251 >>> g = f.copy()
227252 >>> g.standard_name = 'air_temperature'
228- >>> import numpy
229- >>> g[...] = numpy.random.normal(loc=290, scale=10, size= 40).reshape(5, 8)
253+ >>> import numpy as np
254+ >>> g[...] = np.arange( 40).reshape(5, 8) + 253.15
230255 >>> g.override_units('K', inplace=True)
231256 >>> print(g)
232257 Field: air_temperature (ncvar%q)
@@ -247,13 +272,28 @@ def histogram(*digitized):
247272 Dimension coords: air_temperature(5) = [281.1054839143287, ..., 313.9741786365939] K
248273 : specific_humidity(10) = [0.01015, ..., 0.13885] 1
249274 >>> print(h.array)
250- [[2 1 5 3 2 -- -- -- -- --]
251- [1 1 2 -- 1 -- 1 1 -- --]
252- [4 4 2 1 1 1 -- -- 1 1]
253- [1 1 -- -- 1 -- -- -- 1 --]
254- [1 -- -- -- -- -- -- -- -- --]]
275+ [[3 3 2 -- -- -- -- -- -- --]
276+ [1 1 2 1 3 -- -- -- -- --]
277+ [1 -- -- 1 -- 1 1 1 2 1]
278+ [2 1 1 2 2 -- -- -- -- --]
279+ [2 2 4 -- -- -- -- -- -- --]]
255280 >>> h.sum()
256281 <CF Data(): 40 1>
282+ >>> p = cf.histogram(indices, indices_t, density=True)
283+ >>> print(p)
284+ Field: long_name=probability density function
285+ ---------------------------------------------
286+ Data : long_name=probability density function(air_temperature(5), specific_humidity(10)) K-1
287+ Cell methods : latitude: longitude: point
288+ Dimension coords: air_temperature(5) = [257.05, ..., 288.25] K
289+ : specific_humidity(10) = [0.01015, ..., 0.13885] 1
290+ >>> print(p.array)
291+ >>> print(p.data.round(2).array))
292+ [[0.67 0.67 0.45 -- -- -- -- -- -- --]
293+ [0.22 0.22 0.45 0.22 0.67 -- -- -- -- --]
294+ [0.22 -- -- 0.22 -- 0.22 0.22 0.22 0.45 0.22]
295+ [0.45 0.22 0.22 0.45 0.45 -- -- -- -- --]
296+ [0.45 0.45 0.9 -- -- -- -- -- -- --]]
257297
258298 """
259299 if not digitized :
@@ -264,7 +304,26 @@ def histogram(*digitized):
264304 f = digitized [0 ].copy ()
265305 f .clear_properties ()
266306
267- return f .bin ("sample_size" , digitized = digitized )
307+ out = f .bin ("sample_size" , digitized = digitized )
308+
309+ if density :
310+ # Get the measure of each bin. This is the outer product of
311+ # the cell sizes of each dimension coordinate construct (the
312+ # dimension coordinate constructs define the bins).
313+ data_axes = out .get_data_axes ()
314+ dim = out .dimension_coordinate (filter_by_axis = (data_axes [0 ],))
315+ bin_measures = dim .cellsize
316+ for axis in data_axes [1 :]:
317+ dim = out .dimension_coordinate (filter_by_axis = (axis ,))
318+ bin_measures .outerproduct (dim .cellsize , inplace = True )
319+
320+ # Convert counts to densities
321+ out /= out .data .sum () * bin_measures
322+
323+ out .del_property ("standard_name" , None )
324+ out .set_property ("long_name" , "probability density function" )
325+
326+ return out
268327
269328
270329def curl_xy (fx , fy , x_wrap = None , one_sided_at_boundary = False , radius = None ):
0 commit comments