@@ -92,7 +92,7 @@ def relative_vorticity(
92
92
) # pragman: no cover
93
93
94
94
95
- def histogram (* digitized ):
95
+ def histogram (* digitized , density = False ):
96
96
"""Return the distribution of a set of variables in the form of an
97
97
N-dimensional histogram.
98
98
@@ -151,6 +151,17 @@ def histogram(*digitized):
151
151
manipulating the dimensions of the digitized field
152
152
construct's data to ensure that broadcasting can occur.
153
153
154
+ density: `bool`, optional
155
+ If False, the default, the result will contain the number
156
+ of samples in each bin. If True, the result is the value
157
+ of the probability density function at the bin, normalized
158
+ such that the integral over the range is 1. Note that the
159
+ sum of the histogram values will not be equal to 1 unless
160
+ bins of unity size are chosen; it is not a probability
161
+ mass function.
162
+
163
+ .. versionadded:: NEXTVERSION
164
+
154
165
:Returns:
155
166
156
167
`Field`
@@ -197,7 +208,7 @@ def histogram(*digitized):
197
208
[0.1174 0.1317]
198
209
[0.1317 0.146 ]]
199
210
>>> h = cf.histogram(indices)
200
- >>> rint (h)
211
+ >>> print (h)
201
212
Field: number_of_observations
202
213
-----------------------------
203
214
Data : number_of_observations(specific_humidity(10)) 1
@@ -216,7 +227,21 @@ def histogram(*digitized):
216
227
[0.1031 0.1174]
217
228
[0.1174 0.1317]
218
229
[0.1317 0.146 ]]
230
+ >>> p = cf.histogram(indices, density=True)
231
+ >>> print(p)
232
+ Field: long_name=probability density function
233
+ ---------------------------------------------
234
+ Data : long_name=probability density function(specific_humidity(10)) 1
235
+ Cell methods : latitude: longitude: point
236
+ Dimension coords: specific_humidity(10) = [0.01015, ..., 0.13885] 1
237
+ >>> print(p.data.round(2).array))
238
+ [15.73 12.24 15.73 6.99 8.74 1.75 1.75 1.75 3.5 1.75]
239
+
240
+ The sum of the density values multiplied by the bin sizes is
241
+ unity:
219
242
243
+ >>> print((p * p.dimension_coordinate().cellsize).sum().array)
244
+ 1.0
220
245
221
246
Create a two-dimensional histogram based on specific humidity and
222
247
temperature bins. The temperature bins in this example are derived
@@ -225,8 +250,8 @@ def histogram(*digitized):
225
250
226
251
>>> g = f.copy()
227
252
>>> g.standard_name = 'air_temperature'
228
- >>> import numpy
229
- >>> g[...] = numpy.random.normal(loc=290, scale=10, size= 40).reshape(5, 8)
253
+ >>> import numpy as np
254
+ >>> g[...] = np.arange( 40).reshape(5, 8) + 253.15
230
255
>>> g.override_units('K', inplace=True)
231
256
>>> print(g)
232
257
Field: air_temperature (ncvar%q)
@@ -247,13 +272,28 @@ def histogram(*digitized):
247
272
Dimension coords: air_temperature(5) = [281.1054839143287, ..., 313.9741786365939] K
248
273
: specific_humidity(10) = [0.01015, ..., 0.13885] 1
249
274
>>> print(h.array)
250
- [[2 1 5 3 2 -- -- -- -- --]
251
- [1 1 2 -- 1 -- 1 1 -- --]
252
- [4 4 2 1 1 1 -- -- 1 1]
253
- [1 1 -- -- 1 -- -- -- 1 --]
254
- [1 -- -- -- -- -- -- -- -- --]]
275
+ [[3 3 2 -- -- -- -- -- -- --]
276
+ [1 1 2 1 3 -- -- -- -- --]
277
+ [1 -- -- 1 -- 1 1 1 2 1]
278
+ [2 1 1 2 2 -- -- -- -- --]
279
+ [2 2 4 -- -- -- -- -- -- --]]
255
280
>>> h.sum()
256
281
<CF Data(): 40 1>
282
+ >>> p = cf.histogram(indices, indices_t, density=True)
283
+ >>> print(p)
284
+ Field: long_name=probability density function
285
+ ---------------------------------------------
286
+ Data : long_name=probability density function(air_temperature(5), specific_humidity(10)) K-1
287
+ Cell methods : latitude: longitude: point
288
+ Dimension coords: air_temperature(5) = [257.05, ..., 288.25] K
289
+ : specific_humidity(10) = [0.01015, ..., 0.13885] 1
290
+ >>> print(p.array)
291
+ >>> print(p.data.round(2).array))
292
+ [[0.67 0.67 0.45 -- -- -- -- -- -- --]
293
+ [0.22 0.22 0.45 0.22 0.67 -- -- -- -- --]
294
+ [0.22 -- -- 0.22 -- 0.22 0.22 0.22 0.45 0.22]
295
+ [0.45 0.22 0.22 0.45 0.45 -- -- -- -- --]
296
+ [0.45 0.45 0.9 -- -- -- -- -- -- --]]
257
297
258
298
"""
259
299
if not digitized :
@@ -264,7 +304,26 @@ def histogram(*digitized):
264
304
f = digitized [0 ].copy ()
265
305
f .clear_properties ()
266
306
267
- return f .bin ("sample_size" , digitized = digitized )
307
+ out = f .bin ("sample_size" , digitized = digitized )
308
+
309
+ if density :
310
+ # Get the measure of each bin. This is the outer product of
311
+ # the cell sizes of each dimension coordinate construct (the
312
+ # dimension coordinate constructs define the bins).
313
+ data_axes = out .get_data_axes ()
314
+ dim = out .dimension_coordinate (filter_by_axis = (data_axes [0 ],))
315
+ bin_measures = dim .cellsize
316
+ for axis in data_axes [1 :]:
317
+ dim = out .dimension_coordinate (filter_by_axis = (axis ,))
318
+ bin_measures .outerproduct (dim .cellsize , inplace = True )
319
+
320
+ # Convert counts to densities
321
+ out /= out .data .sum () * bin_measures
322
+
323
+ out .del_property ("standard_name" , None )
324
+ out .set_property ("long_name" , "probability density function" )
325
+
326
+ return out
268
327
269
328
270
329
def curl_xy (fx , fy , x_wrap = None , one_sided_at_boundary = False , radius = None ):
0 commit comments