Skip to content

Commit d5f2fa3

Browse files
committed
Merge branch 'main' into new-dask-from-numpy-v2-from-cfa-in-cfdm
2 parents 30d8e0d + 2d2c0b0 commit d5f2fa3

File tree

3 files changed

+121
-11
lines changed

3 files changed

+121
-11
lines changed

Changelog.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ version 3.17.0
99
from `cfdm` (https://github.com/NCAS-CMS/cf-python/issues/841)
1010
* New keyword parameter to `cf.Field.compute_vertical_coordinates`:
1111
``key`` (https://github.com/NCAS-CMS/cf-python/issues/802)
12+
* New keyword parameter to `cf.histogram`: ``density``
13+
(https://github.com/NCAS-CMS/cf-python/issues/794)
1214
* Changed dependency: ``Python>=3.9.0``
1315
* Changed dependency: ``numpy>=2.0.0``
1416
* Changed dependency: ``cfdm>=1.12.0.0, <1.12.1.0``
@@ -22,7 +24,6 @@ version 3.17.0
2224

2325
----
2426

25-
2627
version 3.16.3
2728
--------------
2829

cf/maths.py

Lines changed: 69 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ def relative_vorticity(
9292
) # pragman: no cover
9393

9494

95-
def histogram(*digitized):
95+
def histogram(*digitized, density=False):
9696
"""Return the distribution of a set of variables in the form of an
9797
N-dimensional histogram.
9898
@@ -151,6 +151,17 @@ def histogram(*digitized):
151151
manipulating the dimensions of the digitized field
152152
construct's data to ensure that broadcasting can occur.
153153
154+
density: `bool`, optional
155+
If False, the default, the result will contain the number
156+
of samples in each bin. If True, the result is the value
157+
of the probability density function at the bin, normalized
158+
such that the integral over the range is 1. Note that the
159+
sum of the histogram values will not be equal to 1 unless
160+
bins of unity size are chosen; it is not a probability
161+
mass function.
162+
163+
.. versionadded:: NEXTVERSION
164+
154165
:Returns:
155166
156167
`Field`
@@ -197,7 +208,7 @@ def histogram(*digitized):
197208
[0.1174 0.1317]
198209
[0.1317 0.146 ]]
199210
>>> h = cf.histogram(indices)
200-
>>> rint(h)
211+
>>> print(h)
201212
Field: number_of_observations
202213
-----------------------------
203214
Data : number_of_observations(specific_humidity(10)) 1
@@ -216,7 +227,21 @@ def histogram(*digitized):
216227
[0.1031 0.1174]
217228
[0.1174 0.1317]
218229
[0.1317 0.146 ]]
230+
>>> p = cf.histogram(indices, density=True)
231+
>>> print(p)
232+
Field: long_name=probability density function
233+
---------------------------------------------
234+
Data : long_name=probability density function(specific_humidity(10)) 1
235+
Cell methods : latitude: longitude: point
236+
Dimension coords: specific_humidity(10) = [0.01015, ..., 0.13885] 1
237+
>>> print(p.data.round(2).array))
238+
[15.73 12.24 15.73 6.99 8.74 1.75 1.75 1.75 3.5 1.75]
239+
240+
The sum of the density values multiplied by the bin sizes is
241+
unity:
219242
243+
>>> print((p * p.dimension_coordinate().cellsize).sum().array)
244+
1.0
220245
221246
Create a two-dimensional histogram based on specific humidity and
222247
temperature bins. The temperature bins in this example are derived
@@ -225,8 +250,8 @@ def histogram(*digitized):
225250
226251
>>> g = f.copy()
227252
>>> g.standard_name = 'air_temperature'
228-
>>> import numpy
229-
>>> g[...] = numpy.random.normal(loc=290, scale=10, size=40).reshape(5, 8)
253+
>>> import numpy as np
254+
>>> g[...] = np.arange(40).reshape(5, 8) + 253.15
230255
>>> g.override_units('K', inplace=True)
231256
>>> print(g)
232257
Field: air_temperature (ncvar%q)
@@ -247,13 +272,28 @@ def histogram(*digitized):
247272
Dimension coords: air_temperature(5) = [281.1054839143287, ..., 313.9741786365939] K
248273
: specific_humidity(10) = [0.01015, ..., 0.13885] 1
249274
>>> print(h.array)
250-
[[2 1 5 3 2 -- -- -- -- --]
251-
[1 1 2 -- 1 -- 1 1 -- --]
252-
[4 4 2 1 1 1 -- -- 1 1]
253-
[1 1 -- -- 1 -- -- -- 1 --]
254-
[1 -- -- -- -- -- -- -- -- --]]
275+
[[3 3 2 -- -- -- -- -- -- --]
276+
[1 1 2 1 3 -- -- -- -- --]
277+
[1 -- -- 1 -- 1 1 1 2 1]
278+
[2 1 1 2 2 -- -- -- -- --]
279+
[2 2 4 -- -- -- -- -- -- --]]
255280
>>> h.sum()
256281
<CF Data(): 40 1>
282+
>>> p = cf.histogram(indices, indices_t, density=True)
283+
>>> print(p)
284+
Field: long_name=probability density function
285+
---------------------------------------------
286+
Data : long_name=probability density function(air_temperature(5), specific_humidity(10)) K-1
287+
Cell methods : latitude: longitude: point
288+
Dimension coords: air_temperature(5) = [257.05, ..., 288.25] K
289+
: specific_humidity(10) = [0.01015, ..., 0.13885] 1
290+
>>> print(p.array)
291+
>>> print(p.data.round(2).array))
292+
[[0.67 0.67 0.45 -- -- -- -- -- -- --]
293+
[0.22 0.22 0.45 0.22 0.67 -- -- -- -- --]
294+
[0.22 -- -- 0.22 -- 0.22 0.22 0.22 0.45 0.22]
295+
[0.45 0.22 0.22 0.45 0.45 -- -- -- -- --]
296+
[0.45 0.45 0.9 -- -- -- -- -- -- --]]
257297
258298
"""
259299
if not digitized:
@@ -264,7 +304,26 @@ def histogram(*digitized):
264304
f = digitized[0].copy()
265305
f.clear_properties()
266306

267-
return f.bin("sample_size", digitized=digitized)
307+
out = f.bin("sample_size", digitized=digitized)
308+
309+
if density:
310+
# Get the measure of each bin. This is the outer product of
311+
# the cell sizes of each dimension coordinate construct (the
312+
# dimension coordinate constructs define the bins).
313+
data_axes = out.get_data_axes()
314+
dim = out.dimension_coordinate(filter_by_axis=(data_axes[0],))
315+
bin_measures = dim.cellsize
316+
for axis in data_axes[1:]:
317+
dim = out.dimension_coordinate(filter_by_axis=(axis,))
318+
bin_measures.outerproduct(dim.cellsize, inplace=True)
319+
320+
# Convert counts to densities
321+
out /= out.data.sum() * bin_measures
322+
323+
out.del_property("standard_name", None)
324+
out.set_property("long_name", "probability density function")
325+
326+
return out
268327

269328

270329
def curl_xy(fx, fy, x_wrap=None, one_sided_at_boundary=False, radius=None):

cf/test/test_Maths.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
faulthandler.enable() # to debug seg faults and timeouts
66

7+
import numpy as np
8+
79
import cf
810

911

@@ -209,6 +211,54 @@ def test_differential_operators(self):
209211
zeros[...] = 0
210212
self.assertTrue(cg.data.equals(zeros.data, rtol=0, atol=1e-15))
211213

214+
def test_histogram(self):
215+
f = cf.example_field(0)
216+
g = f.copy()
217+
g.standard_name = "air_temperature"
218+
g[...] = np.arange(40).reshape(5, 8) + 253.15
219+
g.override_units("K", inplace=True)
220+
221+
atol = 1e-15
222+
223+
# 1-d histogram
224+
indices = f.digitize(10)
225+
h = cf.histogram(indices)
226+
self.assertTrue((h.array == [9, 7, 9, 4, 5, 1, 1, 1, 2, 1]).all)
227+
h = cf.histogram(indices, density=True)
228+
self.assertEqual(h.Units, cf.Units("1"))
229+
# Check that integral is 1
230+
bin_measures = h.dimension_coordinate().cellsize
231+
integral = (h * bin_measures).sum()
232+
self.assertTrue(np.allclose(integral.array, 1, rtol=0, atol=atol))
233+
234+
# 2-d histogram
235+
indices_t = g.digitize(5)
236+
h = cf.histogram(indices, indices_t)
237+
self.assertEqual(h.Units, cf.Units())
238+
# The -1 values correspond to missing values in h
239+
self.assertTrue(
240+
(
241+
h.array
242+
== [
243+
[3, 3, 2, -1, -1, -1, -1, -1, -1, -1],
244+
[1, 1, 2, 1, 3, -1, -1, -1, -1, -1],
245+
[1, -1, -1, 1, -1, 1, 1, 1, 2, 1],
246+
[2, 1, 1, 2, 2, -1, -1, -1, -1, -1],
247+
[2, 2, 4, -1, -1, -1, -1, -1, -1, -1],
248+
]
249+
).all()
250+
)
251+
252+
h = cf.histogram(indices, indices_t, density=True)
253+
self.assertEqual(h.Units, cf.Units("K-1"))
254+
# Check that integral is 1
255+
bin_measures = h.dimension_coordinate("air_temperature").cellsize
256+
bin_measures.outerproduct(
257+
h.dimension_coordinate("specific_humidity").cellsize, inplace=True
258+
)
259+
integral = (h * bin_measures).sum()
260+
self.assertTrue(np.allclose(integral.array, 1, rtol=0, atol=atol))
261+
212262

213263
if __name__ == "__main__":
214264
print("Run date:", datetime.datetime.now())

0 commit comments

Comments
 (0)