Skip to content

Commit b5c40da

Browse files
Merge pull request #820 from sadielbartholomew/student-recipes-3
Add new recipe (18) by summer student: correlation calculation
2 parents 9836d52 + 186b4ab commit b5c40da

File tree

2 files changed

+146
-4
lines changed

2 files changed

+146
-4
lines changed
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
"""
2+
Calculating the Pearson correlation coefficient between datasets
3+
================================================================
4+
5+
In this recipe, we will take two datasets, one for an independent variable
6+
(in this example elevation) and one for a dependent variable (snow
7+
cover over a particuar day), regrid them to the same resolution then
8+
calculate the correlation coefficient, to get a measure of the relationship
9+
between them.
10+
11+
"""
12+
13+
# %%
14+
# 1. Import cf-python, cf-plot and other required packages:
15+
import cfplot as cfp
16+
import cf
17+
18+
import matplotlib.pyplot as plt
19+
import scipy.stats.mstats as mstats
20+
21+
# %%
22+
# 2. Read the data in and unpack the Fields from FieldLists using indexing.
23+
# In our example We are investigating the influence of the land height on
24+
# the snow cover extent, so snow cover is the dependent variable. The snow
25+
# cover data is the
26+
# 'Snow Cover Extent 2017-present (raster 500 m), Europe, daily – version 1'
27+
# sourced from the Copernicus Land Monitoring Service which is described at:
28+
# https://land.copernicus.eu/en/products/snow/snow-cover-extent-europe-v1-0-500m
29+
# and the elevation data is the 'NOAA NGDC GLOBE topo: elevation data' dataset
30+
# which can be sourced from the IRI Data Library, or details found, at:
31+
# http://iridl.ldeo.columbia.edu/SOURCES/.NOAA/.NGDC/.GLOBE/.topo/index.html.
32+
orog = cf.read("~/recipes/1km_elevation.nc")[0]
33+
snow = cf.read("~/recipes/snowcover")[0]
34+
35+
# %%
36+
# 3. Choose the day of pre-aggregated snow cover to investigate. We will
37+
# take the first datetime element corresponding to the first day from the
38+
# datasets, 1st January 2024, but by changing the indexing you can explore
39+
# other days by changing the index. We also get the string corresponding to
40+
# the date, to reference later:
41+
snow_day = snow[0]
42+
snow_day_dt = snow_day.coordinate("time")[0].data
43+
snow_day_daystring = f"{snow_day_dt.datetime_as_string[0].split(' ')[0]}"
44+
45+
# %%
46+
# 4. Choose the region to consider to compare the relationship across,
47+
# which must be defined across both datasets, though not necessarily on the
48+
# same grid since we regrid to the same grid next and subspace to the same
49+
# area for both datasets ready for comparison in the next steps. By changing
50+
# the latitude and longitude points in the tuple below, you can change the
51+
# area that is used:
52+
region_in_mid_uk = ((-3.0, -1.0), (52.0, 55.0))
53+
sub_orog = orog.subspace(
54+
longitude=cf.wi(*region_in_mid_uk[0]), latitude=cf.wi(*region_in_mid_uk[1])
55+
)
56+
sub_snow = snow_day.subspace(
57+
longitude=cf.wi(*region_in_mid_uk[0]), latitude=cf.wi(*region_in_mid_uk[1])
58+
)
59+
60+
# %%
61+
# 5. Ensure data quality, since the standard name here corresponds to a
62+
# unitless fraction, but the values are in the tens, so we need to
63+
# normalise these to all lie between 0 and 1 and change the units
64+
# appropriately:
65+
sub_snow = ((sub_snow - sub_snow.minimum()) / (sub_snow.range()))
66+
sub_snow.override_units("1", inplace=True)
67+
68+
# %%
69+
# 6. Regrid the data so that they lie on the same grid and therefore each
70+
# array structure has values with corresponding geospatial points that
71+
# can be statistically compared. Here the elevation field is regridded to the
72+
# snow field since the snow is higher-resolution, but the other way round is
73+
# possible by switching the field order:
74+
regridded_orog = sub_orog.regrids(sub_snow, method="linear")
75+
76+
# %%
77+
# 7. Squeeze the snow data to remove the size 1 axes so we have arrays of
78+
# the same dimensions for each of the two fields to compare:
79+
sub_snow = sub_snow.squeeze()
80+
81+
# %%
82+
# 8. Finally, perform the statistical calculation by using the SciPy method
83+
# to find the Pearson correlation coefficient for the two arrays now they are
84+
# in comparable form. Note we need to use 'scipy.stats.mstats' and not
85+
# 'scipy.stats' for the 'pearsonr' method, to account for masked
86+
# data in the array(s) properly:
87+
coefficient = mstats.pearsonr(regridded_orog.array, sub_snow.array)
88+
print(f"The Pearson correlation coefficient is: {coefficient}")
89+
90+
# %%
91+
# 9. Make a final plot showing the two arrays side-by-side and quoting the
92+
# determined Pearson correlation coefficient to illustrate the relatoinship
93+
# and its strength visually. We use 'gpos' to position the plots in two
94+
# columns and apply some specific axes ticks and labels for clarity.
95+
cfp.gopen(
96+
rows=1, columns=2, top=0.85,
97+
file="snow_and_orog_on_same_grid.png",
98+
user_position=True,
99+
)
100+
101+
# Joint configuration of the plots, including adding an overall title
102+
plt.suptitle(
103+
(
104+
"Snow cover compared to elevation for the same area of the UK "
105+
f"aggregated across\n day {snow_day_daystring} with correlation "
106+
"coefficient (on the same grid) of "
107+
f"{coefficient.statistic:.4g} (4 s.f.)"
108+
),
109+
fontsize=17,
110+
)
111+
cfp.mapset(resolution="10m")
112+
cfp.setvars(ocean_color="white", lake_color="white")
113+
label_info = {
114+
"xticklabels": ("3W", "2W", "1W"),
115+
"yticklabels": ("52N", "53N", "54N", "55N"),
116+
"xticks": (-3, -2, -1),
117+
"yticks": (52, 53, 54, 55),
118+
}
119+
120+
# Plot the two contour plots as columns
121+
cfp.gpos(1)
122+
cfp.cscale("wiki_2_0_reduced")
123+
cfp.con(
124+
regridded_orog,
125+
lines=False,
126+
title="Elevation (from 1km-resolution orography)",
127+
colorbar_drawedges=False,
128+
**label_info,
129+
)
130+
cfp.gpos(2)
131+
# Don't add extentions on the colourbar since it can only be 0 to 1 inclusive
132+
cfp.levs(min=0, max=1, step=0.1, extend="neither")
133+
cfp.cscale("precip_11lev", ncols=11, reverse=1)
134+
cfp.con(sub_snow, lines=False,
135+
title="Snow cover extent (from satellite imagery)",
136+
colorbar_drawedges=False,
137+
**label_info
138+
)
139+
140+
cfp.gclose()

docs/source/recipes/recipe_list.txt

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ plot_06_recipe.html#sphx-glr-recipes-plot-06-recipe-py
1313
plot_07_recipe.html#sphx-glr-recipes-plot-07-recipe-py
1414
<div class="sphx-glr-thumbcontainer aggregate lineplot subspace" tooltip="Aggregate, Lineplot, Subspace">
1515
plot_08_recipe.html#sphx-glr-recipes-plot-08-recipe-py
16-
<div class="sphx-glr-thumbcontainer collapse contourmap" tooltip="Collapse, Contourmap, Subplot">
16+
<div class="sphx-glr-thumbcontainer collapse contourmap stats subplot" tooltip="Collapse, Contourmap, Statistical Operations, Subplot">
1717
plot_09_recipe.html#sphx-glr-recipes-plot-09-recipe-py
1818
<div class="sphx-glr-thumbcontainer histogram" tooltip="Histogram">
1919
plot_10_recipe.html#sphx-glr-recipes-plot-10-recipe-py
@@ -23,12 +23,14 @@ plot_11_recipe.html#sphx-glr-recipes-plot-11-recipe-py
2323
plot_12_recipe.html#sphx-glr-recipes-plot-12-recipe-py
2424
<div class="sphx-glr-thumbcontainer contourmap mask" tooltip=" Contourmap, Mask">
2525
plot_13_recipe.html#sphx-glr-recipes-plot-13-recipe-py
26-
<div class="sphx-glr-thumbcontainer subspace collapse contourmap lineplot" tooltip="Subspace, Collapse, Contourmap, Lineplot">
26+
<div class="sphx-glr-thumbcontainer subspace collapse contourmap lineplot stats" tooltip="Subspace, Collapse, Contourmap, Lineplot, Statistical Operations">
2727
plot_14_recipe.html#sphx-glr-recipes-plot-14-recipe-py
28-
<div class="sphx-glr-thumbcontainer subspace collapse contourmap" tooltip="Subspace, Collapse, Contourmap">
28+
<div class="sphx-glr-thumbcontainer subspace collapse contourmap stats" tooltip="Subspace, Collapse, Contourmap, Statistical Operations">
2929
plot_15_recipe.html#sphx-glr-recipes-plot-15-recipe-py
30-
<div class="sphx-glr-thumbcontainer histogram subspace" tooltip="Histogram, Subspace, Subplot">
30+
<div class="sphx-glr-thumbcontainer histogram subspace subplot" tooltip="Histogram, Subspace, Subplot">
3131
plot_16_recipe.html#sphx-glr-recipes-plot-16-recipe-py
3232
<div class="sphx-glr-thumbcontainer histogram subspace" tooltip="Histogram, Subspace">
3333
plot_17_recipe.html#sphx-glr-recipes-plot-17-recipe-py
3434
<div class="sphx-glr-thumbcontainer contourmap subspace subplot" tooltip="Contourmap, Subspace, Subplot">
35+
plot_18_recipe.html#sphx-glr-recipes-plot-18-recipe-py
36+
<div class="sphx-glr-thumbcontainer regrid stats" tooltip="Regrid, Statistical Operations">

0 commit comments

Comments
 (0)