Skip to content

Commit 0425274

Browse files
Add python scripts for computing stats of ObsFcstAna output (#87)
add python scripts for computing stats of ObsFcstAna output (via intermediate monthly files)
2 parents addba69 + 990bce2 commit 0425274

File tree

13 files changed

+2018
-420
lines changed

13 files changed

+2018
-420
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1111

1212
### Added
1313

14+
- Added python package for post-processing ObsFcstAna output.
15+
1416
### Changed
1517

1618
- Specify only ntasks_model for SLURM resource request.
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
#!/usr/bin/env python3
2+
3+
"""
4+
Sample script for post-processing GEOSldas ObsFcstAna output into data assimilation diagnostics.
5+
First, compute and store monthly sums and sums of squares and cross-products of raw ObsFcstAna output.
6+
Data assimilation diagnostics ("stats") such as the mean and std-dev of the observation-minus-forecast
7+
residuals can then be diagnosed quickly from these intermediate "sums" files.
8+
Sample script optionally computes and plots:
9+
- Maps of long-term data assimilation diagnostics (see also Plot_stats_maps.py).
10+
- Monthly time series of spatially averaged data assimilation diagnostics (see also Plot_stats_timeseries.py).
11+
12+
Usage:
13+
1. Edit "user_config.py" with experiments information.
14+
2. Run this script as follows (on Discover):
15+
$ module load python/GEOSpyD
16+
$ ./Get_ObsFcstAna_stats.py
17+
18+
# Background execution:
19+
$ nohup ./Get_ObsFcstAna_stats.py > out.log &
20+
21+
Authors: Q. Liu, R. Reichle, A. Fox
22+
Last Modified: May 2025
23+
"""
24+
25+
import sys; sys.path.append('../../shared/python/')
26+
import warnings; warnings.filterwarnings("ignore")
27+
import os
28+
29+
import numpy as np
30+
31+
from datetime import timedelta
32+
from postproc_ObsFcstAna import postproc_ObsFcstAna
33+
from user_config import get_config # user-defined config info
34+
35+
# ---
36+
#
37+
# If the script is run in the background, uncomment the following lines to see the redirected
38+
# standard output in the out.log file immediately. When the lines are commented out, the redirected
39+
# standard output will not appear in the out.log file until the job has completed.
40+
# If the script is run in the foreground, the lines must be commented out.
41+
#
42+
#import io
43+
#sys.stdout = io.TextIOWrapper(open(sys.stdout.fileno(), 'wb', 0), write_through=True)
44+
#sys.stderr = io.TextIOWrapper(open(sys.stderr.fileno(), 'wb', 0), write_through=True)
45+
#
46+
# ---
47+
48+
def main():
49+
50+
config = get_config() # edit user-defined inputs in user_config.py
51+
52+
exp_list = config['exp_list']
53+
start_time = config['start_time']
54+
end_time = config['end_time']
55+
sum_path = config['sum_path']
56+
out_path = config['out_path']
57+
58+
# ------------------------------------------------------------------------------------
59+
# Postprocess raw ObsFcstAna output data into monthly sums
60+
61+
# Initialize the postprocessing object
62+
postproc = postproc_ObsFcstAna(exp_list, start_time, end_time, sum_path=sum_path)
63+
64+
# Compute and save monthly sums
65+
postproc.save_monthly_sums()
66+
67+
# --------------------------------------------------------------------------------------
68+
# Optionally compute long-term temporal/spatial statistics and create plots.
69+
# The plotting scripts can also run standalone using the individual Plot_stats_*.py scripts,
70+
# as long as the monthly sum files are available.
71+
72+
plot_maps = False
73+
plot_timeseries = False
74+
75+
if plot_maps: # Compute long-term temporal stats and plot maps
76+
77+
stats_file = out_path + 'temporal_stats_'+exp_list[0]['exptag']+ '_'+start_time.strftime('%Y%m%d')+'_'+ \
78+
(end_time+timedelta(days=-1)).strftime('%Y%m%d')+'.nc4'
79+
80+
# temporal_stats is a dictionary that contains all mean/variances fields for computing long-term O-F/O-A stats
81+
# each field has the dimension [N_tile, N_species]
82+
83+
temporal_stats = postproc.calc_temporal_stats_from_sums(write_to_nc=True, fout_stats=stats_file)
84+
85+
# Example to plot some O-F maps
86+
from Plot_stats_maps import plot_OmF_maps
87+
plot_OmF_maps(postproc, temporal_stats, fig_path=out_path )
88+
89+
if plot_timeseries: # Compute spatially averaged stats and plot monthly time series of stats
90+
91+
from Plot_stats_timeseries import Plot_monthly_OmF_bars
92+
Plot_monthly_OmF_bars(postproc, fig_path=out_path)
93+
94+
if __name__ == '__main__':
95+
main()
96+
97+
# ====================== EOF =========================================================
Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
#!/usr/bin/env python3
2+
3+
"""
4+
Sample script for plotting maps of long-term data assimilation diagnostics.
5+
Computes Nobs-weighted avg of each metric across all species.
6+
Requires saved files with monthly sums (see Get_ObsFcstAna_stat.py).
7+
Stats of *normalized* OmFs are approximated!
8+
"""
9+
10+
import sys; sys.path.append('../../shared/python/')
11+
import warnings; warnings.filterwarnings("ignore")
12+
13+
import numpy as np
14+
import matplotlib.pyplot as plt
15+
16+
from datetime import timedelta
17+
18+
from remap_1d_to_2d import remap_1d_to_2d
19+
from plot import plotMap
20+
from EASEv2 import EASEv2_ind2latlon
21+
22+
def plot_OmF_maps(postproc_obj, stats, fig_path='./'):
23+
24+
start_time = postproc_obj.start_time
25+
end_time = postproc_obj.end_time
26+
domain = postproc_obj.domain
27+
tc = postproc_obj.tilecoord
28+
29+
# Sample of final compuation of selected diagnostic metrics
30+
31+
Nmin = 20
32+
33+
# Then compute metrics of O-F, O-A, etc. based on above computed
34+
N_data = stats['N_data']
35+
O_mean = stats['obs_mean']
36+
# mean(x-y) = E[x] - E[y]
37+
OmF_mean = stats['obs_mean'] - stats['fcst_mean']
38+
OmA_mean = stats['obs_mean'] - stats['ana_mean']
39+
# var(x-y) = var(x) + var(y) - 2cov(x,y)
40+
# cov(x,y) = E[xy] - E[x]E[y]
41+
OmF_stdv = np.sqrt(stats['obs_variance'] + stats['fcst_variance'] - \
42+
2 * (stats['oxf_mean'] - stats['obs_mean']*stats['fcst_mean']))
43+
44+
OmA_stdv = np.sqrt(stats['obs_variance'] + stats['ana_variance'] - \
45+
2 * (stats['oxa_mean'] - stats['obs_mean']*stats['ana_mean']))
46+
47+
# *****************************************************************************************
48+
# The time series mean and std-dev of the *normalized* OmF computed here are APPROXIMATED!
49+
# *****************************************************************************************
50+
# Here, we first compute the stats of the OmF time series and then normalize using
51+
# the time-avg "obsvar" and "fcstvar" values.
52+
# Since "fcstvar" changes with time, the OmF values should be normalized at each time
53+
# step (as in the older matlab scripts), and then the time series stats can be computed.
54+
# To compute the exact stats with this python package, the sum and sum-of-squares of
55+
# the normalized OmF values would need to be added into the sums files.
56+
#
57+
OmF_norm_mean = OmF_mean / np.sqrt(stats['obsvar_mean'] + stats['fcstvar_mean']) # APPROXIMATED stat!
58+
OmF_norm_stdv = np.sqrt(OmF_stdv**2 / (stats['obsvar_mean'] + stats['fcstvar_mean']) ) # APPROXIMATED stat!
59+
60+
# Mask out data points with insufficent observations using the Nmin threshold
61+
# Do NOT apply to N_data
62+
OmF_mean[ N_data < Nmin] = np.nan
63+
OmF_stdv[ N_data < Nmin] = np.nan
64+
OmF_norm_mean[N_data < Nmin] = np.nan
65+
OmF_norm_stdv[N_data < Nmin] = np.nan
66+
OmA_mean[ N_data < Nmin] = np.nan
67+
OmA_stdv[ N_data < Nmin] = np.nan
68+
N_data[ N_data < Nmin] = 0
69+
70+
# Compute Nobs-weighted avg of each metric across all species.
71+
# Typically used for SMAP Tb_h/h from asc and desc overpasses,
72+
# or ASCAT soil moisture from Metop-A/B/C.
73+
# DOES NOT MAKE SENSE IF, SAY, SPECIES HAVE DIFFERENT UNITS!
74+
Nobs_data = np.nansum( N_data, axis=1)
75+
OmF_mean = np.nansum(OmF_mean *N_data, axis=1)/Nobs_data
76+
OmF_stdv = np.nansum(OmF_stdv *N_data, axis=1)/Nobs_data
77+
OmF_norm_mean = np.nansum(OmF_norm_mean*N_data, axis=1)/Nobs_data
78+
OmF_norm_stdv = np.nansum(OmF_norm_stdv*N_data, axis=1)/Nobs_data
79+
OmA_mean = np.nansum(OmA_mean *N_data, axis=1)/Nobs_data
80+
OmA_stdv = np.nansum(OmA_stdv *N_data, axis=1)/Nobs_data
81+
82+
# Plotting
83+
exptag = postproc_obj.exptag
84+
85+
fig, axes = plt.subplots(2,2, figsize=(18,10))
86+
plt.rcParams.update({'font.size':14})
87+
88+
for i in np.arange(2):
89+
for j in np.arange(2):
90+
units = '[k]'
91+
if i == 0 and j == 0:
92+
tile_data = Nobs_data
93+
# crange is [cmin, cmax]
94+
crange =[0, np.ceil((end_time-start_time).days/150)*300]
95+
colormap = plt.get_cmap('jet',20)
96+
title_txt = exptag + ' Tb Nobs '+ start_time.strftime('%Y%m')+'_'+end_time.strftime('%Y%m')
97+
units = '[-]'
98+
if i == 0 and j ==1:
99+
tile_data = OmF_mean
100+
crange =[-3, 3]
101+
colormap = plt.get_cmap('bwr', 15)
102+
title_txt = exptag + ' Tb O-F mean '+ start_time.strftime('%Y%m')+'_'+end_time.strftime('%Y%m')
103+
if i == 1 and j == 0:
104+
tile_data = OmF_stdv
105+
crange =[0, 15]
106+
colormap = plt.get_cmap ('jet',15)
107+
title_txt = exptag + ' Tb O-F stdv '+ start_time.strftime('%Y%m')+'_'+end_time.strftime('%Y%m')
108+
if i == 1 and j == 1:
109+
tile_data = OmF_norm_stdv
110+
crange =[0, 15]
111+
colormap = plt.get_cmap ('jet',15)
112+
title_txt = exptag + ' Tb normalized O-F stdv (approx!) '+ start_time.strftime('%Y%m%d')+'_'+end_time.strftime('%Y%m%d')
113+
114+
colormap.set_bad(color='0.9') # light grey, 0-black, 1-white
115+
116+
# Regrid 1d tile_data to 2d grid_data for map plots
117+
if '_M09_' in domain: # special case
118+
grid_data_M09 = np.zeros((1624, 3856)) + np.nan
119+
grid_data_M09[tc['j_indg'],tc['i_indg']] = tile_data
120+
121+
# Reshape the data into 4x4 blocks
122+
reshaped = grid_data_M09.reshape(1624//4, 4, 3856//4, 4)
123+
124+
# Combine each 4x4 M09 block into a M36 grid
125+
#if i==0 and j==0:
126+
# grid_data = np.sum(reshaped,axis=(1, 3))
127+
#else:
128+
# grid_data = np.nanmean(reshaped,axis=(1, 3))
129+
130+
grid_data = grid_data_M09[1::4, 2::4]
131+
132+
# NOT area weighted
133+
wmean = np.nanmean(grid_data)
134+
wabsmean = np.nanmean(np.abs(grid_data))
135+
if 'normalized' in title_txt:
136+
wabsmean = np.nanmean(np.abs(grid_data-1.))
137+
138+
lat_M36, lon_M36 = EASEv2_ind2latlon(np.arange(406), np.arange(964),'M36')
139+
lon_2d,lat_2d = np.meshgrid(lon_M36,lat_M36)
140+
else:
141+
grid_data, uy, ux = remap_1d_to_2d(tile_data, lat_1d = tc['com_lat'], lon_1d = tc['com_lon'])
142+
lon_2d,lat_2d = np.meshgrid(ux, uy)
143+
144+
# Aear weighted mean and mean(abs)
145+
wmean = np.nansum( tile_data * tc['area'])/np.nansum(~np.isnan(tile_data)*tc['area'])
146+
wabsmean = np.nansum(np.abs(tile_data) * tc['area'])/np.nansum(~np.isnan(tile_data)*tc['area'])
147+
if 'normalized' in title_txt:
148+
wabsmean = np.nansum(np.abs(tile_data-1.) * tc['area'])/np.nansum(~np.isnan(tile_data)*tc['area'])
149+
150+
if 'normalized' in title_txt:
151+
title_txt = title_txt + '\n' + "avg=%.3f, avg(abs(nstdv-1))=%.3f" % (wmean, wabsmean)+' '+units
152+
elif 'mean' in title_txt:
153+
title_txt = title_txt + '\n' + "avg=%.3f, avg(abs)=%.3f" % (wmean, wabsmean)+' '+units
154+
else:
155+
title_txt = title_txt + '\n' + "avg=%.2f" % (wmean) +' '+units
156+
157+
if 'normalized' in title_txt:
158+
grid_data = np.log10(grid_data)
159+
crange = [-0.6, 0.45]
160+
161+
mm, cs = plotMap(grid_data, ax =axes[i,j], lat=lat_2d, lon=lon_2d, cRange=crange, \
162+
title=title_txt, cmap=colormap, bounding=[-60, 80, -180,180])
163+
164+
plt.tight_layout()
165+
# Save figure to file
166+
fig.savefig(fig_path+'Map_OmF_'+ exptag +'_'+start_time.strftime('%Y%m')+'_'+\
167+
(end_time+timedelta(days=-1)).strftime('%Y%m')+'.png')
168+
#plt.show()
169+
plt.close(fig)
170+
171+
# -----------------------------------------------------------------------------------------------
172+
173+
if __name__ == '__main__':
174+
175+
from postproc_ObsFcstAna import postproc_ObsFcstAna
176+
from user_config import get_config
177+
178+
config = get_config() # edit user-defined inputs in user_config.py
179+
180+
exp_list = config['exp_list']
181+
start_time = config['start_time']
182+
end_time = config['end_time']
183+
sum_path = config['sum_path']
184+
out_path = config['out_path']
185+
186+
# ------------------------------------------------------------------------------------
187+
# Initialize the postprocessing object
188+
postproc = postproc_ObsFcstAna(exp_list, start_time, end_time, sum_path=sum_path)
189+
190+
# Compute long-term temporal stats and plot maps
191+
stats_file = out_path + 'temporal_stats_'+exp_list[0]['exptag']+'_'+ start_time.strftime('%Y%m%d')+'_'+ \
192+
(end_time+timedelta(days=-1)).strftime('%Y%m%d')+'.nc4'
193+
194+
# temporal_stats is a dictionary that contains all mean/variances fields for computing long-term O-F/O-A stats
195+
# each field has the dimension [N_tile, N_species]
196+
197+
temporal_stats = postproc.calc_temporal_stats_from_sums(write_to_nc=True, fout_stats=stats_file)
198+
199+
plot_OmF_maps(postproc, temporal_stats, fig_path=out_path )
200+
201+
# ====================== EOF =========================================================

0 commit comments

Comments
 (0)