GEOS-ESM
diff --git a/‎CHANGELOG.md‎
Lines changed: 2 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎GEOSldas_App/util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_stats.py‎
Lines changed: 97 additions & 0 deletions b/‎GEOSldas_App/util/postproc/ObsFcstAna_stats/Get_ObsFcstAna_stats.py‎
Lines changed: 97 additions & 0 deletions
diff --git a/‎GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py‎
Lines changed: 201 additions & 0 deletions b/‎GEOSldas_App/util/postproc/ObsFcstAna_stats/Plot_stats_maps.py‎
Lines changed: 201 additions & 0 deletions
@@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
+- Added python package for post-processing ObsFcstAna output.
+
 ### Changed
 
 - Specify only ntasks_model for SLURM resource request.
 
@@ -0,0 +1,97 @@
+#!/usr/bin/env python3
+
+"""
+Sample script for post-processing GEOSldas ObsFcstAna output into data assimilation diagnostics.
+First, compute and store monthly sums and sums of squares and cross-products of raw ObsFcstAna output.  
+Data assimilation diagnostics ("stats") such as the mean and std-dev of the observation-minus-forecast 
+residuals can then be diagnosed quickly from these intermediate "sums" files. 
+Sample script optionally computes and plots:
+- Maps of long-term data assimilation diagnostics (see also Plot_stats_maps.py).
+- Monthly time series of spatially averaged data assimilation diagnostics (see also Plot_stats_timeseries.py).
+
+Usage:
+    1. Edit "user_config.py" with experiments information.
+    2. Run this script as follows (on Discover):
+    $ module load python/GEOSpyD
+    $ ./Get_ObsFcstAna_stats.py
+    
+    # Background execution:
+    $ nohup ./Get_ObsFcstAna_stats.py > out.log &
+
+Authors: Q. Liu, R. Reichle, A. Fox
+Last Modified: May 2025
+"""
+
+import sys;       sys.path.append('../../shared/python/')
+import warnings;  warnings.filterwarnings("ignore")
+import os
+
+import numpy as np
+
+from datetime               import timedelta
+from postproc_ObsFcstAna    import postproc_ObsFcstAna
+from user_config            import get_config              # user-defined config info
+
+# ---
+#
+# If the script is run in the background, uncomment the following lines to see the redirected
+#   standard output in the out.log file immediately.  When the lines are commented out, the redirected
+#   standard output will not appear in the out.log file until the job has completed.
+# If the script is run in the foreground, the lines must be commented out.
+#
+#import io
+#sys.stdout = io.TextIOWrapper(open(sys.stdout.fileno(), 'wb', 0), write_through=True)
+#sys.stderr = io.TextIOWrapper(open(sys.stderr.fileno(), 'wb', 0), write_through=True)
+#
+# ---
+
+def main():
+     
+    config     = get_config()                 # edit user-defined inputs in user_config.py
+    
+    exp_list   = config['exp_list']
+    start_time = config['start_time']
+    end_time   = config['end_time']
+    sum_path   = config['sum_path']
+    out_path   = config['out_path']
+     
+    # ------------------------------------------------------------------------------------
+    # Postprocess raw ObsFcstAna output data into monthly sums 
+
+    # Initialize the postprocessing object
+    postproc = postproc_ObsFcstAna(exp_list, start_time, end_time, sum_path=sum_path)
+
+    # Compute and save monthly sums
+    postproc.save_monthly_sums()
+
+    # --------------------------------------------------------------------------------------
+    # Optionally compute long-term temporal/spatial statistics and create plots.
+    # The plotting scripts can also run standalone using the individual Plot_stats_*.py scripts,
+    #   as long as the monthly sum files are available.
+    
+    plot_maps       = False
+    plot_timeseries = False
+
+    if plot_maps:            # Compute long-term temporal stats and plot maps
+
+        stats_file  = out_path + 'temporal_stats_'+exp_list[0]['exptag']+ '_'+start_time.strftime('%Y%m%d')+'_'+ \
+            (end_time+timedelta(days=-1)).strftime('%Y%m%d')+'.nc4'
+
+        # temporal_stats is a dictionary that contains all mean/variances fields for computing long-term O-F/O-A stats
+        # each field has the dimension [N_tile, N_species]
+
+        temporal_stats = postproc.calc_temporal_stats_from_sums(write_to_nc=True, fout_stats=stats_file)
+
+        # Example to plot some O-F maps
+        from Plot_stats_maps import plot_OmF_maps
+        plot_OmF_maps(postproc, temporal_stats, fig_path=out_path )
+
+    if plot_timeseries:      # Compute spatially averaged stats and plot monthly time series of stats
+
+        from Plot_stats_timeseries import Plot_monthly_OmF_bars
+        Plot_monthly_OmF_bars(postproc, fig_path=out_path)
+
+if __name__ == '__main__':
+    main()
+
+# ====================== EOF =========================================================
@@ -0,0 +1,201 @@
+#!/usr/bin/env python3
+
+"""
+Sample script for plotting maps of long-term data assimilation diagnostics.
+Computes Nobs-weighted avg of each metric across all species.
+Requires saved files with monthly sums (see Get_ObsFcstAna_stat.py).
+Stats of *normalized* OmFs are approximated!
+"""
+
+import sys;       sys.path.append('../../shared/python/')
+import warnings;  warnings.filterwarnings("ignore")
+
+import numpy             as np
+import matplotlib.pyplot as plt
+
+from datetime               import timedelta
+
+from remap_1d_to_2d         import remap_1d_to_2d
+from plot                   import plotMap
+from EASEv2                 import EASEv2_ind2latlon
+
+def plot_OmF_maps(postproc_obj, stats, fig_path='./'):
+    
+    start_time = postproc_obj.start_time
+    end_time   = postproc_obj.end_time
+    domain     = postproc_obj.domain
+    tc         = postproc_obj.tilecoord
+    
+    # Sample of final compuation of selected diagnostic metrics 
+     
+    Nmin = 20
+
+    # Then compute metrics of O-F, O-A, etc. based on above computed
+    N_data = stats['N_data']
+    O_mean = stats['obs_mean']
+    # mean(x-y) = E[x] - E[y]   
+    OmF_mean = stats['obs_mean'] - stats['fcst_mean']
+    OmA_mean = stats['obs_mean'] - stats['ana_mean']
+    # var(x-y) = var(x) + var(y) - 2cov(x,y)
+    # cov(x,y) = E[xy] - E[x]E[y]
+    OmF_stdv  = np.sqrt(stats['obs_variance'] + stats['fcst_variance'] - \
+                        2 * (stats['oxf_mean'] - stats['obs_mean']*stats['fcst_mean']))
+                        
+    OmA_stdv  = np.sqrt(stats['obs_variance'] + stats['ana_variance'] - \
+                        2 * (stats['oxa_mean'] - stats['obs_mean']*stats['ana_mean']))
+
+    # *****************************************************************************************
+    # The time series mean and std-dev of the *normalized* OmF computed here are APPROXIMATED!
+    # *****************************************************************************************
+    # Here, we first compute the stats of the OmF time series and then normalize using 
+    # the time-avg "obsvar" and "fcstvar" values.
+    # Since "fcstvar" changes with time, the OmF values should be normalized at each time 
+    # step (as in the older matlab scripts), and then the time series stats can be computed. 
+    # To compute the exact stats with this python package, the sum and sum-of-squares of 
+    # the normalized OmF values would need to be added into the sums files. 
+    #
+    OmF_norm_mean = OmF_mean / np.sqrt(stats['obsvar_mean'] + stats['fcstvar_mean'])         # APPROXIMATED stat!
+    OmF_norm_stdv = np.sqrt(OmF_stdv**2 / (stats['obsvar_mean'] + stats['fcstvar_mean']) )   # APPROXIMATED stat!
+      
+    # Mask out data points with insufficent observations using the Nmin threshold
+    # Do NOT apply to N_data
+    OmF_mean[     N_data < Nmin] = np.nan
+    OmF_stdv[     N_data < Nmin] = np.nan
+    OmF_norm_mean[N_data < Nmin] = np.nan
+    OmF_norm_stdv[N_data < Nmin] = np.nan
+    OmA_mean[     N_data < Nmin] = np.nan
+    OmA_stdv[     N_data < Nmin] = np.nan
+    N_data[       N_data < Nmin] = 0
+
+    # Compute Nobs-weighted avg of each metric across all species.
+    # Typically used for SMAP Tb_h/h from asc and desc overpasses,
+    # or ASCAT soil moisture from Metop-A/B/C.
+    # DOES NOT MAKE SENSE IF, SAY, SPECIES HAVE DIFFERENT UNITS!
+    Nobs_data     = np.nansum(              N_data, axis=1)
+    OmF_mean      = np.nansum(OmF_mean     *N_data, axis=1)/Nobs_data
+    OmF_stdv      = np.nansum(OmF_stdv     *N_data, axis=1)/Nobs_data
+    OmF_norm_mean = np.nansum(OmF_norm_mean*N_data, axis=1)/Nobs_data
+    OmF_norm_stdv = np.nansum(OmF_norm_stdv*N_data, axis=1)/Nobs_data
+    OmA_mean      = np.nansum(OmA_mean     *N_data, axis=1)/Nobs_data
+    OmA_stdv      = np.nansum(OmA_stdv     *N_data, axis=1)/Nobs_data
+
+    # Plotting
+    exptag = postproc_obj.exptag
+
+    fig, axes = plt.subplots(2,2, figsize=(18,10))
+    plt.rcParams.update({'font.size':14})
+
+    for i in np.arange(2):
+        for j in np.arange(2):
+            units = '[k]'
+            if i == 0 and j == 0:
+                tile_data = Nobs_data
+                # crange is [cmin, cmax]
+                crange =[0, np.ceil((end_time-start_time).days/150)*300]
+                colormap = plt.get_cmap('jet',20)
+                title_txt = exptag + ' Tb Nobs '+ start_time.strftime('%Y%m')+'_'+end_time.strftime('%Y%m')
+                units = '[-]'
+            if i == 0 and j ==1:
+                tile_data = OmF_mean
+                crange =[-3, 3]
+                colormap = plt.get_cmap('bwr', 15) 
+                title_txt = exptag + ' Tb O-F mean '+ start_time.strftime('%Y%m')+'_'+end_time.strftime('%Y%m')
+            if i == 1 and j == 0:
+                tile_data = OmF_stdv
+                crange =[0, 15]
+                colormap = plt.get_cmap ('jet',15)
+                title_txt = exptag + ' Tb O-F stdv '+ start_time.strftime('%Y%m')+'_'+end_time.strftime('%Y%m')
+            if i == 1 and j == 1:
+                tile_data = OmF_norm_stdv
+                crange =[0, 15]
+                colormap = plt.get_cmap ('jet',15)
+                title_txt = exptag + ' Tb normalized O-F stdv (approx!) '+ start_time.strftime('%Y%m%d')+'_'+end_time.strftime('%Y%m%d')
+
+            colormap.set_bad(color='0.9') # light grey, 0-black, 1-white
+
+            # Regrid 1d tile_data to 2d grid_data for map plots
+            if '_M09_' in domain: # special case  
+                grid_data_M09 = np.zeros((1624, 3856)) + np.nan  
+                grid_data_M09[tc['j_indg'],tc['i_indg']] = tile_data
+                
+                # Reshape the data into 4x4 blocks
+                reshaped = grid_data_M09.reshape(1624//4, 4, 3856//4, 4)
+
+                # Combine each 4x4 M09 block into a M36 grid
+                #if i==0 and j==0:
+                #   grid_data = np.sum(reshaped,axis=(1, 3)) 
+                #else:
+                #   grid_data = np.nanmean(reshaped,axis=(1, 3))
+
+                grid_data = grid_data_M09[1::4, 2::4]
+
+                # NOT area weighted 
+                wmean = np.nanmean(grid_data)
+                wabsmean = np.nanmean(np.abs(grid_data))
+                if 'normalized' in title_txt:
+                    wabsmean = np.nanmean(np.abs(grid_data-1.))
+                    
+                lat_M36, lon_M36 = EASEv2_ind2latlon(np.arange(406), np.arange(964),'M36')
+                lon_2d,lat_2d = np.meshgrid(lon_M36,lat_M36)
+            else:
+                grid_data, uy, ux = remap_1d_to_2d(tile_data, lat_1d = tc['com_lat'], lon_1d = tc['com_lon'])
+                lon_2d,lat_2d = np.meshgrid(ux, uy)
+
+                # Aear weighted mean and mean(abs)
+                wmean    =     np.nansum(       tile_data     * tc['area'])/np.nansum(~np.isnan(tile_data)*tc['area'])
+                wabsmean =     np.nansum(np.abs(tile_data)    * tc['area'])/np.nansum(~np.isnan(tile_data)*tc['area'])
+                if 'normalized' in title_txt:
+                    wabsmean = np.nansum(np.abs(tile_data-1.) * tc['area'])/np.nansum(~np.isnan(tile_data)*tc['area'])
+                    
+            if 'normalized' in title_txt:
+                title_txt = title_txt + '\n' + "avg=%.3f, avg(abs(nstdv-1))=%.3f" % (wmean, wabsmean)+' '+units
+            elif 'mean' in title_txt:
+                title_txt = title_txt + '\n' + "avg=%.3f, avg(abs)=%.3f" % (wmean, wabsmean)+' '+units
+            else:
+                title_txt = title_txt + '\n' + "avg=%.2f" % (wmean) +' '+units                
+         
+            if 'normalized' in title_txt:
+                grid_data = np.log10(grid_data)
+                crange = [-0.6, 0.45]
+                
+            mm, cs = plotMap(grid_data, ax =axes[i,j], lat=lat_2d, lon=lon_2d, cRange=crange, \
+                        title=title_txt, cmap=colormap, bounding=[-60, 80, -180,180])            
+
+    plt.tight_layout()
+    # Save figure to file
+    fig.savefig(fig_path+'Map_OmF_'+ exptag +'_'+start_time.strftime('%Y%m')+'_'+\
+                        (end_time+timedelta(days=-1)).strftime('%Y%m')+'.png')
+    #plt.show()
+    plt.close(fig)
+
+# -----------------------------------------------------------------------------------------------
+    
+if __name__ == '__main__':
+    
+    from postproc_ObsFcstAna    import postproc_ObsFcstAna
+    from user_config            import get_config
+    
+    config     = get_config()                 # edit user-defined inputs in user_config.py
+    
+    exp_list   = config['exp_list']
+    start_time = config['start_time']
+    end_time   = config['end_time']
+    sum_path   = config['sum_path']
+    out_path   = config['out_path']
+     
+    # ------------------------------------------------------------------------------------
+    # Initialize the postprocessing object
+    postproc = postproc_ObsFcstAna(exp_list, start_time, end_time, sum_path=sum_path)
+
+    # Compute long-term temporal stats and plot maps
+    stats_file  = out_path + 'temporal_stats_'+exp_list[0]['exptag']+'_'+ start_time.strftime('%Y%m%d')+'_'+ \
+        (end_time+timedelta(days=-1)).strftime('%Y%m%d')+'.nc4'
+
+    # temporal_stats is a dictionary that contains all mean/variances fields for computing long-term O-F/O-A stats
+    # each field has the dimension [N_tile, N_species]
+
+    temporal_stats = postproc.calc_temporal_stats_from_sums(write_to_nc=True, fout_stats=stats_file)
+
+    plot_OmF_maps(postproc, temporal_stats, fig_path=out_path )
+
+# ====================== EOF =========================================================