RESCUES/Extract_region_level.py at main · PREP-NexT/RESCUES · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#%% Import packages
import xarray as xr
import numpy as np
import pandas as pd

# Define file paths
mask_path = "./pr/data/country_mask_0.25deg.nc"

# Load the data into xarray datasets
mask = xr.open_dataset(mask_path)

# Define a function to calculate the area of a grid cell
def area(lon, lat, res):
    """Calculate the area of a grid cell.
    Args:
        lon (float): longitude of the cell
        lat (float): latitude of the cell
        res (float): resolution of the grid cell in degrees

    Returns:
        float: the area of the cell in square kilometers
    """
    # Get the coordinates of the four corners of the cell
    lon1, lat1, lon2, lat2 = lon - res/2, lat - res/2, lon + res/2, lat + res/2

    # Convert longitude and latitude to radians
    lon1 = np.radians(lon1)
    lat1 = np.radians(lat1)
    lon2 = np.radians(lon2)
    lat2 = np.radians(lat2)

    # Calculate the area of the cell using the formula
    r = 6372
    area = abs(r**2 * (lon2 - lon1) * (np.sin(lat2) - np.sin(lat1)))

    # Return the calculated area of the cell
    return area

#%% Calculate the area of each grid cell and convert to xarray DataArray
lon, lat = np.meshgrid(mask["lon"], mask["lat"])

area_ = area(lon, lat, np.full_like(lon, 0.25))

mask["area"] = xr.DataArray(data=area_,
                    dims=['lat', 'lon'],
                    coords={'lat': mask.lat,
                    'lon': mask.lon})

#%% Calculate the total area by country and calculate area weight for each cell
area_by_country = mask.to_dataframe().groupby("COWCODE").sum()["area"]
countries = area_by_country.index
weight = mask["COWCODE"].to_numpy().copy()
cell_area = mask["area"].to_numpy()
for cowcode in countries:
    # Create a boolean array for the current country
    mask_country = (mask["COWCODE"] == cowcode).to_numpy()

    # Extract the total area for the current country
    total_area = area_by_country.loc[cowcode]

    # Calculate the weight for each grid cell in the current country
    weight[mask_country] = cell_area[mask_country]/total_area

# Convert weight to xarray DataArray and add it to the mask dataset
mask["weight"] = xr.DataArray(data=weight,
                            dims=['lat', 'lon'],
                            coords={'lat': mask.lat,
                            'lon': mask.lon})

cshape = pd.read_csv("./pr/data/cshape_country.csv")

country_name = [cshape[cshape["COWCODE"]==COWCODE]["CNTRY_NAME"].values[0]
                for COWCODE in countries]

#%%
folder = "./pr/CMIP6/"
models = ['ACCESS-ESM1-5', 'ACCESS-CM2', 'BCC-CSM2-MR', 'CAMS-CSM1-0', 'CanESM5',
          'CMCC-ESM2', 'MRI-ESM2-0', 'NESM3', 'TaiESM1','CAS-ESM2-0', 'FIO-ESM-2-0', 'MIROC6']

SSPs = ['ssp126', 'ssp585']

for mode in models:
    for ssp in SSPs:
        path = folder + "pr_Amon_" + mode + "_" + ssp + "_r1i1p1f1_gn_201501-210012_0.25.nc"

        # Open netCDF file with xarray
        fldare = xr.open_dataset(path)

        # Create empty DataFrame with countries as index and yearly periods as columns
        fldare_df = pd.DataFrame(index=countries, columns=pd.date_range(start="2015-01-01", freq="M", periods=1032)) # 2015-01-16, CAMS-CSM1-0: 1020
        # others: start="2015-01-01", freq="M", periods=1032

        # Extract the precipitation data from the xarray dataset
        var = fldare["pr"].values  # Assuming "pr" is the variable name for precipitation

        var = var * 3600 * 24 * 30.5  # Convert to millimeters per month

        # Iterate over each country to calculate the total annual precipitation
        for COWCODE in countries:
            # Create a boolean mask for the current country
            mask_country = (mask["COWCODE"] == COWCODE).values

            # Calculate the annual total precipitation by country [mm]
            # using a boolean mask and summing over the two spatial dimensions
            fldare_df.loc[COWCODE] = np.array([np.sum(np.nan_to_num(var[i, :, :][mask_country], nan=0) * mask["weight"].values[mask_country])
                                               for i in range(1032)]) # other is 1032, CAMS-CSM1-0: 1020

        # Remove rows where all values are zero
        fldare_df = fldare_df.replace(0, np.nan)

        # Convert country codes to country names and use them as the new index
        country_name = [cshape[cshape["COWCODE"] == COWCODE]["CNTRY_NAME"].values[0] for COWCODE in countries]
        fldare_df.index = country_name

        dates = pd.date_range(start='2015-01', end='2101-01', freq='M').strftime('%Y-%m') # or end='2101-01', CAMS-CSM1-0: end='2100-01'
        fldare_df.columns = list(dates)

        # Save the DataFrame to a CSV file
        fldare_df.to_csv(folder + "pr_Amon_" + mode + "_" + ssp + "_iso.csv")