-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathExtract_region_level.py
More file actions
121 lines (90 loc) · 4.62 KB
/
Extract_region_level.py
File metadata and controls
121 lines (90 loc) · 4.62 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#%% Import packages
import xarray as xr
import numpy as np
import pandas as pd
# Define file paths
mask_path = "./pr/data/country_mask_0.25deg.nc"
# Load the data into xarray datasets
mask = xr.open_dataset(mask_path)
# Define a function to calculate the area of a grid cell
def area(lon, lat, res):
"""Calculate the area of a grid cell.
Args:
lon (float): longitude of the cell
lat (float): latitude of the cell
res (float): resolution of the grid cell in degrees
Returns:
float: the area of the cell in square kilometers
"""
# Get the coordinates of the four corners of the cell
lon1, lat1, lon2, lat2 = lon - res/2, lat - res/2, lon + res/2, lat + res/2
# Convert longitude and latitude to radians
lon1 = np.radians(lon1)
lat1 = np.radians(lat1)
lon2 = np.radians(lon2)
lat2 = np.radians(lat2)
# Calculate the area of the cell using the formula
r = 6372
area = abs(r**2 * (lon2 - lon1) * (np.sin(lat2) - np.sin(lat1)))
# Return the calculated area of the cell
return area
#%% Calculate the area of each grid cell and convert to xarray DataArray
lon, lat = np.meshgrid(mask["lon"], mask["lat"])
area_ = area(lon, lat, np.full_like(lon, 0.25))
mask["area"] = xr.DataArray(data=area_,
dims=['lat', 'lon'],
coords={'lat': mask.lat,
'lon': mask.lon})
#%% Calculate the total area by country and calculate area weight for each cell
area_by_country = mask.to_dataframe().groupby("COWCODE").sum()["area"]
countries = area_by_country.index
weight = mask["COWCODE"].to_numpy().copy()
cell_area = mask["area"].to_numpy()
for cowcode in countries:
# Create a boolean array for the current country
mask_country = (mask["COWCODE"] == cowcode).to_numpy()
# Extract the total area for the current country
total_area = area_by_country.loc[cowcode]
# Calculate the weight for each grid cell in the current country
weight[mask_country] = cell_area[mask_country]/total_area
# Convert weight to xarray DataArray and add it to the mask dataset
mask["weight"] = xr.DataArray(data=weight,
dims=['lat', 'lon'],
coords={'lat': mask.lat,
'lon': mask.lon})
cshape = pd.read_csv("./pr/data/cshape_country.csv")
country_name = [cshape[cshape["COWCODE"]==COWCODE]["CNTRY_NAME"].values[0]
for COWCODE in countries]
#%%
folder = "./pr/CMIP6/"
models = ['ACCESS-ESM1-5', 'ACCESS-CM2', 'BCC-CSM2-MR', 'CAMS-CSM1-0', 'CanESM5',
'CMCC-ESM2', 'MRI-ESM2-0', 'NESM3', 'TaiESM1','CAS-ESM2-0', 'FIO-ESM-2-0', 'MIROC6']
SSPs = ['ssp126', 'ssp585']
for mode in models:
for ssp in SSPs:
path = folder + "pr_Amon_" + mode + "_" + ssp + "_r1i1p1f1_gn_201501-210012_0.25.nc"
# Open netCDF file with xarray
fldare = xr.open_dataset(path)
# Create empty DataFrame with countries as index and yearly periods as columns
fldare_df = pd.DataFrame(index=countries, columns=pd.date_range(start="2015-01-01", freq="M", periods=1032)) # 2015-01-16, CAMS-CSM1-0: 1020
# others: start="2015-01-01", freq="M", periods=1032
# Extract the precipitation data from the xarray dataset
var = fldare["pr"].values # Assuming "pr" is the variable name for precipitation
var = var * 3600 * 24 * 30.5 # Convert to millimeters per month
# Iterate over each country to calculate the total annual precipitation
for COWCODE in countries:
# Create a boolean mask for the current country
mask_country = (mask["COWCODE"] == COWCODE).values
# Calculate the annual total precipitation by country [mm]
# using a boolean mask and summing over the two spatial dimensions
fldare_df.loc[COWCODE] = np.array([np.sum(np.nan_to_num(var[i, :, :][mask_country], nan=0) * mask["weight"].values[mask_country])
for i in range(1032)]) # other is 1032, CAMS-CSM1-0: 1020
# Remove rows where all values are zero
fldare_df = fldare_df.replace(0, np.nan)
# Convert country codes to country names and use them as the new index
country_name = [cshape[cshape["COWCODE"] == COWCODE]["CNTRY_NAME"].values[0] for COWCODE in countries]
fldare_df.index = country_name
dates = pd.date_range(start='2015-01', end='2101-01', freq='M').strftime('%Y-%m') # or end='2101-01', CAMS-CSM1-0: end='2100-01'
fldare_df.columns = list(dates)
# Save the DataFrame to a CSV file
fldare_df.to_csv(folder + "pr_Amon_" + mode + "_" + ssp + "_iso.csv")