Skip to content

Commit 7de882c

Browse files
authored
Merge pull request #158 from c-hydro/dev
Dev
2 parents a728ec0 + 8f5e43f commit 7de882c

File tree

7 files changed

+200
-23
lines changed

7 files changed

+200
-23
lines changed

pyproject.toml

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "door"
7-
version = "2.3.10"
7+
version = "2.3.11"
88
description = "A package for operational retrieval of raster data from different sources"
99
authors = [
1010
{ name = "Andrea Libertino", email = "andrea.libertino@cimafoundation.org" },
@@ -28,17 +28,18 @@ classifiers = [
2828
# mandatory dependencies for basic functionality
2929
dependencies = [
3030
"d3tools@git+https://github.com/c-hydro/d3tools",
31-
"numpy>=1.21.5",
32-
"rioxarray>=0.15.5",
33-
"xarray>=2024.3.0",
34-
"requests>=2.32.3",
35-
"cfgrib>=0.9.11.0",
36-
"netcdf4>=1.5.8",
37-
"h5py>=3.6.0", #only for viirs_downloader
38-
"pyhdf>=0.10.2", #only for modis_downloader
39-
"cdsapi>=0.7.4", #only for cds_downloader
40-
"ecmwf-opendata >= 0.2.0", #only for ecmwf_opendata_downloader
41-
"paramiko>=2.9.3"
31+
"numpy~=2.3.5",
32+
"rioxarray~=0.20.0",
33+
"xarray~=2025.11.0",
34+
"requests~=2.32.5",
35+
"cfgrib~=0.9.15.0",
36+
"netcdf4~=1.7.3",
37+
"h5netcdf~=1.7.3",
38+
"h5py~=3.15.1", #only for viirs_downloader
39+
"pyhdf~=0.11.6", #only for modis_downloader
40+
"cdsapi~=0.7.7", #only for cds_downloader
41+
"ecmwf-opendata~=0.3.24", #only for ecmwf_opendata_downloader
42+
"paramiko~=4.0.0"
4243
]
4344

4445
[project.urls]
@@ -48,7 +49,7 @@ Repository = "https://github.com/c-hydro/door"
4849
# dependencies for drops data
4950
drops = [
5051
"drops2@git+https://github.com/CIMAFoundation/drops2@v0.7.0",
51-
"pandas>=1.3.5"
52+
"pandas~=2.3.3"
5253
]
5354

5455
# dependencies for creating thumbnails
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
from .viirsmodis_downloader import VIIRSDownloader, MODISDownloader
2-
from .grace_downloader import GRACEDownloader
2+
from .grace_downloader import GRACEDownloader
3+
from .merra2_downloader import MERRA2Downloader

src/door/data_sources/earthdata/cmr_downloader.py

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,6 @@ class CMRDownloader(DOORDownloader):
3232

3333
cmr_page_size = 200 #TODO: check if true
3434

35-
default_options = {
36-
'variables': None,
37-
'make_mosaic': True,
38-
'crop_to_bounds': True,
39-
'keep_tiles_naming': False,
40-
'selected_tiles' : None
41-
}
42-
4335
file_ext = ['.hdf', '.h5']
4436

4537
def __init__(self, product: str) -> None:
@@ -211,7 +203,8 @@ def cmr_search(self, time: ts.TimeRange, space_bounds: BoundingBox) -> dict:
211203

212204
search_page = response.read()
213205
search_page = json.loads(search_page.decode('utf-8'))
214-
valid_results = cmr_filter_urls(search_page, extensions=self.file_ext, selected_tiles=self.selected_tiles)
206+
selected_tiles = self.selected_tiles if hasattr(self, 'selected_tiles') else None
207+
valid_results = cmr_filter_urls(search_page, extensions=self.file_ext, selected_tiles=selected_tiles)
215208

216209
urls += valid_results
217210

src/door/data_sources/earthdata/grace_downloader.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@ class GRACEDownloader(CMRDownloader):
3030
}
3131
}
3232

33+
default_options = {
34+
'variables': ['TWS']
35+
}
36+
3337
file_ext = ['.tif', '.tiff']
3438

3539
def __init__(self, product: str):
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
import os
2+
import rioxarray as rxr
3+
import xarray as xr
4+
from typing import Generator
5+
from affine import Affine
6+
7+
import datetime as dt
8+
9+
from .cmr_downloader import CMRDownloader
10+
from d3tools.spatial import BoundingBox, crop_to_bb
11+
12+
from d3tools.timestepping.timestep import TimeStep
13+
14+
class MERRA2Downloader(CMRDownloader):
15+
16+
source = 'MERRA2'
17+
name = 'MERRA2_downloader'
18+
19+
available_products = {
20+
'tavg1_2d' : { # time-averaged (hourly), single level
21+
'provider' : 'GES_DISC',
22+
'freq' : 'daily', # files are daily, data is hourly
23+
'version' : '5.12.4'
24+
}
25+
}
26+
27+
available_variables = {
28+
'tavg1_2d' : {
29+
'precipitation' : {'product_id' : 'M2T1NXFLX', 'varname' : 'PRECTOT', 'agg_method' : 'sum'},
30+
'temperature' : {'product_id' : 'M2T1NXSLV', 'varname' : 'T2M', 'agg_method' : 'mean'}
31+
}
32+
}
33+
34+
available_agg_methods = ['mean', 'max', 'min', 'sum']
35+
36+
default_options = {
37+
'variables' : ['precipitation'],
38+
'agg_method' : ['sum']
39+
}
40+
41+
file_ext = ['.nc4']
42+
43+
@property
44+
def start(self):
45+
return dt.datetime(1980,1,1)
46+
47+
def set_variables(self, variables: str|list[str]) -> None:
48+
"""
49+
Set the variables to download.
50+
"""
51+
if isinstance(variables, str):
52+
variables = [variables]
53+
super().set_variables(variables)
54+
55+
agg_options = self.agg_method
56+
if not isinstance(agg_options, list):
57+
agg_options = [agg_options]
58+
59+
if len(agg_options) != len(variables):
60+
msg = 'The number of aggregation methods must be the same as the number of variables'
61+
self.log.error(msg)
62+
raise ValueError(msg)
63+
64+
for agg, var in zip(agg_options, variables):
65+
agg = self.check_agg(agg)
66+
self.variables[var].update({'agg_method': agg})
67+
68+
def check_agg(self, agg):
69+
if not isinstance(agg, list): agg = [agg]
70+
for a in agg:
71+
if a not in self.available_agg_methods:
72+
msg = f'Aggregation method {a} not available'
73+
self.log.error(msg)
74+
raise ValueError(msg)
75+
return agg
76+
77+
def _get_data_ts(self,
78+
timestep: TimeStep,
79+
space_bounds: BoundingBox,
80+
tmp_path: str) -> Generator[tuple[xr.DataArray, dict], None, None]:
81+
"""
82+
Get data from the CMR.
83+
"""
84+
85+
for var, varopts in self.variables.items():
86+
self.product_id = varopts['product_id']
87+
88+
# Check the data from the CMR
89+
url_list = self.cmr_search(timestep, space_bounds)
90+
91+
if not url_list:
92+
return None
93+
94+
# download the data (only one file)
95+
file = os.path.join(tmp_path, os.path.basename(url_list[0]))
96+
if not os.path.exists(file):
97+
self.download(url_list, tmp_path)[0]
98+
99+
# open the file with rasterio
100+
all_data = xr.open_dataset(file, engine = 'h5netcdf')
101+
102+
# ensure the latitude is descending
103+
all_data = all_data.sortby('lat', ascending=False)
104+
105+
# picke the single variable we need
106+
data = all_data[varopts['varname']]
107+
108+
# set spatial reference
109+
data = data.rio.write_crs("EPSG:4326")
110+
data = data.rio.set_spatial_dims(x_dim="lon", y_dim="lat")
111+
112+
# crop to the bounding box
113+
cropped_data = crop_to_bb(data, space_bounds)
114+
115+
# set the missing value
116+
cropped_data = cropped_data.where(cropped_data < 9.9e14, other = float('nan'))
117+
cropped_data.attrs = {'_FillValue': float('nan')}
118+
119+
# set and convert the unit if needed
120+
if var == 'precipitation':
121+
# from kg/m2/s to mm (1 kg/m2 = 1 mm of water; multiply by 3600 to get hourly total)
122+
cropped_data = cropped_data * 3600.0
123+
cropped_data.attrs['units'] = 'mm'
124+
elif var == 'temperature':
125+
# from K to °C
126+
cropped_data = cropped_data - 273.15
127+
cropped_data.attrs['units'] = '°C'
128+
129+
# Aggregate if needed
130+
agg_methods = varopts['agg_method']
131+
for agg_method in agg_methods:
132+
if agg_method == 'mean':
133+
agg_data = cropped_data.mean(dim='time')
134+
elif agg_method == 'max':
135+
agg_data = cropped_data.max(dim='time')
136+
elif agg_method == 'min':
137+
agg_data = cropped_data.min(dim='time')
138+
elif agg_method == 'sum':
139+
agg_data = cropped_data.sum(dim='time')
140+
else:
141+
msg = f'Aggregation method {agg_method} not recognized'
142+
self.log.error(msg)
143+
raise ValueError(msg)
144+
145+
yield agg_data, {'variable': var, 'agg_method': agg_method}
146+

src/door/data_sources/earthdata/viirsmodis_downloader.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,14 @@ class VIIRSMODISDownloader(CMRDownloader):
3131

3232
file_ext = ['.hdf', '.h5']
3333

34+
default_options = {
35+
'variables': None,
36+
'make_mosaic': True,
37+
'crop_to_bounds': True,
38+
'keep_tiles_naming': False,
39+
'selected_tiles' : None
40+
}
41+
3442
# we need to add the version=2.0 to the URL to get the correct response for the snow product (for FAPAR it doesn't matter)
3543
cmr_url='https://cmr.earthdata.nasa.gov/search/granules.json?version=2.0'
3644

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
{
2+
"TAGS":{
3+
"source" : "MERRA2",
4+
"product" : "tavg1_2d",
5+
"prod_name": "{source}-{product}"
6+
},
7+
"DATASETS":{
8+
"destination" : {
9+
"type" : "local",
10+
"path" : "output/{source}/{variable}/%Y",
11+
"filename" : "{variable}_{agg_method}_sample_%Y%m%d.tif"
12+
}
13+
},
14+
"DOOR_DOWNLOADER" : {
15+
"source" : "{source}",
16+
"product" : "{product}",
17+
"destination" : "{DATASETS.destination}",
18+
"bounds" : [-180, -90, 180, 90],
19+
"options" : {
20+
"variables" : ["precipitation", "temperature"],
21+
"agg_method": [["sum"], ["mean", "max", "min"]]
22+
}
23+
}
24+
}

0 commit comments

Comments
 (0)