Skip to content

Commit a69ce0f

Browse files
committed
Init Datacube extension addition
1 parent ab4f83b commit a69ce0f

File tree

8 files changed

+481
-37
lines changed

8 files changed

+481
-37
lines changed

notebooks/omega_cubes.py

Lines changed: 75 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -263,8 +263,8 @@ def _(box, example_sav, np):
263263

264264

265265
@app.cell
266-
def _(xr):
267-
ex_nc_ds = xr.open_dataset("./data/raw/downloads/cube_omega/3644_4.nc")
266+
def _(ex_ids, xr):
267+
ex_nc_ds = xr.open_dataset(f"./data/raw/downloads/cube_omega/{ex_ids[0]}.nc")
268268
ex_nc_ds
269269
return (ex_nc_ds,)
270270

@@ -310,11 +310,41 @@ def _(mo):
310310

311311
@app.cell
312312
def _(ex_nc_ds):
313-
for _k in ex_nc_ds.keys():
313+
print("[DIMS]\n", ex_nc_ds.dims)
314+
print("[DATA VARIABLES]", ex_nc_ds.data_vars)
315+
print("[COORDINATES]", ex_nc_ds.coords)
316+
print("[ATTRIBUTES]", ex_nc_ds.attrs)
317+
return
318+
319+
320+
@app.cell
321+
def _(ex_nc_ds):
322+
ex_nc_ds.dims
323+
return
324+
325+
326+
@app.cell
327+
def _(ex_nc_ds):
328+
for _k in ex_nc_ds.data_vars.keys():
314329
print(_k)
330+
print("------")
331+
# print(ex_nc_ds.data_vars[_k])
332+
print(ex_nc_ds.data_vars[_k].data)
333+
print(ex_nc_ds.data_vars[_k].attrs)
334+
print("======\n")
335+
return
336+
315337

316-
print(ex_nc_ds["solar_longitude"].encoding)
317-
print(ex_nc_ds.encoding)
338+
@app.cell
339+
def _(ex_nc_ds):
340+
for _k in ex_nc_ds.coords.keys():
341+
print(_k)
342+
print("------")
343+
# print(ex_nc_ds.coords[_k])
344+
print(ex_nc_ds.coords[_k].data)
345+
print(ex_nc_ds.coords[_k].coords)
346+
print(ex_nc_ds.coords[_k].attrs)
347+
print("==========================")
318348
return
319349

320350

@@ -589,6 +619,46 @@ def _(mo):
589619
return
590620

591621

622+
@app.cell
623+
def _(ex_nc_ds_l2):
624+
print("[DIMS]\n", ex_nc_ds_l2.dims)
625+
print("[DATA VARIABLES]", ex_nc_ds_l2.data_vars)
626+
print("[COORDINATES]", ex_nc_ds_l2.coords)
627+
print("[ATTRIBUTES]", ex_nc_ds_l2.attrs)
628+
return
629+
630+
631+
@app.cell
632+
def _(ex_nc_ds_l2):
633+
ex_nc_ds_l2.dims
634+
return
635+
636+
637+
@app.cell
638+
def _(ex_nc_ds_l2):
639+
for _k in ex_nc_ds_l2.data_vars.keys():
640+
print(_k)
641+
print("------")
642+
# print(ex_nc_ds_l2.data_vars[_k])
643+
print(ex_nc_ds_l2.data_vars[_k].data)
644+
print(ex_nc_ds_l2.data_vars[_k].attrs)
645+
print("======")
646+
return
647+
648+
649+
@app.cell
650+
def _(ex_nc_ds_l2):
651+
for _k in ex_nc_ds_l2.coords.keys():
652+
print(_k)
653+
print("------")
654+
# print(ex_nc_ds_l2.coords[_k])
655+
print(ex_nc_ds_l2.coords[_k].data)
656+
print(ex_nc_ds_l2.coords[_k].coords)
657+
print(ex_nc_ds_l2.coords[_k].attrs)
658+
print("==========================")
659+
return
660+
661+
592662
@app.cell
593663
def _(ex_nc_ds_l2):
594664
ex_nc_ds_l2.attrs

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ dependencies = [
1111
"astropy>=7.1.0",
1212
"beautifulsoup4>=4.13.5",
1313
"fsspec>=2025.9.0",
14+
"geojson-pydantic>=2.1.0",
1415
"geopandas[all]>=1.1.1",
1516
"httpx>=0.28.1",
1617
"httpx-retries>=0.4.3",

src/psup_stac_converter/extensions.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,40 @@ def apply_sci(
6767
# pass
6868

6969

70+
# TODO: Datacube for OMEGA
71+
# def apply_datacube(stac_item: pystac.Item) -> pystac.Item:
72+
# dc_ext = DatacubeExtension.ext(stac_item, add_if_missing=True)
73+
74+
# dc_ext.apply(
75+
# dimensions={
76+
# "mydim": Dimension(
77+
# properties={
78+
# "type": "",
79+
# "axis": "",
80+
# "description": "",
81+
# "extent": [1, 2, 3, 4],
82+
# "values": [4, 5, 8, 7, 74, 1265, 894],
83+
# "step": None, # use number instead
84+
# "reference_system": "IAU:1354:584",
85+
# }
86+
# )
87+
# },
88+
# variables={
89+
# "myvariable": Variable(
90+
# properties={
91+
# "dimensions": ["e", "f"],
92+
# "type": "data|auxiliary",
93+
# "description": """Insert description here""",
94+
# "extent": [1, 4, 5],
95+
# "values": [4, 8, 9],
96+
# "unit": "km",
97+
# }
98+
# )
99+
# },
100+
# )
101+
# return stac_item
102+
103+
70104
def apply_eo(stac_instance: StacInstance, bands: list[Band]) -> StacInstance:
71105
if isinstance(stac_instance, pystac.Item) or isinstance(
72106
stac_instance, pystac.Asset

src/psup_stac_converter/omega/_base.py

Lines changed: 49 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ def __init__(
5757
self,
5858
psup_io_handler: PsupIoHandler,
5959
data_type: Literal["data_cubes_slice", "c_channel_slice"],
60+
processing_level: Literal["L2", "L3"],
6061
collection_id: str = "",
6162
license_name: str = "CC-BY-4.0",
6263
collection_description: str = "",
@@ -65,6 +66,7 @@ def __init__(
6566
):
6667
self.io_handler = psup_io_handler
6768
self.data_type = data_type
69+
self.processing_level = processing_level
6870
self._omega_data = self._get_omega_data(data_type)
6971
self.collection_id = collection_id
7072
self.license_name = license_name
@@ -146,16 +148,18 @@ def open_file(
146148
"""Opens an asset of a particular OMEGA cube. Allows IDL .sav, NetCDF and text.
147149
148150
Args:
149-
orbit_cube_idx (str): _description_
150-
file_extension (Literal["sav", "nc", "txt"]): _description_
151+
orbit_cube_idx (str): OMEGA data ID of the item
152+
file_extension (Literal["sav", "nc", "txt"]): The extension of the file.
153+
Can be a `.txt` text (only available with OMEGA C Channel data), an IDL `.sav` file or a NetCDF `.nc`file.
151154
on_disk (bool, optional): _description_. Defaults to True.
152-
text_raw (bool, optional): _description_. Defaults to False.
155+
text_raw (bool, optional): Either returns the text `.txt`file as it is (str) if left to True, or as an
156+
interpreted object of left to False. Defaults to False.
153157
154158
Raises:
155-
ValueError: _description_
159+
FileExtensionError: If the extension isn't recognized
156160
157161
Returns:
158-
_type_: _description_
162+
OmegaDataTextItem | dict[str, Any] | str | xr.Dataset: The opened file. The type depends of the extension.
159163
"""
160164
if file_extension == "sav":
161165
return self.open_sav_dataset(orbit_cube_idx, on_disk=on_disk)
@@ -169,13 +173,15 @@ def open_file(
169173
def open_sav_dataset(
170174
self, orbit_cube_idx: str, on_disk: bool = True
171175
) -> dict[str, Any]:
172-
"""Opens an IDL .sav file
176+
"""Opens an IDL .sav file as a dict of attributes
173177
174178
Args:
175-
file_href (str): _description_
179+
orbit_cube_idx (str): OMEGA data ID of the item
180+
on_disk (bool, optional): Whether the file should be downloaded on the local
181+
disk or not. Defaults to True.
176182
177183
Returns:
178-
dict[str, Any]: _description_
184+
dict[str, Any]: An IDL AttrDict of the.sav file
179185
"""
180186
sav_ds = None
181187
oc_info = self.find_info_by_orbit_cube(orbit_cube_idx, file_extension="sav")
@@ -191,14 +197,16 @@ def open_sav_dataset(
191197

192198
return sav_ds
193199

194-
def open_nc_dataset(self, orbit_cube_idx: str, on_disk: bool = True) -> Any:
195-
"""Opens NetCDF4 dataset
200+
def open_nc_dataset(self, orbit_cube_idx: str, on_disk: bool = True) -> xr.Dataset:
201+
"""Opens NetCDF4 dataset using the XArray package
196202
197203
Args:
198-
file_href (str): _description_
204+
orbit_cube_idx (str): OMEGA data ID of the item
205+
on_disk (bool, optional): Whether the file should be downloaded on the local
206+
disk or not. Defaults to True.
199207
200208
Returns:
201-
Any: _description_
209+
xr.Dataset: The NetCDF dataset under an xarray dataset.
202210
"""
203211
nc_dataset = None
204212
oc_info = self.find_info_by_orbit_cube(orbit_cube_idx, file_extension="nc")
@@ -223,9 +231,11 @@ def open_txt_metadata(
223231
available for Data Cubes.
224232
225233
Args:
226-
orbit_cube_idx (str): _description_
227-
on_disk (bool, optional): _description_. Defaults to True.
228-
raw (bool, optional): _description_. Defaults to False.
234+
orbit_cube_idx (str): the ID of the OMEGA data cube
235+
on_disk (bool, optional): Whether the file should be saved locally or not. Useful for
236+
saved states. Defaults to True.
237+
raw (bool, optional): Whether the file should be returned as str or directly intepreted
238+
as an object if left to `False`. Defaults to False.
229239
230240
Returns:
231241
str | OmegaDataTextItem: The result. Either the raw text if `raw=True` or
@@ -265,9 +275,20 @@ def open_txt_metadata(
265275
return OmegaDataTextItem.model_validate_json(json.dumps(text_obj))
266276

267277
def find_spatial_extent(self) -> pystac.SpatialExtent:
278+
"""Assesses the spatial extent of the collection (the whole planet by default)
279+
280+
Returns:
281+
pystac.SpatialExtent: A SpatialExtent object for the collection
282+
"""
268283
return pystac.SpatialExtent(bboxes=[[-180.0, -90.0, 180.0, 90.0]])
269284

270285
def find_temporal_extent(self) -> pystac.TemporalExtent:
286+
"""Assesses the spatial extent of the collection (the extrema defined
287+
by `pandas.Timestamp`by default)
288+
289+
Returns:
290+
pystac.TemporalExtent: A TemporalExtent object for the collection
291+
"""
271292
return pystac.TemporalExtent(
272293
intervals=[
273294
[
@@ -278,6 +299,11 @@ def find_temporal_extent(self) -> pystac.TemporalExtent:
278299
)
279300

280301
def create_collection(self) -> pystac.Collection:
302+
"""Creates a STAC collection based over the OMEGA data series.
303+
304+
Returns:
305+
pystac.Collection: The corresponding STAC collection
306+
"""
281307
spatial_extent = self.find_spatial_extent()
282308
temporal_extent = self.find_temporal_extent()
283309
collection_extent = pystac.Extent(
@@ -296,6 +322,8 @@ def create_collection(self) -> pystac.Collection:
296322
collection = cast(
297323
pystac.Collection, apply_sci(collection, publications=self.publications)
298324
)
325+
# TODO: make a pystac extension for processing
326+
# collection.extra_fields["processing:level"] = self.processing_level
299327

300328
for omega_data_idx in tqdm(self.omega_data_ids, total=self.n_elements):
301329
try:
@@ -311,13 +339,13 @@ def create_collection(self) -> pystac.Collection:
311339
return collection
312340

313341
def create_stac_item(self, orbit_cube_idx: str, **kwargs) -> pystac.Item:
314-
"""Creates a STAC item based on the common properties of OMEGA cubes
342+
"""Creates a STAC item based on the common properties of OMEGA cubes.
315343
316344
Args:
317-
orbit_cube_idx (str): _description_
345+
orbit_cube_idx (str): The ID of the data cube
318346
319347
Returns:
320-
pystac.Item: _description_
348+
pystac.Item: The corresponding item of the orbit-cube ID
321349
"""
322350

323351
footprint = kwargs.get(
@@ -338,7 +366,6 @@ def create_stac_item(self, orbit_cube_idx: str, **kwargs) -> pystac.Item:
338366
)
339367

340368
# assets
341-
342369
# NetCDF4 data
343370
try:
344371
self.log.debug(f"Creating NetCDF asset for # {orbit_cube_idx}")
@@ -351,6 +378,7 @@ def create_stac_item(self, orbit_cube_idx: str, **kwargs) -> pystac.Item:
351378
extra_fields={"size": nc_info["h_total_size"].item()},
352379
)
353380
pystac_item.add_asset("nc", nc_asset)
381+
self.log.debug(f"{nc_asset} successfully added: {nc_asset.to_dict()}")
354382
except OmegaCubeDataMissingError:
355383
self.log.warning(f"NetCDF file not found for {orbit_cube_idx}. Skipping.")
356384

@@ -362,12 +390,13 @@ def create_stac_item(self, orbit_cube_idx: str, **kwargs) -> pystac.Item:
362390
)
363391
sav_asset = pystac.Asset(
364392
href=sav_info["href"].item(),
365-
media_type=pystac.MediaType.TEXT,
393+
media_type=pystac.MediaType.GEOTIFF,
366394
roles=["data"],
367395
description="IDL .sav data",
368396
extra_fields={"size": sav_info["h_total_size"].item()},
369397
)
370398
pystac_item.add_asset("sav", sav_asset)
399+
self.log.debug(f"{sav_asset} successfully added: {sav_asset.to_dict()}")
371400
except OmegaCubeDataMissingError:
372401
self.log.warning(f"IDL.sav not found for {orbit_cube_idx}. Skipping.")
373402

0 commit comments

Comments
 (0)