Skip to content

Commit 77e467f

Browse files
committed
fixing download issues
1 parent 7d71f65 commit 77e467f

File tree

4 files changed

+96
-61
lines changed

4 files changed

+96
-61
lines changed

src/psup_stac_converter/omega/_base.py

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -559,27 +559,29 @@ def create_stac_item(self, orbit_cube_idx: str, **kwargs) -> pystac.Item:
559559
self.log.debug("Applying DatacubeExtension")
560560
cubedata = self.retrieve_nc_info_from_saved_state(orbit_cube_idx=orbit_cube_idx)
561561
self.log.debug(f"Loading: {cubedata}")
562-
dc_ext = DatacubeExtension.ext(pystac_item, add_if_missing=True)
563-
564-
# This operation prevents the key from finding itself attached to "Variables" and "Dimensions"
565-
dc_dimensions: dict[str, Dimension] = {
566-
k: Dimension.from_dict(v.to_dict()[k])
567-
for k, v in cubedata["dimensions"].items()
568-
}
569-
dc_variables: dict[str, Variable] = {
570-
k: Variable.from_dict(v.to_dict()[k])
571-
for k, v in cubedata["variables"].items()
572-
}
573-
574-
dc_ext.apply(dimensions=dc_dimensions, variables=dc_variables)
575-
576-
for extra_name, extra_value in cubedata["extras"].items():
577-
pystac_item.assets["nc"].extra_fields[extra_name] = extra_value
562+
if cubedata:
563+
dc_ext = DatacubeExtension.ext(pystac_item, add_if_missing=True)
564+
565+
# This operation prevents the key from finding itself attached to "Variables" and "Dimensions"
566+
dc_dimensions: dict[str, Dimension] = {
567+
k: Dimension.from_dict(v.to_dict()[k])
568+
for k, v in cubedata["dimensions"].items()
569+
}
570+
dc_variables: dict[str, Variable] = {
571+
k: Variable.from_dict(v.to_dict()[k])
572+
for k, v in cubedata["variables"].items()
573+
}
574+
575+
dc_ext.apply(dimensions=dc_dimensions, variables=dc_variables)
576+
577+
for extra_name, extra_value in cubedata["extras"].items():
578+
pystac_item.assets["nc"].extra_fields[extra_name] = extra_value
579+
else:
580+
self.log.warning(f"Cubedata for {orbit_cube_idx} appears to be empty.")
578581

579582
# common metadata
580583
pystac_item.common_metadata.mission = "mex"
581584
pystac_item.common_metadata.instruments = ["omega"]
582-
583585
self.log.debug(f"Created item from base method {pystac_item}")
584586

585587
return pystac_item
@@ -725,6 +727,12 @@ def retrieve_nc_info_from_saved_state(self, orbit_cube_idx: str) -> dict[str, An
725727
nc_info = json.load(nc_md)
726728
nc_info = reformat_nc_info(nc_info)
727729
self.log.debug(f"nc_info loaded with {nc_info}")
730+
if not nc_info:
731+
nc_md_state.unlink()
732+
return self.retrieve_nc_info_from_saved_state(
733+
orbit_cube_idx=orbit_cube_idx
734+
)
735+
728736
else:
729737
self.log.debug(
730738
f"{nc_md_state} not found. Creating it from # {orbit_cube_idx}"
@@ -736,14 +744,12 @@ def retrieve_nc_info_from_saved_state(self, orbit_cube_idx: str) -> dict[str, An
736744
self.log.debug(f"{nc_md_state} with {nc_info} created!")
737745
except Exception as e:
738746
self.log.warning(
739-
f"Couldn't save .sav information for # {orbit_cube_idx} because of the following: {e}"
747+
f"Couldn't save .nc information for # {orbit_cube_idx} because of the following: {e}"
740748
)
741749
nc_info = {}
742750

743751
return nc_info
744752

745-
# TODO: revise startegy when this function is called
746-
# You need to pick specific reflectance data for that
747753
def make_thumbnail(
748754
self,
749755
orbit_cube_idx: str,

src/psup_stac_converter/omega/c_channel_proj.py

Lines changed: 39 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -97,30 +97,9 @@ def extract_sav_metadata(self, orbit_cube_idx: str, **kwargs) -> dict[str, Any]:
9797

9898
return {}
9999

100-
def create_stac_item(self, orbit_cube_idx: str) -> pystac.Item:
101-
text_data = cast(
102-
OmegaDataTextItem, self.open_file(orbit_cube_idx, "txt", on_disk=True)
103-
)
104-
# TODO: need some contour function for footprint
105-
footprint = json.loads(to_geojson(self.get_contour_data(orbit_cube_idx)))
106-
bbox = bounds(text_data.bbox).tolist()
107-
108-
pystac_item = super().create_stac_item(
109-
orbit_cube_idx,
110-
timestamp=text_data.start_time,
111-
start_datetime=text_data.start_time,
112-
end_datetime=text_data.stop_time,
113-
footprint=footprint,
114-
bbox=bbox,
115-
item_properties={
116-
"solar_longitude": text_data.solar_longitude,
117-
"orbit_number": text_data.orbit_number,
118-
"cube_number": text_data.cube_number,
119-
# Would fit more in processing extension
120-
"data_quality_id": text_data.data_quality_id,
121-
},
122-
)
123-
100+
def retrieve_sav_info_from_saved_state(
101+
self, orbit_cube_idx: str, **kwargs
102+
) -> dict[str, Any]:
124103
sav_md_state = self.sav_metadata_folder / f"sav_{orbit_cube_idx}.json"
125104
if sav_md_state.exists():
126105
self.log.debug(f"{sav_md_state} found! Opening...")
@@ -131,15 +110,18 @@ def create_stac_item(self, orbit_cube_idx: str) -> pystac.Item:
131110
) as sav_md:
132111
sav_info = json.load(sav_md)
133112
self.log.debug(f"sav_info loaded with {sav_info}")
113+
if not sav_info:
114+
self.log.warning(
115+
f"Cube {orbit_cube_idx} happens to not have info. Redownloading..."
116+
)
117+
sav_md_state.unlink()
118+
return self.retrieve_sav_info_from_saved_state(orbit_cube_idx, **kwargs)
134119
else:
135120
self.log.debug(
136121
f"{sav_md_state} Not found. Creating from # {orbit_cube_idx}"
137122
)
138123
try:
139-
sav_info = self.extract_sav_metadata(
140-
orbit_cube_idx,
141-
sav_size=pystac_item.assets["sav"].extra_fields["size"],
142-
)
124+
sav_info = self.extract_sav_metadata(orbit_cube_idx, **kwargs)
143125
with open(sav_md_state, "w", encoding="utf-8") as sav_md:
144126
json.dump(sav_info, sav_md)
145127
self.log.debug(f"{sav_md_state} with {sav_info} created!")
@@ -148,6 +130,35 @@ def create_stac_item(self, orbit_cube_idx: str) -> pystac.Item:
148130
f"Couldn't save .sav information for # {orbit_cube_idx} because of the following: {e}"
149131
)
150132
sav_info = {}
133+
return sav_info
134+
135+
def create_stac_item(self, orbit_cube_idx: str) -> pystac.Item:
136+
text_data = cast(
137+
OmegaDataTextItem, self.open_file(orbit_cube_idx, "txt", on_disk=True)
138+
)
139+
140+
footprint = json.loads(to_geojson(self.get_contour_data(orbit_cube_idx)))
141+
bbox = bounds(text_data.bbox).tolist()
142+
143+
pystac_item = super().create_stac_item(
144+
orbit_cube_idx,
145+
timestamp=text_data.start_time,
146+
start_datetime=text_data.start_time,
147+
end_datetime=text_data.stop_time,
148+
footprint=footprint,
149+
bbox=bbox,
150+
item_properties={
151+
"solar_longitude": text_data.solar_longitude,
152+
"orbit_number": text_data.orbit_number,
153+
"cube_number": text_data.cube_number,
154+
# Would fit more in processing extension
155+
"data_quality_id": text_data.data_quality_id,
156+
},
157+
)
158+
159+
sav_info = self.retrieve_sav_info_from_saved_state(
160+
orbit_cube_idx, sav_size=pystac_item.assets["sav"].extra_fields["size"]
161+
)
151162

152163
pystac_item.assets["sav"].extra_fields["map_dimensions"] = sav_info.get("dims")
153164
self.log.debug(f"Item created: {pystac_item.to_dict()}")

src/psup_stac_converter/omega/data_cubes.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -187,13 +187,21 @@ def find_extra_nc_data(self, nc_data: xr.Dataset) -> dict[str, Any]:
187187

188188
return extras
189189

190-
def create_stac_item(self, orbit_cube_idx: str) -> pystac.Item:
190+
def retrieve_sav_info_from_saved_state(
191+
self, orbit_cube_idx: str, **kwargs
192+
) -> dict[str, Any]:
191193
sav_md_state = self.sav_metadata_folder / f"sav_{orbit_cube_idx}.json"
192194
self.log.debug(f"Opening {sav_md_state}")
193195
if sav_md_state.exists():
194196
with open(sav_md_state, "r", encoding="utf-8") as sav_md:
195197
sav_info = json.load(sav_md)
196198
self.log.debug(f"sav_info loaded with {sav_info}")
199+
if not sav_info:
200+
self.log.warning(
201+
f"Cube {orbit_cube_idx} happens to not have info. Redownloading..."
202+
)
203+
sav_md_state.unlink()
204+
return self.retrieve_sav_info_from_saved_state(orbit_cube_idx, **kwargs)
197205
else:
198206
self.log.debug(
199207
f"{sav_md_state} not found. Creating it from # {orbit_cube_idx}"
@@ -202,12 +210,18 @@ def create_stac_item(self, orbit_cube_idx: str) -> pystac.Item:
202210
sav_info = self.extract_sav_info(orbit_cube_idx)
203211
with open(sav_md_state, "w", encoding="utf-8") as sav_md:
204212
json.dump(sav_info, sav_md)
205-
self.log.debug(f"{sav_md_state} with {sav_info} created!")
213+
self.log.debug(f"{sav_md_state} with {sav_info} created!")
206214
except Exception as e:
207215
self.log.warning(
208216
f"Couldn't save .sav information for # {orbit_cube_idx} because of the following: {e}"
209217
)
210218
sav_info = {}
219+
return sav_info
220+
221+
def create_stac_item(self, orbit_cube_idx: str) -> pystac.Item:
222+
# TODO: regroup that in a single function
223+
224+
sav_info = self.retrieve_sav_info_from_saved_state(orbit_cube_idx)
211225

212226
# This one is given by the data description
213227
default_end_datetime = dt.datetime(2016, 4, 11, 0, 0)

src/psup_stac_converter/utils/downloader.py

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ def open_resource(self, file_href: str):
287287
log.debug(f"A total of {total_bytes} bytes will be downloaded")
288288

289289
with (
290-
tempfile.NamedTemporaryFile() as tmp_f,
290+
tempfile.NamedTemporaryFile(delete_on_close=False) as tmp_f,
291291
tqdm(
292292
total=total_bytes,
293293
unit="B",
@@ -296,17 +296,21 @@ def open_resource(self, file_href: str):
296296
unit_divisor=1024,
297297
) as pbar,
298298
):
299-
num_bytes_downloaded = response.num_bytes_downloaded
300-
for chunk in response.iter_bytes():
301-
tmp_f.write(chunk)
302-
pbar.update(
303-
response.num_bytes_downloaded - num_bytes_downloaded
304-
)
299+
try:
305300
num_bytes_downloaded = response.num_bytes_downloaded
306-
log.info(f"Saved {file_href} temporarily")
307-
yield tmp_f
308-
log.debug(f"{tmp_f.name} ready to use")
309-
log.debug(f"{file_href} disposed")
301+
for chunk in response.iter_bytes():
302+
tmp_f.write(chunk)
303+
pbar.update(
304+
response.num_bytes_downloaded - num_bytes_downloaded
305+
)
306+
num_bytes_downloaded = response.num_bytes_downloaded
307+
tmp_f.close()
308+
log.info(f"Saved {file_href} temporarily on {tmp_f.name}")
309+
yield tmp_f
310+
log.debug(f"{tmp_f.name} ready to use")
311+
finally:
312+
Path(tmp_f.name).unlink()
313+
log.debug(f"{file_href} disposed ({tmp_f.name})")
310314

311315
def save_slice_on_disk(
312316
self,

0 commit comments

Comments
 (0)