Skip to content

Commit 8854ceb

Browse files
authored
Make XML metadata optional, extract from HDF if XML not found (#97)
1 parent b12d628 commit 8854ceb

File tree

4 files changed

+55
-2
lines changed

4 files changed

+55
-2
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
99
### Changed
1010

1111
- Item IDs no longer contain the production datetime ([#88](https://github.com/stactools-packages/modis/pull/88))
12+
- Make XML metadata optional - extract metadata from HDF file if XML is not available ([#XX](https://github.com/stactools-packages/modis/pull/XX))
1213

1314
### Fixed
1415

src/stactools/modis/builder.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,13 @@ def add_hdf_or_xml_href(
110110
xml_href = f"{href}.xml"
111111
else:
112112
raise ValueError(f"Invalid HDF or XML href: {href}")
113-
self.add_xml_asset(xml_href)
113+
114+
# Add XML asset if it exists, otherwise extract metadata from HDF
115+
if os.path.exists(xml_href):
116+
self.add_xml_asset(xml_href)
117+
else:
118+
self.metadata = Metadata.from_hdf_href(hdf_href, self.read_href_modifier)
119+
114120
self.add_hdf_asset(
115121
hdf_href, cog_directory=cog_directory, create_cogs=create_cogs
116122
)

src/stactools/modis/metadata.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import fsspec
77
import numpy as np
8+
import rasterio
89
from lxml import etree
910
from rasterio import Affine
1011
from rasterio.crs import CRS
@@ -232,6 +233,8 @@ def from_cog_tags(cls, cog_tags: Dict[str, str]) -> "Metadata":
232233
geometry, bbox = cls._geometry_and_bbox(
233234
collection, horizontal_tile, vertical_tile
234235
)
236+
qa_percent = cog_tags.get("QAPERCENTNOTPRODUCEDCLOUD")
237+
qa_percent_not_produced_cloud = int(qa_percent) if qa_percent else None
235238
return Metadata(
236239
id=os.path.splitext(cog_tags["LOCALGRANULEID"])[0],
237240
product=product,
@@ -242,7 +245,7 @@ def from_cog_tags(cls, cog_tags: Dict[str, str]) -> "Metadata":
242245
end_datetime=end_datetime,
243246
created=None,
244247
updated=None,
245-
qa_percent_not_produced_cloud=int(cog_tags["QAPERCENTNOTPRODUCEDCLOUD"]),
248+
qa_percent_not_produced_cloud=qa_percent_not_produced_cloud,
246249
qa_percent_cloud_cover=None,
247250
horizontal_tile=horizontal_tile,
248251
vertical_tile=vertical_tile,
@@ -252,6 +255,30 @@ def from_cog_tags(cls, cog_tags: Dict[str, str]) -> "Metadata":
252255
collection=collection,
253256
)
254257

258+
@classmethod
259+
def from_hdf_href(
260+
cls, href: str, read_href_modifier: Optional[ReadHrefModifier] = None
261+
) -> "Metadata":
262+
"""Reads metadata from an HDF file when XML is not available.
263+
264+
Args:
265+
href (str): The href of the HDF file
266+
read_href_modifier (Optional[Callable[[str], str]]): Optional
267+
function to modify the read href
268+
269+
Returns:
270+
Metadata: Information that will map to Item attributes.
271+
"""
272+
if read_href_modifier:
273+
read_href = read_href_modifier(href)
274+
else:
275+
read_href = href
276+
277+
with rasterio.open(read_href) as dataset:
278+
hdf_tags = dataset.tags()
279+
280+
return cls.from_cog_tags(hdf_tags)
281+
255282
@property
256283
def datetime(self) -> Optional[datetime.datetime]:
257284
"""Returns a single nominal datetime for this metadata file.

tests/test_stac.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,25 @@ def test_raster_footprint_geometry() -> None:
207207
item.validate()
208208

209209

210+
def test_create_item_from_hdf_without_xml(tmp_path: Path) -> None:
211+
hdf_file = "MOD10A2.A2022033.h09v05.061.2022042050729.hdf"
212+
source_hdf_path = test_data.get_path(f"data-files/{hdf_file}")
213+
214+
temp_hdf_path = tmp_path / hdf_file
215+
shutil.copyfile(source_hdf_path, temp_hdf_path)
216+
217+
temp_xml_path = tmp_path / f"{hdf_file}.xml"
218+
assert not temp_xml_path.exists()
219+
220+
item = stactools.modis.stac.create_item(str(temp_hdf_path))
221+
222+
assert item is not None
223+
assert item.id.startswith("MOD10A2.A2022033.h09v05")
224+
assert "hdf" in item.assets
225+
assert "metadata" not in item.assets
226+
item.validate()
227+
228+
210229
@pytest.mark.parametrize("file_name", PROJECTION_EDGE_FILES)
211230
def test_raster_footprint_at_projection_edge(file_name: str) -> None:
212231
path = test_data.get_path(file_name)

0 commit comments

Comments
 (0)