Skip to content

Commit 0ba75d2

Browse files
authored
Chadp/stactools s1 upgrade (#331)
* upgrade sentinel-grd-1 to 0.8.1; downgrade pystac to 1.9 for compatibility * s1grd/rtc updates; dedupe items in batches for pgstac upsert prior to attempting to avoid error; dont fail entire batch job when encountering one error, log and continue. * enable args dict for workflow yaml in template_workflow_dict; add unit tests for template.py funcs; s1rtc updates * s1grd only needed requirements changes; use 1.10.1 pystac for pctasks * update pctasks toml for planetarycomputer==1.0.0 and pystac-client==0.6.1; compatibility * update pctasks toml for planetarycomputer==1.0.0 and pystac-client==0.6.1; compatibility * format and fix types for unique_items * remove docstrings, add fixtures for yaml content, fix bad , in s1grd collection test json * add some testing for the deduping; improve memory usage for dedupe * mypy * mypy * isort * fix deployment Dockerfile to add icu dependency and upgrade azure funcs to 4.1.0
1 parent 25bd193 commit 0ba75d2

File tree

18 files changed

+640
-42
lines changed

18 files changed

+640
-42
lines changed

.gitignore

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,4 +131,11 @@ docs/reference/generated/
131131
# deployment
132132
deployment/deploy-values.yaml
133133
deployment/terraform/*/tf_output.json
134-
deployment/tf_output.json
134+
deployment/tf_output.json
135+
136+
# azurite
137+
# Azurite
138+
__blobstorage__
139+
__queuestorage__
140+
__tablestorage__
141+
__azurite_db*.json

datasets/sentinel-1-grd/Dockerfile

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,15 @@ RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 10
2121
# See https://github.com/mapbox/rasterio/issues/1289
2222
ENV CURL_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt
2323

24-
# Install Python 3.8
25-
RUN curl -L -O "https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(uname)-$(uname -m).sh" \
26-
&& bash "Mambaforge-$(uname)-$(uname -m).sh" -b -p /opt/conda \
27-
&& rm -rf "Mambaforge-$(uname)-$(uname -m).sh"
24+
# Install Python 3.10
25+
RUN curl -L -O "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh" \
26+
&& bash "Miniforge3-$(uname)-$(uname -m).sh" -b -p /opt/conda \
27+
&& rm -rf "Miniforge3-$(uname)-$(uname -m).sh"
2828

29-
ENV PATH /opt/conda/bin:$PATH
30-
ENV LD_LIBRARY_PATH /opt/conda/lib/:$LD_LIBRARY_PATH
29+
ENV PATH=/opt/conda/bin:$PATH
30+
ENV LD_LIBRARY_PATH=/opt/conda/lib/:$LD_LIBRARY_PATH
3131

32-
RUN mamba install -y -c conda-forge python=3.8 gdal=3.3.3 pip setuptools cython numpy==1.21.5
32+
RUN mamba install -y -c conda-forge python=3.10 gdal libgdal-netcdf pip setuptools cython numpy
3333

3434
RUN python -m pip install --upgrade pip
3535

@@ -66,7 +66,10 @@ RUN cd /opt/src/pctasks/dataset && \
6666
pip install .
6767

6868
COPY ./datasets/sentinel-1-grd/requirements.txt /opt/src/datasets/sentinel-1-grd/requirements.txt
69-
RUN python3 -m pip install -r /opt/src/datasets/sentinel-1-grd/requirements.txt
69+
70+
# stactools-sentinel1 needs pystac==1.9 and stactools>=0.4.5, so we have to override and use 1.10.1 here
71+
RUN python3 -m pip install -r /opt/src/datasets/sentinel-1-grd/requirements.txt && \
72+
python3 -m pip install pystac==1.10.1
7073

7174
# Setup Python Path to allow import of test modules
7275
ENV PYTHONPATH=/opt/src:$PYTHONPATH

datasets/sentinel-1-grd/collection/template.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,8 @@
101101
],
102102
"platform": [
103103
"SENTINEL-1A",
104-
"SENTINEL-1B"
104+
"SENTINEL-1B",
105+
"SENTINEL-1C"
105106
],
106107
"sar:frequency_band": [
107108
"C"

datasets/sentinel-1-grd/dataset.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
id: sentinel-1-grd
2-
image: ${{ args.registry }}/pctasks-sentinel-1-grd:20230629.1
2+
image: ${{ args.registry }}/pctasks-sentinel-1-grd:20250708.1
33

44
args:
55
- registry
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
stactools-sentinel1==0.5.0
1+
stactools-sentinel1==0.8.1

datasets/sentinel-1-grd/s1grd.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import urllib3
1010
from stactools.core.utils.antimeridian import Strategy, fix_item
1111
from pctasks.core.storage.base import Storage
12-
from stactools.sentinel1.grd import Format
12+
from stactools.sentinel1.formats import Format
1313
from stactools.sentinel1.grd.stac import create_item
1414

1515
from pctasks.core.models.task import WaitTaskResult
@@ -80,7 +80,9 @@ def backoff_throttle_check(e: Exception) -> bool:
8080
)
8181

8282

83-
def get_item_storage(asset_uri: str, storage_factory: StorageFactory) -> Tuple[Storage, str]:
83+
def get_item_storage(
84+
asset_uri: str, storage_factory: StorageFactory
85+
) -> Tuple[Storage, str]:
8486
is_blob_storage = asset_uri.startswith("blob://")
8587
# We also write the individual STAC items to a storage container
8688
# for another processing stream.
@@ -138,10 +140,17 @@ def create_item(
138140
is_throttle=backoff_throttle_check,
139141
)
140142

141-
item: pystac.Item = create_item(
142-
temp_archive_dir, archive_format=Format.COG
143-
)
144-
item = rewrite_asset_hrefs(item, archive_storage, temp_archive_dir)
143+
try:
144+
item: pystac.Item = create_item(
145+
temp_archive_dir, archive_format=Format.COG
146+
)
147+
item = rewrite_asset_hrefs(item, archive_storage, temp_archive_dir)
148+
except FileNotFoundError as e:
149+
logger.error(f"Failed to create STAC item for {archive}: {str(e)}")
150+
return []
151+
except Exception as e:
152+
logger.error(f"Unexpected error processing {archive}: {str(e)}")
153+
return []
145154

146155
# Remove checksum from id
147156
item.id = "_".join(item.id.split("_")[0:-1])
@@ -184,7 +193,9 @@ def create_item(
184193
return [item]
185194

186195

187-
def rewrite_asset_hrefs(item: pystac.Item, storage: Storage, relative_to: str) -> pystac.Item:
196+
def rewrite_asset_hrefs(
197+
item: pystac.Item, storage: Storage, relative_to: str
198+
) -> pystac.Item:
188199
"""
189200
Rewrite the item's assets to link to Blob Storage instead of local paths.
190201

datasets/sentinel-1-grd/test_s1grd.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,75 @@
11
import pathlib
22
import pystac
33

4+
import logging
45
import s1grd
6+
from stactools.sentinel1.metadata_links import MetadataLinks
57
from pctasks.core.storage import StorageFactory
8+
import pytest
69

710

811
HERE = pathlib.Path(__file__).parent
12+
logging.basicConfig(level=logging.INFO)
13+
logger = logging.getLogger(__name__)
14+
logger.setLevel(logging.INFO)
15+
if not logger.hasHandlers():
16+
handler = logging.StreamHandler()
17+
handler.setFormatter(logging.Formatter("[%(levelname)s]:%(asctime)s: %(message)s"))
18+
logger.addHandler(handler)
19+
20+
21+
@pytest.mark.parametrize(
22+
"item_id, annotation_name, expected_key",
23+
[
24+
(
25+
"S1A_IW_GRDH_1SDV_20230628T210705_20230628T210730_049191_05EA4D_21D1",
26+
"s1a-iw-grd-vh-20230628t210705-20230628t210730-049191-05ea4d-002",
27+
"vh",
28+
),
29+
(
30+
"S1C_IW_GRDH_1SDV_20250708T025935_20250708T030005_003123_00655C_BA99",
31+
"s1c-iw-grd-vh-20250708t025935-20250708t030005-003123-00655c-002",
32+
"vh",
33+
),
34+
],
35+
)
36+
def test_metadata_links_annotation_pattern_parametrized(
37+
tmp_path, item_id: str, annotation_name: str, expected_key: str
38+
):
39+
# Setup: create a minimal manifest.safe with dataObjectSection and fileLocation
40+
archive_dir = tmp_path / item_id
41+
annotation_filename = f"{annotation_name}.xml"
42+
annotation_dir = archive_dir / "annotation"
43+
annotation_dir.mkdir(parents=True)
44+
annotation_file = annotation_dir / annotation_filename
45+
annotation_file.write_text("<xml></xml>")
46+
47+
# The manifest must reference the annotation file
48+
manifest_content = f"""
49+
<manifest>
50+
<dataObjectSection>
51+
<dataObject>
52+
<byteStream>
53+
<fileLocation href="annotation/{annotation_filename}"/>
54+
</byteStream>
55+
</dataObject>
56+
</dataObjectSection>
57+
</manifest>
58+
"""
59+
manifest_file = archive_dir / "manifest.safe"
60+
manifest_file.write_text(manifest_content)
61+
try:
62+
logger.info(f"Creating MetadataLinks for {archive_dir}")
63+
ml = MetadataLinks(str(archive_dir))
64+
annotation_hrefs = ml.annotation_hrefs
65+
logger.info(f"Annotation hrefs: {annotation_hrefs}")
66+
except Exception as e:
67+
assert False, f"MetadataLinks failed: {e}"
68+
69+
assert any(
70+
expected_key in key and annotation_file.name in href
71+
for key, href in annotation_hrefs
72+
)
973

1074

1175
def test_get_item_storage():

datasets/sentinel-1-rtc/Dockerfile

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,14 @@ RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 10
2222
ENV CURL_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt
2323

2424
# Install Python 3.8
25-
RUN curl -L -O "https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(uname)-$(uname -m).sh" \
26-
&& bash "Mambaforge-$(uname)-$(uname -m).sh" -b -p /opt/conda \
27-
&& rm -rf "Mambaforge-$(uname)-$(uname -m).sh"
25+
RUN curl -L -O "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh" \
26+
&& bash "Miniforge3-$(uname)-$(uname -m).sh" -b -p /opt/conda \
27+
&& rm -rf "Miniforge3-$(uname)-$(uname -m).sh"
2828

2929
ENV PATH /opt/conda/bin:$PATH
3030
ENV LD_LIBRARY_PATH /opt/conda/lib/:$LD_LIBRARY_PATH
3131

32-
RUN mamba install -y -c conda-forge python=3.8 gdal=3.3.3 pip setuptools cython numpy==1.21.5
32+
RUN mamba install -y -c conda-forge python=3.10 gdal=3.3.3 pip setuptools cython numpy==1.21.5
3333

3434
RUN python -m pip install --upgrade pip
3535

datasets/sentinel-1-rtc/collection/template.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,8 @@
101101
],
102102
"platform": [
103103
"SENTINEL-1A",
104-
"SENTINEL-1B"
104+
"SENTINEL-1B",
105+
"SENTINEL-1C"
105106
],
106107
"sar:frequency_band": [
107108
"C"
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
stactools==0.3.*
1+
stactools>=0.4.5

0 commit comments

Comments
 (0)