Skip to content
Open
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
d1a2619
fix post-processing of item_id for s1grd; add unit test
chadp777 Aug 14, 2025
d69ef6c
format; test updates
chadp777 Aug 14, 2025
adcc640
format; test updates
chadp777 Aug 14, 2025
3a54485
format; test updates
chadp777 Aug 14, 2025
ba45006
fix unit test to ensure it can catch when logic issues pertaining to …
chadp777 Aug 14, 2025
d9a119b
fix unit test to ensure it can catch when logic issues pertaining to …
chadp777 Aug 14, 2025
b4ae281
Merge branch 'main' of https://github.com/microsoft/planetary-compute…
chadp777 Aug 19, 2025
c65f1f1
test dedupe; test s3 docker
chadp777 Aug 19, 2025
a4abb26
format
chadp777 Aug 19, 2025
d13dfc4
s5p;format
chadp777 Aug 19, 2025
1cbd932
readme update; fix test
chadp777 Aug 19, 2025
20a7a67
use proper mock loader; readme update
chadp777 Aug 19, 2025
0f9904e
format
chadp777 Aug 19, 2025
bd90764
prevent create_item failures from failing the entire ingestion; log a…
chadp777 Sep 9, 2025
519ac85
fix postgres timeouts
chadp777 Sep 9, 2025
8e26fc7
format
chadp777 Sep 9, 2025
b0c7301
update database version for cicd
chadp777 Sep 9, 2025
3c03709
use proper test items
chadp777 Sep 9, 2025
30dc842
use proper test items
chadp777 Sep 9, 2025
60d75b7
format
chadp777 Sep 10, 2025
1321ab5
update image s3/s5p
chadp777 Sep 10, 2025
7aa1a5c
updates for s3 and s5p
chadp777 Sep 24, 2025
53b1b3f
revert pgstac upgrade that check if tests still pass
chadp777 Sep 24, 2025
3c92a34
Merge branch 'main' of https://github.com/microsoft/planetary-compute…
chadp777 Sep 26, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion datasets/sentinel-1-grd/dataset.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
id: sentinel-1-grd
image: ${{ args.registry }}/pctasks-sentinel-1-grd:20250708.1
image: ${{ args.registry }}/pctasks-sentinel-1-grd:20250814.1

args:
- registry
Expand Down
10 changes: 5 additions & 5 deletions datasets/sentinel-3/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,15 @@ RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 10
# See https://github.com/mapbox/rasterio/issues/1289
ENV CURL_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt

# Install Python 3.8
RUN curl -L -O "https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(uname)-$(uname -m).sh" \
&& bash "Mambaforge-$(uname)-$(uname -m).sh" -b -p /opt/conda \
&& rm -rf "Mambaforge-$(uname)-$(uname -m).sh"
# Install Python 3.10
RUN curl -L -O "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh" \
&& bash "Miniforge3-$(uname)-$(uname -m).sh" -b -p /opt/conda \
&& rm -rf "Miniforge3-$(uname)-$(uname -m).sh"

ENV PATH /opt/conda/bin:$PATH
ENV LD_LIBRARY_PATH /opt/conda/lib/:$LD_LIBRARY_PATH

RUN mamba install -y -c conda-forge python=3.8 gdal=3.3.3 pip setuptools cython numpy==1.21.5
RUN mamba install -y -c conda-forge python=3.10 gdal libgdal-netcdf pip setuptools cython numpy

RUN python -m pip install --upgrade pip

Expand Down
7 changes: 4 additions & 3 deletions datasets/sentinel-3/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,13 @@ $ PYTHONPATH=datasets/sentinel-3 python -m pytest datasets/sentinel-3/tests/

```console
$ ls datasets/sentinel-3/collection/ | xargs -I {} \
pctasks dataset process-items '${{ args.since }}' \
pctasks dataset process-items \
-d datasets/sentinel-3/dataset.yaml \
-c {} \
--workflow-id={}-update \
--is-update-workflow \
--upsert
--is-update-workflow {}-update \
-u \
-y
```

**Notes:**
Expand Down
2 changes: 1 addition & 1 deletion datasets/sentinel-3/dataset.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
id: sentinel-3
image: ${{ args.registry }}/pctasks-sentinel-3:20230630.1
image: ${{ args.registry }}/pctasks-sentinel-3:20250922.1

args:
- registry
Expand Down
2 changes: 1 addition & 1 deletion datasets/sentinel-3/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
git+https://github.com/stactools-packages/sentinel3.git@36375cc63c053087380664ff931ceed5ad3b5f83
git+https://github.com/stactools-packages/sentinel3.git@93518a430556f290d5e55d3ae0fa1d76cec26197
3 changes: 1 addition & 2 deletions datasets/sentinel-3/sentinel_3.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

import pctasks.dataset.collection
from pctasks.core.models.task import WaitTaskResult
from pctasks.core.storage import Storage, StorageFactory
from pctasks.core.storage import StorageFactory
from pctasks.core.utils.backoff import is_common_throttle_exception, with_backoff

handler = logging.StreamHandler()
Expand Down Expand Up @@ -240,7 +240,6 @@ class Sentinel3Collections(pctasks.dataset.collection.Collection):
def create_item(
cls, asset_uri: str, storage_factory: StorageFactory
) -> Union[List[pystac.Item], WaitTaskResult]:

# Only create Items for NT (Not Time critical) products
sen3_archive = os.path.dirname(asset_uri)
assert sen3_archive.endswith(".SEN3")
Expand Down
10 changes: 5 additions & 5 deletions datasets/sentinel-5p/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,15 @@ RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 10
# See https://github.com/mapbox/rasterio/issues/1289
ENV CURL_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt

# Install Python 3.8
RUN curl -L -O "https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(uname)-$(uname -m).sh" \
&& bash "Mambaforge-$(uname)-$(uname -m).sh" -b -p /opt/conda \
&& rm -rf "Mambaforge-$(uname)-$(uname -m).sh"
# Install Python 3.10
RUN curl -L -O "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh" \
&& bash "Miniforge3-$(uname)-$(uname -m).sh" -b -p /opt/conda \
&& rm -rf "Miniforge3-$(uname)-$(uname -m).sh"

ENV PATH /opt/conda/bin:$PATH
ENV LD_LIBRARY_PATH /opt/conda/lib/:$LD_LIBRARY_PATH

RUN mamba install -y -c conda-forge python=3.8 gdal=3.3.3 pip setuptools cython numpy==1.21.5
RUN mamba install -y -c conda-forge python=3.10 gdal libgdal-netcdf pip setuptools cython numpy

RUN python -m pip install --upgrade pip

Expand Down
7 changes: 4 additions & 3 deletions datasets/sentinel-5p/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,13 @@ az acr build -r {the registry} --subscription {the subscription} -t pctasks-sent
This collection is updated regularly.

```console
$ pctasks dataset process-items '${{ args.since }}' \
$ pctasks dataset process-items \
-d datasets/sentinel-5p/dataset.yaml \
-c sentinel-5p-l2-netcdf \
--workflow-id=sentinel-5p-l2-netcdf-update \
--is-update-workflow \
--upsert
--is-update-workflow sentinel-5p-l2-netcdf-update \
-u \
-y
```

**Notes:**
Expand Down
2 changes: 1 addition & 1 deletion datasets/sentinel-5p/dataset.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
id: sentinel_5p
image: ${{ args.registry }}/pctasks-sentinel-5p:20230630.3
image: ${{ args.registry }}/pctasks-sentinel-5p:20250910.1

args:
- registry
Expand Down
1 change: 0 additions & 1 deletion datasets/sentinel-5p/sentinel_5p.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ class Sentinel5pNetCDFCollection(Collection):
def create_item(
cls, asset_uri: str, storage_factory: StorageFactory
) -> Union[List[pystac.Item], WaitTaskResult]:

storage, nc_path = storage_factory.get_storage_for_file(asset_uri)

with TemporaryDirectory() as tmp_dir:
Expand Down
2 changes: 1 addition & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ services:

database:
container_name: pctasks-database
image: ghcr.io/stac-utils/pgstac:v0.7.3
image: ghcr.io/stac-utils/pgstac:v0.8.6
environment:
- POSTGRES_USER=username
- POSTGRES_PASSWORD=password
Expand Down
56 changes: 29 additions & 27 deletions pctasks/dataset/pctasks/dataset/items/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,21 +178,22 @@ def create_items(
try:
with traced_create_item(args.asset_uri, args.collection_id):
result = self._create_item(args.asset_uri, storage_factory)
if isinstance(result, WaitTaskResult):
return result
elif result is None:
logger.warning(f"No items created from {args.asset_uri}")
else:
results.extend(
validate_create_items_result(
result,
collection_id=args.collection_id,
skip_validation=args.options.skip_validation,
)
)
except Exception as e:
raise CreateItemsError(
f"Failed to create item from {args.asset_uri}"
) from e
if isinstance(result, WaitTaskResult):
return result
elif result is None:
logger.warning(f"No items created from {args.asset_uri}")
else:
results.extend(
validate_create_items_result(
result,
collection_id=args.collection_id,
skip_validation=args.options.skip_validation,
)
tb_str = traceback.format_exc()
logger.error(
f"Failed to create item from {args.asset_uri}: {type(e).__name__}: {str(e)}\n{tb_str}" # noqa: E501
)
elif args.asset_chunk_info:
chunk_storage, chunk_path = storage_factory.get_storage_for_file(
Expand All @@ -208,24 +209,25 @@ def create_items(
asset_uri, args.collection_id, i=i, asset_count=asset_count
):
result = self._create_item(asset_uri, storage_factory)
if isinstance(result, WaitTaskResult):
return result
else:
if not result:
logger.warning(f"No items created from {asset_uri}")
else:
results.extend(
validate_create_items_result(
result,
collection_id=args.collection_id,
skip_validation=args.options.skip_validation,
)
)
except Exception as e:
tb_str = traceback.format_exc()
logger.error(
f"Failed to create item from {asset_uri}: {type(e).__name__}: {str(e)}\n{tb_str}" # noqa: E501
)
if isinstance(result, WaitTaskResult):
return result
else:
if not result:
logger.warning(f"No items created from {asset_uri}")
else:
results.extend(
validate_create_items_result(
result,
collection_id=args.collection_id,
skip_validation=args.options.skip_validation,
)
)
continue

else:
# Should be prevented by validator
Expand Down
9 changes: 6 additions & 3 deletions pctasks/ingest_task/pctasks/ingest_task/pgstac.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,16 +54,19 @@ def ingest_items(
mode: Methods = Methods.upsert,
insert_group_size: Optional[int] = None,
) -> None:
all_unique_items = list(
self.unique_items(items, lambda b: orjson.loads(b)["id"])
)
if insert_group_size:
groups = grouped(items, insert_group_size)
groups = grouped(all_unique_items, insert_group_size)
else:
groups = [items]
groups = [all_unique_items]

for i, group in enumerate(groups):
logger.info(f" ...Loading group {i + 1}")
self._with_connection_retry(
lambda: self.loader.load_items(
iter(self.unique_items(group, lambda b: orjson.loads(b)["id"])),
iter(group),
insert_mode=mode,
)
)
Expand Down
Loading