From baf8c301f63673532a1b92c79e1aa076895443f6 Mon Sep 17 00:00:00 2001 From: Wietze Date: Wed, 8 Oct 2025 23:02:56 -0400 Subject: [PATCH] feat: Sentinel-1 GRD pipeline support Extend pipeline to support Sentinel-1 GRD collections: - S1 GRD workflow configuration and test payloads - Collection detection logic (get_crs.py extended for S1) - Staging namespace deployment (rbac-staging.yaml) - S1-specific STAC registration handling - End-to-end S1 test suite - v20-v22 image iterations with S1 support Enables multi-mission pipeline supporting both S2 L2A and S1 GRD products. --- .gitignore | 63 +++++++ README.md | 16 +- docker/Dockerfile | 9 +- scripts/augment_stac_item.py | 93 +++++++++- scripts/get_zarr_url.py | 30 +++ scripts/test_s1_e2e.sh | 172 ++++++++++++++++++ scripts/watch-staging-workflows.sh | 27 +++ tests/unit/test_augment_stac_item.py | 89 +++++++++ uv.lock | 81 +++++++++ workflows/amqp-publish-once.yaml | 87 +++++++++ workflows/amqp-publish-s1-test.yaml | 92 ++++++++++ workflows/eventsource.yaml | 2 +- workflows/examples/payload-s1.json | 5 + workflows/examples/run-s1-test.yaml | 22 +++ .../examples/sentinel-1-l1-grd-dp-test.json | 161 ++++++++++++++++ workflows/rbac-staging.yaml | 61 +++++++ workflows/rbac.yaml | 6 +- workflows/sensor.yaml | 11 +- workflows/template.yaml | 57 ++++-- 19 files changed, 1042 insertions(+), 42 deletions(-) create mode 100644 .gitignore create mode 100755 scripts/get_zarr_url.py create mode 100755 scripts/test_s1_e2e.sh create mode 100644 scripts/watch-staging-workflows.sh create mode 100644 workflows/amqp-publish-once.yaml create mode 100644 workflows/amqp-publish-s1-test.yaml create mode 100644 workflows/examples/payload-s1.json create mode 100644 workflows/examples/run-s1-test.yaml create mode 100644 workflows/examples/sentinel-1-l1-grd-dp-test.json create mode 100644 workflows/rbac-staging.yaml diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..403f764 --- /dev/null +++ b/.gitignore @@ -0,0 +1,63 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +*.egg-info/ +dist/ +build/ +*.egg + +# Testing +.pytest_cache/ +.coverage +htmlcov/ +.tox/ + +# Type checking +.mypy_cache/ +.dmypy.json +dmypy.json + +# Linting +.ruff_cache/ + +# Environment +.env +.env.local +.env.*.local +.venv +env/ +venv/ +ENV/ +*.local.yaml + +# IDE +.vscode/ +.idea/ +*.swp +*.swo + +# Jupyter +.ipynb_checkpoints/ + +# Kubernetes +.work/ +kubeconfig* +.kube/ + +# Temporary files +*.tmp +*.log +runs/ +generated/ +.archive/ + +# OS +.DS_Store +Thumbs.db + +# Project-specific +*.zarr +out/ diff --git a/README.md b/README.md index 4fdaade..42c8bda 100644 --- a/README.md +++ b/README.md @@ -23,18 +23,24 @@ kubectl get wf -n devseed -w [![Python](https://img.shields.io/badge/python-3.11+-blue.svg)](https://www.python.org/downloads/) [![Tests](https://github.com/EOPF-Explorer/data-pipeline/workflows/Tests/badge.svg)](https://github.com/EOPF-Explorer/data-pipeline/actions) +- **Multi-sensor support**: Sentinel-1 GRD and Sentinel-2 L2A - STAC item registration with retry logic -- GeoZarr format conversion -- Cloud-native workflows +- GeoZarr format conversion with cloud-optimized overviews +- Cloud-native workflows with Argo +- Interactive visualization with TiTiler ## What It Does -Transforms Sentinel-2 satellite data into web-ready visualizations: +Transforms Sentinel satellite data into web-ready visualizations: **Input:** STAC item URL β†’ **Output:** Interactive web map (~5-10 min) **Pipeline:** Convert (5 min) β†’ Register (30 sec) β†’ Augment (10 sec) +**Supported sensors:** +- **Sentinel-1** L1 GRD: SAR backscatter (VH/VV polarizations) +- **Sentinel-2** L2A: Multispectral reflectance (10m/20m/60m) + ## Quick Start πŸ“– **New to the project?** See [GETTING_STARTED.md](GETTING_STARTED.md) for complete setup (15 min). @@ -262,10 +268,12 @@ pytest -v -k e2e # End-to-end tests only 1. **Edit workflow:** `workflows/template.yaml` 2. **Update scripts:** `scripts/*.py` 3. **Test locally:** `pytest tests/ -v` -4. **Build image:** `docker build -t ghcr.io/eopf-explorer/data-pipeline:dev -f docker/Dockerfile .` +4. **Build image:** `docker buildx build --platform linux/amd64 -t ghcr.io/eopf-explorer/data-pipeline:dev -f docker/Dockerfile . --push` 5. **Deploy:** `kubectl apply -f workflows/template.yaml -n devseed` 6. **Monitor:** `kubectl get wf -n devseed -w` +⚠️ **Important:** Always use `--platform linux/amd64` when building images for Kubernetes clusters. + See [CONTRIBUTING.md](CONTRIBUTING.md) for coding standards and development workflow. ## License diff --git a/docker/Dockerfile b/docker/Dockerfile index 5f9e705..357093d 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,3 +1,4 @@ +# Build for linux/amd64: docker buildx build --platform linux/amd64 -t . --push FROM python:3.11-slim # System dependencies (including GDAL for rasterio) @@ -18,13 +19,13 @@ RUN pip install -U pip uv # Cachebust for data-model installation (change timestamp to force fresh install) ARG CACHEBUST=2025-10-06T11:00:00Z -# Install eopf-geozarr from minimal fix branch -# Includes critical set_spatial_dims() fix before write_crs() calls +# Install eopf-geozarr from fix/s1-encoding-conflict branch (temporary until merged) RUN uv pip install --system --no-cache \ - git+https://github.com/EOPF-Explorer/data-model.git@fix/spatial-dims-minimal \ + git+https://github.com/EOPF-Explorer/data-model.git@fix/s1-encoding-conflict \ pystac>=1.10.0 \ httpx>=0.27.0 \ - boto3>=1.34.0 + boto3>=1.34.0 \ + tenacity>=8.0.0 # Force fresh copy of scripts (invalidate cache) ARG SCRIPTS_VERSION=2025-10-06T02:05:00Z diff --git a/scripts/augment_stac_item.py b/scripts/augment_stac_item.py index 5803c47..8d28d09 100644 --- a/scripts/augment_stac_item.py +++ b/scripts/augment_stac_item.py @@ -47,6 +47,69 @@ def _encode_quicklook_query() -> str: DEFAULT_QUICKLOOK_QUERY = _encode_quicklook_query() + +def _get_s1_polarization(item: Item) -> str: + """Extract first available polarization from S1 item assets. + + Args: + item: PySTAC Item with S1 assets + + Returns: + Uppercase polarization code (VH, VV, HH, or HV). Defaults to VH. + """ + for pol in _S1_POLARIZATIONS: + if pol in item.assets: + return pol.upper() + return "VH" + + +def _encode_s1_preview_query(item: Item) -> str: + """Generate S1 GRD preview query for TiTiler. + + S1 GRD structure in converted GeoZarr: + /S01SIWGRD_{timestamp}_{id}_VH/measurements with grd variable + + TiTiler needs the full path to the measurements group with the grd variable. + + Args: + item: PySTAC Item with S1 GRD data + + Returns: + Query string for TiTiler (variables, bidx, rescale) + """ + pol = _get_s1_polarization(item) + asset = item.assets.get(pol.lower()) + + if not asset or not asset.href: + # Fallback to simple path + pairs = [ + ("variables", "/measurements:grd"), + ("bidx", "1"), + ("rescale", "0,219"), + ] + return "&".join(f"{key}={urllib.parse.quote_plus(value)}" for key, value in pairs) + + # Extract group path from asset href + # Example: s3://.../S01SIWGRD_..._VH/measurements -> /S01SIWGRD_..._VH/measurements:grd + href = asset.href + if ".zarr/" in href: + # Extract path after .zarr/ + zarr_path = href.split(".zarr/")[1] + # zarr_path is like: S01SIWGRD_..._VH/measurements + # Build variable reference: /S01SIWGRD_..._VH/measurements:grd + variable_path = f"/{zarr_path}:grd" + else: + # Fallback + variable_path = "/measurements:grd" + + pairs = [ + ("variables", variable_path), + ("bidx", "1"), + ("rescale", "0,219"), # Typical S1 GRD range + ] + return "&".join(f"{key}={urllib.parse.quote_plus(value)}" for key, value in pairs) + + _ALLOWED_SCHEMES = {"http", "https"} _USER_AGENT = "augment-stac-item/1.0" _DEFAULT_TIMEOUT = float(os.getenv("HTTP_TIMEOUT", "30")) @@ -65,6 +128,14 @@ def _encode_quicklook_query() -> str: _S2_DATASET_KEYS = ("SR_10m", "SR_20m", "SR_60m") _S2_QUICKLOOK_KEYS = ("TCI_10m", "TCI", "TCI_20m") +_S1_COLLECTION_ID = "sentinel-1-l1-grd" +_S1_POLARIZATIONS = ("vh", "vv", "hh", "hv") + + +def _is_s1_collection(collection_id: str) -> bool: + """Check if collection is Sentinel-1 GRD.""" + return collection_id.startswith("sentinel-1-l1-grd") + def _coerce_epsg(value: Any) -> int | None: if isinstance(value, bool): @@ -462,17 +533,27 @@ def add_visualization_links( item.links = [link for link in item.links if link.rel not in filtered_rels] item_id = item.id viewer_href = f"{base_raster_url}/collections/{coll}/items/{item_id}/viewer" - asset_key = _select_preview_asset(item) - preview_asset = item.assets.get(asset_key) if asset_key else None - is_quicklook = _is_quicklook_asset(preview_asset) - default_query = DEFAULT_QUICKLOOK_QUERY if is_quicklook else DEFAULT_TRUE_COLOR_QUERY + + # Determine preview query based on collection type + asset_key: str | None + if _is_s1_collection(coll): + # Sentinel-1: Use GRD polarization preview + default_query = _encode_s1_preview_query(item) + xyz_title = os.getenv("PREVIEW_XYZ_TITLE", f"GRD {_get_s1_polarization(item)}") + asset_key = _get_s1_polarization(item).lower() # vh or vv + else: + # Sentinel-2: Use quicklook or true color + asset_key = _select_preview_asset(item) + preview_asset = item.assets.get(asset_key) if asset_key else None + is_quicklook = _is_quicklook_asset(preview_asset) + default_query = DEFAULT_QUICKLOOK_QUERY if is_quicklook else DEFAULT_TRUE_COLOR_QUERY + xyz_title = os.getenv("PREVIEW_XYZ_TITLE", "True Color Image (10m)") + xyz_query = _resolve_preview_query( os.getenv("PREVIEW_XYZ_QUERY"), default_query=default_query, ) - xyz_title = os.getenv("PREVIEW_XYZ_TITLE", "True Color Image (10m)") - def _add_link(rel: str, target: str, media_type: str, title: str | None = None) -> None: item.add_link( Link( diff --git a/scripts/get_zarr_url.py b/scripts/get_zarr_url.py new file mode 100755 index 0000000..548026b --- /dev/null +++ b/scripts/get_zarr_url.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 +import json +import sys +from urllib.request import urlopen + + +def get_zarr_url(stac_item_url: str) -> str: + with urlopen(stac_item_url) as response: + item = json.loads(response.read()) + + assets = item.get("assets", {}) + + # Priority: product, zarr, then any .zarr asset + for key in ["product", "zarr"]: + if key in assets: + href = assets[key].get("href") + if href: + return str(href) + + # Fallback + for asset in assets.values(): + href = asset.get("href", "") + if ".zarr" in href: + return str(href) + + raise RuntimeError("No Zarr asset found") + + +if __name__ == "__main__": + print(get_zarr_url(sys.argv[1])) diff --git a/scripts/test_s1_e2e.sh b/scripts/test_s1_e2e.sh new file mode 100755 index 0000000..3a53158 --- /dev/null +++ b/scripts/test_s1_e2e.sh @@ -0,0 +1,172 @@ +#!/bin/bash +# Test S1 GRD end-to-end pipeline in devseed-staging namespace +# +# This script: +# 1. Applies the workflow template +# 2. Publishes an S1 test payload via AMQP +# 3. Waits for workflow completion +# 4. Shows logs and verifies STAC item was created + +set -euo pipefail + +# Set kubeconfig +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(dirname "$SCRIPT_DIR")" +export KUBECONFIG="${KUBECONFIG:-$PROJECT_ROOT/.work/kubeconfig}" + +if [ ! -f "$KUBECONFIG" ]; then + echo "❌ Kubeconfig not found at: $KUBECONFIG" + echo "Please set KUBECONFIG environment variable or create .work/kubeconfig" + exit 1 +fi + +NAMESPACE="${NAMESPACE:-devseed-staging}" +PAYLOAD_FILE="${PAYLOAD_FILE:-workflows/examples/payload-s1.json}" +TIMEOUT="${TIMEOUT:-600}" # 10 minutes + +echo "==========================================" +echo "S1 GRD Pipeline E2E Test" +echo "==========================================" +echo "Kubeconfig: $KUBECONFIG" +echo "Namespace: $NAMESPACE" +echo "Payload: $PAYLOAD_FILE" +echo "Timeout: ${TIMEOUT}s" +echo "" + +# Step 1: Apply workflow template +echo "πŸ“ Applying workflow template..." +kubectl -n "$NAMESPACE" apply -f workflows/template.yaml +echo "βœ… Template applied" +echo "" + +# Step 2: Publish AMQP message +echo "πŸ“€ Publishing test payload..." +kubectl -n "$NAMESPACE" delete job amqp-publish-once --ignore-not-found=true +kubectl -n "$NAMESPACE" delete configmap amqp-payload --ignore-not-found=true +kubectl -n "$NAMESPACE" create configmap amqp-payload --from-file=body.json="$PAYLOAD_FILE" +kubectl -n "$NAMESPACE" apply -f workflows/amqp-publish-once.yaml +echo "⏳ Waiting for publish job..." +kubectl -n "$NAMESPACE" wait --for=condition=complete --timeout=120s job/amqp-publish-once +echo "βœ… Payload published" +echo "" + +# Step 3: Get latest workflow +echo "πŸ” Finding triggered workflow..." +sleep 3 # Give sensor time to create workflow +WORKFLOW=$(kubectl -n "$NAMESPACE" get wf --sort-by=.metadata.creationTimestamp -o jsonpath='{.items[-1:].metadata.name}' 2>/dev/null || true) +if [ -z "$WORKFLOW" ]; then + echo "❌ No workflow found!" + exit 1 +fi +echo "βœ… Workflow: $WORKFLOW" +echo "" + +# Step 4: Wait for completion +echo "⏳ Waiting for workflow completion (timeout: ${TIMEOUT}s)..." +START_TIME=$(date +%s) +while true; do + PHASE=$(kubectl -n "$NAMESPACE" get wf "$WORKFLOW" -o jsonpath='{.status.phase}' 2>/dev/null || echo "Unknown") + ELAPSED=$(($(date +%s) - START_TIME)) + + echo " [${ELAPSED}s] Phase: $PHASE" + + case "$PHASE" in + Succeeded) + echo "βœ… Workflow succeeded!" + break + ;; + Failed|Error) + echo "❌ Workflow failed!" + break + ;; + Unknown) + echo "❌ Workflow disappeared!" + exit 1 + ;; + esac + + if [ $ELAPSED -ge $TIMEOUT ]; then + echo "⏰ Timeout reached!" + break + fi + + sleep 5 +done +echo "" + +# Step 5: Show workflow details +echo "==========================================" +echo "Workflow Details" +echo "==========================================" +kubectl -n "$NAMESPACE" get wf "$WORKFLOW" -o jsonpath=' +Name: {.metadata.name} +Status: {.status.phase} +Started: {.status.startedAt} +Finished: {.status.finishedAt} +Duration: {.status.estimatedDuration} + +Parameters: + source_url: {.spec.arguments.parameters[?(@.name=="source_url")].value} + item_id: {.spec.arguments.parameters[?(@.name=="item_id")].value} + collection: {.spec.arguments.parameters[?(@.name=="register_collection")].value} +' +echo "" +echo "" + +# Step 6: Show pod logs +echo "==========================================" +echo "Pod Logs" +echo "==========================================" +PODS=$(kubectl -n "$NAMESPACE" get pods -l workflows.argoproj.io/workflow="$WORKFLOW" -o name 2>/dev/null || true) +if [ -z "$PODS" ]; then + echo "⚠️ No pods found" +else + for POD in $PODS; do + POD_NAME=$(basename "$POD") + TEMPLATE=$(kubectl -n "$NAMESPACE" get pod "$POD_NAME" -o jsonpath='{.metadata.labels.workflows\.argoproj\.io/template}' 2>/dev/null || echo "unknown") + echo "" + echo "--- $POD_NAME ($TEMPLATE) ---" + kubectl -n "$NAMESPACE" logs "$POD_NAME" --tail=100 -c main 2>/dev/null || echo "No logs available" + done +fi +echo "" + +# Step 7: Verify STAC item +echo "==========================================" +echo "STAC Item Verification" +echo "==========================================" +ITEM_ID=$(kubectl -n "$NAMESPACE" get wf "$WORKFLOW" -o jsonpath='{.spec.arguments.parameters[?(@.name=="item_id")].value}') +COLLECTION=$(kubectl -n "$NAMESPACE" get wf "$WORKFLOW" -o jsonpath='{.spec.arguments.parameters[?(@.name=="register_collection")].value}') +STAC_URL="https://api.explorer.eopf.copernicus.eu/stac/collections/$COLLECTION/items/$ITEM_ID" + +echo "Checking: $STAC_URL" +ITEM_STATUS=$(curl -s -o /dev/null -w "%{http_code}" "$STAC_URL") +if [ "$ITEM_STATUS" = "200" ]; then + echo "βœ… STAC item exists!" + echo "" + curl -s "$STAC_URL" | jq '{ + id: .id, + collection: .collection, + geometry: .geometry.type, + assets: [.assets | keys[]], + links: [.links[] | select(.rel=="xyz" or .rel=="viewer" or .rel=="tilejson") | {rel, href}] + }' +else + echo "❌ STAC item not found (HTTP $ITEM_STATUS)" +fi +echo "" + +echo "==========================================" +echo "Test Summary" +echo "==========================================" +echo "Workflow: $WORKFLOW" +echo "Status: $PHASE" +echo "STAC Item: $ITEM_STATUS" +echo "" +if [ "$PHASE" = "Succeeded" ] && [ "$ITEM_STATUS" = "200" ]; then + echo "πŸŽ‰ END-TO-END TEST PASSED!" + exit 0 +else + echo "❌ END-TO-END TEST FAILED" + exit 1 +fi diff --git a/scripts/watch-staging-workflows.sh b/scripts/watch-staging-workflows.sh new file mode 100644 index 0000000..5c8012c --- /dev/null +++ b/scripts/watch-staging-workflows.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# Helper script to monitor devseed-staging workflows +# Usage: ./watch-staging-workflows.sh [workflow-name] + +set -e + +NAMESPACE="devseed-staging" + +if [ $# -eq 0 ]; then + echo "πŸ“‹ Listing all workflows in $NAMESPACE..." + argo list -n "$NAMESPACE" + echo "" + echo "πŸ’‘ Usage:" + echo " $0 # List all workflows" + echo " $0 # Watch specific workflow" + echo " $0 logs # View workflow logs" + echo " $0 get # Get workflow details" +elif [ "$1" = "logs" ]; then + shift + argo logs "$@" -n "$NAMESPACE" +elif [ "$1" = "get" ]; then + shift + argo get "$@" -n "$NAMESPACE" +else + echo "πŸ” Watching workflow: $1" + argo watch "$1" -n "$NAMESPACE" +fi diff --git a/tests/unit/test_augment_stac_item.py b/tests/unit/test_augment_stac_item.py index 76ba75a..be5c475 100644 --- a/tests/unit/test_augment_stac_item.py +++ b/tests/unit/test_augment_stac_item.py @@ -300,3 +300,92 @@ def test_normalize_asset_alternate_schemes_combined_transformations(): alternates = asset.extra_fields.get("alternate", {}) # Should be normalized from HTTPS AND resolved from preview assert alternates["s3"]["href"] == "s3://bucket/sentinel-2-l2a/data.zarr" + + +def test_get_s1_polarization_vh(): + """Test S1 polarization extraction when VH asset exists.""" + from datetime import datetime + + from pystac import Asset, Item + + from scripts.augment_stac_item import _get_s1_polarization + + item = Item( + id="test-s1", + geometry=None, + bbox=None, + datetime=datetime(2025, 10, 8), + properties={}, + ) + item.add_asset("vh", Asset(href="s3://bucket/data.zarr")) + item.add_asset("calibration", Asset(href="s3://bucket/cal.zarr")) + + result = _get_s1_polarization(item) + assert result == "VH" + + +def test_get_s1_polarization_vv(): + """Test S1 polarization extraction when only VV asset exists.""" + from datetime import datetime + + from pystac import Asset, Item + + from scripts.augment_stac_item import _get_s1_polarization + + item = Item( + id="test-s1", + geometry=None, + bbox=None, + datetime=datetime(2025, 10, 8), + properties={}, + ) + item.add_asset("vv", Asset(href="s3://bucket/data.zarr")) + + result = _get_s1_polarization(item) + assert result == "VV" + + +def test_get_s1_polarization_default(): + """Test S1 polarization defaults to VH when no polarization assets exist.""" + from datetime import datetime + + from pystac import Asset, Item + + from scripts.augment_stac_item import _get_s1_polarization + + item = Item( + id="test-s1", + geometry=None, + bbox=None, + datetime=datetime(2025, 10, 8), + properties={}, + ) + item.add_asset("calibration", Asset(href="s3://bucket/cal.zarr")) + + result = _get_s1_polarization(item) + assert result == "VH" + + +def test_encode_s1_preview_query(): + """Test S1 GRD preview query encoding.""" + from datetime import datetime + + from pystac import Asset, Item + + from scripts.augment_stac_item import _encode_s1_preview_query + + item = Item( + id="test-s1", + geometry=None, + bbox=None, + datetime=datetime(2025, 10, 8), + properties={}, + ) + item.add_asset("vh", Asset(href="s3://bucket/data.zarr")) + + result = _encode_s1_preview_query(item) + + # Should include GRD measurement group (simple fallback without .zarr/ in href) + assert "variables=%2Fmeasurements%3Agrd" in result + assert "bidx=1" in result + assert "rescale=0%2C219" in result diff --git a/uv.lock b/uv.lock index 4e7859b..b82120d 100644 --- a/uv.lock +++ b/uv.lock @@ -204,6 +204,59 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c5/55/51844dd50c4fc7a33b653bfaba4c2456f06955289ca770a5dbd5fd267374/cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9", size = 7249, upload-time = "2023-08-12T20:38:16.269Z" }, ] +[[package]] +name = "charset-normalizer" +version = "3.4.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/83/2d/5fd176ceb9b2fc619e63405525573493ca23441330fcdaee6bef9460e924/charset_normalizer-3.4.3.tar.gz", hash = "sha256:6fce4b8500244f6fcb71465d4a4930d132ba9ab8e71a7859e6a5d59851068d14", size = 122371, upload-time = "2025-08-09T07:57:28.46Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7f/b5/991245018615474a60965a7c9cd2b4efbaabd16d582a5547c47ee1c7730b/charset_normalizer-3.4.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:b256ee2e749283ef3ddcff51a675ff43798d92d746d1a6e4631bf8c707d22d0b", size = 204483, upload-time = "2025-08-09T07:55:53.12Z" }, + { url = "https://files.pythonhosted.org/packages/c7/2a/ae245c41c06299ec18262825c1569c5d3298fc920e4ddf56ab011b417efd/charset_normalizer-3.4.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:13faeacfe61784e2559e690fc53fa4c5ae97c6fcedb8eb6fb8d0a15b475d2c64", size = 145520, upload-time = "2025-08-09T07:55:54.712Z" }, + { url = "https://files.pythonhosted.org/packages/3a/a4/b3b6c76e7a635748c4421d2b92c7b8f90a432f98bda5082049af37ffc8e3/charset_normalizer-3.4.3-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:00237675befef519d9af72169d8604a067d92755e84fe76492fef5441db05b91", size = 158876, upload-time = "2025-08-09T07:55:56.024Z" }, + { url = "https://files.pythonhosted.org/packages/e2/e6/63bb0e10f90a8243c5def74b5b105b3bbbfb3e7bb753915fe333fb0c11ea/charset_normalizer-3.4.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:585f3b2a80fbd26b048a0be90c5aae8f06605d3c92615911c3a2b03a8a3b796f", size = 156083, upload-time = "2025-08-09T07:55:57.582Z" }, + { url = "https://files.pythonhosted.org/packages/87/df/b7737ff046c974b183ea9aa111b74185ac8c3a326c6262d413bd5a1b8c69/charset_normalizer-3.4.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0e78314bdc32fa80696f72fa16dc61168fda4d6a0c014e0380f9d02f0e5d8a07", size = 150295, upload-time = "2025-08-09T07:55:59.147Z" }, + { url = "https://files.pythonhosted.org/packages/61/f1/190d9977e0084d3f1dc169acd060d479bbbc71b90bf3e7bf7b9927dec3eb/charset_normalizer-3.4.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:96b2b3d1a83ad55310de8c7b4a2d04d9277d5591f40761274856635acc5fcb30", size = 148379, upload-time = "2025-08-09T07:56:00.364Z" }, + { url = "https://files.pythonhosted.org/packages/4c/92/27dbe365d34c68cfe0ca76f1edd70e8705d82b378cb54ebbaeabc2e3029d/charset_normalizer-3.4.3-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:939578d9d8fd4299220161fdd76e86c6a251987476f5243e8864a7844476ba14", size = 160018, upload-time = "2025-08-09T07:56:01.678Z" }, + { url = "https://files.pythonhosted.org/packages/99/04/baae2a1ea1893a01635d475b9261c889a18fd48393634b6270827869fa34/charset_normalizer-3.4.3-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:fd10de089bcdcd1be95a2f73dbe6254798ec1bda9f450d5828c96f93e2536b9c", size = 157430, upload-time = "2025-08-09T07:56:02.87Z" }, + { url = "https://files.pythonhosted.org/packages/2f/36/77da9c6a328c54d17b960c89eccacfab8271fdaaa228305330915b88afa9/charset_normalizer-3.4.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1e8ac75d72fa3775e0b7cb7e4629cec13b7514d928d15ef8ea06bca03ef01cae", size = 151600, upload-time = "2025-08-09T07:56:04.089Z" }, + { url = "https://files.pythonhosted.org/packages/64/d4/9eb4ff2c167edbbf08cdd28e19078bf195762e9bd63371689cab5ecd3d0d/charset_normalizer-3.4.3-cp311-cp311-win32.whl", hash = "sha256:6cf8fd4c04756b6b60146d98cd8a77d0cdae0e1ca20329da2ac85eed779b6849", size = 99616, upload-time = "2025-08-09T07:56:05.658Z" }, + { url = "https://files.pythonhosted.org/packages/f4/9c/996a4a028222e7761a96634d1820de8a744ff4327a00ada9c8942033089b/charset_normalizer-3.4.3-cp311-cp311-win_amd64.whl", hash = "sha256:31a9a6f775f9bcd865d88ee350f0ffb0e25936a7f930ca98995c05abf1faf21c", size = 107108, upload-time = "2025-08-09T07:56:07.176Z" }, + { url = "https://files.pythonhosted.org/packages/e9/5e/14c94999e418d9b87682734589404a25854d5f5d0408df68bc15b6ff54bb/charset_normalizer-3.4.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e28e334d3ff134e88989d90ba04b47d84382a828c061d0d1027b1b12a62b39b1", size = 205655, upload-time = "2025-08-09T07:56:08.475Z" }, + { url = "https://files.pythonhosted.org/packages/7d/a8/c6ec5d389672521f644505a257f50544c074cf5fc292d5390331cd6fc9c3/charset_normalizer-3.4.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0cacf8f7297b0c4fcb74227692ca46b4a5852f8f4f24b3c766dd94a1075c4884", size = 146223, upload-time = "2025-08-09T07:56:09.708Z" }, + { url = "https://files.pythonhosted.org/packages/fc/eb/a2ffb08547f4e1e5415fb69eb7db25932c52a52bed371429648db4d84fb1/charset_normalizer-3.4.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c6fd51128a41297f5409deab284fecbe5305ebd7e5a1f959bee1c054622b7018", size = 159366, upload-time = "2025-08-09T07:56:11.326Z" }, + { url = "https://files.pythonhosted.org/packages/82/10/0fd19f20c624b278dddaf83b8464dcddc2456cb4b02bb902a6da126b87a1/charset_normalizer-3.4.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3cfb2aad70f2c6debfbcb717f23b7eb55febc0bb23dcffc0f076009da10c6392", size = 157104, upload-time = "2025-08-09T07:56:13.014Z" }, + { url = "https://files.pythonhosted.org/packages/16/ab/0233c3231af734f5dfcf0844aa9582d5a1466c985bbed6cedab85af9bfe3/charset_normalizer-3.4.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1606f4a55c0fd363d754049cdf400175ee96c992b1f8018b993941f221221c5f", size = 151830, upload-time = "2025-08-09T07:56:14.428Z" }, + { url = "https://files.pythonhosted.org/packages/ae/02/e29e22b4e02839a0e4a06557b1999d0a47db3567e82989b5bb21f3fbbd9f/charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:027b776c26d38b7f15b26a5da1044f376455fb3766df8fc38563b4efbc515154", size = 148854, upload-time = "2025-08-09T07:56:16.051Z" }, + { url = "https://files.pythonhosted.org/packages/05/6b/e2539a0a4be302b481e8cafb5af8792da8093b486885a1ae4d15d452bcec/charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:42e5088973e56e31e4fa58eb6bd709e42fc03799c11c42929592889a2e54c491", size = 160670, upload-time = "2025-08-09T07:56:17.314Z" }, + { url = "https://files.pythonhosted.org/packages/31/e7/883ee5676a2ef217a40ce0bffcc3d0dfbf9e64cbcfbdf822c52981c3304b/charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:cc34f233c9e71701040d772aa7490318673aa7164a0efe3172b2981218c26d93", size = 158501, upload-time = "2025-08-09T07:56:18.641Z" }, + { url = "https://files.pythonhosted.org/packages/c1/35/6525b21aa0db614cf8b5792d232021dca3df7f90a1944db934efa5d20bb1/charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:320e8e66157cc4e247d9ddca8e21f427efc7a04bbd0ac8a9faf56583fa543f9f", size = 153173, upload-time = "2025-08-09T07:56:20.289Z" }, + { url = "https://files.pythonhosted.org/packages/50/ee/f4704bad8201de513fdc8aac1cabc87e38c5818c93857140e06e772b5892/charset_normalizer-3.4.3-cp312-cp312-win32.whl", hash = "sha256:fb6fecfd65564f208cbf0fba07f107fb661bcd1a7c389edbced3f7a493f70e37", size = 99822, upload-time = "2025-08-09T07:56:21.551Z" }, + { url = "https://files.pythonhosted.org/packages/39/f5/3b3836ca6064d0992c58c7561c6b6eee1b3892e9665d650c803bd5614522/charset_normalizer-3.4.3-cp312-cp312-win_amd64.whl", hash = "sha256:86df271bf921c2ee3818f0522e9a5b8092ca2ad8b065ece5d7d9d0e9f4849bcc", size = 107543, upload-time = "2025-08-09T07:56:23.115Z" }, + { url = "https://files.pythonhosted.org/packages/65/ca/2135ac97709b400c7654b4b764daf5c5567c2da45a30cdd20f9eefe2d658/charset_normalizer-3.4.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:14c2a87c65b351109f6abfc424cab3927b3bdece6f706e4d12faaf3d52ee5efe", size = 205326, upload-time = "2025-08-09T07:56:24.721Z" }, + { url = "https://files.pythonhosted.org/packages/71/11/98a04c3c97dd34e49c7d247083af03645ca3730809a5509443f3c37f7c99/charset_normalizer-3.4.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:41d1fc408ff5fdfb910200ec0e74abc40387bccb3252f3f27c0676731df2b2c8", size = 146008, upload-time = "2025-08-09T07:56:26.004Z" }, + { url = "https://files.pythonhosted.org/packages/60/f5/4659a4cb3c4ec146bec80c32d8bb16033752574c20b1252ee842a95d1a1e/charset_normalizer-3.4.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:1bb60174149316da1c35fa5233681f7c0f9f514509b8e399ab70fea5f17e45c9", size = 159196, upload-time = "2025-08-09T07:56:27.25Z" }, + { url = "https://files.pythonhosted.org/packages/86/9e/f552f7a00611f168b9a5865a1414179b2c6de8235a4fa40189f6f79a1753/charset_normalizer-3.4.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:30d006f98569de3459c2fc1f2acde170b7b2bd265dc1943e87e1a4efe1b67c31", size = 156819, upload-time = "2025-08-09T07:56:28.515Z" }, + { url = "https://files.pythonhosted.org/packages/7e/95/42aa2156235cbc8fa61208aded06ef46111c4d3f0de233107b3f38631803/charset_normalizer-3.4.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:416175faf02e4b0810f1f38bcb54682878a4af94059a1cd63b8747244420801f", size = 151350, upload-time = "2025-08-09T07:56:29.716Z" }, + { url = "https://files.pythonhosted.org/packages/c2/a9/3865b02c56f300a6f94fc631ef54f0a8a29da74fb45a773dfd3dcd380af7/charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6aab0f181c486f973bc7262a97f5aca3ee7e1437011ef0c2ec04b5a11d16c927", size = 148644, upload-time = "2025-08-09T07:56:30.984Z" }, + { url = "https://files.pythonhosted.org/packages/77/d9/cbcf1a2a5c7d7856f11e7ac2d782aec12bdfea60d104e60e0aa1c97849dc/charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:fdabf8315679312cfa71302f9bd509ded4f2f263fb5b765cf1433b39106c3cc9", size = 160468, upload-time = "2025-08-09T07:56:32.252Z" }, + { url = "https://files.pythonhosted.org/packages/f6/42/6f45efee8697b89fda4d50580f292b8f7f9306cb2971d4b53f8914e4d890/charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:bd28b817ea8c70215401f657edef3a8aa83c29d447fb0b622c35403780ba11d5", size = 158187, upload-time = "2025-08-09T07:56:33.481Z" }, + { url = "https://files.pythonhosted.org/packages/70/99/f1c3bdcfaa9c45b3ce96f70b14f070411366fa19549c1d4832c935d8e2c3/charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:18343b2d246dc6761a249ba1fb13f9ee9a2bcd95decc767319506056ea4ad4dc", size = 152699, upload-time = "2025-08-09T07:56:34.739Z" }, + { url = "https://files.pythonhosted.org/packages/a3/ad/b0081f2f99a4b194bcbb1934ef3b12aa4d9702ced80a37026b7607c72e58/charset_normalizer-3.4.3-cp313-cp313-win32.whl", hash = "sha256:6fb70de56f1859a3f71261cbe41005f56a7842cc348d3aeb26237560bfa5e0ce", size = 99580, upload-time = "2025-08-09T07:56:35.981Z" }, + { url = "https://files.pythonhosted.org/packages/9a/8f/ae790790c7b64f925e5c953b924aaa42a243fb778fed9e41f147b2a5715a/charset_normalizer-3.4.3-cp313-cp313-win_amd64.whl", hash = "sha256:cf1ebb7d78e1ad8ec2a8c4732c7be2e736f6e5123a4146c5b89c9d1f585f8cef", size = 107366, upload-time = "2025-08-09T07:56:37.339Z" }, + { url = "https://files.pythonhosted.org/packages/8e/91/b5a06ad970ddc7a0e513112d40113e834638f4ca1120eb727a249fb2715e/charset_normalizer-3.4.3-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:3cd35b7e8aedeb9e34c41385fda4f73ba609e561faedfae0a9e75e44ac558a15", size = 204342, upload-time = "2025-08-09T07:56:38.687Z" }, + { url = "https://files.pythonhosted.org/packages/ce/ec/1edc30a377f0a02689342f214455c3f6c2fbedd896a1d2f856c002fc3062/charset_normalizer-3.4.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b89bc04de1d83006373429975f8ef9e7932534b8cc9ca582e4db7d20d91816db", size = 145995, upload-time = "2025-08-09T07:56:40.048Z" }, + { url = "https://files.pythonhosted.org/packages/17/e5/5e67ab85e6d22b04641acb5399c8684f4d37caf7558a53859f0283a650e9/charset_normalizer-3.4.3-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2001a39612b241dae17b4687898843f254f8748b796a2e16f1051a17078d991d", size = 158640, upload-time = "2025-08-09T07:56:41.311Z" }, + { url = "https://files.pythonhosted.org/packages/f1/e5/38421987f6c697ee3722981289d554957c4be652f963d71c5e46a262e135/charset_normalizer-3.4.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8dcfc373f888e4fb39a7bc57e93e3b845e7f462dacc008d9749568b1c4ece096", size = 156636, upload-time = "2025-08-09T07:56:43.195Z" }, + { url = "https://files.pythonhosted.org/packages/a0/e4/5a075de8daa3ec0745a9a3b54467e0c2967daaaf2cec04c845f73493e9a1/charset_normalizer-3.4.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:18b97b8404387b96cdbd30ad660f6407799126d26a39ca65729162fd810a99aa", size = 150939, upload-time = "2025-08-09T07:56:44.819Z" }, + { url = "https://files.pythonhosted.org/packages/02/f7/3611b32318b30974131db62b4043f335861d4d9b49adc6d57c1149cc49d4/charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ccf600859c183d70eb47e05a44cd80a4ce77394d1ac0f79dbd2dd90a69a3a049", size = 148580, upload-time = "2025-08-09T07:56:46.684Z" }, + { url = "https://files.pythonhosted.org/packages/7e/61/19b36f4bd67f2793ab6a99b979b4e4f3d8fc754cbdffb805335df4337126/charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:53cd68b185d98dde4ad8990e56a58dea83a4162161b1ea9272e5c9182ce415e0", size = 159870, upload-time = "2025-08-09T07:56:47.941Z" }, + { url = "https://files.pythonhosted.org/packages/06/57/84722eefdd338c04cf3030ada66889298eaedf3e7a30a624201e0cbe424a/charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:30a96e1e1f865f78b030d65241c1ee850cdf422d869e9028e2fc1d5e4db73b92", size = 157797, upload-time = "2025-08-09T07:56:49.756Z" }, + { url = "https://files.pythonhosted.org/packages/72/2a/aff5dd112b2f14bcc3462c312dce5445806bfc8ab3a7328555da95330e4b/charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d716a916938e03231e86e43782ca7878fb602a125a91e7acb8b5112e2e96ac16", size = 152224, upload-time = "2025-08-09T07:56:51.369Z" }, + { url = "https://files.pythonhosted.org/packages/b7/8c/9839225320046ed279c6e839d51f028342eb77c91c89b8ef2549f951f3ec/charset_normalizer-3.4.3-cp314-cp314-win32.whl", hash = "sha256:c6dbd0ccdda3a2ba7c2ecd9d77b37f3b5831687d8dc1b6ca5f56a4880cc7b7ce", size = 100086, upload-time = "2025-08-09T07:56:52.722Z" }, + { url = "https://files.pythonhosted.org/packages/ee/7a/36fbcf646e41f710ce0a563c1c9a343c6edf9be80786edeb15b6f62e17db/charset_normalizer-3.4.3-cp314-cp314-win_amd64.whl", hash = "sha256:73dc19b562516fc9bcf6e5d6e596df0b4eb98d87e4f79f3ae71840e6ed21361c", size = 107400, upload-time = "2025-08-09T07:56:55.172Z" }, + { url = "https://files.pythonhosted.org/packages/8a/1f/f041989e93b001bc4e44bb1669ccdcf54d3f00e628229a85b08d330615c5/charset_normalizer-3.4.3-py3-none-any.whl", hash = "sha256:ce571ab16d890d23b5c278547ba694193a45011ff86a9162a71307ed9f86759a", size = 53175, upload-time = "2025-08-09T07:57:26.864Z" }, +] + [[package]] name = "click" version = "8.3.0" @@ -379,7 +432,9 @@ dependencies = [ { name = "httpx" }, { name = "pika" }, { name = "pystac" }, + { name = "requests" }, { name = "s3fs" }, + { name = "tenacity" }, { name = "xarray" }, { name = "zarr" }, ] @@ -407,8 +462,10 @@ requires-dist = [ { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0.0" }, { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=4.1.0" }, { name = "pytest-mock", marker = "extra == 'dev'", specifier = ">=3.12.0" }, + { name = "requests", specifier = ">=2.31.0" }, { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.8.0" }, { name = "s3fs", specifier = ">=2024.0.0" }, + { name = "tenacity", specifier = ">=8.0.0" }, { name = "types-boto3", marker = "extra == 'dev'", specifier = ">=1.0.2" }, { name = "xarray", specifier = ">=2024.0.0" }, { name = "zarr", specifier = ">=2.18.0" }, @@ -1271,6 +1328,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" }, ] +[[package]] +name = "requests" +version = "2.32.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "charset-normalizer" }, + { name = "idna" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517, upload-time = "2025-08-18T20:46:02.573Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" }, +] + [[package]] name = "ruff" version = "0.13.3" @@ -1341,6 +1413,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" }, ] +[[package]] +name = "tenacity" +version = "9.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0a/d4/2b0cd0fe285e14b36db076e78c93766ff1d529d70408bd1d2a5a84f1d929/tenacity-9.1.2.tar.gz", hash = "sha256:1169d376c297e7de388d18b4481760d478b0e99a777cad3a9c86e556f4b697cb", size = 48036, upload-time = "2025-04-02T08:25:09.966Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e5/30/643397144bfbfec6f6ef821f36f33e57d35946c44a2352d3c9f0ae847619/tenacity-9.1.2-py3-none-any.whl", hash = "sha256:f77bf36710d8b73a50b2dd155c97b870017ad21afe6ab300326b0371b3b05138", size = 28248, upload-time = "2025-04-02T08:25:07.678Z" }, +] + [[package]] name = "tomli" version = "2.2.1" diff --git a/workflows/amqp-publish-once.yaml b/workflows/amqp-publish-once.yaml new file mode 100644 index 0000000..7a1760e --- /dev/null +++ b/workflows/amqp-publish-once.yaml @@ -0,0 +1,87 @@ +--- +# Generic AMQP publish job +# Publishes payload from 'amqp-payload' configmap to RabbitMQ +# +# Usage: +# 1. Create configmap: kubectl create configmap amqp-payload --from-file=body.json= +# 2. Apply this job: kubectl apply -f amqp-publish-once.yaml +# 3. Wait: kubectl wait --for=condition=complete job/amqp-publish-once +# +apiVersion: batch/v1 +kind: Job +metadata: + name: amqp-publish-once + namespace: devseed-staging +spec: + ttlSecondsAfterFinished: 300 + template: + spec: + restartPolicy: Never + containers: + - name: publish + image: python:3.11-slim + command: + - /bin/bash + - -c + - | + set -e + pip install -q pika + cat <<'PUBLISH_SCRIPT' > /tmp/publish.py + import json + import os + import pika + + with open('/payload/body.json') as f: + payload = json.load(f) + + credentials = pika.PlainCredentials( + os.environ['RABBITMQ_USERNAME'], + os.environ['RABBITMQ_PASSWORD'] + ) + parameters = pika.ConnectionParameters( + host='rabbitmq.core.svc.cluster.local', + port=5672, + credentials=credentials + ) + + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + + routing_key = f"eopf.item.found.{payload['collection']}" + + channel.basic_publish( + exchange='eopf_items', + routing_key=routing_key, + body=json.dumps(payload), + properties=pika.BasicProperties( + content_type='application/json', + delivery_mode=2 # persistent + ) + ) + + print(f"βœ… Published to exchange=eopf_items, routing_key={routing_key}") + print(f"πŸ“¦ Payload: {json.dumps(payload, indent=2)}") + + connection.close() + PUBLISH_SCRIPT + + python /tmp/publish.py + env: + - name: RABBITMQ_USERNAME + valueFrom: + secretKeyRef: + name: rabbitmq-credentials + key: username + - name: RABBITMQ_PASSWORD + valueFrom: + secretKeyRef: + name: rabbitmq-credentials + key: password + volumeMounts: + - name: payload + mountPath: /payload + readOnly: true + volumes: + - name: payload + configMap: + name: amqp-payload diff --git a/workflows/amqp-publish-s1-test.yaml b/workflows/amqp-publish-s1-test.yaml new file mode 100644 index 0000000..90da74b --- /dev/null +++ b/workflows/amqp-publish-s1-test.yaml @@ -0,0 +1,92 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: amqp-payload-s1-test + namespace: devseed-staging +data: + body.json: | + { + "source_url": "https://stac.core.eopf.eodc.eu/collections/sentinel-1-l1-grd/items/S1C_IW_GRDH_1SDV_20251008T163126_20251008T163151_004473_008DBA_9AB4", + "item_id": "S1C_IW_GRDH_20251008_test", + "collection": "sentinel-1-l1-grd" + } +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: amqp-publish-s1-test + namespace: devseed-staging +spec: + ttlSecondsAfterFinished: 300 + template: + spec: + restartPolicy: Never + containers: + - name: publish + image: python:3.11-slim + command: + - /bin/bash + - -c + - | + set -e + pip install -q pika + cat <<'PUBLISH_SCRIPT' > /tmp/publish.py + import json + import os + import pika + + with open('/payload/body.json') as f: + payload = json.load(f) + + credentials = pika.PlainCredentials( + os.environ['RABBITMQ_USERNAME'], + os.environ['RABBITMQ_PASSWORD'] + ) + parameters = pika.ConnectionParameters( + host='rabbitmq.core.svc.cluster.local', + port=5672, + credentials=credentials + ) + + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + + routing_key = f"eopf.items.{payload['collection']}" + + channel.basic_publish( + exchange='geozarr', + routing_key=routing_key, + body=json.dumps(payload), + properties=pika.BasicProperties( + content_type='application/json', + delivery_mode=2 # persistent + ) + ) + + print(f"βœ… Published to exchange=geozarr, routing_key={routing_key}") + print(f"πŸ“¦ Payload: {json.dumps(payload, indent=2)}") + + connection.close() + PUBLISH_SCRIPT + + python /tmp/publish.py + env: + - name: RABBITMQ_USERNAME + valueFrom: + secretKeyRef: + name: rabbitmq-credentials + key: username + - name: RABBITMQ_PASSWORD + valueFrom: + secretKeyRef: + name: rabbitmq-credentials + key: password + volumeMounts: + - name: payload + mountPath: /payload + readOnly: true + volumes: + - name: payload + configMap: + name: amqp-payload-s1-test diff --git a/workflows/eventsource.yaml b/workflows/eventsource.yaml index 6710232..ef276e7 100644 --- a/workflows/eventsource.yaml +++ b/workflows/eventsource.yaml @@ -2,7 +2,7 @@ apiVersion: argoproj.io/v1alpha1 kind: EventSource metadata: name: rabbitmq-geozarr - namespace: devseed + namespace: devseed-staging spec: amqp: geozarr-events: diff --git a/workflows/examples/payload-s1.json b/workflows/examples/payload-s1.json new file mode 100644 index 0000000..9e6752e --- /dev/null +++ b/workflows/examples/payload-s1.json @@ -0,0 +1,5 @@ +{ + "source_url": "https://stac.core.eopf.eodc.eu/collections/sentinel-1-l1-grd/items/S1C_IW_GRDH_1SDV_20251008T163126_20251008T163151_004473_008DBA_9AB4", + "item_id": "S1C_IW_GRDH_20251008_test", + "collection": "sentinel-1-l1-grd-dp-test" +} diff --git a/workflows/examples/run-s1-test.yaml b/workflows/examples/run-s1-test.yaml new file mode 100644 index 0000000..b682428 --- /dev/null +++ b/workflows/examples/run-s1-test.yaml @@ -0,0 +1,22 @@ +--- +# Direct workflow run for S1 GRD test +# Bypasses AMQP/EventSource to test workflow template directly +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + generateName: geozarr-s1-test- + namespace: devseed-staging + labels: + app: geozarr-pipeline + test: s1-grd-direct +spec: + workflowTemplateRef: + name: geozarr-pipeline + arguments: + parameters: + - name: source_url + value: "https://stac.core.eopf.eodc.eu/collections/sentinel-1-l1-grd/items/S1C_IW_GRDH_1SDV_20251008T163126_20251008T163151_004473_008DBA_9AB4" + - name: item_id + value: "S1C_IW_GRDH_20251008_test" + - name: register_collection + value: "sentinel-1-l1-grd-dp-test" diff --git a/workflows/examples/sentinel-1-l1-grd-dp-test.json b/workflows/examples/sentinel-1-l1-grd-dp-test.json new file mode 100644 index 0000000..899255e --- /dev/null +++ b/workflows/examples/sentinel-1-l1-grd-dp-test.json @@ -0,0 +1,161 @@ +{ + "type": "Collection", + "id": "sentinel-1-l1-grd-dp-test", + "title": "Sentinel-1 Level-1 GRD [Data Pipeline Test]", + "description": "Sentinel-1 Level-1 Ground Range Detected (GRD) products consist of focused SAR data that has been detected, multi-looked and projected to ground range using an Earth ellipsoid model. GRD products are available in three resolutions: Full Resolution (FR), High Resolution (HR) and Medium Resolution (MR). This test collection is used for validating the data pipeline conversion and registration workflow.", + "keywords": [ + "Copernicus", + "Sentinel", + "EU", + "ESA", + "Satellite", + "SAR", + "C-band", + "Backscatter" + ], + "license": "proprietary", + "extent": { + "spatial": { + "bbox": [ + [ + -180, + -90, + 180, + 90 + ] + ] + }, + "temporal": { + "interval": [ + [ + "2014-10-03T00:00:00Z", + null + ] + ] + } + }, + "summaries": { + "gsd": [ + 10, + 25, + 40 + ], + "platform": [ + "Sentinel-1A", + "Sentinel-1B", + "Sentinel-1C" + ], + "instruments": [ + "c-sar" + ], + "constellation": [ + "sentinel-1" + ], + "sar:frequency_band": [ + "C" + ], + "sar:instrument_mode": [ + "IW", + "EW", + "SM" + ], + "sar:polarizations": [ + "VV", + "VH", + "HH", + "HV" + ], + "sar:product_type": [ + "GRD" + ], + "processing:level": [ + "L1" + ], + "sat:platform_international_designator": [ + "2014-016A", + "2016-025A", + "2024-087A" + ] + }, + "item_assets": { + "vh": { + "type": "application/vnd+zarr", + "roles": [ + "data", + "amplitude", + "dataset" + ], + "title": "VH Polarization", + "description": "Vertical transmit, Horizontal receive backscatter amplitude" + }, + "vv": { + "type": "application/vnd+zarr", + "roles": [ + "data", + "amplitude", + "dataset" + ], + "title": "VV Polarization", + "description": "Vertical transmit, Vertical receive backscatter amplitude" + }, + "hh": { + "type": "application/vnd+zarr", + "roles": [ + "data", + "amplitude", + "dataset" + ], + "title": "HH Polarization", + "description": "Horizontal transmit, Horizontal receive backscatter amplitude" + }, + "hv": { + "type": "application/vnd+zarr", + "roles": [ + "data", + "amplitude", + "dataset" + ], + "title": "HV Polarization", + "description": "Horizontal transmit, Vertical receive backscatter amplitude" + }, + "product": { + "type": "application/vnd+zarr", + "roles": [ + "data", + "metadata" + ], + "title": "EOPF Product", + "description": "The full Zarr hierarchy of the EOPF product" + }, + "product_metadata": { + "type": "application/json", + "roles": [ + "metadata" + ], + "title": "Consolidated Metadata", + "description": "Consolidated metadata of the EOPF product" + } + }, + "links": [ + { + "rel": "self", + "type": "application/json", + "href": "https://api.explorer.eopf.copernicus.eu/stac/collections/sentinel-1-l1-grd-dp-test" + }, + { + "rel": "items", + "type": "application/geo+json", + "href": "https://api.explorer.eopf.copernicus.eu/stac/collections/sentinel-1-l1-grd-dp-test/items" + }, + { + "rel": "root", + "type": "application/json", + "href": "https://api.explorer.eopf.copernicus.eu/stac" + }, + { + "rel": "parent", + "type": "application/json", + "href": "https://api.explorer.eopf.copernicus.eu/stac" + } + ] +} diff --git a/workflows/rbac-staging.yaml b/workflows/rbac-staging.yaml new file mode 100644 index 0000000..6579867 --- /dev/null +++ b/workflows/rbac-staging.yaml @@ -0,0 +1,61 @@ +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: operate-workflow-sa + namespace: devseed-staging +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: workflow-executor + namespace: devseed-staging +rules: + - apiGroups: + - "" + resources: + - pods + verbs: + - get + - watch + - patch + - apiGroups: + - "" + resources: + - pods/log + verbs: + - get + - watch + - apiGroups: + - "" + resources: + - pods/exec + verbs: + - create + - apiGroups: + - argoproj.io + resources: + - workflowtaskresults + verbs: + - create + - patch + - apiGroups: + - "" + resources: + - secrets + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: workflow-executor-binding + namespace: devseed-staging +subjects: + - kind: ServiceAccount + name: operate-workflow-sa + namespace: devseed-staging +roleRef: + kind: Role + name: workflow-executor + apiGroup: rbac.authorization.k8s.io diff --git a/workflows/rbac.yaml b/workflows/rbac.yaml index 399ac1b..4f96d64 100644 --- a/workflows/rbac.yaml +++ b/workflows/rbac.yaml @@ -2,13 +2,13 @@ apiVersion: v1 kind: ServiceAccount metadata: name: argo-workflow - namespace: devseed + namespace: devseed-staging --- apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: name: argo-executor - namespace: devseed + namespace: devseed-staging rules: - apiGroups: - argoproj.io @@ -21,7 +21,7 @@ rules: kind: RoleBinding apiVersion: rbac.authorization.k8s.io/v1 metadata: - namespace: devseed + namespace: devseed-staging name: argo-workflow-executor subjects: - kind: ServiceAccount diff --git a/workflows/sensor.yaml b/workflows/sensor.yaml index f89837c..5daf0cc 100644 --- a/workflows/sensor.yaml +++ b/workflows/sensor.yaml @@ -2,14 +2,14 @@ apiVersion: argoproj.io/v1alpha1 kind: Sensor metadata: name: geozarr-sensor - namespace: devseed + namespace: devseed-staging spec: template: serviceAccountName: operate-workflow-sa dependencies: - name: geozarr-event - eventSourceName: rabbitmq-geozarr - eventName: geozarr-events + eventSourceName: amqp + eventName: eopf-items-convert triggers: - template: @@ -22,10 +22,10 @@ spec: kind: Workflow metadata: generateName: geozarr- - namespace: devseed + namespace: devseed-staging labels: app: geozarr-pipeline - owner: devseed + owner: devseed-staging spec: workflowTemplateRef: name: geozarr-pipeline @@ -34,7 +34,6 @@ spec: - name: source_url - name: item_id - name: register_collection - value: "sentinel-2-l2a" parameters: - src: dependencyName: geozarr-event diff --git a/workflows/template.yaml b/workflows/template.yaml index 6ea51b4..3d0df82 100644 --- a/workflows/template.yaml +++ b/workflows/template.yaml @@ -2,7 +2,7 @@ apiVersion: argoproj.io/v1alpha1 kind: WorkflowTemplate metadata: name: geozarr-pipeline - namespace: devseed + namespace: devseed-staging spec: # Service account with S3 and STAC API permissions serviceAccountName: operate-workflow-sa @@ -10,9 +10,9 @@ spec: # Clean up completed workflows after 24 hours ttlStrategy: secondsAfterCompletion: 86400 # 24 hours - # Also clean up pods + # Keep pods on failure for debugging podGC: - strategy: OnWorkflowCompletion + strategy: OnWorkflowSuccess arguments: parameters: - name: source_url @@ -37,14 +37,15 @@ spec: activeDeadlineSeconds: 3600 # 1 hour timeout script: # Use data-pipeline image with scripts and latest eopf-geozarr - image: ghcr.io/eopf-explorer/data-pipeline:v15-refactored + image: ghcr.io/eopf-explorer/data-pipeline:v21 imagePullPolicy: Always command: [bash] source: | set -euo pipefail SOURCE_URL="{{workflow.parameters.source_url}}" - OUTPUT_PATH="s3://esa-zarr-sentinel-explorer-fra/tests-output/{{workflow.parameters.register_collection}}/{{workflow.parameters.item_id}}.zarr" + COLLECTION="{{workflow.parameters.register_collection}}" + OUTPUT_PATH="s3://esa-zarr-sentinel-explorer-fra/tests-output/$COLLECTION/{{workflow.parameters.item_id}}.zarr" echo "πŸ” Resolving source..." # Check if source is STAC item or direct zarr @@ -57,13 +58,33 @@ spec: echo "βœ… Direct Zarr URL: $ZARR_URL" fi - echo "πŸš€ Starting conversion..." - eopf-geozarr convert \ - "$ZARR_URL" \ - "$OUTPUT_PATH" \ - --groups /quality/l2a_quicklook/r10m \ - --crs-groups /quality/l2a_quicklook/r10m \ - --spatial-chunk 4096 \ + echo "πŸš€ Starting GeoZarr conversion" + echo "Source: $ZARR_URL" + echo "Destination: $OUTPUT_PATH" + echo "Collection: $COLLECTION" + + # Clean up any partial output from previous failed runs + echo "🧹 Cleaning up any existing output..." + python3 /app/scripts/cleanup_s3_path.py "$OUTPUT_PATH" + + # S1 requires different parameters (both prod and test collections) + if [[ "$COLLECTION" == sentinel-1-l1-grd* ]]; then + ZARR_GROUPS="/measurements" + EXTRA_FLAGS="--gcp-group /conditions/gcp" + CHUNK=2048 + echo "πŸ“‘ S1 GRD mode: groups=$ZARR_GROUPS, chunk=$CHUNK" + else + ZARR_GROUPS="/quality/l2a_quicklook/r10m" + EXTRA_FLAGS="--crs-groups /quality/l2a_quicklook/r10m" + CHUNK=4096 + echo "πŸ—ΊοΈ S2 L2A mode: groups=$ZARR_GROUPS, chunk=$CHUNK" + fi + + # Build conversion command + eopf-geozarr convert "$ZARR_URL" "$OUTPUT_PATH" \ + --groups "$ZARR_GROUPS" \ + $EXTRA_FLAGS \ + --spatial-chunk $CHUNK \ --tile-width 512 \ --verbose env: @@ -83,17 +104,17 @@ spec: value: "https://s3.de.io.cloud.ovh.net" resources: requests: - memory: "2Gi" - cpu: "500m" + memory: "8Gi" + cpu: "1" limits: - memory: "4Gi" - cpu: "2" + memory: "16Gi" + cpu: "4" - name: register-stac activeDeadlineSeconds: 300 # 5 min timeout container: # Use data-pipeline image for Python scripts (register, augment) - image: ghcr.io/eopf-explorer/data-pipeline:v15-refactored + image: ghcr.io/eopf-explorer/data-pipeline:v21 imagePullPolicy: Always command: [python] args: @@ -120,7 +141,7 @@ spec: activeDeadlineSeconds: 300 # 5 min timeout container: # Use data-pipeline image for Python scripts (register, augment) - image: ghcr.io/eopf-explorer/data-pipeline:v15-refactored + image: ghcr.io/eopf-explorer/data-pipeline:v21 imagePullPolicy: Always command: [python] args: