diff --git a/datasets/hls2/collection/hls2-l30/template.json b/datasets/hls2/collection/hls2-l30/template.json index 60236347..f32b0863 100644 --- a/datasets/hls2/collection/hls2-l30/template.json +++ b/datasets/hls2/collection/hls2-l30/template.json @@ -4,8 +4,14 @@ "id": "hls2-l30", "title": "Harmonized Landsat Sentinel-2 (HLS) Version 2.0, Landsat Data", "description": "{{ collection.description }}", - "license": "Data Citation Guidance: https://lpdaac.usgs.gov/data/data-citations-and-guidelines", - "links": [], + "license": "proprietary", + "links": [ + { + "rel": "license", + "href": "https://lpdaac.usgs.gov/data/data-citation-and-policies/", + "title": "LP DAAC - Data Citation and Policies" + } + ], "stac_extensions": [ "https://stac-extensions.github.io/item-assets/v1.0.0/schema.json", "https://stac-extensions.github.io/table/v1.2.0/schema.json", @@ -49,6 +55,22 @@ "type": "image/webp", "href": "https://ai4edatasetspublicassets.blob.core.windows.net/assets/pc_thumbnails/hls2-l30.webp", "title": "HLS2 Landsat Collection Thumbnail" + }, + "geoparquet-items": { + "href": "abfs://items/hls2-l30.parquet", + "type": "application/x-parquet", + "roles": [ + "stac-items" + ], + "title": "GeoParquet STAC items", + "description": "Snapshot of the collection's STAC items exported to GeoParquet format.", + "msft:partition_info": { + "is_partitioned": true, + "partition_frequency": "W-MON" + }, + "table:storage_options": { + "account_name": "pcstacitems" + } } }, "summaries": { diff --git a/datasets/hls2/collection/hls2-s30/template.json b/datasets/hls2/collection/hls2-s30/template.json index 2536df25..04f78bee 100644 --- a/datasets/hls2/collection/hls2-s30/template.json +++ b/datasets/hls2/collection/hls2-s30/template.json @@ -4,8 +4,14 @@ "id": "hls2-s30", "title": "Harmonized Landsat Sentinel-2 (HLS) Version 2.0, Sentinel-2 Data", "description": "{{ collection.description }}", - "license": "Data Citation Guidance: https://lpdaac.usgs.gov/data/data-citations-and-guidelines", - "links": [], + "license": "proprietary", + "links": [ + { + "rel": "license", + "href": "https://lpdaac.usgs.gov/data/data-citation-and-policies/", + "title": "LP DAAC - Data Citation and Policies" + } + ], "stac_extensions": [ "https://stac-extensions.github.io/item-assets/v1.0.0/schema.json", "https://stac-extensions.github.io/table/v1.2.0/schema.json", @@ -56,6 +62,22 @@ "type": "image/webp", "href": "https://ai4edatasetspublicassets.blob.core.windows.net/assets/pc_thumbnails/hls2-s30.webp", "title": "HLS2 Sentinel Collection Thumbnail" + }, + "geoparquet-items": { + "href": "abfs://items/hls2-s30.parquet", + "type": "application/x-parquet", + "roles": [ + "stac-items" + ], + "title": "GeoParquet STAC items", + "description": "Snapshot of the collection's STAC items exported to GeoParquet format.", + "msft:partition_info": { + "is_partitioned": true, + "partition_frequency": "W-MON" + }, + "table:storage_options": { + "account_name": "pcstacitems" + } } }, "summaries": { diff --git a/datasets/stac-geoparquet/Dockerfile b/datasets/stac-geoparquet/Dockerfile index 7fe782fb..f68deb0c 100644 --- a/datasets/stac-geoparquet/Dockerfile +++ b/datasets/stac-geoparquet/Dockerfile @@ -1,41 +1,23 @@ -FROM ubuntu:20.04 +FROM mcr.microsoft.com/azurelinux/base/python:3.12 # Setup timezone info ENV TZ=UTC ENV LC_ALL=C.UTF-8 ENV LANG=C.UTF-8 +ENV UV_SYSTEM_PYTHON=TRUE RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone -RUN apt-get update && apt-get install -y software-properties-common +RUN tdnf install build-essential jq unzip ca-certificates awk wget curl git azure-cli -y \ + && tdnf clean all -RUN add-apt-repository ppa:ubuntugis/ppa && \ - apt-get update && \ - apt-get install -y build-essential python3-dev python3-pip \ - jq unzip ca-certificates wget curl git && \ - apt-get autoremove && apt-get autoclean && apt-get clean - -RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 10 - -# See https://github.com/mapbox/rasterio/issues/1289 -ENV CURL_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt - -# Install Python 3.11 -RUN curl -L -O "https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(uname)-$(uname -m).sh" \ - && bash "Mambaforge-$(uname)-$(uname -m).sh" -b -p /opt/conda \ - && rm -rf "Mambaforge-$(uname)-$(uname -m).sh" - -ENV PATH /opt/conda/bin:$PATH -ENV LD_LIBRARY_PATH /opt/conda/lib/:$LD_LIBRARY_PATH - -RUN mamba install -y -c conda-forge python=3.11 gdal pip setuptools cython numpy - -RUN python -m pip install --upgrade pip +# RUN python3 -m pip install --upgrade pip +RUN pip install --upgrade uv # Install common packages COPY requirements-task-base.txt /tmp/requirements.txt -RUN python -m pip install --no-build-isolation -r /tmp/requirements.txt +RUN uv pip install --no-build-isolation -r /tmp/requirements.txt # # Copy and install packages @@ -43,30 +25,22 @@ RUN python -m pip install --no-build-isolation -r /tmp/requirements.txt COPY pctasks/core /opt/src/pctasks/core RUN cd /opt/src/pctasks/core && \ - pip install . + uv pip install . COPY pctasks/cli /opt/src/pctasks/cli RUN cd /opt/src/pctasks/cli && \ - pip install . + uv pip install . COPY pctasks/task /opt/src/pctasks/task RUN cd /opt/src/pctasks/task && \ - pip install . + uv pip install . COPY pctasks/client /opt/src/pctasks/client RUN cd /opt/src/pctasks/client && \ - pip install . - -# COPY pctasks/ingest /opt/src/pctasks/ingest -# RUN cd /opt/src/pctasks/ingest && \ -# pip install . - -# COPY pctasks/dataset /opt/src/pctasks/dataset -# RUN cd /opt/src/pctasks/dataset && \ -# pip install . + uv pip install . COPY datasets/stac-geoparquet /opt/src/datasets/stac-geoparquet -RUN python3 -m pip install -r /opt/src/datasets/stac-geoparquet/requirements.txt +RUN uv pip install -r /opt/src/datasets/stac-geoparquet/requirements.txt # Setup Python Path to allow import of test modules ENV PYTHONPATH=/opt/src:$PYTHONPATH diff --git a/datasets/stac-geoparquet/README.md b/datasets/stac-geoparquet/README.md index ef293a83..7f385719 100644 --- a/datasets/stac-geoparquet/README.md +++ b/datasets/stac-geoparquet/README.md @@ -4,20 +4,29 @@ Generates the `stac-geoparquet` collection-level assets for the [Planetary Compu ## Container Images +Test the build with; ```shell -$ az acr build -r pccomponents -t pctasks-stac-geoparquet:latest -t pctasks-stac-geoparquet:2023.7.10.0 -f datasets/stac-geoparquet/Dockerfile . +docker build -t stac-geoparquet -f datasets/stac-geoparquet/Dockerfile . +``` + +Then publish to the ACR with: + +```shell +az acr build -r pccomponents -t pctasks-stac-geoparquet:latest -t pctasks-stac-geoparquet:2023.7.10.0 -f datasets/stac-geoparquet/Dockerfile . ``` ## Permissions This requires the following permissions -* Storage Data Table Reader on the config tables (`pcapi/bluecollectoinconfig`, `pcapi/greencollectionconfig`) +* Storage Data Table Reader on the config tables (`pcapi/bluecollectionconfig`, `pcapi/greencollectionconfig`) * Storage Blob Data Contributor on the `pcstacitems` container. ## Arguments + By default, this workflow will generate geoparquet assets for all collections. If you want to select a subset of collections, you can use either: + 1. `extra_skip`: This will skip certain collections 1. `collections`: This will only generate geoparquet for the specified collection(s). @@ -25,6 +34,28 @@ If you want to select a subset of collections, you can use either: The workflow used for updates was registered with +```shell +pctasks workflow update datasets/stac-geoparquet/workflow.yaml +``` + +It can be manually invoked with: + +```shell +pctasks workflow submit stac-geoparquet ``` -pctasks workflow update datasets/workflows/stac-geoparquet.yaml -``` \ No newline at end of file + +## Run Locally + +You can debug the geoparquet export locally like this: + +```shell +export STAC_GEOPARQUET_CONNECTION_INFO="secret" +export STAC_GEOPARQUET_TABLE_NAME="greencollectionconfig" +export STAC_GEOPARQUET_TABLE_ACCOUNT_URL="https://pcapi.table.core.windows.net" +export STAC_GEOPARQUET_STORAGE_OPTIONS_ACCOUNT_NAME="pcstacitems" + +python3 pc_stac_geoparquet.py --collection hls2-l30 +``` + +Apart from the Postgres connection string, you will need PIM activations for +`Storage Blob Data Contributor` to be able to write to the production storage account. diff --git a/datasets/stac-geoparquet/pc_stac_geoparquet.py b/datasets/stac-geoparquet/pc_stac_geoparquet.py index 46d25ba8..39991cb0 100644 --- a/datasets/stac-geoparquet/pc_stac_geoparquet.py +++ b/datasets/stac-geoparquet/pc_stac_geoparquet.py @@ -1,27 +1,580 @@ from __future__ import annotations import argparse +import collections.abc +import dataclasses +import datetime +import hashlib +import itertools +import json import logging import os -from typing import Union, Set +import time +import urllib +from typing import Any, Set, Union import azure.core.credentials import azure.data.tables -from stac_geoparquet import pc_runner +import azure.identity + +import dateutil +import fsspec +import pandas as pd +import pystac +import requests +from stac_geoparquet.arrow import to_parquet +from stac_geoparquet.pgstac_reader import ( + get_pgstac_partitions, + Partition, + pgstac_to_arrow, + pgstac_to_iter, +) -from pctasks.task.task import Task from pctasks.core.models.base import PCBaseModel from pctasks.core.models.task import FailedTaskResult, WaitTaskResult from pctasks.task.context import TaskContext +from pctasks.task.task import Task +import tqdm.auto +import tempfile from pctasks.core.utils.credential import get_credential handler = logging.StreamHandler() handler.setFormatter(logging.Formatter("[%(levelname)s]:%(asctime)s: %(message)s")) -handler.setLevel(logging.INFO) +handler.setLevel(logging.DEBUG) logger = logging.getLogger(__name__) logger.addHandler(handler) -logger.setLevel(logging.INFO) +logger.setLevel(logging.DEBUG) + +CHUNK_SIZE = 8192 + +PARTITION_FREQUENCIES = { + "3dep-lidar-classification": "YS", + "3dep-lidar-copc": "YS", + "3dep-lidar-dsm": "YS", + "3dep-lidar-dtm": "YS", + "3dep-lidar-dtm-native": "YS", + "3dep-lidar-hag": "YS", + "3dep-lidar-intensity": "YS", + "3dep-lidar-pointsourceid": "YS", + "3dep-lidar-returns": "YS", + "3dep-seamless": None, + "alos-dem": None, + "alos-fnf-mosaic": "YS", + "alos-palsar-mosaic": "YS", + "aster-l1t": "YS", + "chloris-biomass": None, + "cil-gdpcir-cc-by": None, + "cil-gdpcir-cc-by-sa": None, + "cil-gdpcir-cc0": None, + "cop-dem-glo-30": None, + "cop-dem-glo-90": None, + "eclipse": None, + "ecmwf-forecast": "MS", + "era5-pds": None, + "esa-worldcover": None, + "fia": None, + "gap": None, + "gbif": None, + "gnatsgo-rasters": None, + "gnatsgo-tables": None, + "goes-cmi": "W-MON", + "hrea": None, + "io-lulc": None, + "io-lulc-9-class": None, + "jrc-gsw": None, + "landsat-c2-l1": "MS", + "landsat-c2-l2": "MS", + "mobi": None, + "modis-09A1-061": "MS", + "modis-09Q1-061": "MS", + "modis-10A1-061": "MS", + "modis-10A2-061": "MS", + "modis-11A1-061": "MS", + "modis-11A2-061": "MS", + "modis-13A1-061": "MS", + "modis-13Q1-061": "MS", + "modis-14A1-061": "MS", + "modis-14A2-061": "MS", + "modis-15A2H-061": "MS", + "modis-15A3H-061": "MS", + "modis-16A3GF-061": "MS", + "modis-17A2H-061": "MS", + "modis-17A2HGF-061": "MS", + "modis-17A3HGF-061": "MS", + "modis-21A2-061": "MS", + "modis-43A4-061": "MS", + "modis-64A1-061": "MS", + "mtbs": None, + "naip": "YS", + "nasa-nex-gddp-cmip6": None, + "nasadem": None, + "noaa-c-cap": None, + "nrcan-landcover": None, + "planet-nicfi-analytic": "YS", + "planet-nicfi-visual": "YS", + "sentinel-1-grd": "MS", + "sentinel-1-rtc": "MS", + "sentinel-2-l2a": "W-MON", + "us-census": None, +} + +SKIP = { + "daymet-daily-na", + "daymet-daily-pr", + "daymet-daily-hi", + "daymet-monthly-na", + "daymet-monthly-pr", + "daymet-monthly-hi", + "daymet-annual-na", + "daymet-annual-pr", + "daymet-annual-hi", + "terraclimate", + "gridmet", + "landsat-8-c2-l2", + "gpm-imerg-hhr", + "deltares-floods", + "goes-mcmip", + # errors + "cil-gdpcir-cc0", + "3dep-lidar-intensity", + "cil-gdpcir-cc-by", + "ecmwf-forecast", + "3dep-lidar-copc", + "era5-pds", + "3dep-lidar-classification", + "3dep-lidar-dtm-native", + "cil-gdpcir-cc-by-sa", +} + +def _pairwise( + iterable: collections.abc.Iterable, +) -> Any: + # pairwise('ABCDEFG') --> AB BC CD DE EF FG + a, b = itertools.tee(iterable) + next(b, None) + return zip(a, b) + +def _build_output_path( + base_output_path: str, + part_number: int | None, + total: int | None, + start_datetime: datetime.datetime, + end_datetime: datetime.datetime, +) -> str: + a, b = start_datetime, end_datetime + base_output_path = base_output_path.rstrip("/") + + if part_number is not None and total is not None: + output_path = ( + f"{base_output_path}/part-{part_number:0{len(str(total * 10))}}_" + f"{a.isoformat()}_{b.isoformat()}.parquet" + ) + else: + token = hashlib.md5( + "".join([a.isoformat(), b.isoformat()]).encode() + ).hexdigest() + output_path = ( + f"{base_output_path}/part-{token}_{a.isoformat()}_{b.isoformat()}.parquet" + ) + return output_path + +def inject_links(item: dict[str, Any]) -> dict[str, Any]: + item["links"] = [ + { + "rel": "collection", + "type": "application/json", + "href": f"https://planetarycomputer.microsoft.com/api/stac/v1/collections/{item['collection']}", # noqa: E501 + }, + { + "rel": "parent", + "type": "application/json", + "href": f"https://planetarycomputer.microsoft.com/api/stac/v1/collections/{item['collection']}", # noqa: E501 + }, + { + "rel": "root", + "type": "application/json", + "href": "https://planetarycomputer.microsoft.com/api/stac/v1/", + }, + { + "rel": "self", + "type": "application/geo+json", + "href": f"https://planetarycomputer.microsoft.com/api/stac/v1/collections/{item['collection']}/items/{item['id']}", # noqa: E501 + }, + { + "rel": "preview", + "href": f"https://planetarycomputer.microsoft.com/api/data/v1/item/map?collection={item['collection']}&item={item['id']}", # noqa: E501 + "title": "Map of item", + "type": "text/html", + }, + ] + return item + + +def inject_assets(item: dict[str, Any], render_config: str | None) -> dict[str, Any]: + item["assets"]["tilejson"] = { + "href": ( + "https://planetarycomputer.microsoft.com/api/data/v1/item/tilejson.json?" + f"collection={item['collection']}" + f"&item={item['id']}&{render_config}" + ), + "roles": ["tiles"], + "title": "TileJSON with default rendering", + "type": "application/json", + } + item["assets"]["rendered_preview"] = { + "href": ( + "https://planetarycomputer.microsoft.com/api/data/v1/item/preview.png?" + f"collection={item['collection']}" + f"&item={item['id']}&{render_config}" + ), + "rel": "preview", + "roles": ["overview"], + "title": "Rendered preview", + "type": "image/png", + } + return item + +def naip_year_to_int(item: dict[str, Any]) -> dict[str, Any]: + """Convert the year to an integer.""" + if "naip:year" in item["properties"] and isinstance(item["properties"]["naip:year"], str): + item["properties"]["naip:year"] = int(item["properties"]["naip:year"]) + return item + +def clean_item(item: dict[str, Any], render_config: str | None) -> dict[str, Any]: + """Clean items by making sure that naip:year is an int and injecting links and assets.""" + item = inject_links(inject_assets(item, render_config)) + + if "proj:epsg" in item["properties"] and not item["properties"]["proj:epsg"]: + # This cannot be null + item["properties"]["proj:epsg"] = "" + + if item["collection"] == "naip": + item = naip_year_to_int(item) + return item + +@dataclasses.dataclass +class CollectionConfig: + """ + Additional collection-based configuration to inject, matching the + dynamic properties from the API. + """ + + collection_id: str + partition_frequency: str | None = None + stac_api: str = "https://planetarycomputer.microsoft.com/api/stac/v1" + should_inject_dynamic_properties: bool = True + render_config: str | None = None + + def __post_init__(self) -> None: + self._collection: pystac.Collection | None = None + + @property + def collection(self) -> pystac.Collection: + if self._collection is None: + self._collection = pystac.read_file( + f"{self.stac_api}/collections/{self.collection_id}" + ) # type: ignore + assert self._collection is not None + return self._collection + + def generate_endpoints( + self, since: datetime.datetime | None = None + ) -> list[tuple[datetime.datetime, datetime.datetime]]: + if self.partition_frequency is None: + raise ValueError("Set partition_frequency") + + start_datetime, end_datetime = self.collection.extent.temporal.intervals[0] + + # https://github.com/dateutil/dateutil/issues/349 + if start_datetime and start_datetime.tzinfo == dateutil.tz.tz.tzlocal(): + start_datetime = start_datetime.astimezone(datetime.timezone.utc) + + if end_datetime and end_datetime.tzinfo == dateutil.tz.tz.tzlocal(): + end_datetime = end_datetime.astimezone(datetime.timezone.utc) + + if end_datetime is None: + end_datetime = pd.Timestamp.utcnow() + + # we need to ensure that the `end_datetime` is past the end of the last partition + # to avoid missing out on the last partition of data. + offset = pd.tseries.frequencies.to_offset(self.partition_frequency) + + if not offset.is_on_offset(start_datetime): + start_datetime = start_datetime - offset + + if not offset.is_on_offset(end_datetime): + end_datetime = end_datetime + offset + + idx = pd.date_range(start_datetime, end_datetime, freq=self.partition_frequency) + + if since: + idx = idx[idx >= since] + + pairs = _pairwise(idx) + return list(pairs) + + def export_partition( + self, + conninfo: str, + output_protocol: str, + output_path: str, + start_datetime: datetime.datetime | None = None, + end_datetime: datetime.datetime | None = None, + storage_options: dict[str, Any] | None = None, + rewrite: bool = False, + ) -> str | None: + # pass + fs = fsspec.filesystem(output_protocol, **storage_options) # type: ignore + if fs.exists(output_path) and not rewrite: + logger.debug("Path %s already exists.", output_path) + return output_path + + def _row_func(item: dict[str, Any]) -> dict[str, Any]: + return clean_item(item, self.render_config) + if any( + pgstac_to_iter( + conninfo=conninfo, + collection=self.collection_id, + start_datetime=start_datetime, + end_datetime=end_datetime, + row_func=_row_func, + ) + ): + logger.info(f"Running parquet export with chunk size of {CHUNK_SIZE}") + with tempfile.TemporaryDirectory() as tmpdir: + arrow = pgstac_to_arrow( + conninfo=conninfo, + collection=self.collection_id, + start_datetime=start_datetime, + end_datetime=end_datetime, + row_func=_row_func, + schema="ChunksToDisk", + tmpdir=tmpdir, + chunk_size=CHUNK_SIZE + ) + + to_parquet( + arrow, + output_path, + filesystem=fs) + return output_path + + def export_partition_for_endpoints( + self, + endpoints: tuple[datetime.datetime, datetime.datetime], + conninfo: str, + output_protocol: str, + output_path: str, + storage_options: dict[str, Any], + part_number: int | None = None, + total: int | None = None, + rewrite: bool = False, + skip_empty_partitions: bool = False, + ) -> str | None: + """ + Export results for a pair of endpoints. + """ + start, end = endpoints + partition_path = _build_output_path(output_path, part_number, total, start, end) + return self.export_partition( + conninfo, + output_protocol, + partition_path, + start_datetime=start, + end_datetime=end, + storage_options=storage_options, + rewrite=rewrite, + ) + + def export_exists( + self, + output_protocol: str, + output_path: str, + storage_options: dict[str, Any], + ) -> bool: + fs = fsspec.filesystem(output_protocol, **storage_options) + if output_protocol: + output_path = f"{output_protocol}://{output_path}" + return fs.exists(output_path) + + def _partition_needs_to_be_rewritten( + self, + output_protocol: str, + output_path: str, + storage_options: dict[str, Any], + partition: Partition, + ) -> bool: + fs = fsspec.filesystem(output_protocol, **storage_options) + if output_protocol: + output_path = f"{output_protocol}://{output_path}" + if not fs.exists(output_path): + return True + file_info = fs.info(output_path) + + # Handle case where last_modified is already a datetime object or a timestamp + last_modified = file_info["last_modified"] + if isinstance(last_modified, datetime.datetime): + file_modified_time = last_modified + else: + # Assume it's a timestamp (int/float) + file_modified_time = datetime.datetime.fromtimestamp(last_modified) + + partition_modified_time = partition.last_updated + return file_modified_time < partition_modified_time + + def export_collection( + self, + conninfo: str, + output_protocol: str, + output_path: str, + storage_options: dict[str, Any], + pgstac_partitions: dict[str, list[Partition]], + rewrite: bool = False, + skip_empty_partitions: bool = False, + ) -> list[str | None]: + + if not self.partition_frequency: + logger.info("Exporting single-partition collection %s", self.collection_id) + + results = [ + self.export_partition( + conninfo, + output_protocol, + output_path, + storage_options=storage_options) + ] + + elif self.partition_frequency and len(pgstac_partitions[self.collection_id]) == 1: + endpoints = self.generate_endpoints() + total = len(endpoints) + logger.info( + "Exporting %d partitions for collection %s with frequency %s", total, self.collection_id, self.partition_frequency + ) + + results = [] + for i, endpoint in tqdm.auto.tqdm(enumerate(endpoints), total=total): + results.append( + self.export_partition_for_endpoints( + endpoints=endpoint, + conninfo=conninfo, + output_protocol=output_protocol, + output_path=output_path, + storage_options=storage_options, + rewrite=rewrite, + skip_empty_partitions=skip_empty_partitions, + part_number=i, + total=total, + ) + ) + else: + partitions = pgstac_partitions[self.collection_id] + total = len(partitions) + # some collections are not partitioned in pgstac, some are. + # If a collection is not partition in pgstac, then we will apply the partitioning scheme of the STAC collection + # In pgstac, you always have to opt-into a partitioning scheme, + # either None/Monthly/Yearly in the collections table. + # Ideal size is 10M to 20M rows per partition, but that it dataset dependent. + logger.info( + "Exporting %d partitions for collection %s using pgstac partitions", total, self.collection_id + ) + + results = [] + for i, partition in tqdm.auto.tqdm(enumerate(partitions), total=total): + partition_path = _build_output_path(output_path, i, total, partition.start, partition.end) + if self._partition_needs_to_be_rewritten( + output_protocol=output_protocol, + output_path=partition_path, + storage_options=storage_options, + partition=partition, + ): + results.append( + self.export_partition( + conninfo=conninfo, + output_protocol=output_protocol, + output_path=partition_path, + start_datetime=partition.start, + end_datetime=partition.end, + storage_options=storage_options, + rewrite=rewrite + ) + ) + else: + logger.info( + "Partition %s already exists and was last updated at %s, skipping", + partition_path, + partition.last_updated, + ) + results.append(partition_path) + + return results + +def build_render_config(render_params: dict[str, Any], assets: dict[str, Any]) -> str: + flat = [] + if assets: + for asset in assets: + flat.append(("assets", asset)) + + for k, v in render_params.items(): + if isinstance(v, list): + flat.extend([(k, v2) for v2 in v]) + else: + flat.append((k, v)) + return urllib.parse.urlencode(flat) + + +def generate_configs_from_storage_table( + table_client: azure.data.tables.TableClient, +) -> dict[str, CollectionConfig]: + configs = {} + for entity in table_client.list_entities(): + collection_id = entity["RowKey"] + data = json.loads(entity["Data"]) + + render_params = data["render_config"]["render_params"] + assets = data["render_config"]["assets"] + render_config = build_render_config(render_params, assets) + configs[collection_id] = CollectionConfig( + collection_id, render_config=render_config + ) + + return configs + + +def generate_configs_from_api(url: str) -> dict[str, CollectionConfig]: + configs = {} + r = requests.get(url) + r.raise_for_status() + + for collection in r.json()["collections"]: + partition_frequency = ( + collection["assets"] + .get("geoparquet-items", {}) + .get("msft:partition_info", {}) + .get("partition_frequency", None) + ) + + configs[collection["id"]] = CollectionConfig( + collection["id"], partition_frequency=partition_frequency + ) + + return configs + + +def merge_configs( + table_configs: dict[str, CollectionConfig], api_configs: dict[str, CollectionConfig] +) -> dict[str, CollectionConfig]: + # what a mess. Get partitioning config from the API, render from the table. + configs = {} + for k in table_configs.keys() | api_configs.keys(): + table_config = table_configs.get(k) + api_config = api_configs.get(k) + config = table_config or api_config + assert config + if api_config: + config.partition_frequency = api_config.partition_frequency + configs[k] = config + return configs class StacGeoparquetTaskInput(PCBaseModel): @@ -40,7 +593,6 @@ class StacGeoparquetTaskInput(PCBaseModel): class StacGeoparquetTaskOutput(PCBaseModel): n_failures: int - class StacGeoparquetTask(Task[StacGeoparquetTaskInput, StacGeoparquetTaskOutput]): _input_model = StacGeoparquetTaskInput _output_model = StacGeoparquetTaskOutput @@ -51,6 +603,7 @@ def get_required_environment_variables(self) -> list[str]: def run( self, input: StacGeoparquetTaskInput, context: TaskContext ) -> Union[StacGeoparquetTaskOutput, WaitTaskResult, FailedTaskResult]: + result = run( output_protocol=input.output_protocol, connection_info=input.connection_info, @@ -65,37 +618,7 @@ def run( return StacGeoparquetTaskOutput(n_failures=result) -SKIP = { - "daymet-daily-na", - "daymet-daily-pr", - "daymet-daily-hi", - "daymet-monthly-na", - "daymet-monthly-pr", - "daymet-monthly-hi", - "daymet-annual-na", - "daymet-annual-pr", - "daymet-annual-hi", - "terraclimate", - "gridmet", - "landsat-8-c2-l2", - "gpm-imerg-hhr", - "deltares-floods", - "goes-mcmip", - # errors - "cil-gdpcir-cc0", - "3dep-lidar-intensity", - "cil-gdpcir-cc-by", - "ecmwf-forecast", - "3dep-lidar-copc", - "era5-pds", - "3dep-lidar-classification", - "3dep-lidar-dtm-native", - "cil-gdpcir-cc-by-sa", -} - - -def run( - output_protocol: str = "abfs", +def list_planetary_computer_collection_configs( connection_info: str | None = None, table_credential: ( str @@ -111,8 +634,7 @@ def run( ) = None, extra_skip: Set[str] | None = None, collections: str | Set[str] | None = None, -) -> int: - # handle the arguments +) -> dict[str, CollectionConfig]: try: connection_info = ( connection_info or os.environ["STAC_GEOPARQUET_CONNECTION_INFO"] @@ -149,7 +671,8 @@ def run( table_name, credential=table_credential, ) - configs = pc_runner.get_configs(table_client) + logger.info(f"Connecting to table {table_name} at {table_account_url}") + configs = get_configs(table_client) if collections is None: configs = {k: v for k, v in configs.items() if k not in skip} @@ -158,28 +681,101 @@ def run( elif isinstance(collections, set): configs = {k: v for k, v in configs.items() if k in collections} + return configs + +def get_configs(table_client: azure.data.tables.TableClient) -> dict[str, CollectionConfig]: + table_configs = generate_configs_from_storage_table(table_client) + api_configs = generate_configs_from_api( + "https://planetarycomputer.microsoft.com/api/stac/v1/collections" + ) + configs = merge_configs(table_configs, api_configs) + for k, v in configs.items(): + if v.partition_frequency is None: + v.partition_frequency = PARTITION_FREQUENCIES.get(k) + + return configs + +def run( + output_protocol: str = "abfs", + connection_info: str | None = None, + table_credential: ( + str + | None + | azure.core.credentials.TokenCredential + | azure.core.credentials.AzureSasCredential + ) = None, + table_name: str | None = None, + table_account_url: str | None = None, + storage_options_account_name: str | None = None, + storage_options_credential: ( + str | None | azure.core.credentials.TokenCredential + ) = None, + extra_skip: Set[str] | None = None, + collections: str | Set[str] | None = None, + configs: dict[str, CollectionConfig] | None = None, +) -> int: + if configs is None: + configs = list_planetary_computer_collection_configs( + connection_info=connection_info, + table_credential=table_credential, + table_name=table_name, + table_account_url=table_account_url, + storage_options_account_name=storage_options_account_name, + storage_options_credential=storage_options_credential, + extra_skip=extra_skip, + collections=collections, + ) + try: + connection_info = ( + connection_info or os.environ["STAC_GEOPARQUET_CONNECTION_INFO"] + ) + except KeyError as e: + raise KeyError( + "STAC_GEOPARQUET_CONNECTION_INFO must be set if not explicitly provided" + ) from e + table_credential = table_credential or os.environ.get( + "STAC_GEOPARQUET_TABLE_CREDENTIAL", azure.identity.DefaultAzureCredential() + ) + assert table_credential is not None + storage_options_account_name = ( + storage_options_account_name + or os.environ["STAC_GEOPARQUET_STORAGE_OPTIONS_ACCOUNT_NAME"] + ) + storage_options_credential = storage_options_credential or os.environ.get( + "STAC_GEOPARQUET_STORAGE_OPTIONS_CREDENTIAL", + azure.identity.DefaultAzureCredential(), + ) + storage_options = { "account_name": storage_options_account_name, "credential": storage_options_credential, } - def f(config: pc_runner.CollectionConfig) -> None: - config.export_collection( - connection_info, - output_protocol, - f"items/{config.collection_id}.parquet", - storage_options, - skip_empty_partitions=True, - ) - N = len(configs) success = [] failure = [] + collection_partitions = list(get_pgstac_partitions(conninfo=connection_info)) + recent_collection_updates: dict[str, list[Partition]] = {} + for partition in collection_partitions: + recent_collection_updates.setdefault(partition.collection, []).append(partition) + logger.info(f"Found {len(collection_partitions)} pgstac partitions") + for i, config in enumerate(configs.values(), 1): - logger.info(f"processing {config.collection_id} [{i}/{N}]") + output_path = f"items/{config.collection_id}.parquet" try: - f(config) + t0 = time.monotonic() + config.export_collection( + connection_info, + output_protocol, + output_path, + storage_options, + pgstac_partitions=recent_collection_updates, + skip_empty_partitions=True, + rewrite=True + ) + t1 = time.monotonic() + logger.info(f"Completed {config.collection_id} [{i}/{N}] in {t1-t0:.2f}s") except Exception as e: failure.append((config.collection_id, e)) logger.exception(f"Failed processing {config.collection_id}") @@ -187,3 +783,36 @@ def f(config: pc_runner.CollectionConfig) -> None: success.append(config.collection_id) return len(failure) + +if __name__ == "__main__": + # Remove all handlers associated with the root logger object. + for h in logging.root.handlers[:]: + logging.root.removeHandler(h) + # Set up logging only for this file and stac_geoparquet package + logging.basicConfig(handlers=[handler], level=logging.DEBUG, force=True) + logging.getLogger().setLevel(logging.WARNING) + logger.setLevel(logging.DEBUG) + logging.getLogger("stac_geoparquet").setLevel(logging.DEBUG) + parser = argparse.ArgumentParser(description="Export STAC collection to GeoParquet.") + parser.add_argument( + "--collection", + type=str, + required=False, + help="The collection ID to export." + ) + args = parser.parse_args() + configs = list_planetary_computer_collection_configs( + connection_info=os.environ["STAC_GEOPARQUET_CONNECTION_INFO"], + table_credential=azure.identity.DefaultAzureCredential(), + table_name=os.environ["STAC_GEOPARQUET_TABLE_NAME"], + table_account_url=os.environ["STAC_GEOPARQUET_TABLE_ACCOUNT_URL"], + storage_options_account_name=os.environ["STAC_GEOPARQUET_STORAGE_OPTIONS_ACCOUNT_NAME"], + storage_options_credential=azure.identity.DefaultAzureCredential(), + extra_skip=SKIP, + collections=args.collection, + ) + n_failures = run(collections=args.collection, configs=configs) + if n_failures == 0: + logger.info("Export completed successfully.") + else: + logger.error(f"Export completed with {n_failures} failures.") diff --git a/datasets/stac-geoparquet/requirements.txt b/datasets/stac-geoparquet/requirements.txt index 1c53526b..3efc9534 100644 --- a/datasets/stac-geoparquet/requirements.txt +++ b/datasets/stac-geoparquet/requirements.txt @@ -1,4 +1,11 @@ -stac-geoparquet[pgstac,pc]==0.2.1 -psycopg[binary,pool]==3.1.8 -azure-data-tables==12.4.2 -pypgstac==0.7.4 \ No newline at end of file +# stac-geoparquet[pgstac,pc]==0.6.0 +git+https://github.com/stac-utils/stac-geoparquet.git@c3b7c324ad0a2ef3c09d3d8c246817c943075fe1#egg=stac-geoparquet[pgstac,pc] +psycopg[binary,pool]==3.2.6 +azure-data-tables==12.5.0 +pypgstac==0.8.6 +fsspec==2025.9.0 +tqdm==4.67.1 +adlfs==2025.8.0 +azure-datalake-store==0.0.53 +pyarrow==21.0.0 +psutil diff --git a/datasets/stac-geoparquet/test.ipynb b/datasets/stac-geoparquet/test.ipynb new file mode 100644 index 00000000..e409ce6d --- /dev/null +++ b/datasets/stac-geoparquet/test.ipynb @@ -0,0 +1,1060 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "89454fb1", + "metadata": {}, + "source": [ + "# Test Notebook\n", + "Use this to simulate use " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "1d4b6b68", + "metadata": {}, + "outputs": [], + "source": [ + "import geopandas\n", + "from azure.identity import DefaultAzureCredential\n", + "\n", + "account = \"pcstacitems\"\n", + "container = \"items\"\n", + "storage_options = {\n", + " \"account_name\": account,\n", + " \"credential\": DefaultAzureCredential()\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3404f46e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
| \n", + " | assets | \n", + "collection | \n", + "geometry | \n", + "id | \n", + "links | \n", + "stac_extensions | \n", + "stac_version | \n", + "type | \n", + "datetime | \n", + "gsd | \n", + "naip:state | \n", + "naip:year | \n", + "proj:bbox | \n", + "proj:epsg | \n", + "proj:shape | \n", + "proj:transform | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "{'image': {'eo:bands': [{'common_name': 'red',... | \n", + "naip | \n", + "POLYGON ((-99.93345 34.93482, -99.93423 35.003... | \n", + "ok_m_3409901_nw_14_1_20100425 | \n", + "[{'href': 'https://planetarycomputer.microsoft... | \n", + "[https://stac-extensions.github.io/eo/v1.0.0/s... | \n", + "1.0.0 | \n", + "Feature | \n", + "2010-04-25 00:00:00+00:00 | \n", + "1.0 | \n", + "ok | \n", + "2010 | \n", + "[408377.0, 3866212.0, 414752.0, 3873800.0] | \n", + "26914 | \n", + "[7588, 6375] | \n", + "[1.0, 0.0, 408377.0, 0.0, -1.0, 3873800.0, 0.0... | \n", + "
| 1 | \n", + "{'image': {'eo:bands': [{'common_name': 'red',... | \n", + "naip | \n", + "POLYGON ((-99.871 34.87231, -99.87173 34.9407,... | \n", + "ok_m_3409901_se_14_1_20100425 | \n", + "[{'href': 'https://planetarycomputer.microsoft... | \n", + "[https://stac-extensions.github.io/eo/v1.0.0/s... | \n", + "1.0.0 | \n", + "Feature | \n", + "2010-04-25 00:00:00+00:00 | \n", + "1.0 | \n", + "ok | \n", + "2010 | \n", + "[414020.0, 3859229.0, 420395.0, 3866814.0] | \n", + "26914 | \n", + "[7585, 6375] | \n", + "[1.0, 0.0, 414020.0, 0.0, -1.0, 3866814.0, 0.0... | \n", + "
| 2 | \n", + "{'image': {'eo:bands': [{'common_name': 'red',... | \n", + "naip | \n", + "POLYGON ((-99.93346 34.87232, -99.93423 34.940... | \n", + "ok_m_3409901_sw_14_1_20100425 | \n", + "[{'href': 'https://planetarycomputer.microsoft... | \n", + "[https://stac-extensions.github.io/eo/v1.0.0/s... | \n", + "1.0.0 | \n", + "Feature | \n", + "2010-04-25 00:00:00+00:00 | \n", + "1.0 | \n", + "ok | \n", + "2010 | \n", + "[408308.0, 3859281.0, 414687.0, 3866869.0] | \n", + "26914 | \n", + "[7588, 6379] | \n", + "[1.0, 0.0, 408308.0, 0.0, -1.0, 3866869.0, 0.0... | \n", + "
| 3 | \n", + "{'image': {'eo:bands': [{'common_name': 'red',... | \n", + "naip | \n", + "POLYGON ((-99.74611 34.9348, -99.74672 35.0031... | \n", + "ok_m_3409902_ne_14_1_20100425 | \n", + "[{'href': 'https://planetarycomputer.microsoft... | \n", + "[https://stac-extensions.github.io/eo/v1.0.0/s... | \n", + "1.0.0 | \n", + "Feature | \n", + "2010-04-25 00:00:00+00:00 | \n", + "1.0 | \n", + "ok | \n", + "2010 | \n", + "[425500.0, 3866067.0, 431862.0, 3873645.0] | \n", + "26914 | \n", + "[7578, 6362] | \n", + "[1.0, 0.0, 425500.0, 0.0, -1.0, 3873645.0, 0.0... | \n", + "
| 4 | \n", + "{'image': {'eo:bands': [{'common_name': 'red',... | \n", + "naip | \n", + "POLYGON ((-99.80855 34.93481, -99.80922 35.003... | \n", + "ok_m_3409902_nw_14_1_20100425 | \n", + "[{'href': 'https://planetarycomputer.microsoft... | \n", + "[https://stac-extensions.github.io/eo/v1.0.0/s... | \n", + "1.0.0 | \n", + "Feature | \n", + "2010-04-25 00:00:00+00:00 | \n", + "1.0 | \n", + "ok | \n", + "2010 | \n", + "[419793.0, 3866112.0, 426159.0, 3873693.0] | \n", + "26914 | \n", + "[7581, 6366] | \n", + "[1.0, 0.0, 419793.0, 0.0, -1.0, 3873693.0, 0.0... | \n", + "
| ... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
| 235639 | \n", + "{'image': {'eo:bands': [{'common_name': 'red',... | \n", + "naip | \n", + "POLYGON ((-104.30931 29.49682, -104.30884 29.5... | \n", + "tx_m_2910430_sw_13_1_20120917 | \n", + "[{'href': 'https://planetarycomputer.microsoft... | \n", + "[https://stac-extensions.github.io/eo/v1.0.0/s... | \n", + "1.0.0 | \n", + "Feature | \n", + "2012-09-17 00:00:00+00:00 | \n", + "1.0 | \n", + "tx | \n", + "2012 | \n", + "[560230.0, 3263230.0, 566950.0, 3270820.0] | \n", + "26913 | \n", + "[7590, 6720] | \n", + "[1.0, 0.0, 560230.0, 0.0, -1.0, 3270820.0, 0.0... | \n", + "
| 235640 | \n", + "{'image': {'eo:bands': [{'common_name': 'red',... | \n", + "naip | \n", + "POLYGON ((-104.18429 29.55929, -104.18374 29.6... | \n", + "tx_m_2910431_nw_13_1_20120917 | \n", + "[{'href': 'https://planetarycomputer.microsoft... | \n", + "[https://stac-extensions.github.io/eo/v1.0.0/s... | \n", + "1.0.0 | \n", + "Feature | \n", + "2012-09-17 00:00:00+00:00 | \n", + "1.0 | \n", + "tx | \n", + "2012 | \n", + "[572290.0, 3270230.0, 579020.0, 3277820.0] | \n", + "26913 | \n", + "[7590, 6730] | \n", + "[1.0, 0.0, 572290.0, 0.0, -1.0, 3277820.0, 0.0... | \n", + "
| 235641 | \n", + "{'image': {'eo:bands': [{'common_name': 'red',... | \n", + "naip | \n", + "POLYGON ((-103.99682 29.30919, -103.99615 29.3... | \n", + "tx_m_2910448_ne_13_1_20120917 | \n", + "[{'href': 'https://planetarycomputer.microsoft... | \n", + "[https://stac-extensions.github.io/eo/v1.0.0/s... | \n", + "1.0.0 | \n", + "Feature | \n", + "2012-09-17 00:00:00+00:00 | \n", + "1.0 | \n", + "tx | \n", + "2012 | \n", + "[590670.0, 3242660.0, 597420.0, 3250260.0] | \n", + "26913 | \n", + "[7600, 6750] | \n", + "[1.0, 0.0, 590670.0, 0.0, -1.0, 3250260.0, 0.0... | \n", + "
| 235642 | \n", + "{'image': {'eo:bands': [{'common_name': 'red',... | \n", + "naip | \n", + "POLYGON ((-82.49543 35.93484, -82.49672 36.003... | \n", + "tn_m_3508204_ne_17_1_20120923_20121015 | \n", + "[{'href': 'https://planetarycomputer.microsoft... | \n", + "[https://stac-extensions.github.io/eo/v1.0.0/s... | \n", + "1.0.0 | \n", + "Feature | \n", + "2012-09-23 00:00:00+00:00 | \n", + "1.0 | \n", + "tn | \n", + "2012 | \n", + "[358759.0, 3977754.0, 365105.0, 3985378.0] | \n", + "26917 | \n", + "[7624, 6346] | \n", + "[1.0, 0.0, 358759.0, 0.0, -1.0, 3985378.0, 0.0... | \n", + "
| 235643 | \n", + "{'image': {'eo:bands': [{'common_name': 'red',... | \n", + "naip | \n", + "POLYGON ((-82.55787 35.93484, -82.55922 36.003... | \n", + "tn_m_3508204_nw_17_1_20120923_20121015 | \n", + "[{'href': 'https://planetarycomputer.microsoft... | \n", + "[https://stac-extensions.github.io/eo/v1.0.0/s... | \n", + "1.0.0 | \n", + "Feature | \n", + "2012-09-23 00:00:00+00:00 | \n", + "1.0 | \n", + "tn | \n", + "2012 | \n", + "[353120.0, 3977843.0, 359472.0, 3985470.0] | \n", + "26917 | \n", + "[7627, 6352] | \n", + "[1.0, 0.0, 353120.0, 0.0, -1.0, 3985470.0, 0.0... | \n", + "
235644 rows × 16 columns
\n", + "| \n", + " | assets | \n", + "collection | \n", + "geometry | \n", + "id | \n", + "links | \n", + "stac_extensions | \n", + "stac_version | \n", + "type | \n", + "datetime | \n", + "end_datetime | \n", + "gsd | \n", + "proj:bbox | \n", + "proj:epsg | \n", + "proj:shape | \n", + "proj:transform | \n", + "proj:wkt2 | \n", + "start_datetime | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "{'biomass': {'file:size': 20568072, 'href': 'h... | \n", + "chloris-biomass | \n", + "POLYGON ((-179.95 90, -179.95 -60, 179.95 -60,... | \n", + "chloris_biomass_50km_2017 | \n", + "[{'href': 'https://planetarycomputer.microsoft... | \n", + "[https://stac-extensions.github.io/file/v2.0.0... | \n", + "1.0.0 | \n", + "Feature | \n", + "2017-01-01 00:00:00+00:00 | \n", + "2017-07-31 00:00:00+00:00 | \n", + "4633 | \n", + "[-20015109.354, -6671703.11790004, 20015109.35... | \n", + "\n", + " | [3600, 8640] | \n", + "[4633.12716525001, 0.0, -20015109.354, 0.0, -4... | \n", + "PROJCS[\"unnamed\",GEOGCS[\"unnamed ellipse\",DATU... | \n", + "2016-07-31 00:00:00+00:00 | \n", + "
| 1 | \n", + "{'biomass': {'file:size': 20581566, 'href': 'h... | \n", + "chloris-biomass | \n", + "POLYGON ((-179.95 90, -179.95 -60, 179.95 -60,... | \n", + "chloris_biomass_50km_2018 | \n", + "[{'href': 'https://planetarycomputer.microsoft... | \n", + "[https://stac-extensions.github.io/file/v2.0.0... | \n", + "1.0.0 | \n", + "Feature | \n", + "2018-01-01 00:00:00+00:00 | \n", + "2018-07-31 00:00:00+00:00 | \n", + "4633 | \n", + "[-20015109.354, -6671703.11790004, 20015109.35... | \n", + "\n", + " | [3600, 8640] | \n", + "[4633.12716525001, 0.0, -20015109.354, 0.0, -4... | \n", + "PROJCS[\"unnamed\",GEOGCS[\"unnamed ellipse\",DATU... | \n", + "2017-07-31 00:00:00+00:00 | \n", + "
| 2 | \n", + "{'biomass': {'file:size': 20524826, 'href': 'h... | \n", + "chloris-biomass | \n", + "POLYGON ((-179.95 90, -179.95 -60, 179.95 -60,... | \n", + "chloris_biomass_50km_2012 | \n", + "[{'href': 'https://planetarycomputer.microsoft... | \n", + "[https://stac-extensions.github.io/file/v2.0.0... | \n", + "1.0.0 | \n", + "Feature | \n", + "2012-01-01 00:00:00+00:00 | \n", + "2012-07-31 00:00:00+00:00 | \n", + "4633 | \n", + "[-20015109.354, -6671703.11790004, 20015109.35... | \n", + "\n", + " | [3600, 8640] | \n", + "[4633.12716525001, 0.0, -20015109.354, 0.0, -4... | \n", + "PROJCS[\"unnamed\",GEOGCS[\"unnamed ellipse\",DATU... | \n", + "2011-07-31 00:00:00+00:00 | \n", + "
| 3 | \n", + "{'biomass': {'file:size': 20527688, 'href': 'h... | \n", + "chloris-biomass | \n", + "POLYGON ((-179.95 90, -179.95 -60, 179.95 -60,... | \n", + "chloris_biomass_50km_2013 | \n", + "[{'href': 'https://planetarycomputer.microsoft... | \n", + "[https://stac-extensions.github.io/file/v2.0.0... | \n", + "1.0.0 | \n", + "Feature | \n", + "2013-01-01 00:00:00+00:00 | \n", + "2013-07-31 00:00:00+00:00 | \n", + "4633 | \n", + "[-20015109.354, -6671703.11790004, 20015109.35... | \n", + "\n", + " | [3600, 8640] | \n", + "[4633.12716525001, 0.0, -20015109.354, 0.0, -4... | \n", + "PROJCS[\"unnamed\",GEOGCS[\"unnamed ellipse\",DATU... | \n", + "2012-07-31 00:00:00+00:00 | \n", + "
| 4 | \n", + "{'biomass': {'file:size': 20485203, 'href': 'h... | \n", + "chloris-biomass | \n", + "POLYGON ((-179.95 90, -179.95 -60, 179.95 -60,... | \n", + "chloris_biomass_50km_2007 | \n", + "[{'href': 'https://planetarycomputer.microsoft... | \n", + "[https://stac-extensions.github.io/file/v2.0.0... | \n", + "1.0.0 | \n", + "Feature | \n", + "2007-01-01 00:00:00+00:00 | \n", + "2007-07-31 00:00:00+00:00 | \n", + "4633 | \n", + "[-20015109.354, -6671703.11790004, 20015109.35... | \n", + "\n", + " | [3600, 8640] | \n", + "[4633.12716525001, 0.0, -20015109.354, 0.0, -4... | \n", + "PROJCS[\"unnamed\",GEOGCS[\"unnamed ellipse\",DATU... | \n", + "2006-07-31 00:00:00+00:00 | \n", + "
| 5 | \n", + "{'biomass': {'file:size': 20489507, 'href': 'h... | \n", + "chloris-biomass | \n", + "POLYGON ((-179.95 90, -179.95 -60, 179.95 -60,... | \n", + "chloris_biomass_50km_2008 | \n", + "[{'href': 'https://planetarycomputer.microsoft... | \n", + "[https://stac-extensions.github.io/file/v2.0.0... | \n", + "1.0.0 | \n", + "Feature | \n", + "2008-01-01 00:00:00+00:00 | \n", + "2008-07-31 00:00:00+00:00 | \n", + "4633 | \n", + "[-20015109.354, -6671703.11790004, 20015109.35... | \n", + "\n", + " | [3600, 8640] | \n", + "[4633.12716525001, 0.0, -20015109.354, 0.0, -4... | \n", + "PROJCS[\"unnamed\",GEOGCS[\"unnamed ellipse\",DATU... | \n", + "2007-07-31 00:00:00+00:00 | \n", + "
| 6 | \n", + "{'biomass': {'file:size': 20495912, 'href': 'h... | \n", + "chloris-biomass | \n", + "POLYGON ((-179.95 90, -179.95 -60, 179.95 -60,... | \n", + "chloris_biomass_50km_2004 | \n", + "[{'href': 'https://planetarycomputer.microsoft... | \n", + "[https://stac-extensions.github.io/file/v2.0.0... | \n", + "1.0.0 | \n", + "Feature | \n", + "2004-01-01 00:00:00+00:00 | \n", + "2004-07-31 00:00:00+00:00 | \n", + "4633 | \n", + "[-20015109.354, -6671703.11790004, 20015109.35... | \n", + "\n", + " | [3600, 8640] | \n", + "[4633.12716525001, 0.0, -20015109.354, 0.0, -4... | \n", + "PROJCS[\"unnamed\",GEOGCS[\"unnamed ellipse\",DATU... | \n", + "2003-07-31 00:00:00+00:00 | \n", + "
| 7 | \n", + "{'biomass': {'file:size': 20502386, 'href': 'h... | \n", + "chloris-biomass | \n", + "POLYGON ((-179.95 90, -179.95 -60, 179.95 -60,... | \n", + "chloris_biomass_50km_2003 | \n", + "[{'href': 'https://planetarycomputer.microsoft... | \n", + "[https://stac-extensions.github.io/file/v2.0.0... | \n", + "1.0.0 | \n", + "Feature | \n", + "2003-01-01 00:00:00+00:00 | \n", + "2003-07-31 00:00:00+00:00 | \n", + "4633 | \n", + "[-20015109.354, -6671703.11790004, 20015109.35... | \n", + "\n", + " | [3600, 8640] | \n", + "[4633.12716525001, 0.0, -20015109.354, 0.0, -4... | \n", + "PROJCS[\"unnamed\",GEOGCS[\"unnamed ellipse\",DATU... | \n", + "2002-07-31 00:00:00+00:00 | \n", + "
| 8 | \n", + "{'biomass': {'file:size': 20496575, 'href': 'h... | \n", + "chloris-biomass | \n", + "POLYGON ((-179.95 90, -179.95 -60, 179.95 -60,... | \n", + "chloris_biomass_50km_2009 | \n", + "[{'href': 'https://planetarycomputer.microsoft... | \n", + "[https://stac-extensions.github.io/file/v2.0.0... | \n", + "1.0.0 | \n", + "Feature | \n", + "2009-01-01 00:00:00+00:00 | \n", + "2009-07-31 00:00:00+00:00 | \n", + "4633 | \n", + "[-20015109.354, -6671703.11790004, 20015109.35... | \n", + "\n", + " | [3600, 8640] | \n", + "[4633.12716525001, 0.0, -20015109.354, 0.0, -4... | \n", + "PROJCS[\"unnamed\",GEOGCS[\"unnamed ellipse\",DATU... | \n", + "2008-07-31 00:00:00+00:00 | \n", + "
| 9 | \n", + "{'biomass': {'file:size': 20482098, 'href': 'h... | \n", + "chloris-biomass | \n", + "POLYGON ((-179.95 90, -179.95 -60, 179.95 -60,... | \n", + "chloris_biomass_50km_2005 | \n", + "[{'href': 'https://planetarycomputer.microsoft... | \n", + "[https://stac-extensions.github.io/file/v2.0.0... | \n", + "1.0.0 | \n", + "Feature | \n", + "2005-01-01 00:00:00+00:00 | \n", + "2005-07-31 00:00:00+00:00 | \n", + "4633 | \n", + "[-20015109.354, -6671703.11790004, 20015109.35... | \n", + "\n", + " | [3600, 8640] | \n", + "[4633.12716525001, 0.0, -20015109.354, 0.0, -4... | \n", + "PROJCS[\"unnamed\",GEOGCS[\"unnamed ellipse\",DATU... | \n", + "2004-07-31 00:00:00+00:00 | \n", + "
| 10 | \n", + "{'biomass': {'file:size': 20533390, 'href': 'h... | \n", + "chloris-biomass | \n", + "POLYGON ((-179.95 90, -179.95 -60, 179.95 -60,... | \n", + "chloris_biomass_50km_2014 | \n", + "[{'href': 'https://planetarycomputer.microsoft... | \n", + "[https://stac-extensions.github.io/file/v2.0.0... | \n", + "1.0.0 | \n", + "Feature | \n", + "2014-01-01 00:00:00+00:00 | \n", + "2014-07-31 00:00:00+00:00 | \n", + "4633 | \n", + "[-20015109.354, -6671703.11790004, 20015109.35... | \n", + "\n", + " | [3600, 8640] | \n", + "[4633.12716525001, 0.0, -20015109.354, 0.0, -4... | \n", + "PROJCS[\"unnamed\",GEOGCS[\"unnamed ellipse\",DATU... | \n", + "2013-07-31 00:00:00+00:00 | \n", + "
| 11 | \n", + "{'biomass': {'file:size': 20609463, 'href': 'h... | \n", + "chloris-biomass | \n", + "POLYGON ((-179.95 90, -179.95 -60, 179.95 -60,... | \n", + "chloris_biomass_50km_2019 | \n", + "[{'href': 'https://planetarycomputer.microsoft... | \n", + "[https://stac-extensions.github.io/file/v2.0.0... | \n", + "1.0.0 | \n", + "Feature | \n", + "2019-01-01 00:00:00+00:00 | \n", + "2019-07-31 00:00:00+00:00 | \n", + "4633 | \n", + "[-20015109.354, -6671703.11790004, 20015109.35... | \n", + "\n", + " | [3600, 8640] | \n", + "[4633.12716525001, 0.0, -20015109.354, 0.0, -4... | \n", + "PROJCS[\"unnamed\",GEOGCS[\"unnamed ellipse\",DATU... | \n", + "2018-07-31 00:00:00+00:00 | \n", + "
| 12 | \n", + "{'biomass': {'file:size': 20484635, 'href': 'h... | \n", + "chloris-biomass | \n", + "POLYGON ((-179.95 90, -179.95 -60, 179.95 -60,... | \n", + "chloris_biomass_50km_2006 | \n", + "[{'href': 'https://planetarycomputer.microsoft... | \n", + "[https://stac-extensions.github.io/file/v2.0.0... | \n", + "1.0.0 | \n", + "Feature | \n", + "2006-01-01 00:00:00+00:00 | \n", + "2006-07-31 00:00:00+00:00 | \n", + "4633 | \n", + "[-20015109.354, -6671703.11790004, 20015109.35... | \n", + "\n", + " | [3600, 8640] | \n", + "[4633.12716525001, 0.0, -20015109.354, 0.0, -4... | \n", + "PROJCS[\"unnamed\",GEOGCS[\"unnamed ellipse\",DATU... | \n", + "2005-07-31 00:00:00+00:00 | \n", + "
| 13 | \n", + "{'biomass': {'file:size': 20508844, 'href': 'h... | \n", + "chloris-biomass | \n", + "POLYGON ((-179.95 90, -179.95 -60, 179.95 -60,... | \n", + "chloris_biomass_50km_2010 | \n", + "[{'href': 'https://planetarycomputer.microsoft... | \n", + "[https://stac-extensions.github.io/file/v2.0.0... | \n", + "1.0.0 | \n", + "Feature | \n", + "2010-01-01 00:00:00+00:00 | \n", + "2010-07-31 00:00:00+00:00 | \n", + "4633 | \n", + "[-20015109.354, -6671703.11790004, 20015109.35... | \n", + "\n", + " | [3600, 8640] | \n", + "[4633.12716525001, 0.0, -20015109.354, 0.0, -4... | \n", + "PROJCS[\"unnamed\",GEOGCS[\"unnamed ellipse\",DATU... | \n", + "2009-07-31 00:00:00+00:00 | \n", + "
| 14 | \n", + "{'biomass': {'file:size': 20521439, 'href': 'h... | \n", + "chloris-biomass | \n", + "POLYGON ((-179.95 90, -179.95 -60, 179.95 -60,... | \n", + "chloris_biomass_50km_2011 | \n", + "[{'href': 'https://planetarycomputer.microsoft... | \n", + "[https://stac-extensions.github.io/file/v2.0.0... | \n", + "1.0.0 | \n", + "Feature | \n", + "2011-01-01 00:00:00+00:00 | \n", + "2011-07-31 00:00:00+00:00 | \n", + "4633 | \n", + "[-20015109.354, -6671703.11790004, 20015109.35... | \n", + "\n", + " | [3600, 8640] | \n", + "[4633.12716525001, 0.0, -20015109.354, 0.0, -4... | \n", + "PROJCS[\"unnamed\",GEOGCS[\"unnamed ellipse\",DATU... | \n", + "2010-07-31 00:00:00+00:00 | \n", + "
| 15 | \n", + "{'biomass': {'file:size': 20542835, 'href': 'h... | \n", + "chloris-biomass | \n", + "POLYGON ((-179.95 90, -179.95 -60, 179.95 -60,... | \n", + "chloris_biomass_50km_2015 | \n", + "[{'href': 'https://planetarycomputer.microsoft... | \n", + "[https://stac-extensions.github.io/file/v2.0.0... | \n", + "1.0.0 | \n", + "Feature | \n", + "2015-01-01 00:00:00+00:00 | \n", + "2015-07-31 00:00:00+00:00 | \n", + "4633 | \n", + "[-20015109.354, -6671703.11790004, 20015109.35... | \n", + "\n", + " | [3600, 8640] | \n", + "[4633.12716525001, 0.0, -20015109.354, 0.0, -4... | \n", + "PROJCS[\"unnamed\",GEOGCS[\"unnamed ellipse\",DATU... | \n", + "2014-07-31 00:00:00+00:00 | \n", + "
| 16 | \n", + "{'biomass': {'file:size': 20556718, 'href': 'h... | \n", + "chloris-biomass | \n", + "POLYGON ((-179.95 90, -179.95 -60, 179.95 -60,... | \n", + "chloris_biomass_50km_2016 | \n", + "[{'href': 'https://planetarycomputer.microsoft... | \n", + "[https://stac-extensions.github.io/file/v2.0.0... | \n", + "1.0.0 | \n", + "Feature | \n", + "2016-01-01 00:00:00+00:00 | \n", + "2016-07-31 00:00:00+00:00 | \n", + "4633 | \n", + "[-20015109.354, -6671703.11790004, 20015109.35... | \n", + "\n", + " | [3600, 8640] | \n", + "[4633.12716525001, 0.0, -20015109.354, 0.0, -4... | \n", + "PROJCS[\"unnamed\",GEOGCS[\"unnamed ellipse\",DATU... | \n", + "2015-07-31 00:00:00+00:00 | \n", + "