Skip to content

Commit 395ad06

Browse files
committed
wip
1 parent 1364d2f commit 395ad06

File tree

10 files changed

+364
-105
lines changed

10 files changed

+364
-105
lines changed

README.md

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,14 @@ catalogs:
2929

3030
# Todo - auth configuration documentation.
3131

32-
See [.superstac.yml](./superstac/.superstac.yml) for an example configuration file.
32+
See [.superstac.yml](./superstac/.superstac.yml) for an example configuration file.
33+
34+
35+
# todo
36+
37+
- retries - https://pystac-client.readthedocs.io/en/stable/usage.html#configuring-retry-behavior
38+
- modifier - https://pystac-client.readthedocs.io/en/stable/usage.html#automatically-modifying-results
39+
- refresh
40+
- auth
41+
- ues cases e.g when a catalog is offline - store latency ?
42+
- when a catalog is specified and changed it still works - band matching

main.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,32 @@
1+
import time
2+
import asyncio
3+
4+
15
def main():
2-
print("Hello from superstac!")
6+
from superstac import get_catalog_registry, federated_search_async
7+
8+
cr = get_catalog_registry()
9+
cr.load_catalogs_from_config()
10+
11+
print("\nRunning asynchronous federated_search_async...")
12+
start_async = time.perf_counter()
13+
results_async = asyncio.run(
14+
federated_search_async(
15+
registry=cr,
16+
collections=["sentinel-2-l2a"],
17+
bbox=[6.0, 49.0, 7.0, 50.0],
18+
datetime="2024-01-01/2024-01-31",
19+
query={"eo:cloud_cover": {"lt": 20}},
20+
sortby=[{"field": "properties.datetime", "direction": "desc"}],
21+
)
22+
)
23+
end_async = time.perf_counter()
24+
print(
25+
f"Asynchronous search found {len(results_async)} items in {end_async - start_async:.2f} seconds."
26+
)
27+
28+
for x in results_async:
29+
print(x.self_href)
330

431

532
if __name__ == "__main__":

superstac/.superstac.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
catalogs:
22
Earth Search:
33
url: https://earth-search.aws.element84.com/v1
4-
summary: 'Element 84 STAC catalog'
54
is_private: False
65
Planetary Computer:
76
url: https://planetarycomputer.microsoft.com/api/stac/v1
8-
summary: 'Microsoft Planetary Computer STAC catalog'
97
is_private: False

superstac/__init__.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from superstac.catalog_registry import (
2-
register_catalog,
32
get_catalog_registry,
4-
load_catalogs_from_config,
53
)
4+
from superstac.search import federated_search, federated_search_async
65

7-
__all__ = ["register_catalog", "get_catalog_registry", "load_catalogs_from_config"]
6+
__all__ = ["get_catalog_registry", "federated_search", "federated_search_async"]

superstac/assets_mapper.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
planetary_mapping = {
2+
"B01": "coastal",
3+
"B02": "blue",
4+
"B03": "green",
5+
"B04": "red",
6+
"B05": "rededge1",
7+
"B06": "rededge2",
8+
"B07": "rededge3",
9+
"B08": "nir",
10+
"B8A": "nir08",
11+
"B09": "nir09",
12+
"B11": "swir16",
13+
"B12": "swir22",
14+
"AOT": "aot",
15+
"SCL": "scl",
16+
"WVP": "wvp",
17+
}

superstac/catalog.py

Lines changed: 86 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22

33
from pathlib import Path
44
import attr
5-
from typing import Any, Dict, Optional, Union
5+
from typing import Any, Dict, List, Optional, Union
6+
7+
from pystac_client import Client
68

79
from superstac.enums import CatalogOutputFormat
810
from superstac.exceptions import (
@@ -29,7 +31,6 @@ def register_catalog(
2931
name: str,
3032
url: str,
3133
is_private: Optional[bool] = False,
32-
summary: Optional[str] = None,
3334
auth: Optional[AuthInfo] = None,
3435
) -> CatalogEntry:
3536
"""Register a single STAC catalog in state.
@@ -48,54 +49,120 @@ def register_catalog(
4849
CatalogEntry: The registered STAC catalog.
4950
"""
5051
logger.info(f"Registering catalog: {name}")
51-
logger.debug(
52-
f"Params - url: {url}, is_private: {is_private}, summary: {summary}, auth: {auth}"
53-
)
52+
logger.debug(f"Params - url: {url}, is_private: {is_private}, auth: {auth}")
5453
if is_private and auth is None:
5554
logger.error(
5655
f"Private catalog '{name}' requires authentication but none was provided."
5756
)
5857
raise InvalidCatalogSchemaError(
5958
f"Authentication parameters is required for private catalogs. If this is a mistake, you can set 'is_private' to False or provide the {AuthInfo.__annotations__} parameters."
6059
)
61-
62-
logger.info(f"Catalog '{name}' registered successfully.")
63-
self.catalogs[name] = CatalogEntry(
60+
client = None
61+
try:
62+
# todo - add auth parameters and config...
63+
client = Client.open(url)
64+
metadata = {
65+
"client": client,
66+
"catalog": client.get_root(),
67+
"is_available": True,
68+
}
69+
logger.info(f"Catalog '{name}' is reachable and valid.")
70+
except Exception as e:
71+
logger.warning(f"Catalog '{name}' could not be reached or parsed: {e}")
72+
metadata = {
73+
"client": None,
74+
"catalog": None,
75+
"is_available": False,
76+
}
77+
entry = CatalogEntry(
6478
name=name,
6579
url=url,
66-
summary=summary,
6780
is_private=is_private,
6881
auth=AuthInfo(**auth.__dict__) if auth and not is_private else None,
82+
**metadata,
6983
)
70-
return self.catalogs[name]
84+
self.catalogs[name] = entry
85+
logger.info(f"Catalog '{name}' registered successfully.")
86+
return entry
7187

72-
def get_available_catalogs(
73-
self, format: Union[str, CatalogOutputFormat] = CatalogOutputFormat.DICT
88+
def get_catalogs(
89+
self,
90+
format: Union[str, CatalogOutputFormat] = CatalogOutputFormat.DICT,
91+
available: bool = False,
7492
) -> list[Union[dict[str, Any], str]]:
75-
"""Get the available STAC catalogs.
93+
"""Get the STAC catalogs.
94+
95+
Args:
96+
format (Union[str, CatalogOutputFormat]): Output format, dict or json string.
97+
available (bool): If True, return only available catalogs; else return all.
7698
7799
Raises:
78100
ValueError: When an invalid format is provided.
79101
80102
Returns:
81-
list[CatalogEntry]: The list of all available STAC catalogs.
103+
list[CatalogEntry]: List of catalogs in the requested format.
82104
"""
83-
logger.info("Retrieving available catalogs.")
105+
logger.info(f"Retrieving {'available' if available else 'all'} catalogs.")
84106
if isinstance(format, str):
85107
try:
86108
format = CatalogOutputFormat(format.lower())
87109
except ValueError:
88110
logger.error(f"Invalid output format: {format}")
89111
raise ValueError(f"Invalid format: {format}")
90112

91-
available = [
113+
catalogs = [
92114
c.as_dict() if format == CatalogOutputFormat.DICT else c.as_json()
93115
for c in self.catalogs.values()
94-
if c.is_available
116+
if (c.is_available if available else True)
95117
]
96118

97-
logger.info(f"{len(available)} catalogs available in format '{format.value}'.")
98-
return available
119+
logger.info(f"{len(catalogs)} catalogs retrieved in format '{format.value}'.")
120+
return catalogs
121+
122+
def get_all_collections(self, available: bool = False) -> Dict[str, List[str]]:
123+
"""
124+
Returns a dictionary mapping catalog names to a list of collection IDs
125+
available in each catalog.
126+
127+
Args:
128+
available (bool): If True, only include catalogs that are available.
129+
130+
Returns:
131+
Dict[str, List[str]]: Catalog name -> list of collection IDs
132+
"""
133+
logger.info(f"Getting collections with available={available}")
134+
collections = {}
135+
for name, entry in self.catalogs.items():
136+
if (entry.is_available if available else True) and entry.catalog:
137+
logger.debug(
138+
f"Processing catalog '{name}' (available={entry.is_available})"
139+
)
140+
try:
141+
all_collections = entry.catalog.get_all_collections()
142+
if all_collections:
143+
collection_ids = []
144+
for c in all_collections:
145+
if hasattr(c, "id"):
146+
collection_ids.append(c.id)
147+
elif isinstance(c, dict) and "id" in c:
148+
collection_ids.append(c["id"])
149+
else:
150+
collection_ids.append(str(c))
151+
collections[name] = collection_ids
152+
logger.info(
153+
f"Found {len(collection_ids)} collections in catalog '{name}'"
154+
)
155+
else:
156+
logger.info(f"No collections found in catalog '{name}'")
157+
except Exception as e:
158+
logger.warning(
159+
f"Failed to get collections for catalog '{name}': {e}"
160+
)
161+
continue
162+
else:
163+
logger.debug(f"Skipping catalog '{name}' due to availability filter")
164+
logger.info(f"Total catalogs with collections returned: {len(collections)}")
165+
return collections
99166

100167
def load_catalogs_from_config(
101168
self, config: Union[str, Path, None] = None
@@ -155,7 +222,6 @@ def load_catalogs_from_config(
155222
name=name,
156223
url=spec.get("url"),
157224
is_private=spec.get("is_private", False),
158-
summary=spec.get("summary"),
159225
auth=AuthInfo(**spec["auth"]) if "auth" in spec else None,
160226
)
161227
except Exception as e:

superstac/catalog_registry.py

Lines changed: 9 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,25 @@
11
"""SuperSTAC Catalog Registry"""
22

3-
from typing import Union
43
from superstac.catalog import CatalogManager
54

65

7-
_catalog_registry = CatalogManager()
6+
_catalog_registry = None
87

98

109
def get_catalog_registry() -> CatalogManager:
1110
"""
1211
Returns the singleton CatalogManager instance.
1312
"""
13+
global _catalog_registry
14+
if _catalog_registry is None:
15+
_catalog_registry = CatalogManager()
1416
return _catalog_registry
1517

1618

17-
def register_catalog(*args, **kwargs):
19+
def clear_registry() -> None:
1820
"""
19-
Shortcut to register a catalog globally.
21+
Reset the registry, mainly for testing.
2022
"""
21-
return _catalog_registry.register_catalog(*args, **kwargs)
22-
23-
24-
def load_catalogs_from_config(file: Union[str, None] = None):
25-
"""
26-
Loads catalogs from YAML into the global registry.
27-
"""
28-
return _catalog_registry.load_catalogs_from_config(file)
29-
30-
31-
def clear_registry():
32-
"""
33-
Optional: Reset the registry, mainly for testing.
34-
"""
35-
_catalog_registry.catalogs.clear()
23+
if _catalog_registry:
24+
_catalog_registry.catalogs.clear()
25+
return None

superstac/models.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,13 @@
22

33
from dataclasses import asdict, dataclass
44
import json
5-
from typing import Optional
5+
from typing import Optional, Union
66
from urllib.parse import urlparse
77

8+
9+
from pystac import Catalog
10+
from pystac_client import Client
11+
812
from superstac.enums import AuthType
913
from superstac.utils import compute_catalog_id
1014

@@ -32,18 +36,27 @@ class CatalogEntry:
3236
"""CatalogEntry"""
3337

3438
name: str
39+
3540
url: str
41+
"""URL to the STAC catalog."""
42+
3643
id: Optional[str] = None
3744
"""Internal unique ID for the catalog. Will be autogenerated post init."""
38-
"""URL to the STAC catalog."""
45+
46+
client: Union[Client, None] = None
47+
"""PySTAC Client"""
48+
49+
catalog: Union[Catalog, None] = None
50+
"""Root Catalog"""
51+
3952
is_private: Optional[bool] = False
4053
"""Indicates whether the catalog is a private catalog or not."""
41-
summary: Optional[str] = None
42-
"""Short description of the catalog."""
54+
4355
auth: Optional[AuthInfo] = None
4456
"""Authentication parameters."""
57+
4558
is_available: Optional[bool] = True
46-
"""Defaults to True on instantiation. It will be updated based on the status code of the catalog. If 200 True and False if otherwise."""
59+
"""Defaults to True on instantiation. It will be updated based on the status code of the catalog from pystac client."""
4760

4861
def __post_init__(self):
4962
# Validate the URL

0 commit comments

Comments
 (0)