Skip to content

Commit 8bb7f37

Browse files
changes from PR review
1 parent 4487ab7 commit 8bb7f37

File tree

4 files changed

+15
-29
lines changed

4 files changed

+15
-29
lines changed

docs/installation.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ For example:
9292
- ``rioxarray`` for GeoTIFF support in the assert helpers from ``openeo.testing.results``
9393
- ``geopandas`` for working with dataframes with geospatial support,
9494
(e.g. with :py:class:`~openeo.extra.job_management.MultiBackendJobManager`)
95+
- ``pystac_client`` for creating a STAC API Job Database (e.g. with :py:class:`~openeo.extra.job_management.stac_job_db.STACAPIJobDatabase`)
9596

9697

9798
Enabling additional features

openeo/extra/job_management/__init__.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -104,21 +104,6 @@ def get_by_status(self, statuses: List[str], max=None) -> pd.DataFrame:
104104
"""
105105
...
106106

107-
@abc.abstractmethod
108-
def initialize_from_df(self, df: pd.DataFrame, on_exists: str = "error") -> "JobDatabaseInterface":
109-
"""
110-
Initialize the job database from a given dataframe,
111-
112-
:param df: dataframe with some columns your ``start_job`` callable expects
113-
:param on_exists: what to do when the job database already exists:
114-
- "error": (default) raise an exception
115-
- "skip": work with existing database, ignore given dataframe and skip any initialization
116-
117-
:return: initialized job database.
118-
"""
119-
...
120-
121-
122107
def _start_job_default(row: pd.Series, connection: Connection, *args, **kwargs):
123108
raise NotImplementedError("No 'start_job' callable provided")
124109

openeo/extra/job_management/stac_job_db.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,14 @@
1-
import concurrent
1+
import concurrent.futures
22
import datetime
33
import logging
4-
from typing import Iterable, List, Union
4+
from typing import Iterable, List
55

66
import geopandas as gpd
77
import numpy as np
88
import pandas as pd
99
import pystac
10+
import pystac_client
1011
import requests
11-
from pystac import Collection, Item
12-
from pystac_client import Client
13-
from requests.auth import HTTPBasicAuth
1412
from shapely.geometry import mapping, shape
1513

1614
from openeo.extra.job_management import JobDatabaseInterface, MultiBackendJobManager
@@ -45,7 +43,7 @@ def __init__(
4543
:param geometry_column: The name of the geometry column in the job metadata that implements __geo_interface__.
4644
"""
4745
self.collection_id = collection_id
48-
self.client = Client.open(stac_root_url)
46+
self.client = pystac_client.Client.open(stac_root_url)
4947

5048
self._auth = auth
5149
self.has_geometry = has_geometry
@@ -54,7 +52,7 @@ def __init__(
5452
self.bulk_size = 500
5553

5654
def exists(self) -> bool:
57-
return len([c.id for c in self.client.get_collections() if c.id == self.collection_id]) > 0
55+
return any(c.id == self.collection_id for c in self.client.get_collections())
5856

5957
def initialize_from_df(self, df: pd.DataFrame, *, on_exists: str = "error"):
6058
"""
@@ -108,7 +106,6 @@ def series_from(self, item: pystac.Item) -> pd.Series:
108106
item_dict = item.to_dict()
109107
item_id = item_dict["id"]
110108
dt = item_dict["properties"]["datetime"]
111-
item_dict["datetime"] = pystac.utils.str_to_datetime(dt)
112109

113110
return pd.Series(item_dict["properties"], name=item_id)
114111

@@ -151,6 +148,9 @@ def item_from(self, series: pd.Series) -> pystac.Item:
151148
return item
152149

153150
def count_by_status(self, statuses: Iterable[str] = ()) -> dict:
151+
if isinstance(statuses, str):
152+
statuses = {statuses}
153+
statuses = set(statuses)
154154
items = self.get_by_status(statuses, max=200)
155155
if items is None:
156156
return {k: 0 for k in statuses}
@@ -199,13 +199,13 @@ def handle_row(series):
199199

200200
self._upload_items_bulk(self.collection_id, all_items)
201201

202-
def _prepare_item(self, item: Item, collection_id: str):
202+
def _prepare_item(self, item: pystac.Item, collection_id: str):
203203
item.collection_id = collection_id
204204

205205
if not item.get_links(pystac.RelType.COLLECTION):
206206
item.add_link(pystac.Link(rel=pystac.RelType.COLLECTION, target=item.collection_id))
207207

208-
def _ingest_bulk(self, items: Iterable[Item]) -> dict:
208+
def _ingest_bulk(self, items: List[pystac.Item]) -> dict:
209209
collection_id = items[0].collection_id
210210
if not all(i.collection_id == collection_id for i in items):
211211
raise Exception("All collection IDs should be identical for bulk ingests")
@@ -219,7 +219,7 @@ def _ingest_bulk(self, items: Iterable[Item]) -> dict:
219219
_check_response_status(response, _EXPECTED_STATUS_POST)
220220
return response.json()
221221

222-
def _upload_items_bulk(self, collection_id: str, items: Iterable[Item]) -> None:
222+
def _upload_items_bulk(self, collection_id: str, items: List[pystac.Item]) -> None:
223223
chunk = []
224224
futures = []
225225

@@ -246,15 +246,15 @@ def join_url(self, url_path: str) -> str:
246246
"""
247247
return str(self.base_url + "/" + url_path)
248248

249-
def _create_collection(self, collection: Collection) -> dict:
249+
def _create_collection(self, collection: pystac.Collection) -> dict:
250250
"""Create a new collection.
251251
252252
:param collection: pystac.Collection object to create in the STAC API backend (or upload if you will)
253253
:raises TypeError: if collection is not a pystac.Collection.
254254
:return: dict that contains the JSON body of the HTTP response.
255255
"""
256256

257-
if not isinstance(collection, Collection):
257+
if not isinstance(collection, pystac.Collection):
258258
raise TypeError(
259259
f'Argument "collection" must be of type pystac.Collection, but its type is {type(collection)=}'
260260
)

tests/extra/job_management/test_stac_job_db.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def mock_auth():
2121

2222
@pytest.fixture
2323
def mock_stac_api_job_database(mock_auth) -> STACAPIJobDatabase:
24-
return STACAPIJobDatabase(collection_id="test_id", stac_root_url="http://fake-stac-api", auth=mock_auth)
24+
return STACAPIJobDatabase(collection_id="test_id", stac_root_url="http://fake-stac-api.test", auth=mock_auth)
2525

2626

2727
@pytest.fixture

0 commit comments

Comments
 (0)