Skip to content

Commit 586756e

Browse files
committed
add bootstrapping script
1 parent 5efb860 commit 586756e

File tree

3 files changed

+91
-15
lines changed

3 files changed

+91
-15
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,3 +161,4 @@ cython_debug/
161161
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
162162
#.idea/
163163
.pgdata
164+
naip.parquet

docker-compose.yaml

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,18 @@
11
services:
2+
database:
3+
image: ghcr.io/stac-utils/pgstac:v0.9.5
4+
environment:
5+
POSTGRES_USER: username
6+
POSTGRES_PASSWORD: password
7+
POSTGRES_DB: postgis
8+
PGUSER: username
9+
PGPASSWORD: password
10+
PGDATABASE: postgis
11+
ports:
12+
- "${MY_DOCKER_IP:-127.0.0.1}:5439:5432"
13+
command: postgres -N 500
14+
volumes:
15+
- ./.pgdata:/var/lib/postgresql/data
216
stac:
317
image: ghcr.io/stac-utils/stac-fastapi-pgstac
418
environment:
@@ -25,21 +39,6 @@ services:
2539
- database
2640
command: bash -c "./scripts/wait-for-it.sh database:5432 && python -m stac_fastapi.pgstac.app"
2741

28-
database:
29-
image: ghcr.io/stac-utils/pgstac:v0.8.5
30-
environment:
31-
POSTGRES_USER: username
32-
POSTGRES_PASSWORD: password
33-
POSTGRES_DB: postgis
34-
PGUSER: username
35-
PGPASSWORD: password
36-
PGDATABASE: postgis
37-
ports:
38-
- "${MY_DOCKER_IP:-127.0.0.1}:5439:5432"
39-
command: postgres -N 500
40-
volumes:
41-
- ./.pgdata:/var/lib/postgresql/data
42-
4342
proxy:
4443
depends_on:
4544
- stac

examples/seed-db.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
# Run with `uv run examples/seed-db.py`
2+
# /// script
3+
# dependencies = [
4+
# "pystac",
5+
# "pystac-client",
6+
# "stacrs",
7+
# "pgstacrs",
8+
# ]
9+
# ///
10+
# ruff: noqa
11+
12+
import asyncio
13+
import logging
14+
import os
15+
16+
import stacrs
17+
from pgstacrs import Client as PgstacClient
18+
from pystac import Collection, Extent, ItemCollection
19+
from pystac_client import Client as PystacClient
20+
21+
logger = logging.getLogger(__name__)
22+
23+
24+
async def get_data_set(
25+
dataset_path: str, bbox: list[float], collections: str, max_items: int
26+
):
27+
if not os.path.exists(dataset_path):
28+
logger.info(f"Downloading dataset to {dataset_path}")
29+
client = PystacClient.open(
30+
"https://planetarycomputer.microsoft.com/api/stac/v1"
31+
)
32+
item_search = client.search(
33+
bbox=bbox, collections=collections, max_items=max_items
34+
)
35+
items = item_search.item_collection()
36+
await stacrs.write(dataset_path, list(item.to_dict() for item in items))
37+
else:
38+
logger.info(f"Loading dataset from {dataset_path}")
39+
item_collection = await stacrs.read(dataset_path)
40+
items = ItemCollection.from_dict(item_collection)
41+
42+
assert os.path.exists(dataset_path), f"Dataset {dataset_path} does not exist"
43+
return items
44+
45+
46+
async def seed_db(items: ItemCollection, db_url: str):
47+
logger.info(f"Seeding database with {len(items)} items")
48+
extent = Extent.from_items(items)
49+
collection = Collection(
50+
"naip", "NAIP data in the Planetary Computer", extent=extent
51+
)
52+
items = list(
53+
item.to_dict(transform_hrefs=False) for item in items
54+
) # https://github.com/stac-utils/pystac/issues/960
55+
56+
pgstac_client = await PgstacClient.open(db_url)
57+
await pgstac_client.upsert_collection(collection.to_dict())
58+
_ = await pgstac_client.upsert_items(items)
59+
60+
61+
if __name__ == "__main__":
62+
logging.basicConfig(level=logging.INFO)
63+
64+
async def main():
65+
colorado_naip = await get_data_set(
66+
dataset_path="./naip.parquet",
67+
bbox=[-109.0591, 36.9927, -102.04212, 41.0019],
68+
collections="naip",
69+
max_items=10000,
70+
)
71+
await seed_db(
72+
items=colorado_naip,
73+
db_url="postgresql://username:password@localhost:5439/postgis",
74+
)
75+
76+
asyncio.run(main())

0 commit comments

Comments
 (0)