Skip to content

Commit 38e757f

Browse files
Feat: add create_missing_collections option to load_queryables method
1 parent 12bcdd5 commit 38e757f

File tree

2 files changed

+127
-0
lines changed

2 files changed

+127
-0
lines changed

src/pypgstac/src/pypgstac/pypgstac.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ def load_queryables(
126126
collection_ids: Optional[list[str]] = None,
127127
delete_missing: Optional[bool] = False,
128128
index_fields: Optional[list[str]] = None,
129+
create_missing_collections: Optional[bool] = False,
129130
) -> None:
130131
"""Load queryables from a JSON file.
131132
@@ -139,6 +140,9 @@ def load_queryables(
139140
index_fields: List of field names to create indexes for. If not provided,
140141
no indexes will be created. Creating too many indexes can
141142
negatively impact performance.
143+
create_missing_collections: If True and collection_ids is specified,
144+
automatically create empty collections for any
145+
collection IDs that don't exist.
142146
"""
143147

144148
# Read the queryables JSON file
@@ -147,6 +151,59 @@ def load_queryables(
147151
queryables_data = item
148152
break # We only need the first item
149153

154+
# Create missing collections if requested
155+
if create_missing_collections and collection_ids:
156+
conn = self._db.connect()
157+
with conn.cursor() as cur:
158+
# Get list of existing collections
159+
cur.execute(
160+
"SELECT id FROM collections WHERE id = ANY(%s);",
161+
[collection_ids],
162+
)
163+
existing_collections = {r[0] for r in cur.fetchall()}
164+
165+
# Create empty collections for any that don't exist
166+
missing_collections = [
167+
cid for cid in collection_ids if cid not in existing_collections
168+
]
169+
if missing_collections:
170+
with conn.transaction():
171+
# Create a temporary table for bulk insert
172+
cur.execute(
173+
"""
174+
DROP TABLE IF EXISTS tmp_collections;
175+
CREATE TEMP TABLE tmp_collections
176+
(content jsonb) ON COMMIT DROP;
177+
""",
178+
)
179+
# Insert collection records into temp table
180+
with cur.copy(
181+
"COPY tmp_collections (content) FROM stdin;",
182+
) as copy:
183+
for cid in missing_collections:
184+
empty_collection = {
185+
"id": cid,
186+
"stac_version": "1.0.0",
187+
"description": "Automatically created collection"
188+
+ f" for {cid}",
189+
"license": "proprietary",
190+
"extent": {
191+
"spatial": {"bbox": [[-180, -90, 180, 90]]},
192+
"temporal": {"interval": [[None, None]]},
193+
},
194+
}
195+
copy.write_row(
196+
(orjson.dumps(empty_collection).decode(),),
197+
)
198+
199+
# Insert from temp table to collections
200+
cur.execute(
201+
"""
202+
INSERT INTO collections (content)
203+
SELECT content FROM tmp_collections;
204+
""",
205+
)
206+
150207
if not queryables_data:
151208
raise ValueError(f"No valid JSON data found in {file}")
152209

src/pypgstac/tests/test_queryables.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -514,6 +514,76 @@ def test_load_queryables_delete_missing_with_collections(
514514
partial_props_file.unlink()
515515

516516

517+
def test_load_queryables_create_missing_collections(db: PgstacDB) -> None:
518+
"""Test loading queryables with create_missing_collections flag."""
519+
# Create a CLI instance
520+
cli = PgstacCLI(dsn=db.dsn)
521+
522+
# Try to load queryables for non-existent collections without the flag
523+
non_existent_collections = ["test_collection_1", "test_collection_2"]
524+
with pytest.raises(Exception) as exc_info:
525+
cli.load_queryables(
526+
str(TEST_QUERYABLES_JSON),
527+
collection_ids=non_existent_collections,
528+
)
529+
assert "do not exist" in str(exc_info.value)
530+
531+
# Load queryables with create_missing_collections flag
532+
cli.load_queryables(
533+
str(TEST_QUERYABLES_JSON),
534+
collection_ids=non_existent_collections,
535+
create_missing_collections=True,
536+
)
537+
538+
# Verify that the collections were created
539+
result = db.query(
540+
"""
541+
SELECT id, content
542+
FROM collections
543+
WHERE id = ANY(%s)
544+
ORDER BY id;
545+
""",
546+
[non_existent_collections],
547+
)
548+
549+
# Convert result to a list of dictionaries
550+
collections = [{"id": row[0], "content": row[1]} for row in result]
551+
552+
# Check that both collections were created
553+
assert len(collections) == 2
554+
for collection in collections:
555+
assert collection["id"] in non_existent_collections
556+
content = collection["content"]
557+
# Verify required STAC fields
558+
assert content["stac_version"] == "1.0.0"
559+
assert "description" in content
560+
assert content["license"] == "proprietary"
561+
assert "extent" in content
562+
assert "spatial" in content["extent"]
563+
assert "temporal" in content["extent"]
564+
assert content["extent"]["spatial"]["bbox"] == [[-180, -90, 180, 90]]
565+
assert content["extent"]["temporal"]["interval"] == [[None, None]]
566+
567+
# Verify that queryables were loaded for these collections
568+
result = db.query(
569+
"""
570+
SELECT name, collection_ids
571+
FROM queryables
572+
WHERE name LIKE 'test:%%'
573+
AND collection_ids = %s::text[]
574+
ORDER BY name;
575+
""",
576+
[non_existent_collections],
577+
)
578+
579+
# Convert result to a list of dictionaries
580+
queryables = [{"name": row[0], "collection_ids": row[1]} for row in result]
581+
582+
# Check that queryables were created and associated with the collections
583+
assert len(queryables) == 5 # All test properties
584+
for queryable in queryables:
585+
assert set(queryable["collection_ids"]) == set(non_existent_collections)
586+
517587
def test_load_queryables_no_properties(db: PgstacDB) -> None:
518588
"""Test loading queryables with no properties."""
519589
# Create a CLI instance

0 commit comments

Comments
 (0)