diff --git a/.gitignore b/.gitignore index 32bc2044..32565592 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,5 @@ src/pypgstac/target src/pypgstac/python/pypgstac/*.so .vscode .ipynb_checkpoints +.venv +.pytest_cache \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index be124f88..b030e86b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/). +## [Unreleased] + +### Added + +- Add `load_queryables` function to pypgstac for loading queryables from a JSON file +- Add support for specifying collection IDs when loading queryables + ## [v0.9.5] ### Changed diff --git a/docs/src/pypgstac.md b/docs/src/pypgstac.md index 9e62eb33..f2433bef 100644 --- a/docs/src/pypgstac.md +++ b/docs/src/pypgstac.md @@ -85,6 +85,80 @@ To upsert any records, adding anything new and replacing anything with the same pypgstac load items --method upsert ``` +### Loading Queryables + +Queryables are a mechanism that allows clients to discover what terms are available for use when writing filter expressions in a STAC API. The Filter Extension enables clients to filter collections and items based on their properties using the Common Query Language (CQL2). + +To load queryables from a JSON file: + +``` +pypgstac load_queryables queryables.json +``` + +To load queryables for specific collections: + +``` +pypgstac load_queryables queryables.json --collection_ids [collection1,collection2] +``` + +To load queryables and delete properties not present in the file: + +``` +pypgstac load_queryables queryables.json --delete_missing +``` + +To load queryables and create indexes only for specific fields: + +``` +pypgstac load_queryables queryables.json --index_fields [field1,field2] +``` + +By default, no indexes are created when loading queryables. Using the `--index_fields` parameter allows you to selectively create indexes only for fields that require them. Creating too many indexes can degrade database performance, especially for write operations, so it's recommended to only index fields that are frequently used in queries. + +When using `--delete_missing` with specific collections, only properties for those collections will be deleted: + +``` +pypgstac load_queryables queryables.json --collection_ids [collection1,collection2] --delete_missing +``` + +You can combine all parameters as needed: + +``` +pypgstac load_queryables queryables.json --collection_ids [collection1,collection2] --delete_missing --index_fields [field1,field2] +``` + +The JSON file should follow the queryables schema as described in the [STAC API - Filter Extension](https://github.com/stac-api-extensions/filter#queryables). Here's an example: + +```json +{ + "$schema": "https://json-schema.org/draft/2019-09/schema", + "$id": "https://example.com/stac/queryables", + "type": "object", + "title": "Queryables for Example STAC API", + "description": "Queryable names for the Example STAC API", + "properties": { + "id": { + "description": "Item identifier", + "type": "string" + }, + "datetime": { + "description": "Datetime", + "type": "string", + "format": "date-time" + }, + "eo:cloud_cover": { + "description": "Cloud cover percentage", + "type": "number", + "minimum": 0, + "maximum": 100 + } + }, + "additionalProperties": true +} +``` + +The command will extract the properties from the JSON file and create queryables in the database. It will also determine the appropriate property wrapper based on the type of each property and create the necessary indexes. + ### Automated Collection Extent Updates By setting `pgstac.update_collection_extent` to `true`, a trigger is enabled to automatically adjust the spatial and temporal extents in collections when new items are ingested. This feature, while helpful, may increase overhead within data load transactions. To alleviate performance impact, combining this setting with `pgstac.use_queue` is beneficial. This approach necessitates a separate process, such as a scheduled task via the `pg_cron` extension, to periodically invoke `CALL run_queued_queries();`. Such asynchronous processing ensures efficient transactional performance and updated collection extents. diff --git a/src/pypgstac/examples/load_queryables_example.py b/src/pypgstac/examples/load_queryables_example.py new file mode 100644 index 00000000..61bd8644 --- /dev/null +++ b/src/pypgstac/examples/load_queryables_example.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python +""" +Example script demonstrating how to load queryables into PgSTAC. + +This script shows how to use the load_queryables function both from the command line +and programmatically. +""" + +import sys +from pathlib import Path + +# Add the parent directory to the path so we can import pypgstac +sys.path.append(str(Path(__file__).parent.parent)) + +from pypgstac.pypgstac import PgstacCLI + + +def load_for_specific_collections( + cli, sample_file, collection_ids, delete_missing=False, +): + """Load queryables for specific collections. + + Args: + cli: PgstacCLI instance + sample_file: Path to the queryables file + collection_ids: List of collection IDs to apply queryables to + delete_missing: If True, delete properties not present in the file + """ + cli.load_queryables( + str(sample_file), collection_ids=collection_ids, delete_missing=delete_missing, + ) + + +def main(): + """Demonstrate loading queryables into PgSTAC.""" + # Get the path to the sample queryables file + sample_file = Path(__file__).parent / "sample_queryables.json" + + # Check if the file exists + if not sample_file.exists(): + return + + # Create a PgstacCLI instance + # This will use the standard PostgreSQL environment variables for connection + cli = PgstacCLI() + + # Load queryables for all collections + cli.load_queryables(str(sample_file)) + + # Example of loading for specific collections + load_for_specific_collections(cli, sample_file, ["landsat-8", "sentinel-2"]) + + # Example of loading queryables with delete_missing=True + # This will delete properties not present in the file + cli.load_queryables(str(sample_file), delete_missing=True) + + # Example of loading for specific collections with delete_missing=True + # This will delete properties not present in the file, but only for the specified collections + load_for_specific_collections( + cli, sample_file, ["landsat-8", "sentinel-2"], delete_missing=True, + ) + + +if __name__ == "__main__": + main() diff --git a/src/pypgstac/examples/sample_queryables.json b/src/pypgstac/examples/sample_queryables.json new file mode 100644 index 00000000..e00bc545 --- /dev/null +++ b/src/pypgstac/examples/sample_queryables.json @@ -0,0 +1,79 @@ +{ + "$schema": "https://json-schema.org/draft/2019-09/schema", + "$id": "https://example.com/stac/queryables", + "type": "object", + "title": "Queryables for Example STAC API", + "description": "Queryable names for the Example STAC API", + "properties": { + "id": { + "description": "Item identifier", + "type": "string" + }, + "collection": { + "description": "Collection identifier", + "type": "string" + }, + "datetime": { + "description": "Datetime", + "type": "string", + "format": "date-time" + }, + "geometry": { + "description": "Geometry", + "type": "object" + }, + "eo:cloud_cover": { + "description": "Cloud cover percentage", + "type": "number", + "minimum": 0, + "maximum": 100 + }, + "platform": { + "description": "Platform name", + "type": "string", + "enum": ["landsat-8", "sentinel-2"] + }, + "instrument": { + "description": "Instrument name", + "type": "string" + }, + "gsd": { + "description": "Ground sample distance in meters", + "type": "number" + }, + "view:off_nadir": { + "description": "Off-nadir angle in degrees", + "type": "number" + }, + "view:sun_azimuth": { + "description": "Sun azimuth angle in degrees", + "type": "number" + }, + "view:sun_elevation": { + "description": "Sun elevation angle in degrees", + "type": "number" + }, + "sci:doi": { + "description": "Digital Object Identifier", + "type": "string" + }, + "created": { + "description": "Date and time the item was created", + "type": "string", + "format": "date-time" + }, + "updated": { + "description": "Date and time the item was last updated", + "type": "string", + "format": "date-time" + }, + "landcover:classes": { + "description": "Land cover classes", + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": true +} diff --git a/src/pypgstac/src/pypgstac/pypgstac.py b/src/pypgstac/src/pypgstac/pypgstac.py index a4684f6a..e0720850 100644 --- a/src/pypgstac/src/pypgstac/pypgstac.py +++ b/src/pypgstac/src/pypgstac/pypgstac.py @@ -5,10 +5,11 @@ from typing import Optional import fire +import orjson from smart_open import open from pypgstac.db import PgstacDB -from pypgstac.load import Loader, Methods, Tables +from pypgstac.load import Loader, Methods, Tables, read_json from pypgstac.migrate import Migrate @@ -119,6 +120,182 @@ def loadextensions(self) -> None: except Exception: pass + def load_queryables( + self, + file: str, + collection_ids: Optional[list[str]] = None, + delete_missing: Optional[bool] = False, + index_fields: Optional[list[str]] = None, + ) -> None: + """Load queryables from a JSON file. + + Args: + file: Path to the JSON file containing queryables definition + collection_ids: Comma-separated list of collection IDs to apply the + queryables to + delete_missing: If True, delete properties not present in the file. + If collection_ids is specified, only delete properties + for those collections. + index_fields: List of field names to create indexes for. If not provided, + no indexes will be created. Creating too many indexes can + negatively impact performance. + """ + + # Read the queryables JSON file + queryables_data = None + for item in read_json(file): + queryables_data = item + break # We only need the first item + + if not queryables_data: + raise ValueError(f"No valid JSON data found in {file}") + + # Extract properties from the queryables definition + properties = queryables_data.get("properties", {}) + if not properties: + raise ValueError("No properties found in queryables definition") + + conn = self._db.connect() + with conn.cursor() as cur: + with conn.transaction(): + # Insert each property as a queryable + for name, definition in properties.items(): + # Skip core fields that are already indexed + if name in ( + "id", + "geometry", + "datetime", + "end_datetime", + "collection", + ): + continue + + # Determine property wrapper based on type + property_wrapper = "to_text" # default + if definition.get("type") == "number": + property_wrapper = "to_float" + elif definition.get("type") == "integer": + property_wrapper = "to_int" + elif definition.get("format") == "date-time": + property_wrapper = "to_tstz" + elif definition.get("type") == "array": + property_wrapper = "to_text_array" + + # Determine if this field should be indexed + property_index_type = None + if index_fields and name in index_fields: + property_index_type = "BTREE" + + # First delete any existing queryable with the same name + if not collection_ids: + # If no collection_ids specified, delete queryables + # with NULL collection_ids + cur.execute( + """ + DELETE FROM queryables + WHERE name = %s AND collection_ids IS NULL + """, + [name], + ) + else: + # Delete queryables with matching name and collection_ids + cur.execute( + """ + DELETE FROM queryables + WHERE name = %s AND collection_ids = %s::text[] + """, + [name, collection_ids], + ) + + # Also delete queryables with NULL collection_ids + cur.execute( + """ + DELETE FROM queryables + WHERE name = %s AND collection_ids IS NULL + """, + [name], + ) + + # Then insert the new queryable + cur.execute( + """ + INSERT INTO queryables + (name, collection_ids, definition, property_wrapper, + property_index_type) + VALUES (%s, %s, %s, %s, %s) + """, + [ + name, + collection_ids, + orjson.dumps(definition).decode(), + property_wrapper, + property_index_type, + ], + ) + + # If delete_missing is True, + # delete all queryables that were not in the file + if delete_missing: + # Get the list of property names from the file + property_names = list(properties.keys()) + + # Skip core fields that are already indexed + core_fields = [ + "id", + "geometry", + "datetime", + "end_datetime", + "collection", + ] + property_names = [ + name for name in property_names if name not in core_fields + ] + + if not property_names: + # If no valid properties, don't delete anything + pass + elif not collection_ids: + # If no collection_ids specified, + # delete queryables with NULL collection_ids + # that are not in the property_names list + placeholders = ", ".join(["%s"] * len(property_names)) + core_placeholders = ", ".join(["%s"] * len(core_fields)) + + # Build the query with proper placeholders + query = f""" + DELETE FROM queryables + WHERE collection_ids IS NULL + AND name NOT IN ({placeholders}) + AND name NOT IN ({core_placeholders}) + """ + + # Flatten the parameters + params = property_names + core_fields + + cur.execute(query, params) + else: + # Delete queryables with matching collection_ids + # that are not in the property_names list + placeholders = ", ".join(["%s"] * len(property_names)) + core_placeholders = ", ".join(["%s"] * len(core_fields)) + + # Build the query with proper placeholders + query = f""" + DELETE FROM queryables + WHERE collection_ids = %s::text[] + AND name NOT IN ({placeholders}) + AND name NOT IN ({core_placeholders}) + """ + + # Flatten the parameters + params = [collection_ids] + property_names + core_fields + + cur.execute(query, params) + + # Trigger index creation only if index_fields were provided + if index_fields and len(index_fields) > 0: + cur.execute("SELECT maintain_partitions();") + def cli() -> fire.Fire: """Wrap fire call for CLI.""" diff --git a/src/pypgstac/tests/data-files/queryables/test_queryables.json b/src/pypgstac/tests/data-files/queryables/test_queryables.json new file mode 100644 index 00000000..39026290 --- /dev/null +++ b/src/pypgstac/tests/data-files/queryables/test_queryables.json @@ -0,0 +1,53 @@ +{ + "$schema": "https://json-schema.org/draft/2019-09/schema", + "$id": "https://example.com/stac/queryables", + "type": "object", + "title": "Test Queryables for PgSTAC", + "description": "Test queryable names for PgSTAC", + "properties": { + "id": { + "description": "Item identifier", + "type": "string" + }, + "collection": { + "description": "Collection identifier", + "type": "string" + }, + "datetime": { + "description": "Datetime", + "type": "string", + "format": "date-time" + }, + "geometry": { + "description": "Geometry", + "type": "object" + }, + "test:string_prop": { + "description": "Test string property", + "type": "string" + }, + "test:number_prop": { + "description": "Test number property", + "type": "number", + "minimum": 0, + "maximum": 100 + }, + "test:integer_prop": { + "description": "Test integer property", + "type": "integer" + }, + "test:datetime_prop": { + "description": "Test datetime property", + "type": "string", + "format": "date-time" + }, + "test:array_prop": { + "description": "Test array property", + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": true +} diff --git a/src/pypgstac/tests/test_queryables.py b/src/pypgstac/tests/test_queryables.py new file mode 100644 index 00000000..31241818 --- /dev/null +++ b/src/pypgstac/tests/test_queryables.py @@ -0,0 +1,535 @@ +"""Tests for pypgstac queryables functionality.""" + +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +from pypgstac.db import PgstacDB +from pypgstac.load import Loader +from pypgstac.pypgstac import PgstacCLI + +HERE = Path(__file__).parent +TEST_DATA_DIR = HERE.parent.parent / "pgstac" / "tests" / "testdata" +TEST_COLLECTIONS_JSON = TEST_DATA_DIR / "collections.json" +TEST_QUERYABLES_JSON = HERE / "data-files" / "queryables" / "test_queryables.json" + + +def test_load_queryables_succeeds(db: PgstacDB) -> None: + """Test pypgstac queryables loader.""" + # Create a CLI instance + cli = PgstacCLI(dsn=db.dsn) + + # Load the test queryables with index_fields specified for all fields + cli.load_queryables( + str(TEST_QUERYABLES_JSON), + index_fields=[ + "test:string_prop", + "test:number_prop", + "test:integer_prop", + "test:datetime_prop", + "test:array_prop", + ], + ) + + # Verify that the queryables were loaded + result = db.query( + """ + SELECT name, property_wrapper, property_index_type + FROM queryables + WHERE name LIKE 'test:%' + ORDER BY name; + """, + ) + + # Convert result to a list of dictionaries for easier assertion + queryables = [ + {"name": row[0], "property_wrapper": row[1], "property_index_type": row[2]} + for row in result + ] + + # Check that all test properties were loaded with correct wrappers + assert len(queryables) == 5 + + # Check string property + string_prop = next(q for q in queryables if q["name"] == "test:string_prop") + assert string_prop["property_wrapper"] == "to_text" + assert string_prop["property_index_type"] == "BTREE" + + # Check number property + number_prop = next(q for q in queryables if q["name"] == "test:number_prop") + assert number_prop["property_wrapper"] == "to_float" + assert number_prop["property_index_type"] == "BTREE" + + # Check integer property + integer_prop = next(q for q in queryables if q["name"] == "test:integer_prop") + assert integer_prop["property_wrapper"] == "to_int" + assert integer_prop["property_index_type"] == "BTREE" + + # Check datetime property + datetime_prop = next(q for q in queryables if q["name"] == "test:datetime_prop") + assert datetime_prop["property_wrapper"] == "to_tstz" + assert datetime_prop["property_index_type"] == "BTREE" + + # Check array property + array_prop = next(q for q in queryables if q["name"] == "test:array_prop") + assert array_prop["property_wrapper"] == "to_text_array" + assert array_prop["property_index_type"] == "BTREE" + + +def test_load_queryables_without_index_fields(db: PgstacDB) -> None: + """Test pypgstac queryables loader without index_fields parameter.""" + # Create a CLI instance + cli = PgstacCLI(dsn=db.dsn) + + # Load the test queryables without index_fields + cli.load_queryables(str(TEST_QUERYABLES_JSON)) + + # Verify that the queryables were loaded without indexes + result = db.query( + """ + SELECT name, property_wrapper, property_index_type + FROM queryables + WHERE name LIKE 'test:%' + ORDER BY name; + """, + ) + + # Convert result to a list of dictionaries for easier assertion + queryables = [ + {"name": row[0], "property_wrapper": row[1], "property_index_type": row[2]} + for row in result + ] + + # Check that all test properties were loaded with correct wrappers but no indexes + assert len(queryables) == 5 + + # Check that none of the properties have indexes + for q in queryables: + assert q["property_index_type"] is None + + +def test_load_queryables_with_specific_index_fields(db: PgstacDB) -> None: + """Test pypgstac queryables loader with specific index_fields.""" + # Create a CLI instance + cli = PgstacCLI(dsn=db.dsn) + + # Load the test queryables with only specific index_fields + cli.load_queryables( + str(TEST_QUERYABLES_JSON), + index_fields=["test:string_prop", "test:datetime_prop"], + ) + + # Verify that only the specified fields have indexes + result = db.query( + """ + SELECT name, property_wrapper, property_index_type + FROM queryables + WHERE name LIKE 'test:%' + ORDER BY name; + """, + ) + + # Convert result to a list of dictionaries for easier assertion + queryables = [ + {"name": row[0], "property_wrapper": row[1], "property_index_type": row[2]} + for row in result + ] + + # Check that all properties are loaded + assert len(queryables) == 5 + + # Check that only the specified fields have indexes + for q in queryables: + if q["name"] in ["test:string_prop", "test:datetime_prop"]: + assert q["property_index_type"] == "BTREE" + else: + assert q["property_index_type"] is None + + +def test_load_queryables_empty_index_fields(db: PgstacDB) -> None: + """Test pypgstac queryables loader with empty index_fields.""" + # Create a CLI instance + cli = PgstacCLI(dsn=db.dsn) + + # Load the test queryables with empty index_fields + cli.load_queryables( + str(TEST_QUERYABLES_JSON), + index_fields=[], + ) + + # Verify that no fields have indexes + result = db.query( + """ + SELECT name, property_wrapper, property_index_type + FROM queryables + WHERE name LIKE 'test:%' + ORDER BY name; + """, + ) + + # Convert result to a list of dictionaries for easier assertion + queryables = [ + {"name": row[0], "property_wrapper": row[1], "property_index_type": row[2]} + for row in result + ] + + # Check that no fields have indexes + for q in queryables: + assert q["property_index_type"] is None + + +@patch("pypgstac.pypgstac.PgstacDB.connect") +def test_maintain_partitions_called_only_with_index_fields(mock_connect): + """Test that maintain_partitions is only called when index_fields is provided.""" + # Mock the database connection + mock_conn = MagicMock() + mock_connect.return_value = mock_conn + + # Mock cursor + mock_cursor = MagicMock() + mock_conn.cursor.return_value.__enter__.return_value = mock_cursor + + # Create a CLI instance with the mocked connection + cli = PgstacCLI(dsn="mock_dsn") + + # Create a temporary file with test queryables + test_file = HERE / "data-files" / "queryables" / "temp_test.json" + with open(test_file, "w") as f: + f.write( + """ + { + "type": "object", + "title": "Test Properties", + "properties": { + "test:prop1": { + "type": "string", + "title": "Test Property 1" + }, + "test:prop2": { + "type": "integer", + "title": "Test Property 2" + } + } + } + """, + ) + + # Case 1: With index_fields + cli.load_queryables( + str(test_file), + index_fields=["test:prop1"], + ) + + # Check that maintain_partitions was called + maintain_calls = [ + call_args for call_args in mock_cursor.execute.call_args_list + if "maintain_partitions" in str(call_args) + ] + assert len(maintain_calls) == 1 + + # Reset mock + mock_cursor.reset_mock() + + # Case 2: Without index_fields + cli.load_queryables(str(test_file)) + + # Check that maintain_partitions was not called + maintain_calls = [ + call_args for call_args in mock_cursor.execute.call_args_list + if "maintain_partitions" in str(call_args) + ] + assert len(maintain_calls) == 0 + + # Clean up + test_file.unlink() + + +def test_load_queryables_with_collections(db: PgstacDB, loader: Loader) -> None: + """Test pypgstac queryables loader with specific collections.""" + # Load test collections first + loader.load_collections( + str(TEST_COLLECTIONS_JSON), + insert_mode="insert", + ) + + # Get collection IDs from the database + result = db.query("SELECT id FROM collections LIMIT 2;") + collection_ids = [row[0] for row in result] + + # Create a CLI instance + cli = PgstacCLI(dsn=db.dsn) + + # Load queryables for specific collections + cli.load_queryables( + str(TEST_QUERYABLES_JSON), + collection_ids=collection_ids, + index_fields=["test:string_prop"], + ) + + # Verify that the queryables were loaded with the correct collection IDs + result = db.query( + """ + SELECT name, collection_ids, property_index_type + FROM queryables + WHERE name LIKE 'test:%' + ORDER BY name; + """, + ) + + # Convert result to a list of dictionaries for easier assertion + queryables = [ + {"name": row[0], "collection_ids": row[1], "property_index_type": row[2]} + for row in result + ] + + # Check that all queryables have the correct collection IDs + assert len(queryables) == 5 + for q in queryables: + assert set(q["collection_ids"]) == set(collection_ids) + # Check that only test:string_prop has an index + if q["name"] == "test:string_prop": + assert q["property_index_type"] == "BTREE" + else: + assert q["property_index_type"] is None + + +def test_load_queryables_update(db: PgstacDB) -> None: + """Test updating existing queryables.""" + # Create a CLI instance + cli = PgstacCLI(dsn=db.dsn) + + # Load the test queryables with an index on number_prop + cli.load_queryables(str(TEST_QUERYABLES_JSON), index_fields=["test:number_prop"]) + + # Modify the test queryables file to change property wrappers + # This is simulated by directly updating the database + db.query( + """ + UPDATE queryables + SET property_wrapper = 'to_text' + WHERE name = 'test:number_prop'; + """, + ) + + # Load the queryables again, but with a different index field + cli.load_queryables(str(TEST_QUERYABLES_JSON), index_fields=["test:string_prop"]) + + # Verify that the property wrapper was updated and index changed + result = db.query( + """ + SELECT name, property_wrapper, property_index_type + FROM queryables + WHERE name in ('test:number_prop', 'test:string_prop'); + """, + ) + + # Convert result to a list of dictionaries for easier assertion + queryables = [ + {"name": row[0], "property_wrapper": row[1], "property_index_type": row[2]} + for row in result + ] + + # Find the properties + number_prop = next(q for q in queryables if q["name"] == "test:number_prop") + string_prop = next(q for q in queryables if q["name"] == "test:string_prop") + + # The property wrapper should be back to to_float + assert number_prop["property_wrapper"] == "to_float" + # The index should be removed from number_prop + assert number_prop["property_index_type"] is None + # The index should be added to string_prop + assert string_prop["property_index_type"] == "BTREE" + + +def test_load_queryables_invalid_json(db: PgstacDB) -> None: + """Test loading queryables with invalid JSON.""" + # Create a CLI instance + cli = PgstacCLI(dsn=db.dsn) + + # Create a temporary file with invalid JSON + invalid_json_file = HERE / "data-files" / "queryables" / "invalid.json" + with open(invalid_json_file, "w") as f: + f.write("{") + + # Loading should raise an exception + with pytest.raises((ValueError, SyntaxError)): + cli.load_queryables(str(invalid_json_file)) + + # Clean up + invalid_json_file.unlink() + + +def test_load_queryables_delete_missing(db: PgstacDB) -> None: + """Test loading queryables with delete_missing=True.""" + # Create a CLI instance + cli = PgstacCLI(dsn=db.dsn) + + # First, load the test queryables with indexes on all fields + cli.load_queryables( + str(TEST_QUERYABLES_JSON), + index_fields=[ + "test:string_prop", + "test:number_prop", + "test:integer_prop", + "test:datetime_prop", + "test:array_prop", + ], + ) + + # Create a temporary file with only one property + partial_props_file = HERE / "data-files" / "queryables" / "partial_props.json" + with open(partial_props_file, "w") as f: + f.write( + """ + { + "type": "object", + "title": "Partial Properties", + "properties": { + "test:string_prop": { + "type": "string", + "title": "String Property" + } + } + } + """, + ) + + # Load the partial queryables with delete_missing=True and index the string property + cli.load_queryables( + str(partial_props_file), + delete_missing=True, + index_fields=["test:string_prop"], + ) + + # Verify that only the string property remains and has an index + result = db.query( + """ + SELECT name, property_index_type + FROM queryables + WHERE name LIKE 'test:%' + ORDER BY name; + """, + ) + + # Convert result to a list of dictionaries + queryables = [{"name": row[0], "property_index_type": row[1]} for row in result] + + # Check that only the string property remains and has an index + assert len(queryables) == 1 + assert queryables[0]["name"] == "test:string_prop" + assert queryables[0]["property_index_type"] == "BTREE" + + # Clean up + partial_props_file.unlink() + + +def test_load_queryables_delete_missing_with_collections( + db: PgstacDB, loader: Loader, +) -> None: + """Test loading queryables with delete_missing=True and specific collections.""" + # Load test collections first + loader.load_collections( + str(TEST_COLLECTIONS_JSON), + insert_mode="insert", + ) + + # Get collection IDs from the database + result = db.query("SELECT id FROM collections LIMIT 2;") + collection_ids = [row[0] for row in result] + + # Create a CLI instance + cli = PgstacCLI(dsn=db.dsn) + + # First, load all test queryables for the specific collections with indexes + cli.load_queryables( + str(TEST_QUERYABLES_JSON), + collection_ids=collection_ids, + index_fields=[ + "test:string_prop", + "test:number_prop", + "test:integer_prop", + "test:datetime_prop", + "test:array_prop", + ], + ) + + # Create a temporary file with only one property + partial_props_file = HERE / "data-files" / "queryables" / "partial_props.json" + with open(partial_props_file, "w") as f: + f.write( + """ + { + "type": "object", + "title": "Partial Properties", + "properties": { + "test:string_prop": { + "type": "string", + "title": "String Property" + } + } + } + """, + ) + + # Load the partial queryables with delete_missing=True for the specific collections + # but without an index + cli.load_queryables( + str(partial_props_file), + collection_ids=collection_ids, + delete_missing=True, + ) + + # Verify that only the string property remains for the specific collections + # and that it doesn't have an index + result = db.query( + """ + SELECT name, collection_ids, property_index_type + FROM queryables + WHERE name LIKE 'test:%' + ORDER BY name; + """, + ) + + # Convert result to a list of dictionaries + queryables = [ + {"name": row[0], "collection_ids": row[1], "property_index_type": row[2]} + for row in result + ] + + # Filter queryables for the specific collections + specific_queryables = [ + q + for q in queryables + if q["collection_ids"] and set(q["collection_ids"]) == set(collection_ids) + ] + + # Check that only the string property remains for the specific collections + assert len(specific_queryables) == 1 + assert specific_queryables[0]["name"] == "test:string_prop" + # Verify it doesn't have an index + assert specific_queryables[0]["property_index_type"] is None + + # Clean up + partial_props_file.unlink() + + +def test_load_queryables_no_properties(db: PgstacDB) -> None: + """Test loading queryables with no properties.""" + # Create a CLI instance + cli = PgstacCLI(dsn=db.dsn) + + # Create a temporary file with no properties + no_props_file = HERE / "data-files" / "queryables" / "no_props.json" + with open(no_props_file, "w") as f: + f.write('{"type": "object", "title": "No Properties"}') + + # Loading should raise a ValueError + with pytest.raises( + ValueError, + match="No properties found in queryables definition", + ): + cli.load_queryables(str(no_props_file)) + + # Clean up + no_props_file.unlink()