Skip to content

Commit 7630c0c

Browse files
committed
fix: remove catalog endpoint parameter
1 parent cf46f6d commit 7630c0c

File tree

1 file changed

+5
-54
lines changed

1 file changed

+5
-54
lines changed

dags/veda_data_pipeline/veda_tenant_tagging_pipeline.py

Lines changed: 5 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,7 @@
1717
"collections": Param(
1818
default=None,
1919
type=["null", "array"],
20-
description="List of collection IDs to tag (optional if catalog_endpoint is provided)"
21-
),
22-
"catalog_endpoint": Param(
23-
default=None,
24-
type=["null", "string"],
25-
description="STAC catalog endpoint URL to fetch all collections from (optional if collections is provided)"
20+
description="List of collection IDs to tag"
2621
),
2722
"tenant": Param(default=None, type="string", description="Tenant ID to tag the collection with"),
2823
"tenant_field": Param(
@@ -42,7 +37,7 @@
4237
Tags existing collections with tenant information by updating their properties field.
4338
4439
This pipeline:
45-
1. Fetches existing collections from the STAC catalog (either from a list or from a catalog endpoint)
40+
1. Fetches existing collections from the STAC catalog
4641
2. Updates each collection's properties with tenant tags
4742
3. Re-ingests the updated collections
4843
@@ -51,10 +46,8 @@
5146
**Required Parameters:**
5247
- `tenant` (string): Tenant ID to tag collections with
5348
54-
**Collection Source (provide one of the following):**
49+
**Collection Source:**
5550
- `collections` (array of strings): List of collection IDs to tag
56-
- `catalog_endpoint` (string): STAC catalog endpoint URL (e.g., `https://dev.openveda.cloud/api/stac/collections`) to fetch all collections from
57-
5851
**Optional Parameters:**
5952
- `tenant_field` (string): Properties key to write tenant into (default: `eic:tenant`)
6053
- `properties` (object): Additional properties to add/update on collections
@@ -78,13 +71,6 @@
7871
}
7972
```
8073
81-
**Tag all collections from a catalog:**
82-
```json
83-
{
84-
"catalog_endpoint": "https://dev.openveda.cloud/api/stac/collections",
85-
"tenant": "tenant-123"
86-
}
87-
```
8874
8975
**With additional properties:**
9076
```json
@@ -109,51 +95,16 @@
10995

11096
@task()
11197
def get_collection_ids(ti=None):
112-
"""Extract and validate collection IDs from configuration or fetch from catalog endpoint"""
98+
"""Extract and validate collection IDs from configuration"""
11399
try:
114100
config = ti.dag_run.conf
115101
collections = config.get("collections")
116-
catalog_endpoint = config.get("catalog_endpoint")
117102
tenant = config.get("tenant")
118103

119104
logger.info(f"Starting collection ID validation. Tenant: {tenant}")
120105

121-
# If catalog_endpoint is provided, fetch all collections
122-
if catalog_endpoint:
123-
logger.info(f"Fetching all collections from catalog endpoint: {catalog_endpoint}")
124-
125-
try:
126-
response = requests.get(catalog_endpoint, timeout=30)
127-
response.raise_for_status()
128-
except requests.exceptions.RequestException as e:
129-
error_msg = f"Failed to fetch collections from catalog endpoint {catalog_endpoint}: {str(e)}"
130-
logger.error(error_msg)
131-
raise ValueError(error_msg) from e
132-
133-
try:
134-
catalog_data = response.json()
135-
except (ValueError, requests.exceptions.JSONDecodeError) as json_error:
136-
error_msg = f"Failed to parse JSON response from catalog endpoint {catalog_endpoint}"
137-
logger.error(error_msg)
138-
raise ValueError(error_msg) from json_error
139-
140-
# Extract collection IDs
141-
if "collections" in catalog_data:
142-
collections = [coll.get("id") for coll in catalog_data["collections"] if coll.get("id")]
143-
else:
144-
error_msg = f"Unexpected response format from catalog endpoint {catalog_endpoint}"
145-
logger.error(error_msg)
146-
raise ValueError(error_msg)
147-
148-
if not collections:
149-
error_msg = f"No collections found at catalog endpoint {catalog_endpoint}"
150-
logger.error(error_msg)
151-
raise ValueError(error_msg)
152-
153-
logger.info(f"Found {len(collections)} collections from catalog endpoint")
154-
155106
if not collections:
156-
error_msg = "Either 'collections' list or 'catalog_endpoint' must be provided in DAG configuration"
107+
error_msg = "The 'collections' list must be provided in DAG configuration"
157108
logger.error(error_msg)
158109
raise ValueError(error_msg)
159110

0 commit comments

Comments
 (0)