Skip to content

Commit bfa28e6

Browse files
committed
Added global and per feed configuration
1 parent f5f51a9 commit bfa28e6

File tree

5 files changed

+108
-11
lines changed

5 files changed

+108
-11
lines changed

api/src/utils/config_reader.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
from typing import Optional, Any
2+
from sqlalchemy.orm import Session
3+
from shared.database.database import with_db_session
4+
from shared.database_gen.sqlacodegen_models import ConfigKey, ConfigValueFeed
5+
6+
7+
@with_db_session
8+
def get_config_value(
9+
namespace: str,
10+
key: str,
11+
feed_id: Optional[str] = None,
12+
db_session: Session = None,
13+
) -> Optional[Any]:
14+
"""
15+
Retrieves a configuration value from the database using the provided session.
16+
17+
It first looks for a feed-specific override. If a feed_id is provided and a
18+
value is found, it returns that value.
19+
20+
If no feed-specific value is found or no feed_id is provided, it looks for
21+
the global default value in the `config_key` table.
22+
23+
:param namespace: The namespace of the configuration key.
24+
:param key: The configuration key.
25+
:param feed_id: The optional feed_id for a specific override.
26+
:param db_session: The SQLAlchemy session, injected by the `with_db_session` decorator.
27+
:return: The configuration value, or None if not found.
28+
"""
29+
# 1. Try to get feed-specific value if feed_id is provided
30+
if feed_id:
31+
feed_config = (
32+
db_session.query(ConfigValueFeed.value)
33+
.filter(
34+
ConfigValueFeed.feed_id == feed_id,
35+
ConfigValueFeed.namespace == namespace,
36+
ConfigValueFeed.key == key,
37+
)
38+
.first()
39+
)
40+
if feed_config:
41+
return feed_config.value
42+
43+
# 2. If not found or no feed_id, get the default value
44+
default_config = (
45+
db_session.query(ConfigKey.default_value).filter(ConfigKey.namespace == namespace, ConfigKey.key == key).first()
46+
)
47+
48+
return default_config.default_value if default_config else None
49+
50+
51+
# Example of how you would use it elsewhere in your application:
52+
#
53+
# from shared.config.config_reader import get_config_value
54+
#
55+
# def my_service_function():
56+
# # The `with_db_session` decorator handles creating and closing the session.
57+
# http_headers = get_config_value(
58+
# namespace='feed_download',
59+
# key='http_headers',
60+
# feed_id='mdb-2126'
61+
# )
62+
# print(f"HTTP Headers for mdb-2126: {http_headers}")

functions-python/batch_process_dataset/src/main.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -122,13 +122,14 @@ def create_dataset_stable_id(feed_stable_id, timestamp):
122122
"""
123123
return f"{feed_stable_id}-{timestamp}"
124124

125-
def download_content(self, temporary_file_path):
125+
def download_content(self, temporary_file_path, stable_id):
126126
"""
127127
Downloads the content of a URL and return the hash of the file
128128
"""
129129
file_hash = download_and_get_hash(
130130
self.producer_url,
131131
temporary_file_path,
132+
stable_id,
132133
authentication_type=self.authentication_type,
133134
api_key_parameter_name=self.api_key_parameter_name,
134135
credentials=self.feed_credentials,
@@ -193,7 +194,7 @@ def upload_files_to_storage(
193194
)
194195
return blob, extracted_files
195196

196-
def upload_dataset(self, public=True) -> DatasetFile or None:
197+
def upload_dataset(self, stable_id, public=True) -> DatasetFile or None:
197198
"""
198199
Uploads a dataset to a GCP bucket as <feed_stable_id>/latest.zip and
199200
<feed_stable_id>/<feed_stable_id>-<upload_datetime>.zip
@@ -204,7 +205,7 @@ def upload_dataset(self, public=True) -> DatasetFile or None:
204205
try:
205206
self.logger.info("Accessing URL %s", self.producer_url)
206207
temp_file_path = self.generate_temp_filename()
207-
file_sha256_hash, is_zip = self.download_content(temp_file_path)
208+
file_sha256_hash, is_zip = self.download_content(temp_file_path, stable_id)
208209
if not is_zip:
209210
self.logger.error(
210211
f"[{self.feed_stable_id}] The downloaded file from {self.producer_url} is not a valid ZIP file."
@@ -416,12 +417,12 @@ def _get_unzipped_size(dataset_file):
416417
)
417418

418419
@with_db_session
419-
def process_from_producer_url(self, db_session) -> Optional[DatasetFile]:
420+
def process_from_producer_url(self, db_session, stable_id) -> Optional[DatasetFile]:
420421
"""
421422
Process the dataset and store new version in GCP bucket if any changes are detected
422423
:return: the DatasetFile object created
423424
"""
424-
dataset_file = self.upload_dataset()
425+
dataset_file = self.upload_dataset(stable_id)
425426

426427
if dataset_file is None:
427428
self.logger.info(f"[{self.feed_stable_id}] No database update required.")
@@ -542,7 +543,7 @@ def process_dataset(cloud_event: CloudEvent):
542543
if json_payload.get("use_bucket_latest", False):
543544
dataset_file = processor.process_from_bucket()
544545
else:
545-
dataset_file = processor.process_from_producer_url()
546+
dataset_file = processor.process_from_producer_url(stable_id)
546547
except Exception as e:
547548
# This makes sure the logger is initialized
548549
logger = get_logger("process_dataset", stable_id if stable_id else "UNKNOWN")

functions-python/helpers/utils.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
from urllib3.util.ssl_ import create_urllib3_context
2929
from pathlib import Path
3030

31+
from utils.config_reader import get_config_value
32+
3133

3234
def create_bucket(bucket_name):
3335
"""
@@ -126,6 +128,7 @@ def download_and_get_hash(
126128
file_path,
127129
hash_algorithm="sha256",
128130
chunk_size=8192,
131+
stable_id=None,
129132
authentication_type=0,
130133
api_key_parameter_name=None,
131134
credentials=None,
@@ -145,12 +148,17 @@ def download_and_get_hash(
145148
ctx.load_default_certs()
146149
ctx.options |= 0x4 # ssl.OP_LEGACY_SERVER_CONNECT
147150

151+
headers = get_config_value(
152+
namespace="feed_download", key="http_headers", feed_stable_id=stable_id
153+
)
154+
if headers is None:
155+
headers = {
156+
"User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) "
157+
"AppleWebKit/537.36 (KHTML, like Gecko) "
158+
"Chrome/126.0.0.0 Mobile Safari/537.36"
159+
}
160+
148161
# authentication_type == 1 -> the credentials are passed in the url
149-
headers = {
150-
"User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) "
151-
"AppleWebKit/537.36 (KHTML, like Gecko) "
152-
"Chrome/126.0.0.0 Mobile Safari/537.36"
153-
}
154162
# Careful, some URLs may already contain a query string
155163
# (e.g. http://api.511.org/transit/datafeeds?operator_id=CE)
156164
if authentication_type == 1 and api_key_parameter_name and credentials:

liquibase/changelog.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,4 +68,5 @@
6868
<include file="changes/feat_1260.sql" relativeToChangelogFile="true"/>
6969
<include file="changes/feat_1333.sql" relativeToChangelogFile="true"/>
7070
<include file="changes/feat_pt_152.sql" relativeToChangelogFile="true"/>
71+
<include file="changes/feat_1325.sql" relativeToChangelogFile="true"/>
7172
</databaseChangeLog>

liquibase/changes/feat_1325.sql

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
ALTER TABLE feed ADD CONSTRAINT feed_stable_id_unique UNIQUE (stable_id);
2+
-- 1. Catalog of allowed keys + optional global values
3+
CREATE TABLE config_key (
4+
namespace text NOT NULL, -- e.g. 'reverse_geolocation'
5+
key text NOT NULL, -- e.g. 'country_fallback'
6+
description text,
7+
default_value jsonb, -- fallback if nothing else
8+
updated_at timestamptz NOT NULL DEFAULT now(),
9+
PRIMARY KEY (namespace, key)
10+
);
11+
12+
-- 2. Per-feed overrides
13+
CREATE TABLE config_value_feed (
14+
feed_stable_id varchar(255) NOT NULL,
15+
namespace text NOT NULL,
16+
key text NOT NULL,
17+
value jsonb NOT NULL,
18+
updated_at timestamptz NOT NULL DEFAULT now(),
19+
PRIMARY KEY (feed_stable_id, namespace, key),
20+
FOREIGN KEY (feed_stable_id) REFERENCES feed(stable_id) ON DELETE CASCADE,
21+
FOREIGN KEY (namespace, key) REFERENCES config_key(namespace, key) ON DELETE CASCADE
22+
);
23+
24+
-- Helpful index
25+
CREATE INDEX config_value_feed_gin ON config_value_feed USING GIN (value);

0 commit comments

Comments
 (0)