Skip to content

Commit e608e44

Browse files
Feat: update feed status python function (#944)
* update feed status python function
1 parent 284f035 commit e608e44

File tree

12 files changed

+301
-1
lines changed

12 files changed

+301
-1
lines changed
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
[run]
2+
omit =
3+
*/test*/*
4+
*/database_gen/*
5+
*/dataset_service/*
6+
*/helpers/*
7+
8+
[report]
9+
exclude_lines =
10+
if __name__ == .__main__.:
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# Environment variables for the validation report information extraction to run locally
2+
export FEEDS_DATABASE_URL=${{FEEDS_DATABASE_URL}}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# Update Feed Status
2+
This directory contains the GCP serverless function that will update all the feed statuses according to their associated latest dataset service date range.
3+
4+
It will exclude Feeds with exisiting status 'deprecated' and 'development'
5+
6+
## Function Workflow
7+
1. **HTTP Request Trigger**: The function is invoked through an HTTP request that includes identifiers for a dataset and feed.
8+
2. **Dataset Query**: Retreives all feeds which have latest dataset with exisitng values for the service date range
9+
3. **Feed Update Query**: Update the feed status based on the service date range values comparing vs current date
10+
11+
## Function Configuration
12+
The function depends on several environment variables:
13+
- `FEEDS_DATABASE_URL`: The database URL for connecting to the database containing GTFS datasets and related entities.
14+
15+
## Local Development
16+
Follow standard practices for local development of GCP serverless functions. Refer to the main [README.md](../README.md) for general setup instructions for the development environment.
17+
18+
## Testing
19+
To run it locally `./scripts/function-python-run.sh --function_name update_feed_status`
20+
21+
In postman or similar service, with a `POST` call `v1/update_feed_status`
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
{
2+
"name": "update-feed-status",
3+
"description": "Update the feed status with the corresponding latest dataset service date range",
4+
"entry_point": "update_feed_status",
5+
"timeout": 3600,
6+
"memory": "1Gi",
7+
"trigger_http": true,
8+
"include_folders": ["helpers"],
9+
"include_api_folders": ["database_gen"],
10+
"secret_environment_variables": [
11+
{
12+
"key": "FEEDS_DATABASE_URL"
13+
}
14+
],
15+
"ingress_settings": "ALLOW_INTERNAL_AND_GCLB",
16+
"max_instance_request_concurrency": 1,
17+
"max_instance_count": 1,
18+
"min_instance_count": 0,
19+
"available_cpu": 1
20+
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Common packages
2+
functions-framework==3.*
3+
google-cloud-logging
4+
psycopg2-binary==2.9.6
5+
aiohttp~=3.10.5
6+
asyncio~=3.4.3
7+
urllib3~=2.2.2
8+
requests~=2.32.3
9+
attrs~=23.1.0
10+
pluggy~=1.3.0
11+
certifi~=2024.7.4
12+
13+
# SQL Alchemy and Geo Alchemy
14+
SQLAlchemy==2.0.23
15+
geoalchemy2==0.14.7
16+
17+
# Google specific packages for this function
18+
cloudevents~=1.10.1
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Faker
2+
pytest~=7.4.3

functions-python/update_feed_status/src/__init__.py

Whitespace-only changes.
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
import logging
2+
import os
3+
import functions_framework
4+
from datetime import date
5+
from shared.helpers.logger import Logger
6+
from shared.helpers.database import Database
7+
from typing import TYPE_CHECKING
8+
from sqlalchemy import case, text
9+
from shared.database_gen.sqlacodegen_models import Gtfsdataset, Feed
10+
11+
if TYPE_CHECKING:
12+
from sqlalchemy.orm import Session
13+
14+
logging.basicConfig(level=logging.INFO)
15+
16+
17+
# query to update the status of the feeds based on the service date range of the latest dataset
18+
def update_feed_statuses_query(session: "Session"):
19+
today = date.today()
20+
21+
latest_dataset_subq = (
22+
session.query(
23+
Gtfsdataset.feed_id,
24+
Gtfsdataset.service_date_range_start,
25+
Gtfsdataset.service_date_range_end,
26+
)
27+
.filter(
28+
Gtfsdataset.latest.is_(True),
29+
Gtfsdataset.service_date_range_start.isnot(None),
30+
Gtfsdataset.service_date_range_end.isnot(None),
31+
)
32+
.subquery()
33+
)
34+
35+
new_status = case(
36+
(
37+
latest_dataset_subq.c.service_date_range_end < today,
38+
text("'inactive'::status"),
39+
),
40+
(
41+
latest_dataset_subq.c.service_date_range_start > today,
42+
text("'future'::status"),
43+
),
44+
(
45+
(latest_dataset_subq.c.service_date_range_start <= today)
46+
& (latest_dataset_subq.c.service_date_range_end >= today),
47+
text("'active'::status"),
48+
),
49+
)
50+
51+
try:
52+
updated_count = (
53+
session.query(Feed)
54+
.filter(
55+
Feed.status != text("'deprecated'::status"),
56+
Feed.status != text("'development'::status"),
57+
Feed.id == latest_dataset_subq.c.feed_id,
58+
)
59+
.update({Feed.status: new_status}, synchronize_session=False)
60+
)
61+
except Exception as e:
62+
logging.error(f"Error updating feed statuses: {e}")
63+
raise Exception(f"Error updating feed statuses: {e}")
64+
65+
try:
66+
session.commit()
67+
logging.info("Feed Database changes committed.")
68+
session.close()
69+
return updated_count
70+
except Exception as e:
71+
logging.error("Error committing changes:", e)
72+
session.rollback()
73+
session.close()
74+
raise Exception(f"Error creating dataset: {e}")
75+
76+
77+
@functions_framework.http
78+
def update_feed_status(_):
79+
"""Updates the Feed status based on the latets dataset service date range."""
80+
Logger.init_logger()
81+
db = Database(database_url=os.getenv("FEEDS_DATABASE_URL"))
82+
update_count = 0
83+
try:
84+
with db.start_db_session() as session:
85+
logging.info("Database session started.")
86+
update_count = update_feed_statuses_query(session)
87+
88+
except Exception as error:
89+
logging.error(f"Error updating the feed statuses: {error}")
90+
return f"Error updating the feed statuses: {error}", 500
91+
92+
return f"Script executed successfully. {update_count} feeds updated", 200
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
from unittest.mock import patch, MagicMock
2+
from test_shared.test_utils.database_utils import default_db_url
3+
from main import update_feed_status, update_feed_statuses_query
4+
from datetime import date, timedelta
5+
6+
import os
7+
8+
9+
def test_update_feed_status_return():
10+
mock_session = MagicMock()
11+
12+
today = date(2025, 3, 1)
13+
14+
mock_subquery = MagicMock()
15+
mock_subquery.c.feed_id = 1
16+
mock_subquery.c.service_date_range_start = today - timedelta(days=10)
17+
mock_subquery.c.service_date_range_end = today + timedelta(days=10)
18+
19+
mock_query = mock_session.query.return_value
20+
mock_query.filter.return_value.subquery.return_value = mock_subquery
21+
22+
mock_update_query = mock_session.query.return_value.filter.return_value
23+
mock_update_query.update.return_value = 3
24+
25+
updated_count = update_feed_statuses_query(mock_session)
26+
27+
assert updated_count == 3
28+
mock_session.commit.assert_called_once()
29+
30+
31+
def test_update_feed_status_failed_query():
32+
mock_session = MagicMock()
33+
34+
today = date(2025, 3, 1)
35+
36+
mock_subquery = MagicMock()
37+
mock_subquery.c.feed_id = 1
38+
mock_subquery.c.service_date_range_start = today - timedelta(days=10)
39+
mock_subquery.c.service_date_range_end = today + timedelta(days=10)
40+
41+
mock_query = mock_session.query.return_value
42+
mock_query.filter.return_value.subquery.return_value = mock_subquery
43+
44+
mock_update_query = mock_session.query.return_value.filter.return_value
45+
mock_update_query.update.side_effect = Exception("Mocked exception")
46+
47+
try:
48+
update_feed_statuses_query(mock_session)
49+
except Exception as e:
50+
assert str(e) == "Error updating feed statuses: Mocked exception"
51+
52+
53+
@patch("main.Logger", autospec=True)
54+
@patch("main.update_feed_statuses_query")
55+
def test_updated_feed_status(mock_update_query, mock_logger):
56+
mock_update_query.return_value = 5
57+
58+
with patch.dict(os.environ, {"FEEDS_DATABASE_URL": default_db_url}):
59+
response_body, status_code = update_feed_status(None)
60+
61+
mock_update_query.asser_called_once()
62+
assert response_body == "Script executed successfully. 5 feeds updated"
63+
assert status_code == 200
64+
65+
66+
@patch("main.Logger", autospec=True)
67+
@patch("main.update_feed_statuses_query")
68+
def test_updated_feed_status_error_raised(mock_update_query, mock_logger):
69+
mock_update_query.side_effect = Exception("Mocked exception")
70+
71+
with patch.dict(os.environ, {"FEEDS_DATABASE_URL": default_db_url}):
72+
response_body, status_code = update_feed_status(None)
73+
74+
mock_update_query.asser_called_once()
75+
assert response_body == "Error updating the feed statuses: Mocked exception"
76+
assert status_code == 500

infra/functions-python/main.tf

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ locals {
5656
function_backfill_dataset_service_date_range_config = jsondecode(file("${path.module}/../../functions-python/backfill_dataset_service_date_range/function_config.json"))
5757
function_backfill_dataset_service_date_range_zip = "${path.module}/../../functions-python/backfill_dataset_service_date_range/.dist/backfill_dataset_service_date_range.zip"
5858

59+
function_update_feed_status_config = jsondecode(file("${path.module}/../../functions-python/update_feed_status/function_config.json"))
60+
function_update_feed_status_zip = "${path.module}/../../functions-python/update_feed_status/.dist/update_feed_status.zip"
61+
5962
function_export_csv_config = jsondecode(file("${path.module}/../../functions-python/export_csv/function_config.json"))
6063
function_export_csv_zip = "${path.module}/../../functions-python/export_csv/.dist/export_csv.zip"
6164
}
@@ -69,6 +72,7 @@ locals {
6972
[for x in local.function_process_validation_report_config.secret_environment_variables : x.key],
7073
[for x in local.function_update_validation_report_config.secret_environment_variables : x.key],
7174
[for x in local.function_backfill_dataset_service_date_range_config.secret_environment_variables : x.key],
75+
[for x in local.function_update_feed_status_config.secret_environment_variables : x.key],
7276
[for x in local.function_export_csv_config.secret_environment_variables : x.key]
7377
)
7478

@@ -180,14 +184,20 @@ resource "google_storage_bucket_object" "backfill_dataset_service_date_range_zip
180184
source = local.function_backfill_dataset_service_date_range_zip
181185
}
182186

183-
184187
# 10. Export CSV
185188
resource "google_storage_bucket_object" "export_csv_zip" {
186189
bucket = google_storage_bucket.functions_bucket.name
187190
name = "export-csv-${substr(filebase64sha256(local.function_export_csv_zip), 0, 10)}.zip"
188191
source = local.function_export_csv_zip
189192
}
190193

194+
# 11. Update Feed Status
195+
resource "google_storage_bucket_object" "update_feed_status_zip" {
196+
bucket = google_storage_bucket.functions_bucket.name
197+
name = "backfill-dataset-service-date-range-${substr(filebase64sha256(local.function_update_feed_status_zip), 0, 10)}.zip"
198+
source = local.function_update_feed_status_zip
199+
}
200+
191201
# Secrets access
192202
resource "google_secret_manager_secret_iam_member" "secret_iam_member" {
193203
for_each = local.unique_secret_keys
@@ -893,6 +903,53 @@ resource "google_cloudfunctions2_function" "export_csv" {
893903
}
894904
}
895905

906+
# 11. functions/update_feed_status cloud function
907+
# Updates the Feed statuses based on latest dataset service date range
908+
909+
resource "google_cloudfunctions2_function" "update_feed_status" {
910+
name = local.function_update_feed_status_config.name
911+
description = local.function_update_feed_status_config.description
912+
location = var.gcp_region
913+
depends_on = [google_secret_manager_secret_iam_member.secret_iam_member]
914+
project = var.project_id
915+
build_config {
916+
runtime = var.python_runtime
917+
entry_point = local.function_update_feed_status_config.entry_point
918+
source {
919+
storage_source {
920+
bucket = google_storage_bucket.functions_bucket.name
921+
object = google_storage_bucket_object.update_feed_status_zip.name
922+
}
923+
}
924+
}
925+
service_config {
926+
available_memory = local.function_update_feed_status_config.memory
927+
available_cpu = local.function_update_feed_status_config.available_cpu
928+
timeout_seconds = local.update_feed_status_config.timeout
929+
vpc_connector = data.google_vpc_access_connector.vpc_connector.id
930+
vpc_connector_egress_settings = "PRIVATE_RANGES_ONLY"
931+
932+
environment_variables = {
933+
# prevents multiline logs from being truncated on GCP console
934+
PYTHONNODEBUGRANGES = 0
935+
}
936+
dynamic "secret_environment_variables" {
937+
for_each = local.function_update_feed_status_config.secret_environment_variables
938+
content {
939+
key = secret_environment_variables.value["key"]
940+
project_id = var.project_id
941+
secret = lookup(secret_environment_variables.value, "secret", "${upper(var.environment)}_${secret_environment_variables.value["key"]}")
942+
version = "latest"
943+
}
944+
}
945+
service_account_email = google_service_account.functions_service_account.email
946+
max_instance_request_concurrency = local.function_update_feed_status_config.max_instance_request_concurrency
947+
max_instance_count = local.function_update_feed_status_config.max_instance_count
948+
min_instance_count = local.function_update_feed_status_config.min_instance_count
949+
}
950+
}
951+
952+
896953
resource "google_cloud_scheduler_job" "export_csv_scheduler" {
897954
name = "export-csv-scheduler-${var.environment}"
898955
description = "Schedule the export_csv function"

0 commit comments

Comments
 (0)