Skip to content

Commit a2413f5

Browse files
authored
feat: enhance gbfs validation pipeline for version tracking (#1064)
1 parent f6273e7 commit a2413f5

File tree

20 files changed

+1067
-680
lines changed

20 files changed

+1067
-680
lines changed

.github/workflows/db-update.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -199,12 +199,12 @@ jobs:
199199
id: getsyspath
200200
run: echo "PATH=$(realpath systems.csv)" >> $GITHUB_OUTPUT
201201

202-
- name: GTFS - Update Database Content
203-
run: scripts/populate-db.sh ${{ steps.getpath.outputs.PATH }} > populate.log
204-
205202
- name: GBFS - Update Database Content
206203
run: scripts/populate-db.sh ${{ steps.getsyspath.outputs.PATH }} gbfs >> populate-gbfs.log
207204

205+
- name: GTFS - Update Database Content
206+
run: scripts/populate-db.sh ${{ steps.getpath.outputs.PATH }} > populate.log
207+
208208
- name: GTFS - Upload log file for verification
209209
if: ${{ always() }}
210210
uses: actions/upload-artifact@v4

api/src/scripts/gbfs_utils/comparison.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@ def compare_db_to_csv(df_from_db, df_from_csv, logger):
3838
df_from_db = df_from_db.fillna("")
3939
df_from_csv = df_from_csv.fillna("")
4040

41+
df_from_db = df_from_db.drop(columns=["Supported Versions"])
42+
df_from_csv = df_from_csv.drop(columns=["Supported Versions"])
43+
4144
if df_from_db.empty:
4245
logger.info("No data found in the database.")
4346
return None, None
@@ -63,7 +66,7 @@ def compare_db_to_csv(df_from_db, df_from_csv, logger):
6366
df_db_common = df_from_db.loc[common_ids]
6467
df_csv_common = df_from_csv.loc[common_ids]
6568
differences = df_db_common != df_csv_common
66-
differing_rows = df_db_common[differences.any(axis=1)]
69+
differing_rows = df_csv_common[differences.any(axis=1)]
6770

6871
if not differing_rows.empty:
6972
logger.info("Rows with differences:")

api/src/scripts/gbfs_utils/fetching.py

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -52,30 +52,3 @@ def get_field_url(fields, field_name):
5252
if field.get("name") == field_name:
5353
return field.get("url")
5454
return None
55-
56-
57-
def get_gbfs_versions(gbfs_versions_url, auto_discovery_url, auto_discovery_version, logger):
58-
"""Get the GBFS versions from the gbfs_versions_url."""
59-
# Default version info extracted from auto-discovery url
60-
version_info = {
61-
"version": auto_discovery_version if auto_discovery_version else "1.0",
62-
"url": auto_discovery_url,
63-
}
64-
try:
65-
if not gbfs_versions_url:
66-
return [version_info]
67-
logger.info(f"Fetching GBFS versions from: {gbfs_versions_url}")
68-
data = get_data_content(gbfs_versions_url, logger)
69-
if not data:
70-
logger.warning(f"No data found in the GBFS versions URL -> {gbfs_versions_url}.")
71-
return [version_info]
72-
gbfs_versions = data.get("versions", [])
73-
74-
# Append the version info from auto-discovery if it doesn't exist
75-
if not any(gv.get("version") == auto_discovery_version for gv in gbfs_versions):
76-
gbfs_versions.append(version_info)
77-
78-
return gbfs_versions
79-
except Exception as e:
80-
logger.error(f"Error fetching version data: {e}")
81-
return [version_info]

api/src/scripts/gbfs_utils/gbfs_versions.py

Lines changed: 0 additions & 19 deletions
This file was deleted.

api/src/scripts/populate_db_gbfs.py

Lines changed: 5 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,15 @@
11
from datetime import datetime
22

33
import pandas as pd
4-
import pytz
54
import pycountry
5+
import pytz
66

7-
from shared.database.database import generate_unique_id, configure_polymorphic_mappers
8-
from shared.database_gen.sqlacodegen_models import Gbfsfeed, Location, Gbfsversion, Externalid
97
from scripts.gbfs_utils.comparison import generate_system_csv_from_db, compare_db_to_csv
10-
from scripts.gbfs_utils.fetching import fetch_data, get_data_content, get_gbfs_versions
8+
from scripts.gbfs_utils.fetching import fetch_data, get_data_content
119
from scripts.gbfs_utils.license import get_license_url
1210
from scripts.populate_db import DatabasePopulateHelper, set_up_configs
13-
from scripts.gbfs_utils.gbfs_versions import OFFICIAL_VERSIONS
11+
from shared.database.database import generate_unique_id, configure_polymorphic_mappers
12+
from shared.database_gen.sqlacodegen_models import Gbfsfeed, Location, Externalid
1413

1514

1615
class GBFSDatabasePopulateHelper(DatabasePopulateHelper):
@@ -45,7 +44,6 @@ def deprecate_feeds(self, deprecated_feeds):
4544
if gbfs_feed:
4645
self.logger.info(f"Deprecating feed with stable_id={stable_id}")
4746
gbfs_feed.status = "deprecated"
48-
# session.flush()
4947

5048
def populate_db(self):
5149
"""Populate the database with the GBFS feeds"""
@@ -65,12 +63,9 @@ def populate_db(self):
6563
self.logger.info(f"Processing row {index + 1} of {len(added_or_updated_feeds)}")
6664
stable_id = self.get_stable_id(row)
6765
gbfs_feed = self.query_feed_by_stable_id(session, stable_id, "gbfs")
68-
fetched_data = fetch_data(
69-
row["Auto-Discovery URL"], self.logger, ["system_information", "gbfs_versions"], ["version"]
70-
)
66+
fetched_data = fetch_data(row["Auto-Discovery URL"], self.logger, ["system_information"])
7167
# If the feed already exists, update it. Otherwise, create a new feed.
7268
if gbfs_feed:
73-
feed_id = gbfs_feed.id
7469
self.logger.info(f"Updating feed {stable_id} - {row['Name']}")
7570
else:
7671
feed_id = generate_unique_id()
@@ -108,26 +103,6 @@ def populate_db(self):
108103
gbfs_feed.locations.clear()
109104
gbfs_feed.locations = [location]
110105

111-
# Add the GBFS versions
112-
versions = get_gbfs_versions(
113-
fetched_data.get("gbfs_versions"),
114-
row["Auto-Discovery URL"],
115-
fetched_data.get("version"),
116-
self.logger,
117-
)
118-
existing_versions = [version.version for version in gbfs_feed.gbfsversions]
119-
for version in versions:
120-
version_value = version.get("version")
121-
if version_value.upper() in OFFICIAL_VERSIONS and version_value not in existing_versions:
122-
gbfs_feed.gbfsversions.append(
123-
Gbfsversion(
124-
feed_id=feed_id,
125-
url=version.get("url"),
126-
version=version_value,
127-
)
128-
)
129-
130-
# self.db.session.flush()
131106
self.logger.info(80 * "-")
132107

133108
# self.db.session.commit()

api/src/shared/database/database.py

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,15 @@
77
from dotenv import load_dotenv
88
from sqlalchemy import create_engine, text, event
99
from sqlalchemy.orm import load_only, Query, class_mapper, Session, mapper
10-
from shared.database_gen.sqlacodegen_models import Base, Feed, Gtfsfeed, Gtfsrealtimefeed, Gbfsfeed
10+
from shared.database_gen.sqlacodegen_models import (
11+
Base,
12+
Feed,
13+
Gtfsfeed,
14+
Gtfsrealtimefeed,
15+
Gbfsversion,
16+
Gbfsfeed,
17+
Gbfsvalidationreport,
18+
)
1119
from sqlalchemy.orm import sessionmaker
1220
import logging
1321

@@ -44,20 +52,26 @@ def configure_polymorphic_mappers():
4452
gbfsfeed_mapper.polymorphic_identity = Gbfsfeed.__tablename__.lower()
4553

4654

55+
cascade_entities = {
56+
Gtfsfeed: [Gtfsfeed.redirectingids, Gtfsfeed.redirectingids_, Gtfsfeed.externalids],
57+
Gbfsversion: [Gbfsversion.gbfsendpoints, Gbfsversion.gbfsvalidationreports],
58+
Gbfsfeed: [Gbfsfeed.gbfsversions],
59+
Gbfsvalidationreport: [Gbfsvalidationreport.gbfsnotices],
60+
}
61+
62+
4763
def set_cascade(mapper, class_):
4864
"""
4965
Set cascade for relationships in Gtfsfeed.
5066
This allows to delete/add the relationships when their respective relation array changes.
5167
"""
52-
if class_.__name__ == "Gtfsfeed":
68+
mapper.confirm_deleted_rows = False # Disable confirm_deleted_rows to avoid warnings in logs with delete-orphan
69+
if class_ in cascade_entities:
70+
relationship_keys = {rel.prop.key for rel in cascade_entities[class_]}
5371
for rel in class_.__mapper__.relationships:
54-
if rel.key in [
55-
"redirectingids",
56-
"redirectingids_",
57-
"externalids",
58-
"externalids_",
59-
]:
72+
if rel.key in relationship_keys:
6073
rel.cascade = "all, delete-orphan"
74+
rel.passive_deletes = True
6175

6276

6377
def mapper_configure_listener(mapper, class_):

functions-python/gbfs_validator/README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,9 @@ The message published by the batch function to the Pub/Sub topic follows this fo
3232

3333
- **`gbfs-validator-batch`**: Triggered per execution ID, this function iterates over all GBFS feeds, preparing and publishing individual messages to the Pub/Sub topic.
3434
- **`gbfs-validator-pubsub`**: Triggered per feed, this function performs the following steps:
35-
1. **Download the feed snapshot to GCP**: It uploads all related files to the specified Cloud Storage bucket and updates the `gbfs.json` file to point to the newly uploaded files.
36-
2. **Validate the feed**: Run the GBFS validator on the feed snapshot.
37-
3. **Update the database**: The function updates the database with the snapshot information and validation report details.
35+
1. **Access the autodiscovery URL and update versions**: The function accesses the autodiscovery URL to update the **GBFSVersions** table.
36+
2. **Measure latency and validate the feed**: For each version, the function measures the response latency and validates the feed. The validation summary is stored in GCP, and the total error count is extracted and saved in the **GBFSValidationReport**.
37+
3. **Store validation details**: The function stores detailed errors as **GBFSNotice** entities.
3838

3939
## Function Configuration
4040

functions-python/gbfs_validator/requirements.txt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,8 @@ google-cloud-datastore
2323
cloudevents~=1.10.1
2424

2525
# Configuration
26-
python-dotenv==1.0.0
26+
python-dotenv==1.0.0
27+
28+
# Additional packages for the function
29+
jsonpath-ng
30+
language-tags

0 commit comments

Comments
 (0)