Skip to content

Commit f00a829

Browse files
authored
Removed from the DB gbfs feeds that were removed form the csv file. Also adjusted cascade delete on the DB.
1 parent 453f002 commit f00a829

File tree

14 files changed

+927
-55
lines changed

14 files changed

+927
-55
lines changed

api/src/scripts/gbfs_utils/comparison.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,7 @@
55

66
def generate_system_csv_from_db(df, db_session):
77
"""Generate a DataFrame from the database with the same columns as the CSV file."""
8-
stable_ids = "gbfs-" + df["System ID"]
98
query = db_session.query(Gbfsfeed)
10-
query = query.filter(Gbfsfeed.stable_id.in_(stable_ids.to_list()))
119
query = query.options(
1210
joinedload(Gbfsfeed.locations), joinedload(Gbfsfeed.gbfsversions), joinedload(Gbfsfeed.externalids)
1311
)
@@ -49,8 +47,9 @@ def compare_db_to_csv(df_from_db, df_from_csv, logger):
4947
return None, None
5048

5149
# Align both DataFrames by "System ID"
52-
df_from_db.set_index("System ID", inplace=True)
53-
df_from_csv.set_index("System ID", inplace=True)
50+
# Keep the System ID column because it's used later in the code
51+
df_from_db.set_index("System ID", inplace=True, drop=False)
52+
df_from_csv.set_index("System ID", inplace=True, drop=False)
5453

5554
# Find rows that are in the CSV but not in the DB (new feeds)
5655
missing_in_db = df_from_csv[~df_from_csv.index.isin(df_from_db.index)]
@@ -68,7 +67,11 @@ def compare_db_to_csv(df_from_db, df_from_csv, logger):
6867
common_ids = df_from_db.index.intersection(df_from_csv.index)
6968
df_db_common = df_from_db.loc[common_ids]
7069
df_csv_common = df_from_csv.loc[common_ids]
71-
differences = df_db_common != df_csv_common
70+
71+
# Exclude 'Location' from comparison because the DB values might have been changed in the
72+
# python function that calculates the location.
73+
columns_to_compare = [col for col in df_db_common.columns if col != "Location"]
74+
differences = df_db_common[columns_to_compare] != df_csv_common[columns_to_compare]
7275
differing_rows = df_csv_common[differences.any(axis=1)]
7376

7477
if not differing_rows.empty:
@@ -83,6 +86,7 @@ def compare_db_to_csv(df_from_db, df_from_csv, logger):
8386
logger.info(80 * "-")
8487

8588
# Merge differing rows with missing_in_db to capture all new or updated feeds
86-
all_differing_or_new_rows = pd.concat([differing_rows, missing_in_db]).reset_index()
89+
# Drop the index because we have it as the System ID column.
90+
all_differing_or_new_rows = pd.concat([differing_rows, missing_in_db]).reset_index(drop=True)
8791

8892
return all_differing_or_new_rows, missing_in_csv

api/src/scripts/populate_db_gbfs.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,20 @@ def deprecate_feeds(self, deprecated_feeds):
3636
self.logger.info("No feeds to deprecate.")
3737
return
3838

39-
self.logger.info(f"Deprecating {len(deprecated_feeds)} feed(s).")
39+
self.logger.info(f"Deleting {len(deprecated_feeds)} feed(s).")
4040
with self.db.start_db_session() as session:
4141
for index, row in deprecated_feeds.iterrows():
4242
stable_id = self.get_stable_id(row)
4343
gbfs_feed = self.query_feed_by_stable_id(session, stable_id, "gbfs")
4444
if gbfs_feed:
45-
self.logger.info(f"Deprecating feed with stable_id={stable_id}")
46-
gbfs_feed.status = "deprecated"
45+
# A note about the deletion done here:
46+
# Some other tables have a foreign key pointing to the feed, and these cannot be null
47+
# (e.g. gbfsversion). So the delete will fail, unless we cascade the deletion of the
48+
# gbfs_feed to the deletion of the entry in gbfsversion, which is done in the DB
49+
# schema. It's also the case for other tables and other foreign keys.
50+
self.logger.info(f"Deleting feed with stable_id={stable_id}")
51+
session.delete(gbfs_feed)
52+
session.flush()
4753

4854
def populate_db(self, session, fetch_url=True):
4955
"""Populate the database with the GBFS feeds"""

api/src/shared/database/database.py

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
Gbfsversion,
1616
Gbfsfeed,
1717
Gbfsvalidationreport,
18+
Osmlocationgroup,
19+
Validationreport,
1820
)
1921
from sqlalchemy.orm import sessionmaker
2022
import logging
@@ -52,12 +54,39 @@ def configure_polymorphic_mappers():
5254
gbfsfeed_mapper.polymorphic_identity = Gbfsfeed.__tablename__.lower()
5355

5456

57+
# The `cascade_entities` dictionary maps SQLAlchemy models to lists of their relationship attributes
58+
# that should have cascading delete-orphan behavior. When a parent entity (such as `Feed`, `Gbfsfeed`, etc.)
59+
# is deleted, any related child entities listed here will also be deleted if they become orphans.
60+
# The `set_cascade` function applies this configuration by setting the `cascade` property to "all, delete-orphan"
61+
# and enabling `passive_deletes` for each specified relationship. This leverages the database's ON DELETE CASCADE
62+
# constraints and ensures that related records are cleaned up automatically when a parent is removed.
5563
cascade_entities = {
56-
Gtfsfeed: [Gtfsfeed.redirectingids, Gtfsfeed.redirectingids_, Gtfsfeed.externalids],
57-
Gbfsversion: [Gbfsversion.gbfsendpoints, Gbfsversion.gbfsvalidationreports],
58-
Gbfsfeed: [Gbfsfeed.gbfsversions],
59-
Gbfsvalidationreport: [Gbfsvalidationreport.gbfsnotices],
60-
Feed: [Feed.feedosmlocationgroups],
64+
Feed: [
65+
Feed.externalids, # externalid_feed_id_fkey
66+
Feed.feedlocationgrouppoints,
67+
Feed.feedosmlocationgroups, # feedosmlocation_feed_id_fkey
68+
Feed.gtfsdatasets, # gtfsdataset_feed_id_fkey
69+
Feed.officialstatushistories, # officialstatushistory_feed_id_fkey
70+
Feed.redirectingids, # redirectingid_source_id_fkey
71+
Feed.redirectingids_, # redirectingid_target_id_fkey
72+
],
73+
Gbfsfeed: [
74+
Gbfsfeed.gbfsversions, # gbfsversion_feed_id_fkey
75+
],
76+
Gbfsvalidationreport: [
77+
Gbfsvalidationreport.gbfsnotices, # gbfsnotice_validation_report_id_fkey
78+
],
79+
Gbfsversion: [
80+
Gbfsversion.gbfsendpoints, # gbfsendpoint_gbfs_version_id_fkey
81+
Gbfsversion.gbfsvalidationreports, # gbfsvalidationreport_gbfs_version_id_fkey
82+
],
83+
Osmlocationgroup: [
84+
Osmlocationgroup.feedlocationgrouppoints,
85+
Osmlocationgroup.feedosmlocationgroups, # feedosmlocation_group_id_fkey
86+
],
87+
Validationreport: [
88+
Validationreport.notices, # notice_validation_report_id_fkey
89+
],
6190
}
6291

6392

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import os
2+
3+
import pytest
4+
from fastapi import FastAPI
5+
from fastapi.testclient import TestClient
6+
from sqlalchemy import text
7+
8+
from shared.database.database import Database
9+
from main import app as application
10+
from tests.test_utils.database import populate_database
11+
12+
13+
@pytest.fixture(scope="package")
14+
def app() -> FastAPI:
15+
application.dependency_overrides = {}
16+
return application
17+
18+
19+
@pytest.fixture(scope="package")
20+
def test_database():
21+
# Restrict the tests to the test database
22+
os.environ["FEEDS_DATABASE_URL"] = "postgresql://postgres:postgres@localhost:54320/MobilityDatabaseTest"
23+
24+
data_dirs = []
25+
second_phase_data_dirs = []
26+
with populate_database(Database(), data_dirs, second_phase_data_dirs) as db:
27+
yield db
28+
29+
30+
@pytest.fixture(scope="package")
31+
def client(app, test_database) -> TestClient:
32+
return TestClient(app)
33+
34+
35+
# We want to delete all data from the database after each test so we don't have to coordinate the DB ids between tests.
36+
@pytest.fixture(autouse=True)
37+
def clean_database(test_database, request):
38+
yield
39+
# Check if the test passed
40+
if request.node.rep_call.outcome == "passed":
41+
with test_database.start_db_session() as session:
42+
for table in session.execute(text("SELECT tablename FROM pg_tables WHERE schemaname = 'public'")):
43+
session.execute(text(f"TRUNCATE {table[0]} CASCADE"))
44+
session.commit()
45+
46+
47+
@pytest.hookimpl(tryfirst=True, hookwrapper=True)
48+
def pytest_runtest_makereport(item, call):
49+
# Attach test result to the request object
50+
outcome = yield
51+
report = outcome.get_result()
52+
setattr(item, f"rep_{call.when}", report)

0 commit comments

Comments
 (0)