Skip to content

Commit 8bd3bd6

Browse files
authored
fix: Added missing status and bounding box for RT feeds (#918)
1 parent 3f44367 commit 8bd3bd6

File tree

4 files changed

+168
-109
lines changed

4 files changed

+168
-109
lines changed

api/src/shared/common/db_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ def get_gtfs_rt_feeds_query(
147147
)
148148
subquery = gtfs_rt_feed_filter.filter(
149149
select(Gtfsrealtimefeed.id)
150-
.join(Location, Gtfsrealtimefeed.locations)
150+
.join(Location, Gtfsrealtimefeed.locations, isouter=True)
151151
.join(Entitytype, Gtfsrealtimefeed.entitytypes)
152152
).subquery()
153153
feed_query = db_session.query(Gtfsrealtimefeed).filter(Gtfsrealtimefeed.id.in_(subquery))

functions-python/export_csv/src/main.py

Lines changed: 123 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
from geoalchemy2.shape import to_shape
2929

3030
from shared.helpers.logger import Logger
31-
from shared.database_gen.sqlacodegen_models import Gtfsfeed, Gtfsrealtimefeed
31+
from shared.database_gen.sqlacodegen_models import Gtfsfeed, Gtfsrealtimefeed, Feed
3232
from shared.common.db_utils import get_all_gtfs_rt_feeds, get_all_gtfs_feeds
3333

3434
from shared.helpers.database import Database
@@ -69,6 +69,36 @@
6969
]
7070

7171

72+
class BoundingBox:
73+
"""
74+
Class used to keep the GTFS feed bounding box in a lookup table so it can be used in associated real-time feeds.
75+
"""
76+
77+
def __init__(
78+
self,
79+
minimum_latitude=None,
80+
maximum_latitude=None,
81+
minimum_longitude=None,
82+
maximum_longitude=None,
83+
extracted_on=None,
84+
):
85+
self.minimum_latitude = minimum_latitude
86+
self.maximum_latitude = maximum_latitude
87+
self.minimum_longitude = minimum_longitude
88+
self.maximum_longitude = maximum_longitude
89+
self.extracted_on = extracted_on
90+
91+
def fill_data(self, data):
92+
data["location.bounding_box.minimum_latitude"] = self.minimum_latitude
93+
data["location.bounding_box.maximum_latitude"] = self.maximum_latitude
94+
data["location.bounding_box.minimum_longitude"] = self.minimum_longitude
95+
data["location.bounding_box.maximum_longitude"] = self.maximum_longitude
96+
data["location.bounding_box.extracted_on"] = self.extracted_on
97+
98+
99+
bounding_box_lookup = {}
100+
101+
72102
@functions_framework.http
73103
def export_and_upload_csv(request=None):
74104
"""
@@ -111,12 +141,11 @@ def fetch_feeds() -> Iterator[Dict]:
111141
:return: Data to write to the output CSV file.
112142
"""
113143
db = Database(database_url=os.getenv("FEEDS_DATABASE_URL"))
114-
logging.info(f"Using database {db.database_url}")
115144
try:
116145
with db.start_db_session() as session:
117146
feed_count = 0
118147
for feed in get_all_gtfs_feeds(session, include_wip=False):
119-
yield get_feed_csv_data(feed)
148+
yield get_gtfs_feed_csv_data(feed)
120149
feed_count += 1
121150

122151
logging.info(f"Processed {feed_count} GTFS feeds.")
@@ -138,10 +167,20 @@ def extract_numeric_version(version):
138167
return match.group(1) if match else version
139168

140169

141-
def get_feed_csv_data(feed: Gtfsfeed):
170+
def get_gtfs_feed_csv_data(feed: Gtfsfeed):
142171
"""
143-
This function takes a GtfsFeed and returns a dictionary with the data to be written to the CSV file.
172+
This function takes a Gtfsfeed object and returns a dictionary with the data to be written to the CSV file.
173+
:param feed: Gtfsfeed object containing feed data.
174+
:return: Dictionary with feed data formatted for CSV output.
144175
"""
176+
joined_features = ""
177+
validated_at = None
178+
bounding_box = None
179+
180+
# First extract the common feed data
181+
data = get_feed_csv_data(feed)
182+
183+
# Then supplement with the GTFS specific data
145184
latest_dataset = next(
146185
(
147186
dataset
@@ -150,11 +189,6 @@ def get_feed_csv_data(feed: Gtfsfeed):
150189
),
151190
None,
152191
)
153-
154-
joined_features = ""
155-
validated_at = None
156-
minimum_latitude = maximum_latitude = minimum_longitude = maximum_longitude = None
157-
158192
if latest_dataset and latest_dataset.validation_reports:
159193
# Keep the report from the more recent validator version
160194
latest_report = max(
@@ -177,10 +211,18 @@ def get_feed_csv_data(feed: Gtfsfeed):
177211
if latest_dataset.bounding_box:
178212
shape = to_shape(latest_dataset.bounding_box)
179213
if shape and shape.bounds:
180-
minimum_latitude = shape.bounds[1]
181-
maximum_latitude = shape.bounds[3]
182-
minimum_longitude = shape.bounds[0]
183-
maximum_longitude = shape.bounds[2]
214+
bounding_box = BoundingBox(
215+
minimum_latitude=shape.bounds[1],
216+
maximum_latitude=shape.bounds[3],
217+
minimum_longitude=shape.bounds[0],
218+
maximum_longitude=shape.bounds[2],
219+
extracted_on=validated_at,
220+
)
221+
222+
# Keep the bounding box for that GTFS feed so it can be used in associated real-time feeds, if any
223+
if bounding_box:
224+
bounding_box.fill_data(data)
225+
bounding_box_lookup[feed.id] = bounding_box
184226

185227
latest_url = latest_dataset.hosted_url if latest_dataset else None
186228
if latest_url:
@@ -193,7 +235,47 @@ def get_feed_csv_data(feed: Gtfsfeed):
193235
if position != -1:
194236
# Construct the new URL
195237
latest_url = latest_url[: position + len(feed.stable_id) + 1] + "latest.zip"
238+
data["urls.latest"] = latest_url
239+
data["features"] = joined_features
240+
241+
return data
242+
196243

244+
def get_feed_csv_data(feed: Feed):
245+
"""
246+
This function takes a generic feed and returns a dictionary with the data to be written to the CSV file.
247+
Any specific data (for GTFS or GTFS_RT has to be added after this call.
248+
"""
249+
250+
redirect_ids = []
251+
redirect_comments = []
252+
# Add concatenated redirect IDs
253+
if feed.redirectingids:
254+
for redirect in feed.redirectingids:
255+
if redirect and redirect.target and redirect.target.stable_id:
256+
stripped_id = redirect.target.stable_id.strip()
257+
if stripped_id:
258+
redirect_ids.append(stripped_id)
259+
redirect_comment = redirect.redirect_comment or ""
260+
redirect_comments.append(redirect_comment)
261+
262+
redirect_ids_str = "|".join(redirect_ids)
263+
redirect_comments_str = "|".join(redirect_comments)
264+
265+
# If for some reason there is no redirect_ids, discard the redirect_comments if any
266+
if redirect_ids_str == "":
267+
redirect_comments_str = ""
268+
else:
269+
# If there is no comment but we do have redirects, use an empty string instead of a
270+
# potentially a bunch of vertical bars.
271+
redirect_comments_str = (
272+
""
273+
if (redirect_comments_str or "").strip("|") == ""
274+
else redirect_comments_str
275+
)
276+
277+
# Some of the data is set to None or "" here but will be set to the proper value
278+
# later depending on the type (GTFS or GTFS_RT)
197279
data = {
198280
"id": feed.stable_id,
199281
"data_type": feed.data_type,
@@ -216,55 +298,28 @@ def get_feed_csv_data(feed: Gtfsfeed):
216298
"urls.authentication_type": feed.authentication_type,
217299
"urls.authentication_info": feed.authentication_info_url,
218300
"urls.api_key_parameter_name": feed.api_key_parameter_name,
219-
"urls.latest": latest_url,
301+
"urls.latest": None,
220302
"urls.license": feed.license_url,
221-
"location.bounding_box.minimum_latitude": minimum_latitude,
222-
"location.bounding_box.maximum_latitude": maximum_latitude,
223-
"location.bounding_box.minimum_longitude": minimum_longitude,
224-
"location.bounding_box.maximum_longitude": maximum_longitude,
303+
"location.bounding_box.minimum_latitude": None,
304+
"location.bounding_box.maximum_latitude": None,
305+
"location.bounding_box.minimum_longitude": None,
306+
"location.bounding_box.maximum_longitude": None,
225307
# We use the report validated_at date as the extracted_on date
226-
"location.bounding_box.extracted_on": validated_at,
308+
"location.bounding_box.extracted_on": None,
227309
"status": feed.status,
228-
"features": joined_features,
310+
"features": None,
311+
"redirect.id": redirect_ids_str,
312+
"redirect.comment": redirect_comments_str,
229313
}
230-
231-
redirect_ids = ""
232-
redirect_comments = ""
233-
# Add concatenated redirect IDs
234-
if feed.redirectingids:
235-
for redirect in feed.redirectingids:
236-
if redirect and redirect.target and redirect.target.stable_id:
237-
stripped_id = redirect.target.stable_id.strip()
238-
if stripped_id:
239-
redirect_ids = (
240-
redirect_ids + "|" + stripped_id
241-
if redirect_ids
242-
else stripped_id
243-
)
244-
redirect_comments = (
245-
redirect_comments + "|" + redirect.redirect_comment
246-
if redirect_comments
247-
else redirect.redirect_comment
248-
)
249-
if redirect_ids == "":
250-
redirect_comments = ""
251-
else:
252-
# If there is no comment but we do have redirects, use an empty string instead of a
253-
# potentially a bunch of vertical bars.
254-
redirect_comments = (
255-
"" if redirect_comments.strip("|") == "" else redirect_comments
256-
)
257-
258-
data["redirect.id"] = redirect_ids
259-
data["redirect.comment"] = redirect_comments
260-
261314
return data
262315

263316

264317
def get_gtfs_rt_feed_csv_data(feed: Gtfsrealtimefeed):
265318
"""
266319
This function takes a GtfsRTFeed and returns a dictionary with the data to be written to the CSV file.
267320
"""
321+
data = get_feed_csv_data(feed)
322+
268323
entity_types = ""
269324
if feed.entitytypes:
270325
valid_entity_types = [
@@ -274,49 +329,31 @@ def get_gtfs_rt_feed_csv_data(feed: Gtfsrealtimefeed):
274329
]
275330
valid_entity_types = sorted(valid_entity_types)
276331
entity_types = "|".join(valid_entity_types)
332+
data["entity_type"] = entity_types
277333

278334
static_references = ""
335+
first_feed_reference = None
279336
if feed.gtfs_feeds:
280337
valid_feed_references = [
281338
feed_reference.stable_id.strip()
282339
for feed_reference in feed.gtfs_feeds
283340
if feed_reference and feed_reference.stable_id
284341
]
285342
static_references = "|".join(valid_feed_references)
286-
287-
data = {
288-
"id": feed.stable_id,
289-
"data_type": feed.data_type,
290-
"entity_type": entity_types,
291-
"location.country_code": ""
292-
if not feed.locations or not feed.locations[0]
293-
else feed.locations[0].country_code,
294-
"location.subdivision_name": ""
295-
if not feed.locations or not feed.locations[0]
296-
else feed.locations[0].subdivision_name,
297-
"location.municipality": ""
298-
if not feed.locations or not feed.locations[0]
299-
else feed.locations[0].municipality,
300-
"provider": feed.provider,
301-
"name": feed.feed_name,
302-
"note": feed.note,
303-
"feed_contact_email": feed.feed_contact_email,
304-
"static_reference": static_references,
305-
"urls.direct_download": feed.producer_url,
306-
"urls.authentication_type": feed.authentication_type,
307-
"urls.authentication_info": feed.authentication_info_url,
308-
"urls.api_key_parameter_name": feed.api_key_parameter_name,
309-
"urls.latest": None,
310-
"urls.license": feed.license_url,
311-
"location.bounding_box.minimum_latitude": None,
312-
"location.bounding_box.maximum_latitude": None,
313-
"location.bounding_box.minimum_longitude": None,
314-
"location.bounding_box.maximum_longitude": None,
315-
"location.bounding_box.extracted_on": None,
316-
"features": None,
317-
"redirect.id": None,
318-
"redirect.comment": None,
319-
}
343+
# If there is more than one GTFS feeds associated with this RT feed (why?)
344+
# We will arbitrarily use the first one in the list for the bounding box.
345+
first_feed_reference = feed.gtfs_feeds[0] if feed.gtfs_feeds else None
346+
data["static_reference"] = static_references
347+
348+
# For the RT feed, we use the bounding box of the associated GTFS feed, if any.
349+
# These bounding boxes were collected when processing the GTFS feeds.
350+
bounding_box = (
351+
bounding_box_lookup.get(first_feed_reference.id)
352+
if first_feed_reference
353+
else None
354+
)
355+
if bounding_box:
356+
bounding_box.fill_data(data)
320357

321358
return data
322359

functions-python/export_csv/tests/conftest.py

Lines changed: 41 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ def populate_database():
4848
session = get_testing_session()
4949
fake = Faker()
5050

51+
feed_reference = None
5152
feeds = []
5253
# We create 3 feeds. The first one is active. The third one is inactive and redirected to the first one.
5354
# The second one is active but not redirected.
@@ -65,6 +66,8 @@ def populate_database():
6566
feed_contact_email=f"gtfs-{i}[email protected]",
6667
provider=f"gtfs-{i} Some fake company",
6768
)
69+
if i == 0:
70+
feed_reference = feed
6871
feeds.append(feed)
6972

7073
# Then fill the specific parameters for each feed
@@ -184,27 +187,46 @@ def populate_database():
184187
session.add(tu_entitytype)
185188

186189
# GTFS Realtime feeds
187-
gtfs_rt_feeds = [
188-
Gtfsrealtimefeed(
189-
id=fake.uuid4(),
190-
data_type="gtfs_rt",
191-
feed_name=f"gtfs-rt-{i} Some fake name",
192-
note=f"gtfs-rt-{i} Some fake note",
193-
producer_url=f"https://gtfs-rt-{i}_some_fake_producer_url",
194-
authentication_type=str(i),
195-
authentication_info_url=f"https://gtfs-rt-{i}_some_fake_authentication_info_url",
196-
api_key_parameter_name=f"gtfs-rt-{i}_fake_api_key_parameter_name",
197-
license_url=f"https://gtfs-rt-{i}_some_fake_license_url",
198-
stable_id=f"gtfs-rt-{i}",
199-
status="inactive" if i == 1 else "active",
200-
feed_contact_email=f"gtfs-rt-{i}[email protected]",
201-
provider=f"gtfs-rt-{i} Some fake company",
202-
entitytypes=[vp_entitytype, tu_entitytype] if (i == 0) else [vp_entitytype],
190+
rt_feeds = []
191+
for i in range(3):
192+
rt_feeds.append(
193+
Gtfsrealtimefeed(
194+
id=fake.uuid4(),
195+
data_type="gtfs_rt",
196+
feed_name=f"gtfs-rt-{i} Some fake name",
197+
note=f"gtfs-rt-{i} Some fake note",
198+
producer_url=f"https://gtfs-rt-{i}_some_fake_producer_url",
199+
authentication_type=str(i),
200+
authentication_info_url=f"https://gtfs-rt-{i}_some_fake_authentication_info_url",
201+
api_key_parameter_name=f"gtfs-rt-{i}_fake_api_key_parameter_name",
202+
license_url=f"https://gtfs-rt-{i}_some_fake_license_url",
203+
stable_id=f"gtfs-rt-{i}",
204+
status="inactive" if i == 1 else "active",
205+
feed_contact_email=f"gtfs-rt-{i}[email protected]",
206+
provider=f"gtfs-rt-{i} Some fake company",
207+
entitytypes=[vp_entitytype, tu_entitytype]
208+
if i == 0
209+
else [vp_entitytype],
210+
gtfs_feeds=[feed_reference] if i == 0 else [],
211+
)
203212
)
204-
for i in range(3)
213+
# rt_feeds[1] is inactive and redirected to rt_feeds[0] and rt_feee[2]
214+
rt_feeds[1].redirectingids = [
215+
Redirectingid(
216+
source_id=rt_feeds[1].id,
217+
target_id=rt_feeds[0].id,
218+
redirect_comment="comment 1",
219+
target=rt_feeds[0],
220+
),
221+
Redirectingid(
222+
source_id=rt_feeds[1].id,
223+
target_id=rt_feeds[2].id,
224+
redirect_comment="comment 2",
225+
target=rt_feeds[2],
226+
),
205227
]
206-
gtfs_rt_feeds[0].gtfs_feeds.append(active_gtfs_feeds[0])
207-
session.add_all(gtfs_rt_feeds)
228+
229+
session.add_all(rt_feeds)
208230

209231
session.commit()
210232

0 commit comments

Comments
 (0)