2828from geoalchemy2 .shape import to_shape
2929
3030from shared .helpers .logger import Logger
31- from shared .database_gen .sqlacodegen_models import Gtfsfeed , Gtfsrealtimefeed
31+ from shared .database_gen .sqlacodegen_models import Gtfsfeed , Gtfsrealtimefeed , Feed
3232from shared .common .db_utils import get_all_gtfs_rt_feeds , get_all_gtfs_feeds
3333
3434from shared .helpers .database import Database
6969]
7070
7171
72+ class BoundingBox :
73+ """
74+ Class used to keep the GTFS feed bounding box in a lookup table so it can be used in associated real-time feeds.
75+ """
76+
77+ def __init__ (
78+ self ,
79+ minimum_latitude = None ,
80+ maximum_latitude = None ,
81+ minimum_longitude = None ,
82+ maximum_longitude = None ,
83+ extracted_on = None ,
84+ ):
85+ self .minimum_latitude = minimum_latitude
86+ self .maximum_latitude = maximum_latitude
87+ self .minimum_longitude = minimum_longitude
88+ self .maximum_longitude = maximum_longitude
89+ self .extracted_on = extracted_on
90+
91+ def fill_data (self , data ):
92+ data ["location.bounding_box.minimum_latitude" ] = self .minimum_latitude
93+ data ["location.bounding_box.maximum_latitude" ] = self .maximum_latitude
94+ data ["location.bounding_box.minimum_longitude" ] = self .minimum_longitude
95+ data ["location.bounding_box.maximum_longitude" ] = self .maximum_longitude
96+ data ["location.bounding_box.extracted_on" ] = self .extracted_on
97+
98+
99+ bounding_box_lookup = {}
100+
101+
72102@functions_framework .http
73103def export_and_upload_csv (request = None ):
74104 """
@@ -111,12 +141,11 @@ def fetch_feeds() -> Iterator[Dict]:
111141 :return: Data to write to the output CSV file.
112142 """
113143 db = Database (database_url = os .getenv ("FEEDS_DATABASE_URL" ))
114- logging .info (f"Using database { db .database_url } " )
115144 try :
116145 with db .start_db_session () as session :
117146 feed_count = 0
118147 for feed in get_all_gtfs_feeds (session , include_wip = False ):
119- yield get_feed_csv_data (feed )
148+ yield get_gtfs_feed_csv_data (feed )
120149 feed_count += 1
121150
122151 logging .info (f"Processed { feed_count } GTFS feeds." )
@@ -138,10 +167,20 @@ def extract_numeric_version(version):
138167 return match .group (1 ) if match else version
139168
140169
141- def get_feed_csv_data (feed : Gtfsfeed ):
170+ def get_gtfs_feed_csv_data (feed : Gtfsfeed ):
142171 """
143- This function takes a GtfsFeed and returns a dictionary with the data to be written to the CSV file.
172+ This function takes a Gtfsfeed object and returns a dictionary with the data to be written to the CSV file.
173+ :param feed: Gtfsfeed object containing feed data.
174+ :return: Dictionary with feed data formatted for CSV output.
144175 """
176+ joined_features = ""
177+ validated_at = None
178+ bounding_box = None
179+
180+ # First extract the common feed data
181+ data = get_feed_csv_data (feed )
182+
183+ # Then supplement with the GTFS specific data
145184 latest_dataset = next (
146185 (
147186 dataset
@@ -150,11 +189,6 @@ def get_feed_csv_data(feed: Gtfsfeed):
150189 ),
151190 None ,
152191 )
153-
154- joined_features = ""
155- validated_at = None
156- minimum_latitude = maximum_latitude = minimum_longitude = maximum_longitude = None
157-
158192 if latest_dataset and latest_dataset .validation_reports :
159193 # Keep the report from the more recent validator version
160194 latest_report = max (
@@ -177,10 +211,18 @@ def get_feed_csv_data(feed: Gtfsfeed):
177211 if latest_dataset .bounding_box :
178212 shape = to_shape (latest_dataset .bounding_box )
179213 if shape and shape .bounds :
180- minimum_latitude = shape .bounds [1 ]
181- maximum_latitude = shape .bounds [3 ]
182- minimum_longitude = shape .bounds [0 ]
183- maximum_longitude = shape .bounds [2 ]
214+ bounding_box = BoundingBox (
215+ minimum_latitude = shape .bounds [1 ],
216+ maximum_latitude = shape .bounds [3 ],
217+ minimum_longitude = shape .bounds [0 ],
218+ maximum_longitude = shape .bounds [2 ],
219+ extracted_on = validated_at ,
220+ )
221+
222+ # Keep the bounding box for that GTFS feed so it can be used in associated real-time feeds, if any
223+ if bounding_box :
224+ bounding_box .fill_data (data )
225+ bounding_box_lookup [feed .id ] = bounding_box
184226
185227 latest_url = latest_dataset .hosted_url if latest_dataset else None
186228 if latest_url :
@@ -193,7 +235,47 @@ def get_feed_csv_data(feed: Gtfsfeed):
193235 if position != - 1 :
194236 # Construct the new URL
195237 latest_url = latest_url [: position + len (feed .stable_id ) + 1 ] + "latest.zip"
238+ data ["urls.latest" ] = latest_url
239+ data ["features" ] = joined_features
240+
241+ return data
242+
196243
244+ def get_feed_csv_data (feed : Feed ):
245+ """
246+ This function takes a generic feed and returns a dictionary with the data to be written to the CSV file.
247+ Any specific data (for GTFS or GTFS_RT has to be added after this call.
248+ """
249+
250+ redirect_ids = []
251+ redirect_comments = []
252+ # Add concatenated redirect IDs
253+ if feed .redirectingids :
254+ for redirect in feed .redirectingids :
255+ if redirect and redirect .target and redirect .target .stable_id :
256+ stripped_id = redirect .target .stable_id .strip ()
257+ if stripped_id :
258+ redirect_ids .append (stripped_id )
259+ redirect_comment = redirect .redirect_comment or ""
260+ redirect_comments .append (redirect_comment )
261+
262+ redirect_ids_str = "|" .join (redirect_ids )
263+ redirect_comments_str = "|" .join (redirect_comments )
264+
265+ # If for some reason there is no redirect_ids, discard the redirect_comments if any
266+ if redirect_ids_str == "" :
267+ redirect_comments_str = ""
268+ else :
269+ # If there is no comment but we do have redirects, use an empty string instead of a
270+ # potentially a bunch of vertical bars.
271+ redirect_comments_str = (
272+ ""
273+ if (redirect_comments_str or "" ).strip ("|" ) == ""
274+ else redirect_comments_str
275+ )
276+
277+ # Some of the data is set to None or "" here but will be set to the proper value
278+ # later depending on the type (GTFS or GTFS_RT)
197279 data = {
198280 "id" : feed .stable_id ,
199281 "data_type" : feed .data_type ,
@@ -216,55 +298,28 @@ def get_feed_csv_data(feed: Gtfsfeed):
216298 "urls.authentication_type" : feed .authentication_type ,
217299 "urls.authentication_info" : feed .authentication_info_url ,
218300 "urls.api_key_parameter_name" : feed .api_key_parameter_name ,
219- "urls.latest" : latest_url ,
301+ "urls.latest" : None ,
220302 "urls.license" : feed .license_url ,
221- "location.bounding_box.minimum_latitude" : minimum_latitude ,
222- "location.bounding_box.maximum_latitude" : maximum_latitude ,
223- "location.bounding_box.minimum_longitude" : minimum_longitude ,
224- "location.bounding_box.maximum_longitude" : maximum_longitude ,
303+ "location.bounding_box.minimum_latitude" : None ,
304+ "location.bounding_box.maximum_latitude" : None ,
305+ "location.bounding_box.minimum_longitude" : None ,
306+ "location.bounding_box.maximum_longitude" : None ,
225307 # We use the report validated_at date as the extracted_on date
226- "location.bounding_box.extracted_on" : validated_at ,
308+ "location.bounding_box.extracted_on" : None ,
227309 "status" : feed .status ,
228- "features" : joined_features ,
310+ "features" : None ,
311+ "redirect.id" : redirect_ids_str ,
312+ "redirect.comment" : redirect_comments_str ,
229313 }
230-
231- redirect_ids = ""
232- redirect_comments = ""
233- # Add concatenated redirect IDs
234- if feed .redirectingids :
235- for redirect in feed .redirectingids :
236- if redirect and redirect .target and redirect .target .stable_id :
237- stripped_id = redirect .target .stable_id .strip ()
238- if stripped_id :
239- redirect_ids = (
240- redirect_ids + "|" + stripped_id
241- if redirect_ids
242- else stripped_id
243- )
244- redirect_comments = (
245- redirect_comments + "|" + redirect .redirect_comment
246- if redirect_comments
247- else redirect .redirect_comment
248- )
249- if redirect_ids == "" :
250- redirect_comments = ""
251- else :
252- # If there is no comment but we do have redirects, use an empty string instead of a
253- # potentially a bunch of vertical bars.
254- redirect_comments = (
255- "" if redirect_comments .strip ("|" ) == "" else redirect_comments
256- )
257-
258- data ["redirect.id" ] = redirect_ids
259- data ["redirect.comment" ] = redirect_comments
260-
261314 return data
262315
263316
264317def get_gtfs_rt_feed_csv_data (feed : Gtfsrealtimefeed ):
265318 """
266319 This function takes a GtfsRTFeed and returns a dictionary with the data to be written to the CSV file.
267320 """
321+ data = get_feed_csv_data (feed )
322+
268323 entity_types = ""
269324 if feed .entitytypes :
270325 valid_entity_types = [
@@ -274,49 +329,31 @@ def get_gtfs_rt_feed_csv_data(feed: Gtfsrealtimefeed):
274329 ]
275330 valid_entity_types = sorted (valid_entity_types )
276331 entity_types = "|" .join (valid_entity_types )
332+ data ["entity_type" ] = entity_types
277333
278334 static_references = ""
335+ first_feed_reference = None
279336 if feed .gtfs_feeds :
280337 valid_feed_references = [
281338 feed_reference .stable_id .strip ()
282339 for feed_reference in feed .gtfs_feeds
283340 if feed_reference and feed_reference .stable_id
284341 ]
285342 static_references = "|" .join (valid_feed_references )
286-
287- data = {
288- "id" : feed .stable_id ,
289- "data_type" : feed .data_type ,
290- "entity_type" : entity_types ,
291- "location.country_code" : ""
292- if not feed .locations or not feed .locations [0 ]
293- else feed .locations [0 ].country_code ,
294- "location.subdivision_name" : ""
295- if not feed .locations or not feed .locations [0 ]
296- else feed .locations [0 ].subdivision_name ,
297- "location.municipality" : ""
298- if not feed .locations or not feed .locations [0 ]
299- else feed .locations [0 ].municipality ,
300- "provider" : feed .provider ,
301- "name" : feed .feed_name ,
302- "note" : feed .note ,
303- "feed_contact_email" : feed .feed_contact_email ,
304- "static_reference" : static_references ,
305- "urls.direct_download" : feed .producer_url ,
306- "urls.authentication_type" : feed .authentication_type ,
307- "urls.authentication_info" : feed .authentication_info_url ,
308- "urls.api_key_parameter_name" : feed .api_key_parameter_name ,
309- "urls.latest" : None ,
310- "urls.license" : feed .license_url ,
311- "location.bounding_box.minimum_latitude" : None ,
312- "location.bounding_box.maximum_latitude" : None ,
313- "location.bounding_box.minimum_longitude" : None ,
314- "location.bounding_box.maximum_longitude" : None ,
315- "location.bounding_box.extracted_on" : None ,
316- "features" : None ,
317- "redirect.id" : None ,
318- "redirect.comment" : None ,
319- }
343+ # If there is more than one GTFS feeds associated with this RT feed (why?)
344+ # We will arbitrarily use the first one in the list for the bounding box.
345+ first_feed_reference = feed .gtfs_feeds [0 ] if feed .gtfs_feeds else None
346+ data ["static_reference" ] = static_references
347+
348+ # For the RT feed, we use the bounding box of the associated GTFS feed, if any.
349+ # These bounding boxes were collected when processing the GTFS feeds.
350+ bounding_box = (
351+ bounding_box_lookup .get (first_feed_reference .id )
352+ if first_feed_reference
353+ else None
354+ )
355+ if bounding_box :
356+ bounding_box .fill_data (data )
320357
321358 return data
322359
0 commit comments