@@ -101,7 +101,7 @@ def record_autodiscovery_request(
101101
102102 def extract_gbfs_endpoints (
103103 self ,
104- gbfs_json_url : str ,
104+ gbfs_json_url : str , extracted_from : str , latency : bool = True
105105 ) -> Tuple [Optional [List [GBFSEndpoint ]], GBFSVersion ]:
106106 """
107107 Extract GBFS endpoints from the GBFS JSON URL.
@@ -115,9 +115,11 @@ def extract_gbfs_endpoints(
115115 self .logger .warning (
116116 "No version found in the GBFS data. Defaulting to version 1.0."
117117 )
118- gbfs_version = GBFSVersion ("1.0" , gbfs_json_url )
118+ gbfs_version = GBFSVersion ("1.0" , gbfs_json_url , extracted_from )
119119 else :
120- gbfs_version = GBFSVersion (version_match [0 ].value , gbfs_json_url )
120+ gbfs_version = GBFSVersion (
121+ version_match [0 ].value , gbfs_json_url , extracted_from
122+ )
121123 if not feeds_matches :
122124 self .logger .error (
123125 "No feeds found in the GBFS data for version %s." , gbfs_version .version
@@ -133,9 +135,9 @@ def extract_gbfs_endpoints(
133135 )
134136 except AttributeError :
135137 language = None
136- endpoints += GBFSEndpoint .from_dict (feed_match .value , language )
138+ endpoints += GBFSEndpoint .from_dict (feed_match .value , language , latency )
137139
138- # If the autodiscovery endpoint is not listed then add it
140+ # If the autodiscovery endpoint is not listed, then add it
139141 if not any (endpoint .name == "gbfs" for endpoint in endpoints ):
140142 endpoints += GBFSEndpoint .from_dict (
141143 [{"name" : "gbfs" , "url" : gbfs_json_url }], None
@@ -147,6 +149,11 @@ def extract_gbfs_endpoints(
147149 for endpoint in endpoints
148150 }.values ()
149151 )
152+ if len (unique_endpoints ) != len (endpoints ):
153+ self .logger .warning (
154+ "Duplicate endpoints found. This is a spec violation. Duplicates have been ignored."
155+ )
156+
150157 self .logger .info ("Found version %s." , gbfs_version .version )
151158 self .logger .info (
152159 "Found endpoints %s." , ", " .join ([endpoint .name for endpoint in endpoints ])
@@ -155,10 +162,13 @@ def extract_gbfs_endpoints(
155162
156163 def extract_gbfs_versions (self , gbfs_json_url : str ) -> Optional [List [GBFSVersion ]]:
157164 """Extract GBFS versions from the autodiscovery URL"""
158- all_endpoints , version = self .extract_gbfs_endpoints (gbfs_json_url )
165+ all_endpoints , version = self .extract_gbfs_endpoints (
166+ gbfs_json_url , "autodiscovery"
167+ )
159168 if not all_endpoints or not version :
160169 return None
161- self .gbfs_endpoints [version .version ] = all_endpoints
170+ version_id = f"{ self .stable_id } _{ version .version } _{ version .extracted_from } "
171+ self .gbfs_endpoints [version_id ] = all_endpoints
162172
163173 # Fetch GBFS Versions
164174 gbfs_versions_endpoint = next (
@@ -172,7 +182,22 @@ def extract_gbfs_versions(self, gbfs_json_url: str) -> Optional[List[GBFSVersion
172182 gbfs_versions_json = fetch_gbfs_data (gbfs_versions_endpoint .url )
173183 versions_matches = parse ("$..versions" ).find (gbfs_versions_json )
174184 if versions_matches :
175- gbfs_versions = GBFSVersion .from_dict (versions_matches [0 ].value )
185+ extracted_versions = GBFSVersion .from_dict (
186+ versions_matches [0 ].value , "gbfs_versions"
187+ )
188+ autodiscovery_url_in_extracted = any (
189+ version .url == gbfs_json_url for version in extracted_versions
190+ )
191+ if not autodiscovery_url_in_extracted :
192+ self .logger .warning (
193+ "The autodiscovery URL is not included in gbfs_versions. There could be duplication"
194+ " of versions."
195+ )
196+ gbfs_versions = [
197+ version
198+ for version in extracted_versions
199+ if version .url != gbfs_json_url
200+ ] + [version ]
176201 self .logger .info (
177202 "Found versions %s" ,
178203 ", " .join ([version .version for version in gbfs_versions ]),
@@ -186,29 +211,6 @@ def extract_gbfs_versions(self, gbfs_json_url: str) -> Optional[List[GBFSVersion
186211 version
187212 ] # If no gbfs_versions endpoint, return the version from the autodiscovery URL
188213
189- def get_latest_version (self ) -> Optional [str ]:
190- """Get the latest GBFS version."""
191- max_version = max (
192- (
193- version
194- for version in self .gbfs_versions
195- if not version .version .lower ().endswith ("RC" )
196- ),
197- key = lambda version : version .version ,
198- default = None ,
199- )
200- if not max_version :
201- self .logger .error (
202- "No non-RC versions found. Trying to set the latest to a RC version."
203- )
204- max_version = max (
205- self .gbfs_versions , key = lambda version : version .version , default = None
206- )
207- if not max_version :
208- self .logger .error ("No versions found." )
209- return None
210- return max_version .version
211-
212214 @with_db_session ()
213215 def update_database_entities (self , db_session : Session ) -> None :
214216 """Update the database entities with the processed GBFS data."""
@@ -222,9 +224,6 @@ def update_database_entities(self, db_session: Session) -> None:
222224 self .logger .error ("GBFS feed with ID %s not found." , self .feed_id )
223225 return
224226 gbfs_versions_orm = []
225- latest_version = self .get_latest_version ()
226- if not latest_version :
227- return
228227
229228 # Deactivate versions that are not in the current feed
230229 active_versions = [version .version for version in self .gbfs_versions ]
@@ -236,28 +235,29 @@ def update_database_entities(self, db_session: Session) -> None:
236235 # Update or create GBFS versions and endpoints
237236 for gbfs_version in self .gbfs_versions :
238237 gbfs_version_orm = self .update_or_create_gbfs_version (
239- db_session , gbfs_version , latest_version
238+ db_session , gbfs_version
240239 )
241240 gbfs_versions_orm .append (gbfs_version_orm )
242241
243- gbfs_endpoints = self .gbfs_endpoints .get (gbfs_version . version , [])
242+ gbfs_endpoints = self .gbfs_endpoints .get (gbfs_version_orm . id , [])
244243 gbfs_endpoints_orm = []
245- features = self .validation_reports .get (gbfs_version . version , {}).get (
244+ features = self .validation_reports .get (gbfs_version_orm . id , {}).get (
246245 "features" , []
247246 )
248247 for endpoint in gbfs_endpoints :
249248 gbfs_endpoint_orm = self .update_or_create_gbfs_endpoint (
250- db_session , gbfs_version . version , endpoint , features
249+ db_session , gbfs_version_orm . id , endpoint , features
251250 )
252- gbfs_endpoint_orm .httpaccesslogs .append (
253- Httpaccesslog (
254- request_method = HTTPMethod .GET .value ,
255- request_url = endpoint .url ,
256- status_code = endpoint .status_code ,
257- latency_ms = endpoint .latency ,
258- response_size_bytes = endpoint .response_size_bytes ,
251+ if endpoint .status_code is not None :
252+ gbfs_endpoint_orm .httpaccesslogs .append (
253+ Httpaccesslog (
254+ request_method = HTTPMethod .GET .value ,
255+ request_url = endpoint .url ,
256+ status_code = endpoint .status_code ,
257+ latency_ms = endpoint .latency ,
258+ response_size_bytes = endpoint .response_size_bytes ,
259+ )
259260 )
260- )
261261 gbfs_endpoints_orm .append (gbfs_endpoint_orm )
262262
263263 # Deactivate endpoints that are not in the current feed
@@ -269,41 +269,40 @@ def update_database_entities(self, db_session: Session) -> None:
269269 gbfs_version_orm .gbfsendpoints = gbfs_endpoints_orm
270270
271271 validation_report_orm = self .create_validation_report_entities (
272- gbfs_version_orm , self .validation_reports .get (gbfs_version . version , {})
272+ gbfs_version_orm , self .validation_reports .get (gbfs_version_orm . id , {})
273273 )
274274 if validation_report_orm :
275275 gbfs_version_orm .gbfsvalidationreports .append (validation_report_orm )
276276 gbfs_feed .gbfsversions = gbfs_versions_orm
277277 db_session .commit ()
278278
279279 def update_or_create_gbfs_version (
280- self , db_session : Session , gbfs_version : GBFSVersion , latest_version : str
280+ self , db_session : Session , gbfs_version : GBFSVersion
281281 ) -> Gbfsversion :
282282 """Update or create a GBFS version entity."""
283- formatted_id = f"{ self .stable_id } _{ gbfs_version .version } "
283+ formatted_id = (
284+ f"{ self .stable_id } _{ gbfs_version .version } _{ gbfs_version .extracted_from } "
285+ )
284286 gbfs_version_orm = (
285287 db_session .query (Gbfsversion ).filter (Gbfsversion .id == formatted_id ).first ()
286288 )
287289 if not gbfs_version_orm :
288290 gbfs_version_orm = Gbfsversion (
289- id = formatted_id , version = gbfs_version .version
291+ id = formatted_id , version = gbfs_version .version , source = gbfs_version . extracted_from
290292 )
291293
292294 gbfs_version_orm .url = gbfs_version .url # Update the URL
293- gbfs_version_orm .latest = (
294- gbfs_version .version == latest_version
295- ) # Update the latest flag
296295 return gbfs_version_orm
297296
298297 def update_or_create_gbfs_endpoint (
299298 self ,
300299 db_session : Session ,
301- version : str ,
300+ version_id : str ,
302301 endpoint : GBFSEndpoint ,
303302 features : List [str ],
304303 ) -> Gbfsendpoint :
305304 """Update or create a GBFS endpoint entity."""
306- formatted_id = f"{ self . stable_id } _ { version } _{ endpoint .name } "
305+ formatted_id = f"{ version_id } _{ endpoint .name } "
307306 if endpoint .language :
308307 formatted_id += f"_{ endpoint .language } "
309308 gbfs_endpoint_orm = (
@@ -346,7 +345,8 @@ def validate_gbfs_feed_versions(self) -> None:
346345 json .dumps (json_report_summary ), content_type = "application/json"
347346 )
348347 report_summary_blob .make_public ()
349- self .validation_reports [version .version ] = {
348+ version_id = f"{ self .stable_id } _{ version .version } _{ version .extracted_from } "
349+ self .validation_reports [version_id ] = {
350350 "report_summary_url" : report_summary_blob .public_url ,
351351 "json_report_summary" : json_report_summary ,
352352 "validation_time" : date_time_utc ,
@@ -356,6 +356,9 @@ def validate_gbfs_feed_versions(self) -> None:
356356 if not obj .get ("required" , True ) and obj .get ("exists" , False )
357357 ],
358358 }
359+ self .logger .info (
360+ f"Validated GBFS feed version: { version .version } with URL: { version .url } "
361+ )
359362
360363 def create_validation_report_entities (
361364 self , gbfs_version_orm : Gbfsversion , validation_report_data : Dict
@@ -373,7 +376,7 @@ def create_validation_report_entities(
373376 return None
374377
375378 validation_report_id = (
376- f"{ self .stable_id } _v{ gbfs_version_orm .version } _{ validation_time } "
379+ f"{ self .stable_id } _v{ gbfs_version_orm .id } _{ validation_time } "
377380 )
378381 validation_report = Gbfsvalidationreport (
379382 id = validation_report_id ,
@@ -401,21 +404,37 @@ def create_validation_report_entities(
401404 def extract_endpoints_for_all_versions (self ):
402405 """Extract endpoints for all versions of the GBFS feed."""
403406 for version in self .gbfs_versions :
404- if version .version in self .gbfs_endpoints :
407+ version_id = f"{ self .stable_id } _{ version .version } _{ version .extracted_from } "
408+ if version_id in self .gbfs_endpoints :
405409 continue
406- endpoints , _ = self .extract_gbfs_endpoints (version .url )
410+ self .logger .info (f"Extracting endpoints for version { version .version } ." )
411+ # Avoid fetching latency data for 'gbfs_versions' endpoint
412+ endpoints , _ = self .extract_gbfs_endpoints (
413+ version .url , "gbfs_versions" , latency = False
414+ )
407415 if endpoints :
408- self .gbfs_endpoints [version . version ] = endpoints
416+ self .gbfs_endpoints [version_id ] = endpoints
409417 else :
410418 self .logger .error ("No endpoints found for version %s." , version .version )
411419
412420 def trigger_location_extraction (self ):
413421 """Trigger the location extraction process."""
414- latest_version = self .get_latest_version ()
415- if not latest_version :
416- self .logger .error ("No latest version found." )
422+ autodiscovery_version = next (
423+ (
424+ version
425+ for version in self .gbfs_versions
426+ if version .extracted_from == "autodiscovery"
427+ ),
428+ None ,
429+ )
430+ if not autodiscovery_version :
431+ self .logger .error (
432+ "No autodiscovery version found. Cannot trigger location extraction."
433+ )
417434 return
418- endpoints = self .gbfs_endpoints .get (latest_version , [])
435+ version_id = f"{ self .stable_id } _{ autodiscovery_version .version } _{ autodiscovery_version .extracted_from } "
436+ endpoints = self .gbfs_endpoints .get (version_id , [])
437+
419438 # Find the station_information_url endpoint
420439 station_information_url = next (
421440 (
0 commit comments