Skip to content

Commit 2a8507d

Browse files
committed
feat: allowing duplicate gbfs versions from different sources
1 parent 08b4cda commit 2a8507d

File tree

10 files changed

+162
-79
lines changed

10 files changed

+162
-79
lines changed

api/src/feeds/impl/models/gbfs_version_impl.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def from_orm(cls, version: GbfsVersionOrm | None) -> GbfsVersion | None:
2828
version=version.version,
2929
created_at=version.created_at,
3030
last_updated_at=latest_report.validated_at if latest_report else None,
31-
latest=version.latest,
31+
source=version.source,
3232
endpoints=[GbfsEndpointImpl.from_orm(item) for item in version.gbfsendpoints]
3333
if version.gbfsendpoints
3434
else [],

api/src/scripts/populate_db_test_data.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ def populate_test_datasets(self, filepath, db_session: "Session"):
135135
self.logger.error(f"No feed found with stable_id: {version['feed_id']}")
136136
continue
137137
gbfs_version = Gbfsversion(
138-
id=version["id"], version=version["version"], url=version["url"], latest=version["latest"]
138+
id=version["id"], version=version["version"], url=version["url"]
139139
)
140140
if version.get("endpoints"):
141141
for endpoint in version["endpoints"]:

api/tests/test_data/extra_test_data.json

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -766,7 +766,6 @@
766766
"id": "gbfs-system_id_1-2.3",
767767
"version": "2.3",
768768
"url": "https://www.example.com/gbfs_feed_1/2.3/gbfs.json",
769-
"latest": false,
770769
"endpoints": [
771770
{
772771
"id": "gbfs-system_id_1-2.3-system_information",
@@ -789,7 +788,6 @@
789788
"id": "gbfs-system_id_1-3.0",
790789
"version": "3.0",
791790
"url": "https://www.example.com/gbfs_feed_1/3.0/gbfs.json",
792-
"latest": true,
793791
"endpoints": [
794792
{
795793
"id": "gbfs-system_id_1-3.0-system_information",
@@ -810,7 +808,6 @@
810808
"id": "gbfs-system_id_2-2.3",
811809
"version": "2.3",
812810
"url": "https://www.example.com/gbfs_feed_1/2.3/gbfs.json",
813-
"latest": false,
814811
"endpoints": [
815812
{
816813
"id": "gbfs-system_id_2-2.3-system_information",

docs/DatabaseCatalogAPI.yaml

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -527,11 +527,16 @@ components:
527527
type: string
528528
format: date-time
529529
example: 2023-07-10T22:06:00Z
530-
latest:
531-
description: >
532-
A boolean value indicating if this is the latest version of the GBFS feed.
533-
type: boolean
534-
example: true
530+
source:
531+
description: >
532+
The source from where the version was obtained. Possible values are:
533+
- `autodiscovery`: The version was obtained through the autodiscovery url.
534+
- `gbfs_versions`: The version was obtained through the gbfs_versions endpoint.
535+
type: string
536+
enum:
537+
- autodiscovery
538+
- gbfs_versions
539+
535540
endpoints:
536541
description: >
537542
A list of endpoints that are available in the version.

functions-python/gbfs_validator/src/gbfs_data_processor.py

Lines changed: 83 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ def record_autodiscovery_request(
101101

102102
def extract_gbfs_endpoints(
103103
self,
104-
gbfs_json_url: str,
104+
gbfs_json_url: str, extracted_from: str, latency: bool = True
105105
) -> Tuple[Optional[List[GBFSEndpoint]], GBFSVersion]:
106106
"""
107107
Extract GBFS endpoints from the GBFS JSON URL.
@@ -115,9 +115,11 @@ def extract_gbfs_endpoints(
115115
self.logger.warning(
116116
"No version found in the GBFS data. Defaulting to version 1.0."
117117
)
118-
gbfs_version = GBFSVersion("1.0", gbfs_json_url)
118+
gbfs_version = GBFSVersion("1.0", gbfs_json_url, extracted_from)
119119
else:
120-
gbfs_version = GBFSVersion(version_match[0].value, gbfs_json_url)
120+
gbfs_version = GBFSVersion(
121+
version_match[0].value, gbfs_json_url, extracted_from
122+
)
121123
if not feeds_matches:
122124
self.logger.error(
123125
"No feeds found in the GBFS data for version %s.", gbfs_version.version
@@ -133,9 +135,9 @@ def extract_gbfs_endpoints(
133135
)
134136
except AttributeError:
135137
language = None
136-
endpoints += GBFSEndpoint.from_dict(feed_match.value, language)
138+
endpoints += GBFSEndpoint.from_dict(feed_match.value, language, latency)
137139

138-
# If the autodiscovery endpoint is not listed then add it
140+
# If the autodiscovery endpoint is not listed, then add it
139141
if not any(endpoint.name == "gbfs" for endpoint in endpoints):
140142
endpoints += GBFSEndpoint.from_dict(
141143
[{"name": "gbfs", "url": gbfs_json_url}], None
@@ -147,6 +149,11 @@ def extract_gbfs_endpoints(
147149
for endpoint in endpoints
148150
}.values()
149151
)
152+
if len(unique_endpoints) != len(endpoints):
153+
self.logger.warning(
154+
"Duplicate endpoints found. This is a spec violation. Duplicates have been ignored."
155+
)
156+
150157
self.logger.info("Found version %s.", gbfs_version.version)
151158
self.logger.info(
152159
"Found endpoints %s.", ", ".join([endpoint.name for endpoint in endpoints])
@@ -155,10 +162,13 @@ def extract_gbfs_endpoints(
155162

156163
def extract_gbfs_versions(self, gbfs_json_url: str) -> Optional[List[GBFSVersion]]:
157164
"""Extract GBFS versions from the autodiscovery URL"""
158-
all_endpoints, version = self.extract_gbfs_endpoints(gbfs_json_url)
165+
all_endpoints, version = self.extract_gbfs_endpoints(
166+
gbfs_json_url, "autodiscovery"
167+
)
159168
if not all_endpoints or not version:
160169
return None
161-
self.gbfs_endpoints[version.version] = all_endpoints
170+
version_id = f"{self.stable_id}_{version.version}_{version.extracted_from}"
171+
self.gbfs_endpoints[version_id] = all_endpoints
162172

163173
# Fetch GBFS Versions
164174
gbfs_versions_endpoint = next(
@@ -172,7 +182,22 @@ def extract_gbfs_versions(self, gbfs_json_url: str) -> Optional[List[GBFSVersion
172182
gbfs_versions_json = fetch_gbfs_data(gbfs_versions_endpoint.url)
173183
versions_matches = parse("$..versions").find(gbfs_versions_json)
174184
if versions_matches:
175-
gbfs_versions = GBFSVersion.from_dict(versions_matches[0].value)
185+
extracted_versions = GBFSVersion.from_dict(
186+
versions_matches[0].value, "gbfs_versions"
187+
)
188+
autodiscovery_url_in_extracted = any(
189+
version.url == gbfs_json_url for version in extracted_versions
190+
)
191+
if not autodiscovery_url_in_extracted:
192+
self.logger.warning(
193+
"The autodiscovery URL is not included in gbfs_versions. There could be duplication"
194+
" of versions."
195+
)
196+
gbfs_versions = [
197+
version
198+
for version in extracted_versions
199+
if version.url != gbfs_json_url
200+
] + [version]
176201
self.logger.info(
177202
"Found versions %s",
178203
", ".join([version.version for version in gbfs_versions]),
@@ -186,29 +211,6 @@ def extract_gbfs_versions(self, gbfs_json_url: str) -> Optional[List[GBFSVersion
186211
version
187212
] # If no gbfs_versions endpoint, return the version from the autodiscovery URL
188213

189-
def get_latest_version(self) -> Optional[str]:
190-
"""Get the latest GBFS version."""
191-
max_version = max(
192-
(
193-
version
194-
for version in self.gbfs_versions
195-
if not version.version.lower().endswith("RC")
196-
),
197-
key=lambda version: version.version,
198-
default=None,
199-
)
200-
if not max_version:
201-
self.logger.error(
202-
"No non-RC versions found. Trying to set the latest to a RC version."
203-
)
204-
max_version = max(
205-
self.gbfs_versions, key=lambda version: version.version, default=None
206-
)
207-
if not max_version:
208-
self.logger.error("No versions found.")
209-
return None
210-
return max_version.version
211-
212214
@with_db_session()
213215
def update_database_entities(self, db_session: Session) -> None:
214216
"""Update the database entities with the processed GBFS data."""
@@ -222,9 +224,6 @@ def update_database_entities(self, db_session: Session) -> None:
222224
self.logger.error("GBFS feed with ID %s not found.", self.feed_id)
223225
return
224226
gbfs_versions_orm = []
225-
latest_version = self.get_latest_version()
226-
if not latest_version:
227-
return
228227

229228
# Deactivate versions that are not in the current feed
230229
active_versions = [version.version for version in self.gbfs_versions]
@@ -236,28 +235,29 @@ def update_database_entities(self, db_session: Session) -> None:
236235
# Update or create GBFS versions and endpoints
237236
for gbfs_version in self.gbfs_versions:
238237
gbfs_version_orm = self.update_or_create_gbfs_version(
239-
db_session, gbfs_version, latest_version
238+
db_session, gbfs_version
240239
)
241240
gbfs_versions_orm.append(gbfs_version_orm)
242241

243-
gbfs_endpoints = self.gbfs_endpoints.get(gbfs_version.version, [])
242+
gbfs_endpoints = self.gbfs_endpoints.get(gbfs_version_orm.id, [])
244243
gbfs_endpoints_orm = []
245-
features = self.validation_reports.get(gbfs_version.version, {}).get(
244+
features = self.validation_reports.get(gbfs_version_orm.id, {}).get(
246245
"features", []
247246
)
248247
for endpoint in gbfs_endpoints:
249248
gbfs_endpoint_orm = self.update_or_create_gbfs_endpoint(
250-
db_session, gbfs_version.version, endpoint, features
249+
db_session, gbfs_version_orm.id, endpoint, features
251250
)
252-
gbfs_endpoint_orm.httpaccesslogs.append(
253-
Httpaccesslog(
254-
request_method=HTTPMethod.GET.value,
255-
request_url=endpoint.url,
256-
status_code=endpoint.status_code,
257-
latency_ms=endpoint.latency,
258-
response_size_bytes=endpoint.response_size_bytes,
251+
if endpoint.status_code is not None:
252+
gbfs_endpoint_orm.httpaccesslogs.append(
253+
Httpaccesslog(
254+
request_method=HTTPMethod.GET.value,
255+
request_url=endpoint.url,
256+
status_code=endpoint.status_code,
257+
latency_ms=endpoint.latency,
258+
response_size_bytes=endpoint.response_size_bytes,
259+
)
259260
)
260-
)
261261
gbfs_endpoints_orm.append(gbfs_endpoint_orm)
262262

263263
# Deactivate endpoints that are not in the current feed
@@ -269,41 +269,40 @@ def update_database_entities(self, db_session: Session) -> None:
269269
gbfs_version_orm.gbfsendpoints = gbfs_endpoints_orm
270270

271271
validation_report_orm = self.create_validation_report_entities(
272-
gbfs_version_orm, self.validation_reports.get(gbfs_version.version, {})
272+
gbfs_version_orm, self.validation_reports.get(gbfs_version_orm.id, {})
273273
)
274274
if validation_report_orm:
275275
gbfs_version_orm.gbfsvalidationreports.append(validation_report_orm)
276276
gbfs_feed.gbfsversions = gbfs_versions_orm
277277
db_session.commit()
278278

279279
def update_or_create_gbfs_version(
280-
self, db_session: Session, gbfs_version: GBFSVersion, latest_version: str
280+
self, db_session: Session, gbfs_version: GBFSVersion
281281
) -> Gbfsversion:
282282
"""Update or create a GBFS version entity."""
283-
formatted_id = f"{self.stable_id}_{gbfs_version.version}"
283+
formatted_id = (
284+
f"{self.stable_id}_{gbfs_version.version}_{gbfs_version.extracted_from}"
285+
)
284286
gbfs_version_orm = (
285287
db_session.query(Gbfsversion).filter(Gbfsversion.id == formatted_id).first()
286288
)
287289
if not gbfs_version_orm:
288290
gbfs_version_orm = Gbfsversion(
289-
id=formatted_id, version=gbfs_version.version
291+
id=formatted_id, version=gbfs_version.version, source=gbfs_version.extracted_from
290292
)
291293

292294
gbfs_version_orm.url = gbfs_version.url # Update the URL
293-
gbfs_version_orm.latest = (
294-
gbfs_version.version == latest_version
295-
) # Update the latest flag
296295
return gbfs_version_orm
297296

298297
def update_or_create_gbfs_endpoint(
299298
self,
300299
db_session: Session,
301-
version: str,
300+
version_id: str,
302301
endpoint: GBFSEndpoint,
303302
features: List[str],
304303
) -> Gbfsendpoint:
305304
"""Update or create a GBFS endpoint entity."""
306-
formatted_id = f"{self.stable_id}_{version}_{endpoint.name}"
305+
formatted_id = f"{version_id}_{endpoint.name}"
307306
if endpoint.language:
308307
formatted_id += f"_{endpoint.language}"
309308
gbfs_endpoint_orm = (
@@ -346,7 +345,8 @@ def validate_gbfs_feed_versions(self) -> None:
346345
json.dumps(json_report_summary), content_type="application/json"
347346
)
348347
report_summary_blob.make_public()
349-
self.validation_reports[version.version] = {
348+
version_id = f"{self.stable_id}_{version.version}_{version.extracted_from}"
349+
self.validation_reports[version_id] = {
350350
"report_summary_url": report_summary_blob.public_url,
351351
"json_report_summary": json_report_summary,
352352
"validation_time": date_time_utc,
@@ -356,6 +356,9 @@ def validate_gbfs_feed_versions(self) -> None:
356356
if not obj.get("required", True) and obj.get("exists", False)
357357
],
358358
}
359+
self.logger.info(
360+
f"Validated GBFS feed version: {version.version} with URL: {version.url}"
361+
)
359362

360363
def create_validation_report_entities(
361364
self, gbfs_version_orm: Gbfsversion, validation_report_data: Dict
@@ -373,7 +376,7 @@ def create_validation_report_entities(
373376
return None
374377

375378
validation_report_id = (
376-
f"{self.stable_id}_v{gbfs_version_orm.version}_{validation_time}"
379+
f"{self.stable_id}_v{gbfs_version_orm.id}_{validation_time}"
377380
)
378381
validation_report = Gbfsvalidationreport(
379382
id=validation_report_id,
@@ -401,21 +404,37 @@ def create_validation_report_entities(
401404
def extract_endpoints_for_all_versions(self):
402405
"""Extract endpoints for all versions of the GBFS feed."""
403406
for version in self.gbfs_versions:
404-
if version.version in self.gbfs_endpoints:
407+
version_id = f"{self.stable_id}_{version.version}_{version.extracted_from}"
408+
if version_id in self.gbfs_endpoints:
405409
continue
406-
endpoints, _ = self.extract_gbfs_endpoints(version.url)
410+
self.logger.info(f"Extracting endpoints for version {version.version}.")
411+
# Avoid fetching latency data for 'gbfs_versions' endpoint
412+
endpoints, _ = self.extract_gbfs_endpoints(
413+
version.url, "gbfs_versions", latency=False
414+
)
407415
if endpoints:
408-
self.gbfs_endpoints[version.version] = endpoints
416+
self.gbfs_endpoints[version_id] = endpoints
409417
else:
410418
self.logger.error("No endpoints found for version %s.", version.version)
411419

412420
def trigger_location_extraction(self):
413421
"""Trigger the location extraction process."""
414-
latest_version = self.get_latest_version()
415-
if not latest_version:
416-
self.logger.error("No latest version found.")
422+
autodiscovery_version = next(
423+
(
424+
version
425+
for version in self.gbfs_versions
426+
if version.extracted_from == "autodiscovery"
427+
),
428+
None,
429+
)
430+
if not autodiscovery_version:
431+
self.logger.error(
432+
"No autodiscovery version found. Cannot trigger location extraction."
433+
)
417434
return
418-
endpoints = self.gbfs_endpoints.get(latest_version, [])
435+
version_id = f"{self.stable_id}_{autodiscovery_version.version}_{autodiscovery_version.extracted_from}"
436+
endpoints = self.gbfs_endpoints.get(version_id, [])
437+
419438
# Find the station_information_url endpoint
420439
station_information_url = next(
421440
(

functions-python/gbfs_validator/src/gbfs_utils.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,13 +47,20 @@ def get_request_metadata(
4747

4848
@staticmethod
4949
def from_dict(
50-
data: List[Dict[str, Any]], language: Optional[str]
50+
data: List[Dict[str, Any]], language: Optional[str], latency: bool = True
5151
) -> List["GBFSEndpoint"]:
5252
"""Creates a list of GBFSEndpoint objects from a list of dictionaries."""
5353
endpoints = []
5454
for file in data:
5555
if "name" in file and "url" in file:
56-
metadata = GBFSEndpoint.get_request_metadata(file["url"])
56+
if not latency:
57+
metadata = {
58+
"latency": None,
59+
"status_code": None,
60+
"response_size_bytes": None,
61+
}
62+
else:
63+
metadata = GBFSEndpoint.get_request_metadata(file["url"])
5764
if metadata:
5865
endpoints.append(
5966
GBFSEndpoint(
@@ -72,12 +79,15 @@ def from_dict(
7279
class GBFSVersion:
7380
version: str
7481
url: str
82+
extracted_from: str
7583

7684
@staticmethod
77-
def from_dict(data: List[Dict[str, Any]]) -> List["GBFSVersion"]:
85+
def from_dict(
86+
data: List[Dict[str, Any]], extracted_from: str
87+
) -> List["GBFSVersion"]:
7888
"""Creates a list of GBFSFile objects from a list of dictionaries."""
7989
return [
80-
GBFSVersion(version["version"], version["url"])
90+
GBFSVersion(version["version"], version["url"], extracted_from)
8191
for version in data
8292
if "version" in version and "url" in version
8393
]

0 commit comments

Comments
 (0)