Skip to content

Commit 5a98509

Browse files
committed
merge: main
2 parents 99d3909 + e59d956 commit 5a98509

File tree

14 files changed

+696
-10
lines changed

14 files changed

+696
-10
lines changed

api/src/feeds/impl/models/gtfs_dataset_impl.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
11
from functools import reduce
22
from typing import List
33

4-
from packaging.version import Version
5-
64
from database_gen.sqlacodegen_models import Gtfsdataset, Validationreport
75
from feeds.impl.models.bounding_box_impl import BoundingBoxImpl
86
from feeds.impl.models.validation_report_impl import ValidationReportImpl
97
from feeds_gen.models.gtfs_dataset import GtfsDataset
8+
from utils.model_utils import compare_java_versions
109

1110

1211
class GtfsDatasetImpl(GtfsDataset):
@@ -30,7 +29,8 @@ def from_orm_latest_validation_report(
3029
"""
3130
if validation_reports:
3231
latest_report = reduce(
33-
lambda a, b: a if Version(a.validator_version) > Version(b.validator_version) else b, validation_reports
32+
lambda a, b: a if compare_java_versions(a.validator_version, b.validator_version) == 1 else b,
33+
validation_reports,
3434
)
3535
return ValidationReportImpl.from_orm(latest_report)
3636
return None

api/src/feeds/impl/models/latest_dataset_impl.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
11
from functools import reduce
22

3-
from packaging.version import Version
4-
53
from database_gen.sqlacodegen_models import Gtfsdataset
64
from feeds.impl.models.bounding_box_impl import BoundingBoxImpl
75
from feeds.impl.models.validation_report_impl import ValidationReportImpl
86
from feeds_gen.models.latest_dataset import LatestDataset
97
from feeds_gen.models.latest_dataset_validation_report import LatestDatasetValidationReport
8+
from utils.model_utils import compare_java_versions
109

1110

1211
class LatestDatasetImpl(LatestDataset):
@@ -28,7 +27,7 @@ def from_orm(cls, dataset: Gtfsdataset | None) -> LatestDataset | None:
2827
validation_report: LatestDatasetValidationReport | None = None
2928
if dataset.validation_reports:
3029
latest_report = reduce(
31-
lambda a, b: a if Version(a.validator_version) > Version(b.validator_version) else b,
30+
lambda a, b: a if compare_java_versions(a.validator_version, b.validator_version) == 1 else b,
3231
dataset.validation_reports,
3332
)
3433
(

api/src/utils/model_utils.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
from packaging.version import Version
2+
3+
4+
def compare_java_versions(v1: str | None, v2: str | None):
5+
"""
6+
Compare two version strings v1 and v2.
7+
Returns 1 if v1 > v2, -1 if v1 < v2,
8+
otherwise 0.
9+
The version strings are expected to be in the format of
10+
major.minor.patch[-SNAPSHOT]
11+
"""
12+
if v1 is None and v2 is None:
13+
return 0
14+
if v1 is None:
15+
return -1
16+
if v2 is None:
17+
return 1
18+
# clean version strings replacing the SNAPSHOT suffix with .dev0
19+
v1 = v1.replace("-SNAPSHOT", ".dev0")
20+
v2 = v2.replace("-SNAPSHOT", ".dev0")
21+
if Version(v1) > Version(v2):
22+
return 1
23+
elif Version(v1) < Version(v2):
24+
return -1
25+
else:
26+
return 0
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import unittest
2+
from utils.model_utils import compare_java_versions
3+
4+
5+
class TestCompareJavaVersions(unittest.TestCase):
6+
def test_compare_versions_equal(self):
7+
self.assertEqual(compare_java_versions("1.0.0", "1.0.0"), 0)
8+
self.assertEqual(compare_java_versions("1.0.0-SNAPSHOT", "1.0.0-SNAPSHOT"), 0)
9+
10+
def test_compare_versions_v1_greater(self):
11+
self.assertEqual(compare_java_versions("1.0.1", "1.0.0"), 1)
12+
self.assertEqual(compare_java_versions("1.0.0", "0.9.9"), 1)
13+
self.assertEqual(compare_java_versions("1.0.0", "1.0.0-SNAPSHOT"), 1)
14+
15+
def test_compare_versions_v2_greater(self):
16+
self.assertEqual(compare_java_versions("1.0.0", "1.0.1"), -1)
17+
self.assertEqual(compare_java_versions("0.9.9", "1.0.0"), -1)
18+
self.assertEqual(compare_java_versions("1.0.0-SNAPSHOT", "1.0.0"), -1)
19+
20+
def test_compare_versions_with_none(self):
21+
self.assertEqual(compare_java_versions(None, None), 0)
22+
self.assertEqual(compare_java_versions(None, "1.0.0"), -1)
23+
self.assertEqual(compare_java_versions("1.0.0", None), 1)
24+
25+
26+
if __name__ == "__main__":
27+
unittest.main()
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
[run]
2+
omit =
3+
*/test*/*
4+
*/database_gen/*
5+
*/dataset_service/*
6+
*/helpers/*
7+
8+
[report]
9+
exclude_lines =
10+
if __name__ == .__main__.:
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# Environment variables for the validation report information extraction to run locally
2+
export FEEDS_DATABASE_URL=${{FEEDS_DATABASE_URL}}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# Backfill Dataset Service Date Range
2+
This directory contains the GCP serverless function that will backfill the GTFS Datasets to include the service date range based off their latest validation report.
3+
It will only fill `NULL` values and not over write existing values
4+
5+
## Function Workflow
6+
1. **HTTP Request Trigger**: The function is invoked through an HTTP request that includes identifiers for a dataset and feed.
7+
2. **Dataset Query**: Retreives all gtfs datasets which have a missing service date range value
8+
3. **Validation Report Retrieval**: For each dataset, get download the latest validaiton json report to retrieve service date ranges
9+
4. **Database Update**: Updates the dataset with the values retrieved from the validation report
10+
11+
## Function Configuration
12+
The function depends on several environment variables:
13+
- `FEEDS_DATABASE_URL`: The database URL for connecting to the database containing GTFS datasets and related entities.
14+
15+
## Local Development
16+
Follow standard practices for local development of GCP serverless functions. Refer to the main [README.md](../README.md) for general setup instructions for the development environment.
17+
18+
## Testing
19+
To run it locally `./scripts/function-python-run.sh --function_name backfill_dataset_service_date_range`
20+
21+
In postman or similar service, with a `POST` call `v1/backfill-dataset-service-date-range`
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
{
2+
"name": "backfill-dataset-service-date-range",
3+
"description": "For each dataset, backfill the dataset service with the date range of the dataset",
4+
"entry_point": "backfill_dataset_service_date_range",
5+
"timeout": 3600,
6+
"memory": "2Gi",
7+
"trigger_http": true,
8+
"include_folders": ["helpers"],
9+
"include_api_folders": ["database_gen"],
10+
"secret_environment_variables": [
11+
{
12+
"key": "FEEDS_DATABASE_URL"
13+
}
14+
],
15+
"ingress_settings": "ALLOW_INTERNAL_AND_GCLB",
16+
"max_instance_request_concurrency": 1,
17+
"max_instance_count": 1,
18+
"min_instance_count": 0,
19+
"available_cpu": 1
20+
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Common packages
2+
functions-framework==3.*
3+
google-cloud-logging
4+
psycopg2-binary==2.9.6
5+
aiohttp~=3.10.5
6+
asyncio~=3.4.3
7+
urllib3~=2.2.2
8+
requests~=2.32.3
9+
attrs~=23.1.0
10+
pluggy~=1.3.0
11+
certifi~=2024.7.4
12+
13+
# SQL Alchemy and Geo Alchemy
14+
SQLAlchemy==2.0.23
15+
geoalchemy2==0.14.7
16+
17+
# Google specific packages for this function
18+
cloudevents~=1.10.1
19+
google-cloud-storage
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Faker
2+
pytest~=7.4.3

0 commit comments

Comments
 (0)