Skip to content

Commit b0dac39

Browse files
authored
feat: 1055 cache validation report notices summary (#1079)
1 parent 33ffb82 commit b0dac39

File tree

16 files changed

+380
-73
lines changed

16 files changed

+380
-73
lines changed

api/src/feeds/impl/models/latest_dataset_impl.py

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
from shared.database_gen.sqlacodegen_models import Gtfsdataset
44
from feeds.impl.models.bounding_box_impl import BoundingBoxImpl
5-
from feeds.impl.models.validation_report_impl import ValidationReportImpl
65
from feeds_gen.models.latest_dataset import LatestDataset
76
from feeds_gen.models.latest_dataset_validation_report import LatestDatasetValidationReport
87
from utils.model_utils import compare_java_versions
@@ -30,21 +29,13 @@ def from_orm(cls, dataset: Gtfsdataset | None) -> LatestDataset | None:
3029
lambda a, b: a if compare_java_versions(a.validator_version, b.validator_version) == 1 else b,
3130
dataset.validation_reports,
3231
)
33-
(
34-
total_error,
35-
total_info,
36-
total_warning,
37-
unique_error_count,
38-
unique_info_count,
39-
unique_warning_count,
40-
) = ValidationReportImpl.compute_totals(latest_report)
4132
validation_report = LatestDatasetValidationReport(
42-
total_error=total_error,
43-
total_warning=total_warning,
44-
total_info=total_info,
45-
unique_error_count=unique_error_count,
46-
unique_warning_count=unique_warning_count,
47-
unique_info_count=unique_info_count,
33+
total_error=latest_report.total_error,
34+
total_warning=latest_report.total_warning,
35+
total_info=latest_report.total_info,
36+
unique_error_count=latest_report.unique_error_count,
37+
unique_warning_count=latest_report.unique_warning_count,
38+
unique_info_count=latest_report.unique_info_count,
4839
)
4940
return cls(
5041
id=dataset.stable_id,

api/src/feeds/impl/models/validation_report_impl.py

Lines changed: 7 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -16,27 +16,6 @@ class Config:
1616

1717
from_attributes = True
1818

19-
@staticmethod
20-
def compute_totals(validation_report) -> tuple[int, int, int, int, int, int]:
21-
"""Compute the total number of errors, info, and warnings from a validation report,
22-
and count the number of distinct codes for each."""
23-
total_info, total_warning, total_error = 0, 0, 0
24-
info_codes, warning_codes, error_codes = set(), set(), set()
25-
for notice in validation_report.notices:
26-
match notice.severity:
27-
case "INFO":
28-
total_info += notice.total_notices
29-
info_codes.add(notice.notice_code)
30-
case "WARNING":
31-
total_warning += notice.total_notices
32-
warning_codes.add(notice.notice_code)
33-
case "ERROR":
34-
total_error += notice.total_notices
35-
error_codes.add(notice.notice_code)
36-
case _:
37-
ValidationReportImpl._get_logger().warning(f"Unknown severity: {notice.severity}")
38-
return total_error, total_info, total_warning, len(error_codes), len(info_codes), len(warning_codes)
39-
4019
@classmethod
4120
def _get_logger(cls):
4221
return Logger(ValidationReportImpl.__class__.__module__).get_logger()
@@ -46,24 +25,17 @@ def from_orm(cls, validation_report: Validationreport | None) -> ValidationRepor
4625
"""Create a model instance from a SQLAlchemy a Validation Report row object."""
4726
if not validation_report:
4827
return None
49-
(
50-
total_error,
51-
total_info,
52-
total_warning,
53-
unique_error_count,
54-
unique_info_count,
55-
unique_warning_count,
56-
) = cls.compute_totals(validation_report)
28+
5729
return cls(
5830
validated_at=validation_report.validated_at,
5931
features=[feature.name for feature in validation_report.features],
6032
validator_version=validation_report.validator_version,
61-
total_error=total_error,
62-
total_warning=total_warning,
63-
total_info=total_info,
64-
unique_error_count=unique_error_count,
65-
unique_warning_count=unique_warning_count,
66-
unique_info_count=unique_info_count,
33+
total_error=validation_report.total_error,
34+
total_warning=validation_report.total_warning,
35+
total_info=validation_report.total_info,
36+
unique_error_count=validation_report.unique_error_count,
37+
unique_warning_count=validation_report.unique_warning_count,
38+
unique_info_count=validation_report.unique_info_count,
6739
url_json=validation_report.json_report,
6840
url_html=validation_report.html_report,
6941
# TODO this field is not in the database

api/src/shared/__init__.py

Whitespace-only changes.

api/src/shared/common/db_utils.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,9 +78,7 @@ def get_gtfs_feeds_query(
7878

7979
if include_options_for_joinedload:
8080
feed_query = feed_query.options(
81-
contains_eager(Gtfsfeed.gtfsdatasets)
82-
.joinedload(Gtfsdataset.validation_reports)
83-
.joinedload(Validationreport.notices),
81+
contains_eager(Gtfsfeed.gtfsdatasets).joinedload(Gtfsdataset.validation_reports),
8482
*get_joinedload_options(),
8583
).order_by(Gtfsfeed.provider, Gtfsfeed.stable_id)
8684

api/tests/integration/test_database.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -131,9 +131,6 @@ def test_validation_report(test_database):
131131
assert validation_report is not None
132132
assert validation_report.validator_version == NEW_VALIDATION_VERSION
133133
assert validation_report.validated_at == NEW_VALIDATION_TIME
134-
assert validation_report.total_info == VALIDATION_INFO_COUNT_PER_NOTICE * VALIDATION_INFO_NOTICES
135-
assert validation_report.total_warning == VALIDATION_WARNING_COUNT_PER_NOTICE * VALIDATION_WARNING_NOTICES
136-
assert validation_report.total_error == VALIDATION_ERROR_COUNT_PER_NOTICE * VALIDATION_ERROR_NOTICES
137134

138135

139136
def test_generate_unique_id():

api/tests/unittest/models/test_gtfs_feed_impl.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,13 @@ class TestGtfsFeedImpl(unittest.TestCase):
185185

186186
def test_from_orm_all_fields(self):
187187
"""Test the `from_orm` method with all fields."""
188+
# Update the validation report in gtfs_feed_orm to include precomputed counters
189+
gtfs_feed_orm.gtfsdatasets[0].validation_reports[0].total_error = 27
190+
gtfs_feed_orm.gtfsdatasets[0].validation_reports[0].total_warning = 64
191+
gtfs_feed_orm.gtfsdatasets[0].validation_reports[0].total_info = 4
192+
gtfs_feed_orm.gtfsdatasets[0].validation_reports[0].unique_error_count = 3
193+
gtfs_feed_orm.gtfsdatasets[0].validation_reports[0].unique_warning_count = 4
194+
gtfs_feed_orm.gtfsdatasets[0].validation_reports[0].unique_info_count = 2
188195
result = GtfsFeedImpl.from_orm(gtfs_feed_orm)
189196
assert result == expected_gtfs_feed_result
190197

api/tests/unittest/models/test_latest_dataset_impl.py

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
from geoalchemy2 import WKTElement
66

7-
from shared.database_gen.sqlacodegen_models import Gtfsdataset, Feed, Validationreport, Notice
7+
from shared.database_gen.sqlacodegen_models import Gtfsdataset, Feed, Validationreport
88
from feeds.impl.models.bounding_box_impl import BoundingBoxImpl
99
from feeds.impl.models.latest_dataset_impl import LatestDatasetImpl
1010

@@ -28,17 +28,33 @@ def test_from_orm(self):
2828
service_date_range_end=datetime(2025, 1, 1, 0, 0, 0, tzinfo=ZoneInfo("Canada/Atlantic")),
2929
agency_timezone="Canada/Atlantic",
3030
validation_reports=[
31-
Validationreport(validator_version="1.0.0"),
31+
Validationreport(
32+
validator_version="1.0.0",
33+
total_error=0,
34+
total_warning=0,
35+
total_info=0,
36+
unique_error_count=0,
37+
unique_warning_count=0,
38+
unique_info_count=0,
39+
),
3240
Validationreport(
3341
validator_version="1.2.0",
34-
notices=[
35-
Notice(severity="INFO", total_notices=1),
36-
Notice(severity="ERROR", total_notices=2, notice_code="foreign_key_violation"),
37-
Notice(severity="ERROR", total_notices=1, notice_code="empty_column_name"),
38-
Notice(severity="WARNING", total_notices=3),
39-
],
42+
total_error=3,
43+
total_warning=3,
44+
total_info=1,
45+
unique_error_count=2,
46+
unique_warning_count=1,
47+
unique_info_count=1,
48+
),
49+
Validationreport(
50+
validator_version="1.1.1",
51+
total_error=1,
52+
total_warning=2,
53+
total_info=0,
54+
unique_error_count=1,
55+
unique_warning_count=1,
56+
unique_info_count=0,
4057
),
41-
Validationreport(validator_version="1.1.1"),
4258
],
4359
)
4460
) == LatestDatasetImpl(

api/tests/unittest/models/test_validation_report_impl.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,12 @@ def test_from_orm(self):
1212
validator_version="1.0.0",
1313
json_report="http://json_report",
1414
html_report="http://html_report",
15+
total_error=33,
16+
total_warning=22,
17+
total_info=11,
18+
unique_error_count=2,
19+
unique_warning_count=1,
20+
unique_info_count=1,
1521
notices=[
1622
Notice(severity="INFO", total_notices=10),
1723
Notice(severity="INFO", total_notices=1),

functions-python/process_validation_report/src/main.py

Lines changed: 98 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,12 @@
1818
import logging
1919
from datetime import datetime
2020
import requests
21-
from shared.helpers.database import Database
2221
from shared.helpers.timezone import (
2322
extract_timezone_from_json_validation_report,
2423
get_service_date_range_with_timezone_utc,
2524
)
2625
import functions_framework
26+
from shared.helpers.database import Database
2727
from shared.database_gen.sqlacodegen_models import (
2828
Validationreport,
2929
Feature,
@@ -164,6 +164,10 @@ def generate_report_entities(
164164
feature = get_feature(feature_name, session)
165165
feature.validations.append(validation_report_entity)
166166
entities.append(feature)
167+
168+
# Process notices and compute counters
169+
counters = process_validation_report_notices(json_report["notices"])
170+
167171
for notice in json_report["notices"]:
168172
notice_entity = Notice(
169173
dataset_id=dataset.id,
@@ -174,6 +178,14 @@ def generate_report_entities(
174178
)
175179
dataset.notices.append(notice_entity)
176180
entities.append(notice_entity)
181+
182+
# Update the validation report entity with computed counters
183+
validation_report_entity.total_info = counters["total_info"]
184+
validation_report_entity.total_warning = counters["total_warning"]
185+
validation_report_entity.total_error = counters["total_error"]
186+
validation_report_entity.unique_info_count = counters["unique_info_count"]
187+
validation_report_entity.unique_warning_count = counters["unique_warning_count"]
188+
validation_report_entity.unique_error_count = counters["unique_error_count"]
177189
return entities
178190

179191

@@ -227,7 +239,6 @@ def create_validation_report_entities(feed_stable_id, dataset_stable_id, version
227239
try:
228240
with db.start_db_session() as session:
229241
logging.info("Database session started.")
230-
231242
# Generate the database entities required for the report
232243
try:
233244
entities = generate_report_entities(
@@ -301,3 +312,88 @@ def process_validation_report(request):
301312
f"Processing validation report version {validator_version} for dataset {dataset_id} in feed {feed_id}."
302313
)
303314
return create_validation_report_entities(feed_id, dataset_id, validator_version)
315+
316+
317+
@functions_framework.http
318+
def compute_validation_report_counters(request):
319+
"""
320+
Compute the total number of errors, warnings, and info notices,
321+
as well as the number of distinct codes for each severity level
322+
across all validation reports in the database, and write the results to the database.
323+
"""
324+
batch_size = 100 # Number of reports to process in each batch
325+
offset = 0
326+
db = Database()
327+
with db.start_db_session(echo=False) as session:
328+
while True:
329+
validation_reports = (
330+
session.query(Validationreport).limit(batch_size).offset(offset).all()
331+
)
332+
print(
333+
f"Processing {len(validation_reports)} validation reports from offset {offset}."
334+
)
335+
# Break the loop if no more reports are found
336+
if len(validation_reports) == 0:
337+
break
338+
339+
for report in validation_reports:
340+
counters = process_validation_report_notices(report.notices)
341+
342+
# Update the report with computed counters
343+
report.total_info = counters["total_info"]
344+
report.total_warning = counters["total_warning"]
345+
report.total_error = counters["total_error"]
346+
report.unique_info_count = counters["unique_info_count"]
347+
report.unique_warning_count = counters["unique_warning_count"]
348+
report.unique_error_count = counters["unique_error_count"]
349+
350+
logging.info(
351+
f"Updated ValidationReport {report.id} with counters: "
352+
f"INFO={report.total_info}, WARNING={report.total_warning}, ERROR={report.total_error}, "
353+
f"Unique INFO Code={report.unique_info_count}, Unique WARNING Code={report.unique_warning_count}, "
354+
f"Unique ERROR Code={report.unique_error_count}"
355+
)
356+
357+
# Commit the changes for the current batch
358+
session.commit()
359+
360+
# Move to the next batch
361+
offset += batch_size
362+
363+
return {"message": "Validation report counters computed successfully."}, 200
364+
365+
366+
def process_validation_report_notices(notices):
367+
"""
368+
Processes the notices of a validation report and computes counters for different severities.
369+
370+
:param report: A Validationreport object containing associated notices.
371+
:return: A dictionary with computed counters for total and unique counts of INFO, WARNING, and ERROR severities.
372+
"""
373+
# Initialize counters for the current report
374+
total_info, total_warning, total_error = 0, 0, 0
375+
info_codes, warning_codes, error_codes = set(), set(), set()
376+
377+
# Process associated notices
378+
for notice in notices:
379+
match notice.severity:
380+
case "INFO":
381+
total_info += notice.total_notices
382+
info_codes.add(notice.notice_code)
383+
case "WARNING":
384+
total_warning += notice.total_notices
385+
warning_codes.add(notice.notice_code)
386+
case "ERROR":
387+
total_error += notice.total_notices
388+
error_codes.add(notice.notice_code)
389+
case _:
390+
logging.warning(f"Unknown severity: {notice.severity}")
391+
392+
return {
393+
"total_info": total_info,
394+
"total_warning": total_warning,
395+
"total_error": total_error,
396+
"unique_info_count": len(info_codes),
397+
"unique_warning_count": len(warning_codes),
398+
"unique_error_count": len(error_codes),
399+
}

0 commit comments

Comments
 (0)