Skip to content

Commit 63ee442

Browse files
authored
fix: process validation report when dataset is not committed to the DB yet (#1127)
1 parent 4fb3781 commit 63ee442

File tree

3 files changed

+58
-18
lines changed

3 files changed

+58
-18
lines changed

functions-python/batch_process_dataset/src/main.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,9 +238,13 @@ def create_dataset(self, dataset_file: DatasetFile, db_session: Session):
238238
latest_dataset.latest = False
239239
db_session.add(latest_dataset)
240240
db_session.add(new_dataset)
241+
db_session.commit()
242+
logging.info(f"[{self.feed_stable_id}] Dataset created successfully.")
241243

242244
refresh_materialized_view(db_session, t_feedsearch.name)
243-
logging.info(f"[{self.feed_stable_id}] Dataset created successfully.")
245+
logging.info(
246+
f"[{self.feed_stable_id}] Materialized view refresh event triggered successfully."
247+
)
244248
except Exception as e:
245249
raise Exception(f"Error creating dataset: {e}")
246250

functions-python/process_validation_report/src/main.py

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,8 @@ def generate_report_entities(
155155
entities.append(validation_report_entity)
156156

157157
dataset = get_dataset(dataset_stable_id, session)
158+
if not dataset:
159+
raise Exception(f"Dataset {dataset_stable_id} not found.")
158160
dataset.validation_reports.append(validation_report_entity)
159161

160162
extracted_timezone = extract_timezone_from_json_validation_report(json_report)
@@ -251,32 +253,36 @@ def create_validation_report_entities(
251253
return str(error), 500
252254

253255
try:
254-
logging.info("Database session started.")
255256
# Generate the database entities required for the report
256-
try:
257-
entities = generate_report_entities(
258-
version,
259-
validated_at,
260-
json_report,
261-
dataset_stable_id,
262-
db_session,
263-
feed_stable_id,
264-
)
265-
except Exception as error:
266-
return str(error), 200 # Report already exists
257+
# If an error is thrown we should let the retry mechanism to do its work
258+
entities = generate_report_entities(
259+
version,
260+
validated_at,
261+
json_report,
262+
dataset_stable_id,
263+
db_session,
264+
feed_stable_id,
265+
)
267266

268-
# Commit the entities to the database
269267
for entity in entities:
270268
db_session.add(entity)
271-
logging.info(f"Committing {len(entities)} entities to the database.")
272-
db_session.commit()
273-
logging.info("Entities committed successfully.")
269+
# In this case the report entities are already in the DB or cannot be saved for other reasons
270+
# In any case, this will fail in any retried event
271+
try:
272+
logging.info("Committing %s entities to the database.", len(entities))
273+
db_session.commit()
274+
logging.info("Entities committed successfully.")
275+
except Exception as error:
276+
logging.warning(
277+
"Could not commit %s entities to the database: %s", entities, error
278+
)
279+
return str(error), 200
274280

275281
update_feed_statuses_query(db_session, [feed_stable_id])
276282

277283
return f"Created {len(entities)} entities.", 200
278284
except Exception as error:
279-
logging.error(f"Error creating validation report entities: {error}")
285+
logging.error("Error creating validation report entities: : %s", error)
280286
return f"Error creating validation report entities: {error}", 500
281287
finally:
282288
pass

functions-python/process_validation_report/tests/test_validation_report.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,36 @@ def test_create_validation_report_entities_json_error2(self, mock_get):
176176
)
177177
self.assertEqual(status, 500)
178178

179+
@mock.patch("requests.get")
180+
def test_create_validation_report_entities_missing_dataset(self, mock_get):
181+
"""
182+
Test the create_validation_report_entities function when the dataset is not found in the DB
183+
"""
184+
mock_get.return_value = MagicMock(
185+
status_code=200,
186+
json=lambda: {
187+
"summary": {
188+
"validatedAt": "2021-01-01T00:00:00Z",
189+
"validatorVersion": "1.0",
190+
"gtfsFeatures": ["stops", "routes"],
191+
},
192+
"notices": [
193+
{"code": "notice_code", "severity": "ERROR", "totalNotices": 1}
194+
],
195+
},
196+
)
197+
feed_stable_id = faker.word()
198+
dataset_stable_id = "MISSING_ID"
199+
200+
message, status = create_validation_report_entities(
201+
feed_stable_id, dataset_stable_id, "1.0"
202+
)
203+
self.assertEqual(500, status)
204+
self.assertEqual(
205+
"Error creating validation report entities: Dataset MISSING_ID not found.",
206+
message,
207+
)
208+
179209
@patch("main.Logger")
180210
@patch("main.create_validation_report_entities")
181211
def test_process_validation_report(self, create_validation_report_entities_mock, _):

0 commit comments

Comments
 (0)