Skip to content

Commit 7207698

Browse files
committed
add acceptance tests
1 parent 0b6d67c commit 7207698

File tree

1 file changed

+107
-1
lines changed

1 file changed

+107
-1
lines changed

tests/acceptance/test_async_backend_pipeline_run.py

Lines changed: 107 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,21 @@ def run_pipeline_for_test(test_dirs, dataset, resource, request_id, input_path):
6262
"reference": "ABC_0001",
6363
"entity": 1,
6464
}
65+
row2 = {
66+
"prefix": "article-4-direction",
67+
"resource": "",
68+
"entry-number": "",
69+
"organisation": organisation,
70+
"reference": "a4d2",
71+
"entity": 10,
72+
}
73+
6574
fieldnames = row.keys()
6675
with open(os.path.join(pipeline_dir, "lookup.csv"), "w") as f:
6776
dictwriter = csv.DictWriter(f, fieldnames=fieldnames)
6877
dictwriter.writeheader()
6978
dictwriter.writerow(row)
79+
dictwriter.writerow(row2)
7080

7181
# Create organisation.csv with data
7282
row = {"organisation": "test-org", "name": "Test Org"}
@@ -165,7 +175,7 @@ def run_pipeline_for_test(test_dirs, dataset, resource, request_id, input_path):
165175
field_typology_map=specification.get_field_typology_map(),
166176
field_prefix_map=specification.get_field_prefix_map(),
167177
),
168-
FactLookupPhase(lookups),
178+
FactLookupPhase(lookups, issue_log=issue_log),
169179
FactPrunePhase(),
170180
SavePhase(
171181
output_path,
@@ -188,6 +198,7 @@ def run_pipeline_for_test(test_dirs, dataset, resource, request_id, input_path):
188198
return {
189199
"output_path": output_path,
190200
"issue_log": issue_log_path,
201+
"save_issue_log": issue_log,
191202
"column_field_log": column_field_log_path,
192203
"dataset_resource_log": dataset_resource_log_path,
193204
}
@@ -408,3 +419,98 @@ def test_column_field_log_creation(test_dirs):
408419
assert (
409420
"field" in column_field_log_df.columns
410421
), "Column field log is missing 'field' column."
422+
423+
424+
def test_pipeline_lookup_phase(test_dirs):
425+
dataset = "article-4-direction-area"
426+
resource = "5158d13bfc6f0723b1fb07c975701a906e83a1ead4aee598ee34e241c79a5f3d"
427+
request_id = "test_request_id"
428+
429+
rows = [
430+
{
431+
"reference": "ABC_0001",
432+
"entry-date": "2025-01-01",
433+
"organisation": "test-org",
434+
"article-4-direction": "a4d1",
435+
}
436+
]
437+
438+
input_path = os.path.join(test_dirs["collection_dir"], resource)
439+
440+
with open(input_path, "w", newline="") as f:
441+
fieldnames = ["reference", "entry-date", "organisation", "article-4-direction"]
442+
writer = csv.DictWriter(f, fieldnames=fieldnames)
443+
writer.writeheader()
444+
for row in rows:
445+
writer.writerow(row)
446+
447+
# Run the pipeline
448+
run_pipeline = run_pipeline_for_test(
449+
test_dirs=test_dirs,
450+
dataset=dataset,
451+
resource=resource,
452+
request_id=request_id,
453+
input_path=input_path,
454+
)
455+
# issue_log
456+
issue_log = run_pipeline.get("save_issue_log")
457+
issue_log_path = run_pipeline.get("issue_log")
458+
issue_dict = next(
459+
(
460+
issue
461+
for issue in issue_log.rows
462+
if issue.get("field") == "article-4-direction"
463+
),
464+
None,
465+
)
466+
467+
assert os.path.exists(issue_log_path)
468+
assert "no associated documents found for this area" == issue_dict["issue-type"]
469+
assert "a4d1" == issue_dict["value"]
470+
471+
472+
def test_pipeline_lookup_phase_assign_reference_entity(test_dirs):
473+
dataset = "article-4-direction-area"
474+
resource = "5158d13bfc6f0723b1fb07c975701a906e83a1ead4aee598ee34e241c79a5f3d"
475+
request_id = "test_request_id"
476+
477+
rows = [
478+
{
479+
"reference": "ABC_0001",
480+
"entry-date": "2025-01-01",
481+
"organisation": "test-org",
482+
"article-4-direction": "a4d2",
483+
}
484+
]
485+
486+
input_path = os.path.join(test_dirs["collection_dir"], resource)
487+
488+
with open(input_path, "w", newline="") as f:
489+
fieldnames = ["reference", "entry-date", "organisation", "article-4-direction"]
490+
writer = csv.DictWriter(f, fieldnames=fieldnames)
491+
writer.writeheader()
492+
for row in rows:
493+
writer.writerow(row)
494+
495+
# Run the pipeline
496+
run_pipeline = run_pipeline_for_test(
497+
test_dirs=test_dirs,
498+
dataset=dataset,
499+
resource=resource,
500+
request_id=request_id,
501+
input_path=input_path,
502+
)
503+
# issue_log
504+
issue_log = run_pipeline.get("save_issue_log")
505+
506+
# assert given error does not exist in issue_log
507+
assert all(
508+
issue.get("issue-type") != "no associated documents found for this area"
509+
for issue in issue_log.rows
510+
)
511+
512+
output_path = run_pipeline.get("output_path")
513+
output_df = pd.read_csv(output_path)
514+
515+
assert os.path.exists(output_path)
516+
assert 10.0 in output_df["reference-entity"].values

0 commit comments

Comments
 (0)