Skip to content

Commit 0b6d67c

Browse files
committed
utilise factLookupPhase()
1 parent af82510 commit 0b6d67c

File tree

3 files changed

+140
-91
lines changed

3 files changed

+140
-91
lines changed

digital_land/commands.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -345,7 +345,9 @@ def pipeline_run(
345345
field_typology_map=specification.get_field_typology_map(),
346346
field_prefix_map=specification.get_field_prefix_map(),
347347
),
348-
FactLookupPhase(lookups=lookups, redirect_lookups=redirect_lookups),
348+
FactLookupPhase(
349+
lookups=lookups, redirect_lookups=redirect_lookups, issue_log=issue_log
350+
),
349351
FactPrunePhase(),
350352
SavePhase(
351353
output_path,

digital_land/phase/lookup.py

Lines changed: 49 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -163,37 +163,6 @@ def process(self, stream):
163163
row[self.entity_field] = self.redirect_entity(
164164
row[self.entity_field]
165165
)
166-
167-
# TODO in future could get the datsets from specification
168-
linked_datasets = ["article-4-direction", "tree-preservation-order"]
169-
if row[self.entity_field]:
170-
for linked_dataset in linked_datasets:
171-
if (
172-
row.get(linked_dataset, "")
173-
or row.get(linked_dataset, "").strip()
174-
):
175-
reference = row.get(linked_dataset, "")
176-
find_entity = self.lookup(
177-
prefix=linked_dataset,
178-
organisation=row.get("organisation", ""),
179-
reference=reference,
180-
)
181-
# raise issue if the found entity is retired in old-entity.csv
182-
if not find_entity or (
183-
str(find_entity) in self.redirect_lookups
184-
and int(
185-
self.redirect_lookups[str(find_entity)].get(
186-
"status", 0
187-
)
188-
)
189-
== 410
190-
):
191-
self.issues.log_issue(
192-
linked_dataset,
193-
"no associated documents found for this area",
194-
reference,
195-
line_number=line_number,
196-
)
197166
yield block
198167

199168

@@ -210,7 +179,55 @@ def process(self, stream):
210179

211180

212181
class FactLookupPhase(LookupPhase):
213-
entity_field = "reference-entity"
182+
def __init__(self, lookups={}, redirect_lookups={}, issue_log=None):
183+
super().__init__(lookups, redirect_lookups, issue_log)
184+
self.entity_field = "reference-entity"
185+
186+
def process(self, stream):
187+
for block in stream:
188+
row = block["row"]
189+
line_number = row.get("line-number", "")
190+
prefix = row.get("prefix", "")
191+
reference = row.get("reference", "")
192+
entity_number = row.get("entity", "")
193+
194+
if prefix and reference:
195+
if entity_number in self.reverse_lookups:
196+
value = self.reverse_lookups[entity_number]
197+
198+
if value:
199+
organisation = value[-1].split(",")[-1]
200+
find_entity = self.lookup(
201+
prefix=prefix,
202+
organisation=organisation,
203+
reference=reference,
204+
)
205+
206+
# TODO get the fields from specification
207+
if (
208+
not find_entity
209+
or (
210+
str(find_entity) in self.redirect_lookups
211+
and int(
212+
self.redirect_lookups[str(find_entity)].get(
213+
"status", 0
214+
)
215+
)
216+
== 410
217+
)
218+
) and prefix in [
219+
"article-4-direction",
220+
"tree-preservation-order",
221+
]:
222+
self.issues.log_issue(
223+
prefix,
224+
"no associated documents found for this area",
225+
reference,
226+
line_number=line_number,
227+
)
228+
else:
229+
row[self.entity_field] = find_entity
230+
yield block
214231

215232

216233
class PrintLookupPhase(LookupPhase):

tests/unit/phase/test_lookup.py

Lines changed: 88 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
import pytest
22

3-
from digital_land.phase.lookup import LookupPhase, EntityLookupPhase, PrintLookupPhase
3+
from digital_land.phase.lookup import (
4+
LookupPhase,
5+
EntityLookupPhase,
6+
PrintLookupPhase,
7+
FactLookupPhase,
8+
)
49
from digital_land.log import IssueLog
510

611

@@ -24,13 +29,15 @@ def get_input_stream_with_linked_field():
2429
return [
2530
{
2631
"row": {
27-
"prefix": "article-4-direction-area",
28-
"reference": "1",
29-
"organisation": "local-authority:ABC",
30-
"article-4-direction": "a4d2",
31-
},
32-
"entry-number": 1,
33-
"line-number": 2,
32+
"fact": "abc",
33+
"entity": "10",
34+
"field": "article-4-direction",
35+
"value": "a4d1",
36+
"prefix": "article-4-direction",
37+
"reference": "a4d1",
38+
"entry-number": 1,
39+
"line-number": 2,
40+
}
3441
}
3542
]
3643

@@ -139,56 +146,6 @@ def test_process_empty_prefix(self, get_lookup):
139146

140147
assert output[0]["row"]["entity"] == "10"
141148

142-
def test_no_associated_documents_issue(self, get_input_stream_with_linked_field):
143-
input_stream = get_input_stream_with_linked_field
144-
145-
lookups = {
146-
",article-4-direction,a4d1,local-authorityabc": "1",
147-
",article-4-direction-area,1,local-authorityabc": "2",
148-
}
149-
issues = IssueLog()
150-
151-
phase = LookupPhase(
152-
lookups=lookups,
153-
issue_log=issues,
154-
)
155-
phase.entity_field = "entity"
156-
output = [block for block in phase.process(input_stream)]
157-
158-
assert output[0]["row"]["entity"] == "2"
159-
assert (
160-
issues.rows[0]["issue-type"]
161-
== "no associated documents found for this area"
162-
)
163-
assert issues.rows[0]["value"] == "a4d2"
164-
165-
def test_no_associated_documents_issue_for_retired_entity(
166-
self, get_input_stream_with_linked_field
167-
):
168-
input_stream = get_input_stream_with_linked_field
169-
170-
lookups = {
171-
",article-4-direction,a4d2,local-authorityabc": "1",
172-
",article-4-direction-area,1,local-authorityabc": "2",
173-
}
174-
issues = IssueLog()
175-
redirect_lookups = {"1": {"entity": "", "status": "410"}}
176-
177-
phase = LookupPhase(
178-
lookups=lookups,
179-
redirect_lookups=redirect_lookups,
180-
issue_log=issues,
181-
)
182-
phase.entity_field = "entity"
183-
output = [block for block in phase.process(input_stream)]
184-
185-
assert output[0]["row"]["entity"] == "2"
186-
assert (
187-
issues.rows[0]["issue-type"]
188-
== "no associated documents found for this area"
189-
)
190-
assert issues.rows[0]["value"] == "a4d2"
191-
192149

193150
class TestPrintLookupPhase:
194151
def test_process_does_not_produce_new_lookup(self, get_input_stream, get_lookup):
@@ -262,3 +219,76 @@ def test_entity_lookup_phase_blank(self):
262219
output = [block for block in phase.process(input_stream)]
263220

264221
assert len(output) == 0
222+
223+
224+
class TestFactLookupPhase:
225+
def test_no_associated_documents_issue_raised(
226+
self, get_input_stream_with_linked_field
227+
):
228+
input_stream = get_input_stream_with_linked_field
229+
lookups = {
230+
",article-4-direction,a4d2,local-authorityabc": "1",
231+
",article-4-direction-area,1,local-authorityabc": "10",
232+
}
233+
issues = IssueLog()
234+
235+
phase = FactLookupPhase(
236+
lookups=lookups,
237+
issue_log=issues,
238+
)
239+
phase.entity_field = "reference-entity"
240+
output = [block for block in phase.process(input_stream)]
241+
242+
assert "reference-entity" not in output
243+
assert (
244+
issues.rows[0]["issue-type"]
245+
== "no associated documents found for this area"
246+
)
247+
assert issues.rows[0]["value"] == "a4d1"
248+
249+
def test_no_associated_documents_issue_for_retired_entity(
250+
self, get_input_stream_with_linked_field
251+
):
252+
input_stream = get_input_stream_with_linked_field
253+
254+
lookups = {
255+
",article-4-direction,a4d1,local-authorityabc": "1",
256+
",article-4-direction-area,1,local-authorityabc": "10",
257+
}
258+
issues = IssueLog()
259+
redirect_lookups = {"1": {"entity": "", "status": "410"}}
260+
261+
phase = FactLookupPhase(
262+
lookups=lookups,
263+
redirect_lookups=redirect_lookups,
264+
issue_log=issues,
265+
)
266+
phase.entity_field = "reference-entity"
267+
output = [block for block in phase.process(input_stream)]
268+
269+
assert "reference-entity" not in output
270+
assert (
271+
issues.rows[0]["issue-type"]
272+
== "no associated documents found for this area"
273+
)
274+
assert issues.rows[0]["value"] == "a4d1"
275+
276+
def test_no_associated_documents_issue_not_raised(
277+
self, get_input_stream_with_linked_field
278+
):
279+
input_stream = get_input_stream_with_linked_field
280+
lookups = {
281+
",article-4-direction,a4d1,local-authorityabc": "1",
282+
",article-4-direction-area,1,local-authorityabc": "10",
283+
}
284+
issues = IssueLog()
285+
286+
phase = FactLookupPhase(
287+
lookups=lookups,
288+
issue_log=issues,
289+
)
290+
phase.entity_field = "reference-entity"
291+
output = [block for block in phase.process(input_stream)]
292+
293+
assert output[0]["row"]["reference-entity"] == "1"
294+
assert len(issues.rows) == 0

0 commit comments

Comments
 (0)