Skip to content

Commit 687c8e9

Browse files
authored
Merge pull request #95 from digital-land/entity-org-and-plugin-errors
added authoritative store in params, entity-org creation, and plugin …
2 parents 9fbf025 + 879d7a3 commit 687c8e9

File tree

14 files changed

+88
-41
lines changed

14 files changed

+88
-41
lines changed

request-api/makefile

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
test: lint test-coverage
22

3-
lint:: black-check flake8
3+
lint: black-check flake8
44

5-
black-check:
6-
black --check .
5+
black-check:
6+
python -m black --check ./src ./tests
77

8-
black:
9-
black .
8+
flake8:
9+
python -m flake8 ./src ./tests
1010

11-
flake8:
12-
flake8 .
11+
black:
12+
python -m black .
1313

1414
test-coverage:: coverage-unit coverage-integration coverage-acceptance
1515

request-processor/makerules/python.mk

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
all:: lint test-coverage
22

3-
lint:
4-
make black ./src/application
5-
python -m flake8 ./src/application
3+
lint: black-check flake8
64

75
black-check:
8-
black --check .
6+
python -m black --check --exclude "digital-land" ./src ./tests
7+
8+
flake8:
9+
python -m flake8 --exclude="./src/digital-land" ./src ./tests
910

1011
black:
1112
python -m black .

request-processor/src/application/core/pipeline.py

Lines changed: 44 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ def fetch_add_data_response(
195195
output_path,
196196
specification_dir,
197197
cache_dir,
198-
url,
198+
endpoint,
199199
):
200200
try:
201201
specification = Specification(specification_dir)
@@ -210,6 +210,7 @@ def fetch_add_data_response(
210210

211211
existing_entities = []
212212
new_entities = []
213+
entity_org_mapping = []
213214
issues_log = None
214215

215216
for idx, resource_file in enumerate(files_in_resource):
@@ -227,7 +228,7 @@ def fetch_add_data_response(
227228
resource=resource_from_path(resource_file_path),
228229
valid_category_values=valid_category_values,
229230
disable_lookups=False,
230-
endpoints=[url],
231+
endpoints=[endpoint],
231232
)
232233

233234
existing_entities.extend(
@@ -253,13 +254,19 @@ def fetch_add_data_response(
253254
pipeline_dir=pipeline_dir,
254255
specification=specification,
255256
cache_dir=cache_dir,
256-
endpoints=[url] if url else None,
257+
endpoints=[endpoint] if endpoint else None,
257258
)
258259
logger.info(
259260
f"Found {len(new_lookups)} unidentified lookups in {resource_file}"
260261
)
261262
new_entities.extend(new_lookups)
262263

264+
# Default create a entity-organisation mapping, front end can use the 'authoritative' flag
265+
entity_org_mapping = create_entity_organisation(
266+
new_lookups, dataset, organisation_provider
267+
)
268+
# TODO, save to pipeline as well for rerun?
269+
263270
# Reload pipeline to pick up newly saved lookups
264271
pipeline = Pipeline(
265272
pipeline_dir, dataset, specification=specification
@@ -274,7 +281,7 @@ def fetch_add_data_response(
274281
resource=resource_from_path(resource_file_path),
275282
valid_category_values=valid_category_values,
276283
disable_lookups=False,
277-
endpoints=[url],
284+
endpoints=[endpoint],
278285
)
279286
else:
280287
logger.info(f"No unidentified lookups found in {resource_file}")
@@ -293,6 +300,7 @@ def fetch_add_data_response(
293300
"existing-in-resource": len(existing_entities),
294301
"new-entities": new_entities_breakdown,
295302
"existing-entities": existing_entities_breakdown,
303+
"entity-organisation": entity_org_mapping,
296304
"pipeline-issues": [dict(issue) for issue in issues_log.rows]
297305
if issues_log
298306
else [],
@@ -354,6 +362,38 @@ def _get_existing_entities_breakdown(existing_entities):
354362
return breakdown
355363

356364

365+
def create_entity_organisation(new_entities, dataset, organisation):
366+
"""
367+
Create entity-organisation mapping from new entities.
368+
369+
Args:
370+
new_entities: List of entity dicts with 'entity' key
371+
dataset: Dataset name
372+
organisation: Organisation identifier
373+
374+
Returns:
375+
List with single dict containing dataset, entity-minimum, entity-maximum, organisation
376+
"""
377+
if not new_entities:
378+
return []
379+
380+
entity_values = [
381+
entry.get("entity") for entry in new_entities if entry.get("entity")
382+
]
383+
384+
if not entity_values:
385+
return []
386+
387+
return [
388+
{
389+
"dataset": dataset,
390+
"entity-minimum": min(entity_values),
391+
"entity-maximum": max(entity_values),
392+
"organisation": organisation,
393+
}
394+
]
395+
396+
357397
def _map_transformed_entities(transformed_csv_path, pipeline_dir): # noqa: C901
358398
"""Extract unique entities from transformed CSV and lookup their details in lookup.csv."""
359399

request-processor/src/application/core/utils.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,9 +251,11 @@ def create_user_friendly_error_log(exception_detail):
251251
status_code = exception_detail.get("errCode")
252252
exception_type = exception_detail.get("exceptionType")
253253
content_type = exception_detail.get("contentType")
254+
plugin = exception_detail.get("plugin")
254255

255256
user_message = "An error occurred, please try again later."
256257

258+
# The ordering here has been considered to show the most relevant message to users in the front end
257259
if exception_type in ["SSLError", "SSLCertVerificationError"]:
258260
user_message = "SSL certificate verification failed"
259261
elif content_type and "text/html" in content_type:
@@ -264,6 +266,8 @@ def create_user_friendly_error_log(exception_detail):
264266
user_message = "The URL must be accessible"
265267
elif status_code == "404":
266268
user_message = "The URL does not exist. Check the URL you've entered is correct (HTTP 404 error)"
269+
elif plugin == "arcgis" and status_code == "200":
270+
user_message = "URL must be the data layer"
267271

268272
result = dict(exception_detail)
269273
result["message"] = user_message

request-processor/src/application/core/workflow.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import datetime
2+
import hashlib
23
import os
34
import csv
45
from pathlib import Path
@@ -463,6 +464,8 @@ def add_data_workflow(
463464
] = f"Unable to find lookups for collection '{collection}', dataset '{dataset}'"
464465
return response_data
465466

467+
endpoint_hash = hashlib.sha256(url.encode("utf-8")).hexdigest()
468+
466469
# All processes arount transforming the data and generating pipeline summary
467470
pipeline_summary = fetch_add_data_response(
468471
dataset=dataset,
@@ -472,7 +475,7 @@ def add_data_workflow(
472475
output_path=output_path,
473476
specification_dir=directories.SPECIFICATION_DIR,
474477
cache_dir=directories.CACHE_DIR,
475-
url=url,
478+
endpoint=endpoint_hash,
476479
)
477480

478481
# Create endpoint and source summaries in workflow

request-processor/src/application/exceptions/customExceptions.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,13 @@ def __init__(self, log={}):
4444
# Content type of the response useful for text/html checks (when not using arcgis/wfs plugin)
4545
self.content_type = log.get("content-type")
4646
self.message_detail = log.get("user_message_detail")
47+
self.plugin = log.get("plugin")
4748

4849
self.load()
4950
super().__init__(self.detail)
5051

5152
def load(self):
53+
# This is not the best way to do this but keeps backward compatibility for now
5254
self.detail = {
5355
"errCode": str(self.status) if self.status is not None else None,
5456
"errType": "User Error",
@@ -60,4 +62,5 @@ def load(self):
6062
"fetchStatus": self.fetch_status,
6163
"exceptionType": self.exception_type,
6264
"contentType": self.content_type,
65+
"plugin": self.plugin,
6366
}

request-processor/src/tasks.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ def handle_check_file(request_schema, request_data, tmp_dir):
141141

142142

143143
@celery.task(base=CheckDataUrlTask, name=CheckDataUrlTask.name)
144-
def check_dataurl(request: Dict, directories=None):
144+
def check_dataurl(request: Dict, directories=None): # noqa
145145
logger.info(
146146
f"Started check_dataurl task for request_id = {request.get('id', 'unknown')}"
147147
)
@@ -513,6 +513,7 @@ def _fetch_resource(resource_dir, url):
513513
}
514514
)
515515
elif log.get("exception") or log.get("status", "").startswith("4"):
516+
log["plugin"] = plugin # Save plugin used for arcgis error context
516517
break
517518

518519
# All fetch attempts failed - include content-type if available

request-processor/tests/integration/src/test_tasks.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ def test_check_datafile(
9999
"https://example.com/arcgis/rest/services/MapServer",
100100
(
101101
None,
102-
'{"type":"FeatureCollection","features":[{"type":"Feature","properties":{"name":"Test Feature"}}]}'.encode(
102+
'{"type":"FeatureCollection","features":[{"type":"Feature","properties":{"name":"Test Feature"}}]}'.encode( # noqa: E501
103103
"utf-8"
104104
),
105105
),

request-processor/tests/unit/src/application/core/test_pipeline.py

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
11
import pytest
2-
import os
3-
import csv
42
from unittest.mock import MagicMock
53
from src.application.core.pipeline import (
64
fetch_add_data_response,
@@ -17,7 +15,7 @@ def test_fetch_add_data_response_success(monkeypatch, tmp_path):
1715
input_path = tmp_path / "resource"
1816
specification_dir = tmp_path / "specification"
1917
cache_dir = tmp_path / "cache"
20-
url = "http://example.com/endpoint"
18+
endpoint = "abc123hash"
2119

2220
input_path.mkdir(parents=True)
2321
pipeline_dir.mkdir(parents=True)
@@ -68,7 +66,7 @@ def test_fetch_add_data_response_success(monkeypatch, tmp_path):
6866
output_path=str(input_path / "output.csv"),
6967
specification_dir=str(specification_dir),
7068
cache_dir=str(cache_dir),
71-
url=url,
69+
endpoint=endpoint,
7270
)
7371

7472
assert "new-in-resource" in result
@@ -83,7 +81,7 @@ def test_fetch_add_data_response_no_files(monkeypatch, tmp_path):
8381
input_path = tmp_path / "resource"
8482
specification_dir = tmp_path / "specification"
8583
cache_dir = tmp_path / "cache"
86-
url = "http://example.com/endpoint"
84+
endpoint = "abc123hash"
8785

8886
input_path.mkdir(parents=True)
8987
pipeline_dir.mkdir(parents=True)
@@ -103,7 +101,7 @@ def test_fetch_add_data_response_no_files(monkeypatch, tmp_path):
103101
output_path=str(input_path / "output.csv"),
104102
specification_dir=str(specification_dir),
105103
cache_dir=str(cache_dir),
106-
url=url,
104+
endpoint=endpoint,
107105
)
108106

109107
assert "new-in-resource" in result
@@ -118,7 +116,7 @@ def test_fetch_add_data_response_file_not_found(monkeypatch, tmp_path):
118116
input_path = tmp_path / "nonexistent"
119117
specification_dir = tmp_path / "specification"
120118
cache_dir = tmp_path / "cache"
121-
url = "http://example.com/endpoint"
119+
endpoint = "abc123hash"
122120

123121
pipeline_dir.mkdir(parents=True)
124122

@@ -138,7 +136,7 @@ def test_fetch_add_data_response_file_not_found(monkeypatch, tmp_path):
138136
output_path=str(input_path / "output.csv"),
139137
specification_dir=str(specification_dir),
140138
cache_dir=str(cache_dir),
141-
url=url,
139+
endpoint=endpoint,
142140
)
143141

144142

@@ -150,7 +148,7 @@ def test_fetch_add_data_response_handles_processing_error(monkeypatch, tmp_path)
150148
input_path = tmp_path / "resource"
151149
specification_dir = tmp_path / "specification"
152150
cache_dir = tmp_path / "cache"
153-
url = "http://example.com/endpoint"
151+
endpoint = "abc123hash"
154152

155153
input_path.mkdir(parents=True)
156154
pipeline_dir.mkdir(parents=True)
@@ -178,7 +176,7 @@ def raise_exception(*args, **kwargs):
178176
output_path=str(input_path / "output.csv"),
179177
specification_dir=str(specification_dir),
180178
cache_dir=str(cache_dir),
181-
url=url,
179+
endpoint=endpoint,
182180
)
183181

184182
assert "new-in-resource" in result

request-processor/tests/unit/src/application/core/test_utils.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -357,8 +357,6 @@ def test_validate_endpoint_csv_read_error(monkeypatch, tmp_path):
357357
"""Test validate_endpoint when reading CSV fails"""
358358
pipeline_dir = tmp_path / "pipeline"
359359
pipeline_dir.mkdir()
360-
url = "http://example.com/endpoint"
361-
362360
endpoint_csv_path = pipeline_dir / "endpoint.csv"
363361
endpoint_csv_path.write_bytes(b"\x00\x00\x00")
364362

@@ -380,7 +378,6 @@ def test_validate_source_creates_new_source(monkeypatch, tmp_path):
380378
"""Test validate_source creates new source entry when it doesn't exist"""
381379
pipeline_dir = tmp_path / "pipeline"
382380
pipeline_dir.mkdir()
383-
source_csv_path = pipeline_dir / "source.csv"
384381

385382
documentation_url = "http://example.com/doc"
386383
collection = "test-collection"

0 commit comments

Comments
 (0)