Skip to content

Commit a91d957

Browse files
committed
pass endpoint not url to pipeline
1 parent 6b6e4d4 commit a91d957

File tree

4 files changed

+21
-16
lines changed

4 files changed

+21
-16
lines changed

request-processor/src/application/core/pipeline.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ def fetch_add_data_response(
195195
output_path,
196196
specification_dir,
197197
cache_dir,
198-
url,
198+
endpoint,
199199
):
200200
try:
201201
specification = Specification(specification_dir)
@@ -228,7 +228,7 @@ def fetch_add_data_response(
228228
resource=resource_from_path(resource_file_path),
229229
valid_category_values=valid_category_values,
230230
disable_lookups=False,
231-
endpoints=[url],
231+
endpoints=[endpoint],
232232
)
233233

234234
existing_entities.extend(
@@ -254,7 +254,7 @@ def fetch_add_data_response(
254254
pipeline_dir=pipeline_dir,
255255
specification=specification,
256256
cache_dir=cache_dir,
257-
endpoints=[url] if url else None,
257+
endpoints=[endpoint] if endpoint else None,
258258
)
259259
logger.info(
260260
f"Found {len(new_lookups)} unidentified lookups in {resource_file}"
@@ -281,7 +281,7 @@ def fetch_add_data_response(
281281
resource=resource_from_path(resource_file_path),
282282
valid_category_values=valid_category_values,
283283
disable_lookups=False,
284-
endpoints=[url],
284+
endpoints=[endpoint],
285285
)
286286
else:
287287
logger.info(f"No unidentified lookups found in {resource_file}")

request-processor/src/application/core/workflow.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import datetime
2+
import hashlib
23
import os
34
import csv
45
from pathlib import Path
@@ -463,6 +464,8 @@ def add_data_workflow(
463464
] = f"Unable to find lookups for collection '{collection}', dataset '{dataset}'"
464465
return response_data
465466

467+
endpoint_hash = hashlib.sha256(url.encode("utf-8")).hexdigest()
468+
466469
# All processes arount transforming the data and generating pipeline summary
467470
pipeline_summary = fetch_add_data_response(
468471
dataset=dataset,
@@ -472,7 +475,7 @@ def add_data_workflow(
472475
output_path=output_path,
473476
specification_dir=directories.SPECIFICATION_DIR,
474477
cache_dir=directories.CACHE_DIR,
475-
url=url,
478+
endpoint=endpoint_hash,
476479
)
477480

478481
# Create endpoint and source summaries in workflow

request-processor/tests/unit/src/application/core/test_pipeline.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def test_fetch_add_data_response_success(monkeypatch, tmp_path):
1717
input_path = tmp_path / "resource"
1818
specification_dir = tmp_path / "specification"
1919
cache_dir = tmp_path / "cache"
20-
url = "http://example.com/endpoint"
20+
endpoint = "abc123hash"
2121

2222
input_path.mkdir(parents=True)
2323
pipeline_dir.mkdir(parents=True)
@@ -68,7 +68,7 @@ def test_fetch_add_data_response_success(monkeypatch, tmp_path):
6868
output_path=str(input_path / "output.csv"),
6969
specification_dir=str(specification_dir),
7070
cache_dir=str(cache_dir),
71-
url=url,
71+
endpoint=endpoint,
7272
)
7373

7474
assert "new-in-resource" in result
@@ -83,7 +83,7 @@ def test_fetch_add_data_response_no_files(monkeypatch, tmp_path):
8383
input_path = tmp_path / "resource"
8484
specification_dir = tmp_path / "specification"
8585
cache_dir = tmp_path / "cache"
86-
url = "http://example.com/endpoint"
86+
endpoint = "abc123hash"
8787

8888
input_path.mkdir(parents=True)
8989
pipeline_dir.mkdir(parents=True)
@@ -103,7 +103,7 @@ def test_fetch_add_data_response_no_files(monkeypatch, tmp_path):
103103
output_path=str(input_path / "output.csv"),
104104
specification_dir=str(specification_dir),
105105
cache_dir=str(cache_dir),
106-
url=url,
106+
endpoint=endpoint,
107107
)
108108

109109
assert "new-in-resource" in result
@@ -118,7 +118,7 @@ def test_fetch_add_data_response_file_not_found(monkeypatch, tmp_path):
118118
input_path = tmp_path / "nonexistent"
119119
specification_dir = tmp_path / "specification"
120120
cache_dir = tmp_path / "cache"
121-
url = "http://example.com/endpoint"
121+
endpoint = "abc123hash"
122122

123123
pipeline_dir.mkdir(parents=True)
124124

@@ -138,7 +138,7 @@ def test_fetch_add_data_response_file_not_found(monkeypatch, tmp_path):
138138
output_path=str(input_path / "output.csv"),
139139
specification_dir=str(specification_dir),
140140
cache_dir=str(cache_dir),
141-
url=url,
141+
endpoint=endpoint,
142142
)
143143

144144

@@ -150,7 +150,7 @@ def test_fetch_add_data_response_handles_processing_error(monkeypatch, tmp_path)
150150
input_path = tmp_path / "resource"
151151
specification_dir = tmp_path / "specification"
152152
cache_dir = tmp_path / "cache"
153-
url = "http://example.com/endpoint"
153+
endpoint = "abc123hash"
154154

155155
input_path.mkdir(parents=True)
156156
pipeline_dir.mkdir(parents=True)
@@ -178,7 +178,7 @@ def raise_exception(*args, **kwargs):
178178
output_path=str(input_path / "output.csv"),
179179
specification_dir=str(specification_dir),
180180
cache_dir=str(cache_dir),
181-
url=url,
181+
endpoint=endpoint,
182182
)
183183

184184
assert "new-in-resource" in result

request-processor/tests/unit/src/application/core/test_workflow.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
fetch_add_data_collection_csvs,
1010
)
1111
import csv
12+
import hashlib
1213
import os
1314
from pathlib import Path
1415
import urllib
@@ -409,7 +410,7 @@ def fake_fetch_add_data_response(
409410
output_path,
410411
specification_dir,
411412
cache_dir,
412-
url,
413+
endpoint,
413414
):
414415
called["fetch_add_data_response"] = {
415416
"dataset": dataset,
@@ -419,7 +420,7 @@ def fake_fetch_add_data_response(
419420
"output_path": output_path,
420421
"specification_dir": specification_dir,
421422
"cache_dir": cache_dir,
422-
"url": url,
423+
"endpoint": endpoint,
423424
}
424425
return {"result": "ok"}
425426

@@ -470,7 +471,8 @@ def fake_fetch_add_data_response(
470471
== directories.SPECIFICATION_DIR
471472
)
472473
assert called["fetch_add_data_response"]["cache_dir"] == directories.CACHE_DIR
473-
assert called["fetch_add_data_response"]["url"] == url
474+
expected_endpoint_hash = hashlib.sha256(url.encode("utf-8")).hexdigest()
475+
assert called["fetch_add_data_response"]["endpoint"] == expected_endpoint_hash
474476

475477

476478
def test_fetch_add_data_pipeline_csvs_from_url(monkeypatch, tmp_path):

0 commit comments

Comments
 (0)