44from digital_land .specification import Specification
55from digital_land .organisation import Organisation
66from digital_land .api import API
7- from collections import defaultdict
87
98from digital_land .pipeline import Pipeline , Lookups
109from digital_land .commands import get_resource_unidentified_lookups
11- from digital_land .api import API
1210from application .core .utils import append_endpoint , append_source
1311from datetime import datetime
1412from pathlib import Path
@@ -83,17 +81,27 @@ def fetch_response_data(
8381 output_path = os .path .join (
8482 transformed_dir , dataset , request_id , f"{ resource } .csv"
8583 ),
86- organisation = Organisation (os .path .join (cache_dir , "organisation.csv" ), Path (pipeline .path )),
84+ organisation = Organisation (
85+ os .path .join (cache_dir , "organisation.csv" ), Path (pipeline .path )
86+ ),
8787 resource = resource ,
88- valid_category_values = api .get_valid_category_values (dataset , pipeline ),
89- converted_path = os .path .join (converted_dir , request_id , f"{ resource } .csv" ),
88+ valid_category_values = api .get_valid_category_values (dataset , pipeline ),
89+ converted_path = os .path .join (
90+ converted_dir , request_id , f"{ resource } .csv"
91+ ),
9092 disable_lookups = True ,
9193 )
9294 # Issue log needs severity column added, so manually added and saved here
93- issue_log .add_severity_column (os .path .join (specification_dir , "issue-type.csv" ))
94- issue_log .save (os .path .join (issue_dir , dataset , request_id , resource + ".csv" ))
95+ issue_log .add_severity_column (
96+ os .path .join (specification_dir , "issue-type.csv" )
97+ )
98+ issue_log .save (
99+ os .path .join (issue_dir , dataset , request_id , resource + ".csv" )
100+ )
95101 pipeline .save_logs (
96- column_field_path = os .path .join (column_field_dir , dataset , request_id , resource + ".csv" ),
102+ column_field_path = os .path .join (
103+ column_field_dir , dataset , request_id , resource + ".csv"
104+ ),
97105 dataset_resource_path = os .path .join (
98106 dataset_resource_dir , dataset , request_id , resource + ".csv"
99107 ),
@@ -112,7 +120,13 @@ def default_output_path(command, input_path):
112120
113121
114122def assign_entries (
115- resource_path , dataset , organisation , pipeline_dir , specification , cache_dir , endpoints = None
123+ resource_path ,
124+ dataset ,
125+ organisation ,
126+ pipeline_dir ,
127+ specification ,
128+ cache_dir ,
129+ endpoints = None ,
116130):
117131 pipeline = Pipeline (pipeline_dir , dataset )
118132 resource_lookups = get_resource_unidentified_lookups (
@@ -138,7 +152,7 @@ def assign_entries(
138152 )
139153
140154 lookups .load_csv ()
141-
155+
142156 # Track which entries are new by checking before adding
143157 new_entries_added = []
144158 for new_lookup in unassigned_entries :
@@ -157,19 +171,20 @@ def assign_entries(
157171 )
158172
159173 newly_assigned = lookups .save_csv ()
160-
174+
161175 # Filter to return only the entries we just added
162176 if newly_assigned :
163177 new_lookups = [
164- lookup for lookup in newly_assigned
178+ lookup
179+ for lookup in newly_assigned
165180 if any (
166- lookup .get ("reference" ) == entry .get ("reference" )
181+ lookup .get ("reference" ) == entry .get ("reference" )
167182 and lookup .get ("organisation" ) == entry .get ("organisation" )
168183 for entry in new_entries_added
169184 )
170185 ]
171186 return new_lookups
172-
187+
173188 return []
174189
175190
@@ -185,15 +200,17 @@ def fetch_add_data_response(
185200 url ,
186201 documentation_url ,
187202):
188- try :
203+ try :
189204 specification = Specification (specification_dir )
190205 pipeline = Pipeline (pipeline_dir , dataset , specification = specification )
191- organisation = Organisation (os .path .join (cache_dir , "organisation.csv" ), Path (pipeline .path ))
206+ organisation = Organisation (
207+ os .path .join (cache_dir , "organisation.csv" ), Path (pipeline .path )
208+ )
192209 api = API (specification = specification )
193210
194211 # TODO: Need to load config class for correct transform?
195212 # TODO: Handling of column mapping?
196- valid_category_values = api .get_valid_category_values (dataset , pipeline )
213+ valid_category_values = api .get_valid_category_values (dataset , pipeline )
197214
198215 files_in_resource = os .listdir (input_dir )
199216
@@ -213,18 +230,21 @@ def fetch_add_data_response(
213230 organisation = organisation ,
214231 organisations = [organisation_provider ],
215232 resource = resource_from_path (resource_file_path ),
216- valid_category_values = valid_category_values ,
233+ valid_category_values = valid_category_values ,
217234 disable_lookups = False ,
218235 )
219236
220237 existing_entities .extend (
221- _map_existing_entities_from_transformed_csv (output_path , pipeline_dir )
238+ _map_transformed_entities (output_path , pipeline_dir )
222239 )
223240
224241 # Check if there are unknown entity issues in the log
225- unknown_issue_types = {'unknown entity' , 'unknown entity - missing reference' }
242+ unknown_issue_types = {
243+ "unknown entity" ,
244+ "unknown entity - missing reference" ,
245+ }
226246 has_unknown = any (
227- row .get (' issue-type' ) in unknown_issue_types
247+ row .get (" issue-type" ) in unknown_issue_types
228248 for row in issues_log .rows
229249 if isinstance (row , dict )
230250 )
@@ -246,8 +266,7 @@ def fetch_add_data_response(
246266 else :
247267 logger .info (f"No unidentified lookups found in { resource_file } " )
248268
249-
250- # TODO: Re-run to see if no unidentified lookups remain and if so create an error summary for add data command
269+ # TODO: Re-run to see if no unidentified remain, if so new add data error summary
251270
252271 except Exception as err :
253272 logger .error (f"Error processing { resource_file } : { err } " )
@@ -340,11 +359,11 @@ def _get_existing_entities_breakdown(existing_entities):
340359 return breakdown
341360
342361
343- def _map_existing_entities_from_transformed_csv (transformed_csv_path , pipeline_dir ):
362+ def _map_transformed_entities (transformed_csv_path , pipeline_dir ): # noqa: C901
344363 """Extract unique entities from transformed CSV and lookup their details in lookup.csv."""
345364
346365 mapped_entities = []
347-
366+
348367 if not os .path .exists (transformed_csv_path ):
349368 logger .warning (f"Transformed CSV not found: { transformed_csv_path } " )
350369 return mapped_entities
0 commit comments