|
1 | 1 | import re |
2 | 2 | import logging |
3 | | -import time |
4 | 3 | import pandas as pd |
5 | | -import urllib |
6 | 4 |
|
7 | 5 | from .phase import Phase |
8 | 6 |
|
@@ -36,13 +34,15 @@ def __init__( |
36 | 34 | issue_log=None, |
37 | 35 | operational_issue_log=None, |
38 | 36 | entity_range=[], |
| 37 | + provision_summary_dir=None, |
39 | 38 | ): |
40 | 39 | self.lookups = lookups |
41 | 40 | self.redirect_lookups = redirect_lookups |
42 | 41 | self.issues = issue_log |
43 | 42 | self.operational_issues = operational_issue_log |
44 | 43 | self.reverse_lookups = self.build_reverse_lookups() |
45 | 44 | self.entity_range = entity_range |
| 45 | + self.provision_summary_dir = provision_summary_dir |
46 | 46 |
|
47 | 47 | def build_reverse_lookups(self): |
48 | 48 | reverse_lookups = {} |
@@ -167,67 +167,43 @@ def process(self, stream): |
167 | 167 | row[self.entity_field] |
168 | 168 | ) |
169 | 169 |
|
| 170 | + linked_datasets = ["article-4-direction", "tree-preservation-order"] |
170 | 171 | if row[self.entity_field]: |
171 | | - if ( |
172 | | - row.get("article-4-direction", "") |
173 | | - or row.get("tree-preservation-order", "").strip() |
174 | | - ): |
175 | | - linked_dataset = ( |
176 | | - "article-4-direction" |
177 | | - if "article-4-direction" in row |
178 | | - else "tree-preservation-order" |
179 | | - ) |
180 | | - |
181 | | - # check applied for organisations that have provided a document dataset |
182 | | - if not hasattr( |
183 | | - self, "lpa_list" |
184 | | - ): # check if data fetched already |
185 | | - params = urllib.parse.urlencode( |
186 | | - { |
187 | | - "sql": f"""select organisation from provision_summary where active_endpoint_count > 0 and dataset == '{linked_dataset}'""", |
188 | | - "_size": "max", |
189 | | - } |
| 172 | + for linked_dataset in linked_datasets: |
| 173 | + if ( |
| 174 | + row.get(linked_dataset, "") |
| 175 | + or row.get(linked_dataset, "").strip() |
| 176 | + ): |
| 177 | + get_organisations = pd.read_csv( |
| 178 | + self.provision_summary_dir + linked_dataset + ".csv" |
190 | 179 | ) |
191 | | - base_url = f"https://datasette.planning.data.gov.uk/performance.csv?{params}" |
192 | | - |
193 | | - max_retries = 60 # Retry for an hour |
194 | | - for attempt in range(max_retries): |
195 | | - try: |
196 | | - get_lpa = pd.read_csv(base_url) |
197 | | - self.lpa_list = get_lpa["organisation"].to_list() |
198 | | - break |
199 | | - except urllib.error.HTTPError: |
200 | | - if attempt < max_retries - 1: |
201 | | - time.sleep(60) |
202 | | - else: |
203 | | - raise Exception( |
204 | | - "Failed to fetch datasette after multiple attempts" |
205 | | - ) |
206 | | - |
207 | | - if row.get("organisation", "") in self.lpa_list: |
208 | | - reference = row.get(linked_dataset, "") |
209 | 180 |
|
210 | | - find_entity = self.lookup( |
211 | | - prefix=linked_dataset, |
212 | | - organisation=row.get("organisation", ""), |
213 | | - reference=row.get(linked_dataset, ""), |
214 | | - ) |
215 | | - # raise issue if the found entity is retired in old-entity.csv |
216 | | - if not find_entity or ( |
217 | | - str(find_entity) in self.redirect_lookups |
218 | | - and int( |
219 | | - self.redirect_lookups[str(find_entity)].get( |
220 | | - "status", 0 |
221 | | - ) |
222 | | - ) |
223 | | - == 410 |
| 181 | + if ( |
| 182 | + row.get("organisation", "") |
| 183 | + in get_organisations["organisation"].values |
224 | 184 | ): |
225 | | - self.issues.log_issue( |
226 | | - linked_dataset, |
227 | | - "no associated documents found for this area", |
228 | | - reference, |
229 | | - line_number=line_number, |
| 185 | + reference = row.get(linked_dataset, "") |
| 186 | + find_entity = self.lookup( |
| 187 | + prefix=linked_dataset, |
| 188 | + organisation=row.get("organisation", ""), |
| 189 | + reference=reference, |
230 | 190 | ) |
| 191 | + # raise issue if the found entity is retired in old-entity.csv |
| 192 | + if not find_entity or ( |
| 193 | + str(find_entity) in self.redirect_lookups |
| 194 | + and int( |
| 195 | + self.redirect_lookups[str(find_entity)].get( |
| 196 | + "status", 0 |
| 197 | + ) |
| 198 | + ) |
| 199 | + == 410 |
| 200 | + ): |
| 201 | + self.issues.log_issue( |
| 202 | + linked_dataset, |
| 203 | + "no associated documents found for this area", |
| 204 | + reference, |
| 205 | + line_number=line_number, |
| 206 | + ) |
231 | 207 | yield block |
232 | 208 |
|
233 | 209 |
|
|
0 commit comments