Skip to content

Commit bd79196

Browse files
committed
fix for non-standard odata v4 endpoints
1 parent 25d5924 commit bd79196

1 file changed

Lines changed: 5 additions & 10 deletions

File tree

airflow/knesset_data_pipelines/run_pipeline.py

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131

3232
PIPELINES_DEV_DOCKER_IMAGE = 'orihoch/knesset-data-pipelines@sha256:329c7619fbdb4603d485df327c17cec556bc1ece1db2f11bc64854e94a5ce88a'
3333

34-
3534
warnings.filterwarnings("ignore", category=XMLParsedAsHTMLWarning)
3635

3736

@@ -248,11 +247,9 @@ def add_dataservice_collection_resource_odata_v4(params, proxies, stats, limit_r
248247
odata_count = None
249248
skip = None
250249
while True:
251-
if odata_count is None:
252-
assert skip is None
250+
if skip is None:
253251
url = f'{url_base}?$count=true'
254252
else:
255-
assert skip is not None
256253
url = f'{url_base}?$skip={skip}'
257254
print(url)
258255
status_code, content = get_response_content(url, params, timeout, proxies)
@@ -262,21 +259,19 @@ def add_dataservice_collection_resource_odata_v4(params, proxies, stats, limit_r
262259
except Exception as e:
263260
raise Exception(f'failed to parse json response for url {url}\n{content}') from e
264261
num_entries = 0
265-
for entry in res['value']:
262+
for entry in (res if isinstance(res, list) else res['value']):
266263
stats['rows'] += 1
267264
yield get_row_from_entry(params, entry, v4=True)
268265
num_entries += 1
269-
if odata_count is None:
266+
skip = num_entries if skip is None else skip + num_entries
267+
if odata_count is None and isinstance(res, dict) and '@odata.count' in res:
270268
odata_count = res['@odata.count']
271269
assert odata_count > 0, f'invalid count: {odata_count} for url {url}\n{content}'
272-
skip = 100
273-
else:
274-
skip += 100
275270
if num_entries == 0:
276271
break
277272
if limit_rows and stats['rows'] >= limit_rows:
278273
break
279-
assert limit_rows or stats['rows'] == odata_count, f'invalid rows count: {stats["rows"]} != {odata_count} for url {url}'
274+
assert limit_rows or odata_count is None or stats['rows'] == odata_count, f'invalid rows count: {stats["rows"]} != {odata_count} for url {url}'
280275

281276

282277
def add_dataservice_collection_resource(params, proxies=None, stats=None, limit_rows=None, stop_on_throttled_error=False, start_url=None, load_from=None):

0 commit comments

Comments
 (0)