3131
3232PIPELINES_DEV_DOCKER_IMAGE = 'orihoch/knesset-data-pipelines@sha256:329c7619fbdb4603d485df327c17cec556bc1ece1db2f11bc64854e94a5ce88a'
3333
34-
3534warnings .filterwarnings ("ignore" , category = XMLParsedAsHTMLWarning )
3635
3736
@@ -248,11 +247,9 @@ def add_dataservice_collection_resource_odata_v4(params, proxies, stats, limit_r
248247 odata_count = None
249248 skip = None
250249 while True :
251- if odata_count is None :
252- assert skip is None
250+ if skip is None :
253251 url = f'{ url_base } ?$count=true'
254252 else :
255- assert skip is not None
256253 url = f'{ url_base } ?$skip={ skip } '
257254 print (url )
258255 status_code , content = get_response_content (url , params , timeout , proxies )
@@ -262,21 +259,19 @@ def add_dataservice_collection_resource_odata_v4(params, proxies, stats, limit_r
262259 except Exception as e :
263260 raise Exception (f'failed to parse json response for url { url } \n { content } ' ) from e
264261 num_entries = 0
265- for entry in res ['value' ]:
262+ for entry in ( res if isinstance ( res , list ) else res ['value' ]) :
266263 stats ['rows' ] += 1
267264 yield get_row_from_entry (params , entry , v4 = True )
268265 num_entries += 1
269- if odata_count is None :
266+ skip = num_entries if skip is None else skip + num_entries
267+ if odata_count is None and isinstance (res , dict ) and '@odata.count' in res :
270268 odata_count = res ['@odata.count' ]
271269 assert odata_count > 0 , f'invalid count: { odata_count } for url { url } \n { content } '
272- skip = 100
273- else :
274- skip += 100
275270 if num_entries == 0 :
276271 break
277272 if limit_rows and stats ['rows' ] >= limit_rows :
278273 break
279- assert limit_rows or stats ['rows' ] == odata_count , f'invalid rows count: { stats ["rows" ]} != { odata_count } for url { url } '
274+ assert limit_rows or odata_count is None or stats ['rows' ] == odata_count , f'invalid rows count: { stats ["rows" ]} != { odata_count } for url { url } '
280275
281276
282277def add_dataservice_collection_resource (params , proxies = None , stats = None , limit_rows = None , stop_on_throttled_error = False , start_url = None , load_from = None ):
0 commit comments