@@ -247,40 +247,33 @@ def _read_parquet_chunked(
247247 if pq_file is None :
248248 continue
249249
250- schema = pq_file .schema .to_arrow_schema ()
250+ metadata = pq_file .metadata
251+ schema = metadata .schema .to_arrow_schema ()
251252 if columns :
252253 schema = pa .schema ([schema .field (column ) for column in columns ], schema .metadata )
253254
254255 use_threads_flag : bool = use_threads if isinstance (use_threads , bool ) else bool (use_threads > 1 )
255- iterate_at_least_once = False
256- for chunk in pq_file .iter_batches (
257- batch_size = batch_size , columns = columns , use_threads = use_threads_flag , use_pandas_metadata = False
258- ):
259- iterate_at_least_once = True
260- table = _add_table_partitions (
261- table = pa .Table .from_batches ([chunk ], schema = schema ),
262- path = path ,
263- path_root = path_root ,
264- )
265- df = _table_to_df (table = table , kwargs = arrow_kwargs )
266- if chunked is True :
267- yield df
268- else :
269- if next_slice is not None :
270- df = pd .concat (objs = [next_slice , df ], sort = False , copy = False )
271- while len (df .index ) >= chunked :
272- yield df .iloc [:chunked , :].copy ()
273- df = df .iloc [chunked :, :]
274- if df .empty :
275- next_slice = None
256+ table_kwargs = {"path" : path , "path_root" : path_root }
257+ if metadata .num_rows > 0 :
258+ for chunk in pq_file .iter_batches (
259+ batch_size = batch_size , columns = columns , use_threads = use_threads_flag , use_pandas_metadata = False
260+ ):
261+ table = _add_table_partitions (table = pa .Table .from_batches ([chunk ], schema = schema ), ** table_kwargs )
262+ df = _table_to_df (table = table , kwargs = arrow_kwargs )
263+ if chunked is True :
264+ yield df
276265 else :
277- next_slice = df
278- if not iterate_at_least_once :
279- table = _add_table_partitions (
280- table = pa .Table .from_batches ([], schema = schema ),
281- path = path ,
282- path_root = path_root ,
283- )
266+ if next_slice is not None :
267+ df = pd .concat (objs = [next_slice , df ], sort = False , copy = False )
268+ while len (df .index ) >= chunked :
269+ yield df .iloc [:chunked , :].copy ()
270+ df = df .iloc [chunked :, :]
271+ if df .empty :
272+ next_slice = None
273+ else :
274+ next_slice = df
275+ else :
276+ table = _add_table_partitions (table = pa .Table .from_batches ([], schema = schema ), ** table_kwargs )
284277 df = _table_to_df (table = table , kwargs = arrow_kwargs )
285278 yield df
286279
0 commit comments