@@ -247,33 +247,36 @@ def _read_parquet_chunked(
247247 if pq_file is None :
248248 continue
249249
250- use_threads_flag : bool = use_threads if isinstance (use_threads , bool ) else bool (use_threads > 1 )
251- chunks = pq_file .iter_batches (
252- batch_size = batch_size , columns = columns , use_threads = use_threads_flag , use_pandas_metadata = False
253- )
254-
255- schema = pq_file .schema .to_arrow_schema ()
250+ metadata = pq_file .metadata
251+ schema = metadata .schema .to_arrow_schema ()
256252 if columns :
257253 schema = pa .schema ([schema .field (column ) for column in columns ], schema .metadata )
258254
259- table = _add_table_partitions (
260- table = pa .Table .from_batches (chunks , schema = schema ),
261- path = path ,
262- path_root = path_root ,
263- )
264- df = _table_to_df (table = table , kwargs = arrow_kwargs )
265- if chunked is True :
266- yield df
255+ use_threads_flag : bool = use_threads if isinstance (use_threads , bool ) else bool (use_threads > 1 )
256+ table_kwargs = {"path" : path , "path_root" : path_root }
257+ if metadata .num_rows > 0 :
258+ for chunk in pq_file .iter_batches (
259+ batch_size = batch_size , columns = columns , use_threads = use_threads_flag , use_pandas_metadata = False
260+ ):
261+ table = _add_table_partitions (table = pa .Table .from_batches ([chunk ], schema = schema ), ** table_kwargs )
262+ df = _table_to_df (table = table , kwargs = arrow_kwargs )
263+ if chunked is True :
264+ yield df
265+ else :
266+ if next_slice is not None :
267+ df = pd .concat (objs = [next_slice , df ], sort = False , copy = False )
268+ while len (df .index ) >= chunked :
269+ yield df .iloc [:chunked , :].copy ()
270+ df = df .iloc [chunked :, :]
271+ if df .empty :
272+ next_slice = None
273+ else :
274+ next_slice = df
267275 else :
268- if next_slice is not None :
269- df = pd .concat (objs = [next_slice , df ], sort = False , copy = False )
270- while len (df .index ) >= chunked :
271- yield df .iloc [:chunked , :].copy ()
272- df = df .iloc [chunked :, :]
273- if df .empty :
274- next_slice = None
275- else :
276- next_slice = df
276+ table = _add_table_partitions (table = pa .Table .from_batches ([], schema = schema ), ** table_kwargs )
277+ df = _table_to_df (table = table , kwargs = arrow_kwargs )
278+ yield df
279+
277280 if next_slice is not None :
278281 yield next_slice
279282
0 commit comments