@@ -273,9 +273,10 @@ def _arrowtable2df(
273273def _read_parquet_chunked (
274274 paths : List [str ],
275275 chunked : Union [bool , int ],
276+ validate_schema : bool ,
277+ ignore_index : Optional [bool ],
276278 columns : Optional [List [str ]],
277279 categories : Optional [List [str ]],
278- validate_schema : bool ,
279280 safe : bool ,
280281 boto3_session : boto3 .Session ,
281282 dataset : bool ,
@@ -331,7 +332,7 @@ def _read_parquet_chunked(
331332 yield df
332333 elif isinstance (chunked , int ) and chunked > 0 :
333334 if next_slice is not None :
334- df = _union (dfs = [next_slice , df ], ignore_index = None )
335+ df = _union (dfs = [next_slice , df ], ignore_index = ignore_index )
335336 while len (df .index ) >= chunked :
336337 yield df .iloc [:chunked ]
337338 df = df .iloc [chunked :]
@@ -430,6 +431,7 @@ def read_parquet(
430431 path_suffix : Union [str , List [str ], None ] = None ,
431432 path_ignore_suffix : Union [str , List [str ], None ] = None ,
432433 ignore_empty : bool = True ,
434+ ignore_index : Optional [bool ] = None ,
433435 partition_filter : Optional [Callable [[Dict [str , str ]], bool ]] = None ,
434436 columns : Optional [List [str ]] = None ,
435437 validate_schema : bool = False ,
@@ -489,6 +491,8 @@ def read_parquet(
489491 If None, will try to read all files. (default)
490492 ignore_empty: bool
491493 Ignore files with 0 bytes.
494+ ignore_index: Optional[bool]
495+ Ignore index when combining multiple parquet files to one DataFrame.
492496 partition_filter: Optional[Callable[[Dict[str, str]], bool]]
493497 Callback Function filters to apply on PARTITION columns (PUSH-DOWN filter).
494498 This function MUST receive a single argument (Dict[str, str]) where keys are partitions
@@ -596,7 +600,9 @@ def read_parquet(
596600 }
597601 _logger .debug ("args:\n %s" , pprint .pformat (args ))
598602 if chunked is not False :
599- return _read_parquet_chunked (paths = paths , chunked = chunked , validate_schema = validate_schema , ** args )
603+ return _read_parquet_chunked (
604+ paths = paths , chunked = chunked , validate_schema = validate_schema , ignore_index = ignore_index , ** args
605+ )
600606 if len (paths ) == 1 :
601607 return _read_parquet (path = paths [0 ], ** args )
602608 if validate_schema is True :
@@ -607,7 +613,7 @@ def read_parquet(
607613 boto3_session = boto3_session ,
608614 s3_additional_kwargs = s3_additional_kwargs ,
609615 )
610- return _union (dfs = [_read_parquet (path = p , ** args ) for p in paths ], ignore_index = None )
616+ return _union (dfs = [_read_parquet (path = p , ** args ) for p in paths ], ignore_index = ignore_index )
611617
612618
613619@apply_configs
@@ -657,10 +663,10 @@ def read_parquet_table(
657663 AWS Glue Catalog table name.
658664 database : str
659665 AWS Glue Catalog database name.
660- path_suffix : Union[str, List[str], None]
666+ filename_suffix : Union[str, List[str], None]
661667 Suffix or List of suffixes to be read (e.g. [".gz.parquet", ".snappy.parquet"]).
662668 If None, will try to read all files. (default)
663- path_ignore_suffix : Union[str, List[str], None]
669+ filename_ignore_suffix : Union[str, List[str], None]
664670 Suffix or List of suffixes for S3 keys to be ignored.(e.g. [".csv", "_SUCCESS"]).
665671 If None, will try to read all files. (default)
666672 catalog_id : str, optional
0 commit comments