@@ -265,6 +265,9 @@ class ParquetFile:
265265 If True, read Parquet logical types as Arrow extension types where possible,
266266 (e.g., read JSON as the canonical `arrow.json` extension type or UUID as
267267 the canonical `arrow.uuid` extension type).
268+ max_page_header_size : int, default None
269+ If not None, override the maximum size of a page header.
270+ Deafults to 16MB, which should be sufficient for most Parquet files.
268271
269272 Examples
270273 --------
@@ -314,7 +317,8 @@ def __init__(self, source, *, metadata=None, common_metadata=None,
314317 coerce_int96_timestamp_unit = None ,
315318 decryption_properties = None , thrift_string_size_limit = None ,
316319 thrift_container_size_limit = None , filesystem = None ,
317- page_checksum_verification = False , arrow_extensions_enabled = True ):
320+ page_checksum_verification = False , arrow_extensions_enabled = True ,
321+ max_page_header_size = None ):
318322
319323 self ._close_source = getattr (source , 'closed' , True )
320324
@@ -336,6 +340,7 @@ def __init__(self, source, *, metadata=None, common_metadata=None,
336340 thrift_container_size_limit = thrift_container_size_limit ,
337341 page_checksum_verification = page_checksum_verification ,
338342 arrow_extensions_enabled = arrow_extensions_enabled ,
343+ max_page_header_size = max_page_header_size ,
339344 )
340345 self .common_metadata = common_metadata
341346 self ._nested_paths_by_prefix = self ._build_nested_paths ()
0 commit comments