@@ -238,6 +238,7 @@ def _arrowtable2df(
238238 table : pa .Table ,
239239 categories : Optional [List [str ]],
240240 safe : bool ,
241+ map_types : bool ,
241242 use_threads : bool ,
242243 dataset : bool ,
243244 path : str ,
@@ -257,7 +258,7 @@ def _arrowtable2df(
257258 strings_to_categorical = False ,
258259 safe = safe ,
259260 categories = categories ,
260- types_mapper = _data_types .pyarrow2pandas_extension ,
261+ types_mapper = _data_types .pyarrow2pandas_extension if map_types else None ,
261262 ),
262263 dataset = dataset ,
263264 path = path ,
@@ -279,6 +280,7 @@ def _read_parquet_chunked(
279280 columns : Optional [List [str ]],
280281 categories : Optional [List [str ]],
281282 safe : bool ,
283+ map_types : bool ,
282284 boto3_session : boto3 .Session ,
283285 dataset : bool ,
284286 path_root : Optional [str ],
@@ -325,6 +327,7 @@ def _read_parquet_chunked(
325327 ),
326328 categories = categories ,
327329 safe = safe ,
330+ map_types = map_types ,
328331 use_threads = use_threads ,
329332 dataset = dataset ,
330333 path = path ,
@@ -404,6 +407,7 @@ def _read_parquet(
404407 columns : Optional [List [str ]],
405408 categories : Optional [List [str ]],
406409 safe : bool ,
410+ map_types : bool ,
407411 boto3_session : boto3 .Session ,
408412 dataset : bool ,
409413 path_root : Optional [str ],
@@ -421,6 +425,7 @@ def _read_parquet(
421425 ),
422426 categories = categories ,
423427 safe = safe ,
428+ map_types = map_types ,
424429 use_threads = use_threads ,
425430 dataset = dataset ,
426431 path = path ,
@@ -441,6 +446,7 @@ def read_parquet(
441446 dataset : bool = False ,
442447 categories : Optional [List [str ]] = None ,
443448 safe : bool = True ,
449+ map_types : bool = True ,
444450 use_threads : bool = True ,
445451 last_modified_begin : Optional [datetime .datetime ] = None ,
446452 last_modified_end : Optional [datetime .datetime ] = None ,
@@ -524,6 +530,10 @@ def read_parquet(
524530 data in a pandas DataFrame or Series (e.g. timestamps are always
525531 stored as nanoseconds in pandas). This option controls whether it
526532 is a safe cast or not.
533+ map_types : bool, default True
534+ True to convert pyarrow DataTypes to pandas ExtensionDtypes. It is
535+ used to override the default pandas type for conversion of built-in
536+ pyarrow types or in absence of pandas_metadata in the Table schema.
527537 use_threads : bool
528538 True to enable concurrent requests, False to disable multiple threads.
529539 If enabled os.cpu_count() will be used as the max number of threads.
@@ -597,6 +607,7 @@ def read_parquet(
597607 "columns" : columns ,
598608 "categories" : categories ,
599609 "safe" : safe ,
610+ "map_types" : map_types ,
600611 "boto3_session" : session ,
601612 "dataset" : dataset ,
602613 "path_root" : path_root ,
@@ -633,6 +644,7 @@ def read_parquet_table(
633644 validate_schema : bool = True ,
634645 categories : Optional [List [str ]] = None ,
635646 safe : bool = True ,
647+ map_types : bool = True ,
636648 chunked : Union [bool , int ] = False ,
637649 use_threads : bool = True ,
638650 boto3_session : Optional [boto3 .Session ] = None ,
@@ -699,6 +711,10 @@ def read_parquet_table(
699711 data in a pandas DataFrame or Series (e.g. timestamps are always
700712 stored as nanoseconds in pandas). This option controls whether it
701713 is a safe cast or not.
714+ map_types : bool, default True
715+ True to convert pyarrow DataTypes to pandas ExtensionDtypes. It is
716+ used to override the default pandas type for conversion of built-in
717+ pyarrow types or in absence of pandas_metadata in the Table schema.
702718 chunked : bool
703719 If True will break the data in smaller DataFrames (Non deterministic number of lines).
704720 Otherwise return a single DataFrame with the whole data.
@@ -767,6 +783,7 @@ def read_parquet_table(
767783 validate_schema = validate_schema ,
768784 categories = categories ,
769785 safe = safe ,
786+ map_types = map_types ,
770787 chunked = chunked ,
771788 dataset = True ,
772789 use_threads = use_threads ,
0 commit comments