33from typing import TYPE_CHECKING
44import warnings
55
6- import numpy as np
7-
8- from pandas ._config import using_string_dtype
9-
106from pandas ._libs import lib
117from pandas .compat ._optional import import_optional_dependency
128from pandas .errors import (
1612from pandas .util ._exceptions import find_stack_level
1713
1814from pandas .core .dtypes .common import (
19- is_string_dtype ,
2015 pandas_dtype ,
2116)
22- from pandas .core .dtypes .dtypes import (
23- BaseMaskedDtype ,
24- )
2517from pandas .core .dtypes .inference import is_integer
2618
27- from pandas .core .arrays .string_ import StringDtype
28-
2919from pandas .io ._util import arrow_table_to_pandas
3020from pandas .io .parsers .base_parser import ParserBase
3121
3222if TYPE_CHECKING :
23+ import pyarrow as pa
24+
3325 from pandas ._typing import ReadBuffer
3426
3527 from pandas import DataFrame
@@ -174,8 +166,8 @@ def _get_convert_options(self):
174166
175167 return convert_options
176168
177- def _adjust_column_names (self , frame : DataFrame ) -> tuple [ DataFrame , bool ] :
178- num_cols = len (frame .columns )
169+ def _adjust_column_names (self , table : pa . Table ) -> bool :
170+ num_cols = len (table .columns )
179171 multi_index_named = True
180172 if self .header is None :
181173 if self .names is None :
@@ -188,8 +180,7 @@ def _adjust_column_names(self, frame: DataFrame) -> tuple[DataFrame, bool]:
188180 columns_prefix = [str (x ) for x in range (num_cols - len (self .names ))]
189181 self .names = columns_prefix + self .names
190182 multi_index_named = False
191- frame .columns = self .names
192- return frame , multi_index_named
183+ return multi_index_named
193184
194185 def _finalize_index (self , frame : DataFrame , multi_index_named : bool ) -> DataFrame :
195186 if self .index_col is not None :
@@ -312,13 +303,7 @@ def read(self) -> DataFrame:
312303
313304 table = table .cast (new_schema )
314305
315- workaround = False
316- pass_backend = dtype_backend
317- if self .dtype is not None and dtype_backend != "pyarrow" :
318- # We pass dtype_backend="pyarrow" and subsequently cast
319- # to avoid lossy conversion e.g. GH#56136
320- workaround = True
321- pass_backend = "numpy_nullable"
306+ multi_index_named = self ._adjust_column_names (table )
322307
323308 with warnings .catch_warnings ():
324309 warnings .filterwarnings (
@@ -327,49 +312,14 @@ def read(self) -> DataFrame:
327312 DeprecationWarning ,
328313 )
329314 frame = arrow_table_to_pandas (
330- table , dtype_backend = pass_backend , null_to_int64 = True
315+ table ,
316+ dtype_backend = dtype_backend ,
317+ null_to_int64 = True ,
318+ dtype = self .dtype ,
319+ names = self .names ,
331320 )
332321
333- frame , multi_index_named = self ._adjust_column_names (frame )
334-
335- if workaround and dtype_backend != "numpy_nullable" :
336- old_dtype = self .dtype
337- if not isinstance (old_dtype , dict ):
338- # e.g. test_categorical_dtype_utf16
339- old_dtype = dict .fromkeys (frame .columns , old_dtype )
340-
341- # _finalize_pandas_output will call astype, but we need to make
342- # sure all keys are populated appropriately.
343- new_dtype = {}
344- for key in frame .columns :
345- ser = frame [key ]
346- if isinstance (ser .dtype , BaseMaskedDtype ):
347- new_dtype [key ] = ser .dtype .numpy_dtype
348- if (
349- key in old_dtype
350- and not using_string_dtype ()
351- and is_string_dtype (old_dtype [key ])
352- and not isinstance (old_dtype [key ], StringDtype )
353- and ser .array ._hasna
354- ):
355- # Cast to make sure we get "NaN" string instead of "NA"
356- frame [key ] = ser .astype (old_dtype [key ])
357- frame .loc [ser .isna (), key ] = np .nan
358- old_dtype [key ] = object # Avoid re-casting
359- elif isinstance (ser .dtype , StringDtype ):
360- # We cast here in case the user passed "category" in
361- # order to get the correct dtype.categories.dtype
362- # e.g. test_categorical_dtype_utf16
363- if not using_string_dtype ():
364- sdt = np .dtype (object )
365- frame [key ] = ser .astype (sdt )
366- frame .loc [ser .isna (), key ] = np .nan
367- else :
368- sdt = StringDtype (na_value = np .nan ) # type: ignore[assignment]
369- frame [key ] = frame [key ].astype (sdt )
370- new_dtype [key ] = sdt
371-
372- new_dtype .update (old_dtype )
373- self .dtype = new_dtype
322+ if self .header is None :
323+ frame .columns = self .names
374324
375325 return self ._finalize_pandas_output (frame , multi_index_named )
0 commit comments