3
3
from typing import TYPE_CHECKING
4
4
import warnings
5
5
6
- import numpy as np
7
-
8
- from pandas ._config import using_string_dtype
9
-
10
6
from pandas ._libs import lib
11
7
from pandas .compat ._optional import import_optional_dependency
12
8
from pandas .errors import (
16
12
from pandas .util ._exceptions import find_stack_level
17
13
18
14
from pandas .core .dtypes .common import (
19
- is_string_dtype ,
20
15
pandas_dtype ,
21
16
)
22
- from pandas .core .dtypes .dtypes import (
23
- BaseMaskedDtype ,
24
- )
25
17
from pandas .core .dtypes .inference import is_integer
26
18
27
- from pandas .core .arrays .string_ import StringDtype
28
-
29
19
from pandas .io ._util import arrow_table_to_pandas
30
20
from pandas .io .parsers .base_parser import ParserBase
31
21
32
22
if TYPE_CHECKING :
23
+ import pyarrow as pa
24
+
33
25
from pandas ._typing import ReadBuffer
34
26
35
27
from pandas import DataFrame
@@ -174,8 +166,8 @@ def _get_convert_options(self):
174
166
175
167
return convert_options
176
168
177
- def _adjust_column_names (self , frame : DataFrame ) -> tuple [ DataFrame , bool ] :
178
- num_cols = len (frame .columns )
169
+ def _adjust_column_names (self , table : pa . Table ) -> bool :
170
+ num_cols = len (table .columns )
179
171
multi_index_named = True
180
172
if self .header is None :
181
173
if self .names is None :
@@ -188,8 +180,7 @@ def _adjust_column_names(self, frame: DataFrame) -> tuple[DataFrame, bool]:
188
180
columns_prefix = [str (x ) for x in range (num_cols - len (self .names ))]
189
181
self .names = columns_prefix + self .names
190
182
multi_index_named = False
191
- frame .columns = self .names
192
- return frame , multi_index_named
183
+ return multi_index_named
193
184
194
185
def _finalize_index (self , frame : DataFrame , multi_index_named : bool ) -> DataFrame :
195
186
if self .index_col is not None :
@@ -312,13 +303,7 @@ def read(self) -> DataFrame:
312
303
313
304
table = table .cast (new_schema )
314
305
315
- workaround = False
316
- pass_backend = dtype_backend
317
- if self .dtype is not None and dtype_backend != "pyarrow" :
318
- # We pass dtype_backend="pyarrow" and subsequently cast
319
- # to avoid lossy conversion e.g. GH#56136
320
- workaround = True
321
- pass_backend = "numpy_nullable"
306
+ multi_index_named = self ._adjust_column_names (table )
322
307
323
308
with warnings .catch_warnings ():
324
309
warnings .filterwarnings (
@@ -327,49 +312,14 @@ def read(self) -> DataFrame:
327
312
DeprecationWarning ,
328
313
)
329
314
frame = arrow_table_to_pandas (
330
- table , dtype_backend = pass_backend , null_to_int64 = True
315
+ table ,
316
+ dtype_backend = dtype_backend ,
317
+ null_to_int64 = True ,
318
+ dtype = self .dtype ,
319
+ names = self .names ,
331
320
)
332
321
333
- frame , multi_index_named = self ._adjust_column_names (frame )
334
-
335
- if workaround and dtype_backend != "numpy_nullable" :
336
- old_dtype = self .dtype
337
- if not isinstance (old_dtype , dict ):
338
- # e.g. test_categorical_dtype_utf16
339
- old_dtype = dict .fromkeys (frame .columns , old_dtype )
340
-
341
- # _finalize_pandas_output will call astype, but we need to make
342
- # sure all keys are populated appropriately.
343
- new_dtype = {}
344
- for key in frame .columns :
345
- ser = frame [key ]
346
- if isinstance (ser .dtype , BaseMaskedDtype ):
347
- new_dtype [key ] = ser .dtype .numpy_dtype
348
- if (
349
- key in old_dtype
350
- and not using_string_dtype ()
351
- and is_string_dtype (old_dtype [key ])
352
- and not isinstance (old_dtype [key ], StringDtype )
353
- and ser .array ._hasna
354
- ):
355
- # Cast to make sure we get "NaN" string instead of "NA"
356
- frame [key ] = ser .astype (old_dtype [key ])
357
- frame .loc [ser .isna (), key ] = np .nan
358
- old_dtype [key ] = object # Avoid re-casting
359
- elif isinstance (ser .dtype , StringDtype ):
360
- # We cast here in case the user passed "category" in
361
- # order to get the correct dtype.categories.dtype
362
- # e.g. test_categorical_dtype_utf16
363
- if not using_string_dtype ():
364
- sdt = np .dtype (object )
365
- frame [key ] = ser .astype (sdt )
366
- frame .loc [ser .isna (), key ] = np .nan
367
- else :
368
- sdt = StringDtype (na_value = np .nan ) # type: ignore[assignment]
369
- frame [key ] = frame [key ].astype (sdt )
370
- new_dtype [key ] = sdt
371
-
372
- new_dtype .update (old_dtype )
373
- self .dtype = new_dtype
322
+ if self .header is None :
323
+ frame .columns = self .names
374
324
375
325
return self ._finalize_pandas_output (frame , multi_index_named )
0 commit comments