Skip to content

Commit 323414c

Browse files
committed
not-infer-string compat
1 parent e5b752e commit 323414c

File tree

1 file changed

+24
-3
lines changed

1 file changed

+24
-3
lines changed

pandas/io/parsers/arrow_parser_wrapper.py

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55

66
import numpy as np
77

8+
from pandas._config import using_string_dtype
9+
810
from pandas._libs import lib
911
from pandas.compat._optional import import_optional_dependency
1012
from pandas.errors import (
@@ -13,7 +15,10 @@
1315
)
1416
from pandas.util._exceptions import find_stack_level
1517

16-
from pandas.core.dtypes.common import pandas_dtype
18+
from pandas.core.dtypes.common import (
19+
is_string_dtype,
20+
pandas_dtype,
21+
)
1722
from pandas.core.dtypes.dtypes import (
1823
BaseMaskedDtype,
1924
)
@@ -326,13 +331,29 @@ def read(self) -> DataFrame:
326331
ser = frame[key]
327332
if isinstance(ser.dtype, BaseMaskedDtype):
328333
new_dtype[key] = ser.dtype.numpy_dtype
334+
if (
335+
key in old_dtype
336+
and not using_string_dtype()
337+
and is_string_dtype(old_dtype[key])
338+
and not isinstance(old_dtype[key], StringDtype)
339+
and ser.array._hasna
340+
):
341+
# Cast to make sure we get "NaN" string instead of "NA"
342+
frame[key] = ser.astype(old_dtype[key])
343+
frame.loc[ser.isna(), key] = np.nan
344+
old_dtype[key] = object # Avoid re-casting
329345
elif isinstance(ser.dtype, StringDtype):
330346
# We cast here in case the user passed "category" in
331347
# order to get the correct dtype.categories.dtype
332348
# e.g. test_categorical_dtype_utf16
333-
sdt = StringDtype(na_value=np.nan)
349+
if not using_string_dtype():
350+
sdt = np.dtype(object)
351+
frame[key] = ser.astype(sdt)
352+
frame.loc[ser.isna(), key] = np.nan
353+
else:
354+
sdt = StringDtype(na_value=np.nan)
355+
frame[key] = frame[key].astype(sdt)
334356
new_dtype[key] = sdt # type: ignore[assignment]
335-
frame[key] = frame[key].astype(new_dtype[key])
336357

337358
new_dtype.update(old_dtype)
338359
self.dtype = new_dtype

0 commit comments

Comments
 (0)