Skip to content

Commit 56e7a16

Browse files
Merge remote-tracking branch 'upstream/main' into string-dtype-tests-io-dtype-backend-conversion
2 parents aed90a6 + 0fadaa9 commit 56e7a16

File tree

5 files changed

+38
-18
lines changed

5 files changed

+38
-18
lines changed

pandas/core/arrays/string_.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -140,12 +140,16 @@ def __init__(
140140
# infer defaults
141141
if storage is None:
142142
if na_value is not libmissing.NA:
143-
if HAS_PYARROW:
144-
storage = "pyarrow"
145-
else:
146-
storage = "python"
143+
storage = get_option("mode.string_storage")
144+
if storage == "auto":
145+
if HAS_PYARROW:
146+
storage = "pyarrow"
147+
else:
148+
storage = "python"
147149
else:
148150
storage = get_option("mode.string_storage")
151+
if storage == "auto":
152+
storage = "python"
149153

150154
if storage == "pyarrow_numpy":
151155
# TODO raise a deprecation warning

pandas/core/config_init.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -452,13 +452,12 @@ def is_terminal() -> bool:
452452

453453
string_storage_doc = """
454454
: string
455-
The default storage for StringDtype. This option is ignored if
456-
``future.infer_string`` is set to True.
455+
The default storage for StringDtype.
457456
"""
458457

459458

460459
def is_valid_string_storage(value: Any) -> None:
461-
legal_values = ["python", "pyarrow"]
460+
legal_values = ["auto", "python", "pyarrow"]
462461
if value not in legal_values:
463462
msg = "Value must be one of python|pyarrow"
464463
if value == "pyarrow_numpy":
@@ -473,7 +472,7 @@ def is_valid_string_storage(value: Any) -> None:
473472
with cf.config_prefix("mode"):
474473
cf.register_option(
475474
"string_storage",
476-
"python",
475+
"auto",
477476
string_storage_doc,
478477
# validator=is_one_of_factory(["python", "pyarrow"]),
479478
validator=is_valid_string_storage,

pandas/tests/arrays/string_/test_string_arrow.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import numpy as np
55
import pytest
66

7-
from pandas.compat import HAS_PYARROW
87
import pandas.util._test_decorators as td
98

109
import pandas as pd
@@ -27,11 +26,10 @@ def test_eq_all_na():
2726
tm.assert_extension_array_equal(result, expected)
2827

2928

30-
def test_config(string_storage, request, using_infer_string):
31-
if using_infer_string and string_storage == "python" and HAS_PYARROW:
32-
# string storage with na_value=NaN always uses pyarrow if available
33-
# -> does not yet honor the option
34-
request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)"))
29+
def test_config(string_storage, using_infer_string):
30+
# with the default string_storage setting
31+
# always "python" at the moment
32+
assert StringDtype().storage == "python"
3533

3634
with pd.option_context("string_storage", string_storage):
3735
assert StringDtype().storage == string_storage

pandas/tests/dtypes/test_common.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import numpy as np
44
import pytest
55

6+
from pandas.compat import HAS_PYARROW
67
import pandas.util._test_decorators as td
78

89
from pandas.core.dtypes.astype import astype_array
@@ -802,13 +803,17 @@ def test_pandas_dtype_ea_not_instance():
802803

803804

804805
def test_pandas_dtype_string_dtypes(string_storage):
805-
# TODO(infer_string) remove skip if "python" is supported
806-
pytest.importorskip("pyarrow")
806+
with pd.option_context("future.infer_string", True):
807+
# with the default string_storage setting
808+
result = pandas_dtype("str")
809+
assert result == pd.StringDtype(
810+
"pyarrow" if HAS_PYARROW else "python", na_value=np.nan
811+
)
812+
807813
with pd.option_context("future.infer_string", True):
808814
with pd.option_context("string_storage", string_storage):
809815
result = pandas_dtype("str")
810-
# TODO(infer_string) hardcoded to pyarrow until python is supported
811-
assert result == pd.StringDtype("pyarrow", na_value=np.nan)
816+
assert result == pd.StringDtype(string_storage, na_value=np.nan)
812817

813818
with pd.option_context("future.infer_string", False):
814819
with pd.option_context("string_storage", string_storage):

web/pandas/community/ecosystem.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,13 @@ Deltalake python package lets you access tables stored in
360360
JVM. It provides the ``delta_table.to_pyarrow_table().to_pandas()`` method to convert
361361
any Delta table into Pandas dataframe.
362362

363+
### [pandas-gbq](https://github.com/googleapis/python-bigquery-pandas)
364+
365+
pandas-gbq provides high performance reads and writes to and from
366+
[Google BigQuery](https://cloud.google.com/bigquery/). Previously (before version 2.2.0),
367+
these methods were exposed as `pandas.read_gbq` and `DataFrame.to_gbq`.
368+
Use `pandas_gbq.read_gbq` and `pandas_gbq.to_gbq`, instead.
369+
363370
## Out-of-core
364371

365372
### [Bodo](https://bodo.ai/)
@@ -513,6 +520,13 @@ Arrays](https://awkward-array.org/) inside pandas' Series and
513520
DataFrame. It also provides an accessor for using awkward functions
514521
on Series that are of awkward type.
515522

523+
### [db-dtypes](https://github.com/googleapis/python-db-dtypes-pandas)
524+
525+
db-dtypes provides an extension types for working with types like
526+
DATE, TIME, and JSON from database systems. This package is used
527+
by pandas-gbq to provide natural dtypes for BigQuery data types without
528+
a natural numpy type.
529+
516530
### [Pandas-Genomics](https://pandas-genomics.readthedocs.io/en/latest/)
517531

518532
Pandas-Genomics provides an extension type and extension array for working

0 commit comments

Comments
 (0)