Skip to content

Commit a819973

Browse files
committed
Latest branch merge
2 parents b29e151 + 0e0814b commit a819973

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+152
-166
lines changed

.github/workflows/unit-tests.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,10 @@ jobs:
5757
# Also install zh_CN (its encoding is gb2312) but do not activate it.
5858
# It will be temporarily activated during tests with locale.setlocale
5959
extra_loc: "zh_CN"
60+
- name: "Future infer strings"
61+
env_file: actions-311.yaml
62+
pattern: "not slow and not network and not single_cpu"
63+
pandas_future_infer_string: "1"
6064
- name: "Pypy"
6165
env_file: actions-pypy-39.yaml
6266
pattern: "not slow and not network and not single_cpu"
@@ -75,6 +79,7 @@ jobs:
7579
LANG: ${{ matrix.lang || 'C.UTF-8' }}
7680
LC_ALL: ${{ matrix.lc_all || '' }}
7781
PANDAS_CI: '1'
82+
PANDAS_FUTURE_INFER_STRING: ${{ matrix.pandas_future_infer_string || '0' }}
7883
TEST_ARGS: ${{ matrix.test_args || '' }}
7984
PYTEST_WORKERS: 'auto'
8085
PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }}

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ The source code is currently hosted on GitHub at:
9696
https://github.com/pandas-dev/pandas
9797

9898
Binary installers for the latest released version are available at the [Python
99-
Package Index (PyPI)](https://pypi.org/project/pandas) and on [Conda](https://docs.conda.io/en/latest/).
99+
Package Index (PyPI)](https://pypi.org/project/pandas) and on [Conda](https://anaconda.org/conda-forge/pandas).
100100

101101
```sh
102102
# conda

ci/code_checks.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
306306
-i "pandas.api.types.is_sparse SA01" \
307307
-i "pandas.api.types.is_timedelta64_ns_dtype SA01" \
308308
-i "pandas.api.types.pandas_dtype PR07,RT03,SA01" \
309-
-i "pandas.api.types.union_categoricals RT03,SA01" \
310309
-i "pandas.arrays.ArrowExtensionArray PR07,SA01" \
311310
-i "pandas.arrays.BooleanArray SA01" \
312311
-i "pandas.arrays.DatetimeArray SA01" \

ci/run_tests.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,5 +16,11 @@ if [[ "$PATTERN" ]]; then
1616
PYTEST_CMD="$PYTEST_CMD -m \"$PATTERN\""
1717
fi
1818

19+
# temporarily let pytest always succeed (many tests are not yet passing in the
20+
# build enabling the future string dtype)
21+
if [[ "$PANDAS_FUTURE_INFER_STRING" == "1" ]]; then
22+
PYTEST_CMD="$PYTEST_CMD || true"
23+
fi
24+
1925
echo $PYTEST_CMD
2026
sh -c "$PYTEST_CMD"

pandas/_config/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,6 @@
3030
from pandas._config.display import detect_console_encoding
3131

3232

33-
def using_pyarrow_string_dtype() -> bool:
33+
def using_string_dtype() -> bool:
3434
_mode_options = _global_config["future"]
3535
return _mode_options["infer_string"]

pandas/_libs/lib.pyx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ from cython cimport (
3737
floating,
3838
)
3939

40-
from pandas._config import using_pyarrow_string_dtype
40+
from pandas._config import using_string_dtype
4141

4242
from pandas._libs.missing import check_na_tuples_nonequal
4343

@@ -2699,7 +2699,7 @@ def maybe_convert_objects(ndarray[object] objects,
26992699
seen.object_ = True
27002700

27012701
elif seen.str_:
2702-
if using_pyarrow_string_dtype() and is_string_array(objects, skipna=True):
2702+
if using_string_dtype() and is_string_array(objects, skipna=True):
27032703
from pandas.core.arrays.string_ import StringDtype
27042704

27052705
dtype = StringDtype(storage="pyarrow_numpy")

pandas/core/config_init.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -858,7 +858,7 @@ def register_converter_cb(key: str) -> None:
858858
with cf.config_prefix("future"):
859859
cf.register_option(
860860
"infer_string",
861-
False,
861+
True if os.environ.get("PANDAS_FUTURE_INFER_STRING", "0") == "1" else False,
862862
"Whether to infer sequence of str objects as pyarrow string "
863863
"dtype, which will be the default in pandas 3.0 "
864864
"(at which point this option will be deprecated).",

pandas/core/construction.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
import numpy as np
1717
from numpy import ma
1818

19-
from pandas._config import using_pyarrow_string_dtype
19+
from pandas._config import using_string_dtype
2020

2121
from pandas._libs import lib
2222
from pandas._libs.tslibs import (
@@ -571,11 +571,7 @@ def sanitize_array(
571571
if not is_list_like(data):
572572
if index is None:
573573
raise ValueError("index must be specified when data is not list-like")
574-
if (
575-
isinstance(data, str)
576-
and using_pyarrow_string_dtype()
577-
and original_dtype is None
578-
):
574+
if isinstance(data, str) and using_string_dtype() and original_dtype is None:
579575
from pandas.core.arrays.string_ import StringDtype
580576

581577
dtype = StringDtype("pyarrow_numpy")
@@ -609,7 +605,7 @@ def sanitize_array(
609605
subarr = data
610606
if data.dtype == object and infer_object:
611607
subarr = maybe_infer_to_datetimelike(data)
612-
elif data.dtype.kind == "U" and using_pyarrow_string_dtype():
608+
elif data.dtype.kind == "U" and using_string_dtype():
613609
from pandas.core.arrays.string_ import StringDtype
614610

615611
dtype = StringDtype(storage="pyarrow_numpy")

pandas/core/dtypes/cast.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
import numpy as np
2020

21-
from pandas._config import using_pyarrow_string_dtype
21+
from pandas._config import using_string_dtype
2222

2323
from pandas._libs import (
2424
Interval,
@@ -798,7 +798,7 @@ def infer_dtype_from_scalar(val) -> tuple[DtypeObj, Any]:
798798
# coming out as np.str_!
799799

800800
dtype = _dtype_obj
801-
if using_pyarrow_string_dtype():
801+
if using_string_dtype():
802802
from pandas.core.arrays.string_ import StringDtype
803803

804804
dtype = StringDtype(storage="pyarrow_numpy")

pandas/core/dtypes/concat.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,7 @@ def union_categoricals(
190190
Returns
191191
-------
192192
Categorical
193+
The union of categories being combined.
193194
194195
Raises
195196
------
@@ -201,6 +202,11 @@ def union_categoricals(
201202
ValueError
202203
Empty list of categoricals passed
203204
205+
See Also
206+
--------
207+
CategoricalDtype : Type for categorical data with the categories and orderedness.
208+
Categorical : Represent a categorical variable in classic R / S-plus fashion.
209+
204210
Notes
205211
-----
206212
To learn more about categories, see `link

0 commit comments

Comments
 (0)