Skip to content

Commit 445842e

Browse files
committed
Merge remote-tracking branch 'upstream/main' into deps/pytz/optional
2 parents e8b2c8c + 0e0814b commit 445842e

File tree

79 files changed

+264
-175
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

79 files changed

+264
-175
lines changed

.github/workflows/unit-tests.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,10 @@ jobs:
5757
# Also install zh_CN (its encoding is gb2312) but do not activate it.
5858
# It will be temporarily activated during tests with locale.setlocale
5959
extra_loc: "zh_CN"
60+
- name: "Future infer strings"
61+
env_file: actions-311.yaml
62+
pattern: "not slow and not network and not single_cpu"
63+
pandas_future_infer_string: "1"
6064
- name: "Pypy"
6165
env_file: actions-pypy-39.yaml
6266
pattern: "not slow and not network and not single_cpu"
@@ -75,6 +79,7 @@ jobs:
7579
LANG: ${{ matrix.lang || 'C.UTF-8' }}
7680
LC_ALL: ${{ matrix.lc_all || '' }}
7781
PANDAS_CI: '1'
82+
PANDAS_FUTURE_INFER_STRING: ${{ matrix.pandas_future_infer_string || '0' }}
7883
TEST_ARGS: ${{ matrix.test_args || '' }}
7984
PYTEST_WORKERS: 'auto'
8085
PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }}

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ The source code is currently hosted on GitHub at:
9696
https://github.com/pandas-dev/pandas
9797

9898
Binary installers for the latest released version are available at the [Python
99-
Package Index (PyPI)](https://pypi.org/project/pandas) and on [Conda](https://docs.conda.io/en/latest/).
99+
Package Index (PyPI)](https://pypi.org/project/pandas) and on [Conda](https://anaconda.org/conda-forge/pandas).
100100

101101
```sh
102102
# conda

ci/code_checks.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
306306
-i "pandas.api.types.is_sparse SA01" \
307307
-i "pandas.api.types.is_timedelta64_ns_dtype SA01" \
308308
-i "pandas.api.types.pandas_dtype PR07,RT03,SA01" \
309-
-i "pandas.api.types.union_categoricals RT03,SA01" \
310309
-i "pandas.arrays.ArrowExtensionArray PR07,SA01" \
311310
-i "pandas.arrays.BooleanArray SA01" \
312311
-i "pandas.arrays.DatetimeArray SA01" \

ci/run_tests.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,5 +16,11 @@ if [[ "$PATTERN" ]]; then
1616
PYTEST_CMD="$PYTEST_CMD -m \"$PATTERN\""
1717
fi
1818

19+
# temporarily let pytest always succeed (many tests are not yet passing in the
20+
# build enabling the future string dtype)
21+
if [[ "$PANDAS_FUTURE_INFER_STRING" == "1" ]]; then
22+
PYTEST_CMD="$PYTEST_CMD || true"
23+
fi
24+
1925
echo $PYTEST_CMD
2026
sh -c "$PYTEST_CMD"

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -616,6 +616,7 @@ I/O
616616
- Bug in :meth:`read_excel` raising ``ValueError`` when passing array of boolean values when ``dtype="boolean"``. (:issue:`58159`)
617617
- Bug in :meth:`read_json` not validating the ``typ`` argument to not be exactly ``"frame"`` or ``"series"`` (:issue:`59124`)
618618
- Bug in :meth:`read_stata` raising ``KeyError`` when input file is stored in big-endian format and contains strL data. (:issue:`58638`)
619+
- Bug in :meth:`read_stata` where extreme value integers were incorrectly interpreted as missing for format versions 111 and prior (:issue:`58130`)
619620

620621
Period
621622
^^^^^^

pandas/_config/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,6 @@
3030
from pandas._config.display import detect_console_encoding
3131

3232

33-
def using_pyarrow_string_dtype() -> bool:
33+
def using_string_dtype() -> bool:
3434
_mode_options = _global_config["future"]
3535
return _mode_options["infer_string"]

pandas/_libs/lib.pyx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ from cython cimport (
3737
floating,
3838
)
3939

40-
from pandas._config import using_pyarrow_string_dtype
40+
from pandas._config import using_string_dtype
4141

4242
from pandas._libs.missing import check_na_tuples_nonequal
4343

@@ -2699,7 +2699,7 @@ def maybe_convert_objects(ndarray[object] objects,
26992699
seen.object_ = True
27002700

27012701
elif seen.str_:
2702-
if using_pyarrow_string_dtype() and is_string_array(objects, skipna=True):
2702+
if using_string_dtype() and is_string_array(objects, skipna=True):
27032703
from pandas.core.arrays.string_ import StringDtype
27042704

27052705
dtype = StringDtype(storage="pyarrow_numpy")

pandas/core/config_init.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -858,7 +858,7 @@ def register_converter_cb(key: str) -> None:
858858
with cf.config_prefix("future"):
859859
cf.register_option(
860860
"infer_string",
861-
False,
861+
True if os.environ.get("PANDAS_FUTURE_INFER_STRING", "0") == "1" else False,
862862
"Whether to infer sequence of str objects as pyarrow string "
863863
"dtype, which will be the default in pandas 3.0 "
864864
"(at which point this option will be deprecated).",

pandas/core/construction.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
import numpy as np
1717
from numpy import ma
1818

19-
from pandas._config import using_pyarrow_string_dtype
19+
from pandas._config import using_string_dtype
2020

2121
from pandas._libs import lib
2222
from pandas._libs.tslibs import (
@@ -571,11 +571,7 @@ def sanitize_array(
571571
if not is_list_like(data):
572572
if index is None:
573573
raise ValueError("index must be specified when data is not list-like")
574-
if (
575-
isinstance(data, str)
576-
and using_pyarrow_string_dtype()
577-
and original_dtype is None
578-
):
574+
if isinstance(data, str) and using_string_dtype() and original_dtype is None:
579575
from pandas.core.arrays.string_ import StringDtype
580576

581577
dtype = StringDtype("pyarrow_numpy")
@@ -609,7 +605,7 @@ def sanitize_array(
609605
subarr = data
610606
if data.dtype == object and infer_object:
611607
subarr = maybe_infer_to_datetimelike(data)
612-
elif data.dtype.kind == "U" and using_pyarrow_string_dtype():
608+
elif data.dtype.kind == "U" and using_string_dtype():
613609
from pandas.core.arrays.string_ import StringDtype
614610

615611
dtype = StringDtype(storage="pyarrow_numpy")

pandas/core/dtypes/cast.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
import numpy as np
2020

21-
from pandas._config import using_pyarrow_string_dtype
21+
from pandas._config import using_string_dtype
2222

2323
from pandas._libs import (
2424
Interval,
@@ -798,7 +798,7 @@ def infer_dtype_from_scalar(val) -> tuple[DtypeObj, Any]:
798798
# coming out as np.str_!
799799

800800
dtype = _dtype_obj
801-
if using_pyarrow_string_dtype():
801+
if using_string_dtype():
802802
from pandas.core.arrays.string_ import StringDtype
803803

804804
dtype = StringDtype(storage="pyarrow_numpy")

0 commit comments

Comments
 (0)