Skip to content

Commit c063298

Browse files
Merge remote-tracking branch 'upstream/main' into string-dtype-object
2 parents 79eb3b4 + 9c8c685 commit c063298

File tree

198 files changed

+972
-332
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

198 files changed

+972
-332
lines changed

.github/workflows/unit-tests.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,9 @@ jobs:
5757
# Also install zh_CN (its encoding is gb2312) but do not activate it.
5858
# It will be temporarily activated during tests with locale.setlocale
5959
extra_loc: "zh_CN"
60+
- name: "Future infer strings"
61+
env_file: actions-311.yaml
62+
pandas_future_infer_string: "1"
6063
- name: "Pypy"
6164
env_file: actions-pypy-39.yaml
6265
pattern: "not slow and not network and not single_cpu"
@@ -75,6 +78,7 @@ jobs:
7578
LANG: ${{ matrix.lang || 'C.UTF-8' }}
7679
LC_ALL: ${{ matrix.lc_all || '' }}
7780
PANDAS_CI: '1'
81+
PANDAS_FUTURE_INFER_STRING: ${{ matrix.pandas_future_infer_string || '0' }}
7882
TEST_ARGS: ${{ matrix.test_args || '' }}
7983
PYTEST_WORKERS: 'auto'
8084
PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }}

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ The source code is currently hosted on GitHub at:
9696
https://github.com/pandas-dev/pandas
9797

9898
Binary installers for the latest released version are available at the [Python
99-
Package Index (PyPI)](https://pypi.org/project/pandas) and on [Conda](https://docs.conda.io/en/latest/).
99+
Package Index (PyPI)](https://pypi.org/project/pandas) and on [Conda](https://anaconda.org/conda-forge/pandas).
100100

101101
```sh
102102
# conda

ci/code_checks.sh

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
223223
-i "pandas.Timestamp.fromordinal SA01" \
224224
-i "pandas.Timestamp.fromtimestamp PR01,SA01" \
225225
-i "pandas.Timestamp.hour GL08" \
226-
-i "pandas.Timestamp.isoweekday SA01" \
227226
-i "pandas.Timestamp.max PR02" \
228227
-i "pandas.Timestamp.microsecond GL08" \
229228
-i "pandas.Timestamp.min PR02" \
@@ -306,7 +305,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
306305
-i "pandas.api.types.is_sparse SA01" \
307306
-i "pandas.api.types.is_timedelta64_ns_dtype SA01" \
308307
-i "pandas.api.types.pandas_dtype PR07,RT03,SA01" \
309-
-i "pandas.api.types.union_categoricals RT03,SA01" \
310308
-i "pandas.arrays.ArrowExtensionArray PR07,SA01" \
311309
-i "pandas.arrays.BooleanArray SA01" \
312310
-i "pandas.arrays.DatetimeArray SA01" \
@@ -329,7 +327,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
329327
-i "pandas.core.groupby.DataFrameGroupBy.hist RT03" \
330328
-i "pandas.core.groupby.DataFrameGroupBy.indices SA01" \
331329
-i "pandas.core.groupby.DataFrameGroupBy.max SA01" \
332-
-i "pandas.core.groupby.DataFrameGroupBy.median SA01" \
333330
-i "pandas.core.groupby.DataFrameGroupBy.min SA01" \
334331
-i "pandas.core.groupby.DataFrameGroupBy.nth PR02" \
335332
-i "pandas.core.groupby.DataFrameGroupBy.nunique SA01" \
@@ -348,7 +345,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
348345
-i "pandas.core.groupby.SeriesGroupBy.is_monotonic_decreasing SA01" \
349346
-i "pandas.core.groupby.SeriesGroupBy.is_monotonic_increasing SA01" \
350347
-i "pandas.core.groupby.SeriesGroupBy.max SA01" \
351-
-i "pandas.core.groupby.SeriesGroupBy.median SA01" \
352348
-i "pandas.core.groupby.SeriesGroupBy.min SA01" \
353349
-i "pandas.core.groupby.SeriesGroupBy.nth PR02" \
354350
-i "pandas.core.groupby.SeriesGroupBy.ohlc SA01" \
@@ -363,7 +359,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
363359
-i "pandas.core.resample.Resampler.indices SA01" \
364360
-i "pandas.core.resample.Resampler.max PR01,RT03,SA01" \
365361
-i "pandas.core.resample.Resampler.mean SA01" \
366-
-i "pandas.core.resample.Resampler.median SA01" \
367362
-i "pandas.core.resample.Resampler.min PR01,RT03,SA01" \
368363
-i "pandas.core.resample.Resampler.ohlc SA01" \
369364
-i "pandas.core.resample.Resampler.prod SA01" \

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -584,6 +584,7 @@ I/O
584584
- Bug in :meth:`read_json` not validating the ``typ`` argument to not be exactly ``"frame"`` or ``"series"`` (:issue:`59124`)
585585
- Bug in :meth:`read_stata` raising ``KeyError`` when input file is stored in big-endian format and contains strL data. (:issue:`58638`)
586586
- Bug in :meth:`read_stata` where extreme value integers were incorrectly interpreted as missing for format versions 111 and prior (:issue:`58130`)
587+
- Bug in :meth:`read_stata` where the missing code for double was not recognised for format versions 105 and prior (:issue:`58149`)
587588

588589
Period
589590
^^^^^^

pandas/_config/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,6 @@
3030
from pandas._config.display import detect_console_encoding
3131

3232

33-
def using_pyarrow_string_dtype() -> bool:
33+
def using_string_dtype() -> bool:
3434
_mode_options = _global_config["future"]
3535
return _mode_options["infer_string"]

pandas/_libs/lib.pyx

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ from cython cimport (
3737
floating,
3838
)
3939

40-
from pandas._config import using_pyarrow_string_dtype
40+
from pandas._config import using_string_dtype
4141

4242
from pandas._libs.missing import check_na_tuples_nonequal
4343

@@ -2699,10 +2699,10 @@ def maybe_convert_objects(ndarray[object] objects,
26992699
seen.object_ = True
27002700

27012701
elif seen.str_:
2702-
if using_pyarrow_string_dtype() and is_string_array(objects, skipna=True):
2702+
if using_string_dtype() and is_string_array(objects, skipna=True):
27032703
from pandas.core.arrays.string_ import StringDtype
27042704

2705-
dtype = StringDtype()
2705+
dtype = StringDtype(na_value=np.nan)
27062706
return dtype.construct_array_type()._from_sequence(objects, dtype=dtype)
27072707

27082708
elif convert_to_nullable_dtype and is_string_array(objects, skipna=True):

pandas/_libs/tslibs/nattype.pyx

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,13 @@ class NaTType(_NaT):
441441
442442
Monday == 1 ... Sunday == 7.
443443
444+
See Also
445+
--------
446+
Timestamp.weekday : Return the day of the week with Monday=0, Sunday=6.
447+
Timestamp.isocalendar : Return a tuple containing ISO year, week number
448+
and weekday.
449+
datetime.date.isoweekday : Equivalent method in datetime module.
450+
444451
Examples
445452
--------
446453
>>> ts = pd.Timestamp('2023-01-01 10:00:00')

pandas/_libs/tslibs/timestamps.pyx

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2775,6 +2775,13 @@ default 'raise'
27752775
27762776
Monday == 1 ... Sunday == 7.
27772777
2778+
See Also
2779+
--------
2780+
Timestamp.weekday : Return the day of the week with Monday=0, Sunday=6.
2781+
Timestamp.isocalendar : Return a tuple containing ISO year, week number
2782+
and weekday.
2783+
datetime.date.isoweekday : Equivalent method in datetime module.
2784+
27782785
Examples
27792786
--------
27802787
>>> ts = pd.Timestamp('2023-01-01 10:00:00')

pandas/_testing/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -509,14 +509,14 @@ def shares_memory(left, right) -> bool:
509509
if (
510510
isinstance(left, ExtensionArray)
511511
and is_string_dtype(left.dtype)
512-
and left.dtype.storage in ("pyarrow", "pyarrow_numpy") # type: ignore[attr-defined]
512+
and left.dtype.storage == "pyarrow" # type: ignore[attr-defined]
513513
):
514514
# https://github.com/pandas-dev/pandas/pull/43930#discussion_r736862669
515515
left = cast("ArrowExtensionArray", left)
516516
if (
517517
isinstance(right, ExtensionArray)
518518
and is_string_dtype(right.dtype)
519-
and right.dtype.storage in ("pyarrow", "pyarrow_numpy") # type: ignore[attr-defined]
519+
and right.dtype.storage == "pyarrow" # type: ignore[attr-defined]
520520
):
521521
right = cast("ArrowExtensionArray", right)
522522
left_pa_data = left._pa_array

pandas/core/arrays/arrow/array.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -575,10 +575,8 @@ def __getitem__(self, item: PositionalIndexer):
575575
if isinstance(item, np.ndarray):
576576
if not len(item):
577577
# Removable once we migrate StringDtype[pyarrow] to ArrowDtype[string]
578-
if self._dtype.name == "string" and self._dtype.storage in (
579-
"pyarrow",
580-
"pyarrow_numpy",
581-
):
578+
if self._dtype.name == "string" and self._dtype.storage == "pyarrow":
579+
# TODO(infer_string) should this be large_string?
582580
pa_dtype = pa.string()
583581
else:
584582
pa_dtype = self._dtype.pyarrow_dtype

0 commit comments

Comments
 (0)