Skip to content

Commit 247e9d8

Browse files
committed
Merge branch 'main' into BUG-57390/Identity-checking-NA-in-map-incorrect
2 parents 237926d + 9c8c685 commit 247e9d8

File tree

199 files changed

+974
-332
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

199 files changed

+974
-332
lines changed

.github/workflows/unit-tests.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,9 @@ jobs:
5757
# Also install zh_CN (its encoding is gb2312) but do not activate it.
5858
# It will be temporarily activated during tests with locale.setlocale
5959
extra_loc: "zh_CN"
60+
- name: "Future infer strings"
61+
env_file: actions-311.yaml
62+
pandas_future_infer_string: "1"
6063
- name: "Pypy"
6164
env_file: actions-pypy-39.yaml
6265
pattern: "not slow and not network and not single_cpu"
@@ -75,6 +78,7 @@ jobs:
7578
LANG: ${{ matrix.lang || 'C.UTF-8' }}
7679
LC_ALL: ${{ matrix.lc_all || '' }}
7780
PANDAS_CI: '1'
81+
PANDAS_FUTURE_INFER_STRING: ${{ matrix.pandas_future_infer_string || '0' }}
7882
TEST_ARGS: ${{ matrix.test_args || '' }}
7983
PYTEST_WORKERS: 'auto'
8084
PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }}

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ The source code is currently hosted on GitHub at:
9696
https://github.com/pandas-dev/pandas
9797

9898
Binary installers for the latest released version are available at the [Python
99-
Package Index (PyPI)](https://pypi.org/project/pandas) and on [Conda](https://docs.conda.io/en/latest/).
99+
Package Index (PyPI)](https://pypi.org/project/pandas) and on [Conda](https://anaconda.org/conda-forge/pandas).
100100

101101
```sh
102102
# conda

ci/code_checks.sh

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
223223
-i "pandas.Timestamp.fromordinal SA01" \
224224
-i "pandas.Timestamp.fromtimestamp PR01,SA01" \
225225
-i "pandas.Timestamp.hour GL08" \
226-
-i "pandas.Timestamp.isoweekday SA01" \
227226
-i "pandas.Timestamp.max PR02" \
228227
-i "pandas.Timestamp.microsecond GL08" \
229228
-i "pandas.Timestamp.min PR02" \
@@ -306,7 +305,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
306305
-i "pandas.api.types.is_sparse SA01" \
307306
-i "pandas.api.types.is_timedelta64_ns_dtype SA01" \
308307
-i "pandas.api.types.pandas_dtype PR07,RT03,SA01" \
309-
-i "pandas.api.types.union_categoricals RT03,SA01" \
310308
-i "pandas.arrays.ArrowExtensionArray PR07,SA01" \
311309
-i "pandas.arrays.BooleanArray SA01" \
312310
-i "pandas.arrays.DatetimeArray SA01" \
@@ -329,7 +327,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
329327
-i "pandas.core.groupby.DataFrameGroupBy.hist RT03" \
330328
-i "pandas.core.groupby.DataFrameGroupBy.indices SA01" \
331329
-i "pandas.core.groupby.DataFrameGroupBy.max SA01" \
332-
-i "pandas.core.groupby.DataFrameGroupBy.median SA01" \
333330
-i "pandas.core.groupby.DataFrameGroupBy.min SA01" \
334331
-i "pandas.core.groupby.DataFrameGroupBy.nth PR02" \
335332
-i "pandas.core.groupby.DataFrameGroupBy.nunique SA01" \
@@ -348,7 +345,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
348345
-i "pandas.core.groupby.SeriesGroupBy.is_monotonic_decreasing SA01" \
349346
-i "pandas.core.groupby.SeriesGroupBy.is_monotonic_increasing SA01" \
350347
-i "pandas.core.groupby.SeriesGroupBy.max SA01" \
351-
-i "pandas.core.groupby.SeriesGroupBy.median SA01" \
352348
-i "pandas.core.groupby.SeriesGroupBy.min SA01" \
353349
-i "pandas.core.groupby.SeriesGroupBy.nth PR02" \
354350
-i "pandas.core.groupby.SeriesGroupBy.ohlc SA01" \
@@ -363,7 +359,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
363359
-i "pandas.core.resample.Resampler.indices SA01" \
364360
-i "pandas.core.resample.Resampler.max PR01,RT03,SA01" \
365361
-i "pandas.core.resample.Resampler.mean SA01" \
366-
-i "pandas.core.resample.Resampler.median SA01" \
367362
-i "pandas.core.resample.Resampler.min PR01,RT03,SA01" \
368363
-i "pandas.core.resample.Resampler.ohlc SA01" \
369364
-i "pandas.core.resample.Resampler.prod SA01" \

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -585,6 +585,7 @@ I/O
585585
- Bug in :meth:`read_json` not validating the ``typ`` argument to not be exactly ``"frame"`` or ``"series"`` (:issue:`59124`)
586586
- Bug in :meth:`read_stata` raising ``KeyError`` when input file is stored in big-endian format and contains strL data. (:issue:`58638`)
587587
- Bug in :meth:`read_stata` where extreme value integers were incorrectly interpreted as missing for format versions 111 and prior (:issue:`58130`)
588+
- Bug in :meth:`read_stata` where the missing code for double was not recognised for format versions 105 and prior (:issue:`58149`)
588589

589590
Period
590591
^^^^^^

pandas/_config/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,6 @@
3030
from pandas._config.display import detect_console_encoding
3131

3232

33-
def using_pyarrow_string_dtype() -> bool:
33+
def using_string_dtype() -> bool:
3434
_mode_options = _global_config["future"]
3535
return _mode_options["infer_string"]

pandas/_libs/lib.pyx

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ from cython cimport (
3737
floating,
3838
)
3939

40-
from pandas._config import using_pyarrow_string_dtype
40+
from pandas._config import using_string_dtype
4141

4242
from pandas._libs.missing import check_na_tuples_nonequal
4343

@@ -2772,10 +2772,10 @@ def maybe_convert_objects(ndarray[object] objects,
27722772
seen.object_ = True
27732773

27742774
elif seen.str_:
2775-
if using_pyarrow_string_dtype() and is_string_array(objects, skipna=True):
2775+
if using_string_dtype() and is_string_array(objects, skipna=True):
27762776
from pandas.core.arrays.string_ import StringDtype
27772777

2778-
dtype = StringDtype(storage="pyarrow_numpy")
2778+
dtype = StringDtype(storage="pyarrow", na_value=np.nan)
27792779
return dtype.construct_array_type()._from_sequence(objects, dtype=dtype)
27802780

27812781
elif storage == "pyarrow" or storage == "python":

pandas/_libs/tslibs/nattype.pyx

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,13 @@ class NaTType(_NaT):
441441
442442
Monday == 1 ... Sunday == 7.
443443
444+
See Also
445+
--------
446+
Timestamp.weekday : Return the day of the week with Monday=0, Sunday=6.
447+
Timestamp.isocalendar : Return a tuple containing ISO year, week number
448+
and weekday.
449+
datetime.date.isoweekday : Equivalent method in datetime module.
450+
444451
Examples
445452
--------
446453
>>> ts = pd.Timestamp('2023-01-01 10:00:00')

pandas/_libs/tslibs/timestamps.pyx

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2775,6 +2775,13 @@ default 'raise'
27752775
27762776
Monday == 1 ... Sunday == 7.
27772777
2778+
See Also
2779+
--------
2780+
Timestamp.weekday : Return the day of the week with Monday=0, Sunday=6.
2781+
Timestamp.isocalendar : Return a tuple containing ISO year, week number
2782+
and weekday.
2783+
datetime.date.isoweekday : Equivalent method in datetime module.
2784+
27782785
Examples
27792786
--------
27802787
>>> ts = pd.Timestamp('2023-01-01 10:00:00')

pandas/_testing/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -509,14 +509,14 @@ def shares_memory(left, right) -> bool:
509509
if (
510510
isinstance(left, ExtensionArray)
511511
and is_string_dtype(left.dtype)
512-
and left.dtype.storage in ("pyarrow", "pyarrow_numpy") # type: ignore[attr-defined]
512+
and left.dtype.storage == "pyarrow" # type: ignore[attr-defined]
513513
):
514514
# https://github.com/pandas-dev/pandas/pull/43930#discussion_r736862669
515515
left = cast("ArrowExtensionArray", left)
516516
if (
517517
isinstance(right, ExtensionArray)
518518
and is_string_dtype(right.dtype)
519-
and right.dtype.storage in ("pyarrow", "pyarrow_numpy") # type: ignore[attr-defined]
519+
and right.dtype.storage == "pyarrow" # type: ignore[attr-defined]
520520
):
521521
right = cast("ArrowExtensionArray", right)
522522
left_pa_data = left._pa_array

pandas/core/arrays/arrow/array.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -580,10 +580,8 @@ def __getitem__(self, item: PositionalIndexer):
580580
if isinstance(item, np.ndarray):
581581
if not len(item):
582582
# Removable once we migrate StringDtype[pyarrow] to ArrowDtype[string]
583-
if self._dtype.name == "string" and self._dtype.storage in (
584-
"pyarrow",
585-
"pyarrow_numpy",
586-
):
583+
if self._dtype.name == "string" and self._dtype.storage == "pyarrow":
584+
# TODO(infer_string) should this be large_string?
587585
pa_dtype = pa.string()
588586
else:
589587
pa_dtype = self._dtype.pyarrow_dtype

0 commit comments

Comments
 (0)