Skip to content

Commit 3467ecc

Browse files
committed
Merge branch 'main' into na_values_doc
2 parents 5f49114 + 0d12b44 commit 3467ecc

File tree

280 files changed

+2155
-647
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

280 files changed

+2155
-647
lines changed

.github/workflows/unit-tests.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,9 @@ jobs:
5757
# Also install zh_CN (its encoding is gb2312) but do not activate it.
5858
# It will be temporarily activated during tests with locale.setlocale
5959
extra_loc: "zh_CN"
60+
- name: "Future infer strings"
61+
env_file: actions-311.yaml
62+
pandas_future_infer_string: "1"
6063
- name: "Pypy"
6164
env_file: actions-pypy-39.yaml
6265
pattern: "not slow and not network and not single_cpu"
@@ -75,6 +78,7 @@ jobs:
7578
LANG: ${{ matrix.lang || 'C.UTF-8' }}
7679
LC_ALL: ${{ matrix.lc_all || '' }}
7780
PANDAS_CI: '1'
81+
PANDAS_FUTURE_INFER_STRING: ${{ matrix.pandas_future_infer_string || '0' }}
7882
TEST_ARGS: ${{ matrix.test_args || '' }}
7983
PYTEST_WORKERS: 'auto'
8084
PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }}

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ The source code is currently hosted on GitHub at:
9696
https://github.com/pandas-dev/pandas
9797

9898
Binary installers for the latest released version are available at the [Python
99-
Package Index (PyPI)](https://pypi.org/project/pandas) and on [Conda](https://docs.conda.io/en/latest/).
99+
Package Index (PyPI)](https://pypi.org/project/pandas) and on [Conda](https://anaconda.org/conda-forge/pandas).
100100

101101
```sh
102102
# conda

ci/code_checks.sh

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -70,16 +70,10 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
7070
--format=actions \
7171
-i ES01 `# For now it is ok if docstrings are missing the extended summary` \
7272
-i "pandas.Series.dt PR01" `# Accessors are implemented as classes, but we do not document the Parameters section` \
73-
-i "pandas.MultiIndex.append PR07,SA01" \
74-
-i "pandas.MultiIndex.copy PR07,RT03,SA01" \
7573
-i "pandas.MultiIndex.get_level_values SA01" \
76-
-i "pandas.MultiIndex.get_loc PR07" \
7774
-i "pandas.MultiIndex.get_loc_level PR07" \
78-
-i "pandas.MultiIndex.levshape SA01" \
7975
-i "pandas.MultiIndex.names SA01" \
80-
-i "pandas.MultiIndex.remove_unused_levels RT03,SA01" \
8176
-i "pandas.MultiIndex.reorder_levels RT03,SA01" \
82-
-i "pandas.MultiIndex.set_levels RT03,SA01" \
8377
-i "pandas.MultiIndex.sortlevel PR07,SA01" \
8478
-i "pandas.MultiIndex.to_frame RT03" \
8579
-i "pandas.NA SA01" \
@@ -223,7 +217,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
223217
-i "pandas.Timestamp.fromordinal SA01" \
224218
-i "pandas.Timestamp.fromtimestamp PR01,SA01" \
225219
-i "pandas.Timestamp.hour GL08" \
226-
-i "pandas.Timestamp.isoweekday SA01" \
227220
-i "pandas.Timestamp.max PR02" \
228221
-i "pandas.Timestamp.microsecond GL08" \
229222
-i "pandas.Timestamp.min PR02" \
@@ -304,10 +297,8 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
304297
-i "pandas.api.types.is_re PR07,SA01" \
305298
-i "pandas.api.types.is_re_compilable PR07,SA01" \
306299
-i "pandas.api.types.is_sparse SA01" \
307-
-i "pandas.api.types.is_string_dtype SA01" \
308300
-i "pandas.api.types.is_timedelta64_ns_dtype SA01" \
309301
-i "pandas.api.types.pandas_dtype PR07,RT03,SA01" \
310-
-i "pandas.api.types.union_categoricals RT03,SA01" \
311302
-i "pandas.arrays.ArrowExtensionArray PR07,SA01" \
312303
-i "pandas.arrays.BooleanArray SA01" \
313304
-i "pandas.arrays.DatetimeArray SA01" \
@@ -330,7 +321,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
330321
-i "pandas.core.groupby.DataFrameGroupBy.hist RT03" \
331322
-i "pandas.core.groupby.DataFrameGroupBy.indices SA01" \
332323
-i "pandas.core.groupby.DataFrameGroupBy.max SA01" \
333-
-i "pandas.core.groupby.DataFrameGroupBy.median SA01" \
334324
-i "pandas.core.groupby.DataFrameGroupBy.min SA01" \
335325
-i "pandas.core.groupby.DataFrameGroupBy.nth PR02" \
336326
-i "pandas.core.groupby.DataFrameGroupBy.nunique SA01" \
@@ -349,7 +339,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
349339
-i "pandas.core.groupby.SeriesGroupBy.is_monotonic_decreasing SA01" \
350340
-i "pandas.core.groupby.SeriesGroupBy.is_monotonic_increasing SA01" \
351341
-i "pandas.core.groupby.SeriesGroupBy.max SA01" \
352-
-i "pandas.core.groupby.SeriesGroupBy.median SA01" \
353342
-i "pandas.core.groupby.SeriesGroupBy.min SA01" \
354343
-i "pandas.core.groupby.SeriesGroupBy.nth PR02" \
355344
-i "pandas.core.groupby.SeriesGroupBy.ohlc SA01" \
@@ -364,7 +353,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
364353
-i "pandas.core.resample.Resampler.indices SA01" \
365354
-i "pandas.core.resample.Resampler.max PR01,RT03,SA01" \
366355
-i "pandas.core.resample.Resampler.mean SA01" \
367-
-i "pandas.core.resample.Resampler.median SA01" \
368356
-i "pandas.core.resample.Resampler.min PR01,RT03,SA01" \
369357
-i "pandas.core.resample.Resampler.ohlc SA01" \
370358
-i "pandas.core.resample.Resampler.prod SA01" \

doc/source/getting_started/comparison/comparison_with_sql.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -505,7 +505,7 @@ DELETE
505505
DELETE FROM tips
506506
WHERE tip > 9;
507507
508-
In pandas we select the rows that should remain instead of deleting them:
508+
In pandas we select the rows that should remain instead of deleting the rows that should be removed:
509509

510510
.. ipython:: python
511511

doc/source/user_guide/pyarrow.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,9 +159,11 @@ PyArrow also provides IO reading functionality that has been integrated into sev
159159
functions provide an ``engine`` keyword that can dispatch to PyArrow to accelerate reading from an IO source.
160160

161161
* :func:`read_csv`
162+
* :func:`read_feather`
162163
* :func:`read_json`
163164
* :func:`read_orc`
164-
* :func:`read_feather`
165+
* :func:`read_parquet`
166+
* :func:`read_table` (experimental)
165167

166168
.. ipython:: python
167169

doc/source/user_guide/style.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,7 @@
351351
"\n",
352352
"- Using [.set_table_styles()][table] to control broader areas of the table with specified internal CSS. Although table styles allow the flexibility to add CSS selectors and properties controlling all individual parts of the table, they are unwieldy for individual cell specifications. Also, note that table styles cannot be exported to Excel. \n",
353353
"- Using [.set_td_classes()][td_class] to directly link either external CSS classes to your data cells or link the internal CSS classes created by [.set_table_styles()][table]. See [here](#Setting-Classes-and-Linking-to-External-CSS). These cannot be used on column header rows or indexes, and also won't export to Excel. \n",
354-
"- Using the [.apply()][apply] and [.map()][map] functions to add direct internal CSS to specific data cells. See [here](#Styler-Functions). As of v1.4.0 there are also methods that work directly on column header rows or indexes; [.apply_index()][applyindex] and [.map_index()][mapindex]. Note that only these methods add styles that will export to Excel. These methods work in a similar way to [DataFrame.apply()][dfapply] and [DataFrame.map()][dfmap].\n",
354+
"- Using the [.apply()][apply] and [.map()][map] functions to add direct internal CSS to specific data cells. See [here](#Styler-Functions). As of v1.4.0 there are also methods that work directly on column header rows or indexes: [.apply_index()][applyindex] and [.map_index()][mapindex]. Note that only these methods add styles that will export to Excel. These methods work in a similar way to [DataFrame.apply()][dfapply] and [DataFrame.map()][dfmap].\n",
355355
"\n",
356356
"[table]: ../reference/api/pandas.io.formats.style.Styler.set_table_styles.rst\n",
357357
"[styler]: ../reference/api/pandas.io.formats.style.Styler.rst\n",

doc/source/whatsnew/v3.0.0.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ Other enhancements
3535
- :meth:`DataFrame.agg` called with ``axis=1`` and a ``func`` which relabels the result index now raises a ``NotImplementedError`` (:issue:`58807`).
3636
- :meth:`Index.get_loc` now accepts also subclasses of ``tuple`` as keys (:issue:`57922`)
3737
- :meth:`Styler.set_tooltips` provides alternative method to storing tooltips by using title attribute of td elements. (:issue:`56981`)
38+
- Added missing parameter ``weights`` in :meth:`DataFrame.plot.kde` for the estimation of the PDF (:issue:`59337`)
3839
- Allow dictionaries to be passed to :meth:`pandas.Series.str.replace` via ``pat`` parameter (:issue:`51748`)
3940
- Support passing a :class:`Series` input to :func:`json_normalize` that retains the :class:`Series` :class:`Index` (:issue:`51452`)
4041
- Support reading value labels from Stata 108-format (Stata 6) and earlier files (:issue:`58154`)
@@ -583,6 +584,8 @@ I/O
583584
- Bug in :meth:`read_excel` raising ``ValueError`` when passing array of boolean values when ``dtype="boolean"``. (:issue:`58159`)
584585
- Bug in :meth:`read_json` not validating the ``typ`` argument to not be exactly ``"frame"`` or ``"series"`` (:issue:`59124`)
585586
- Bug in :meth:`read_stata` raising ``KeyError`` when input file is stored in big-endian format and contains strL data. (:issue:`58638`)
587+
- Bug in :meth:`read_stata` where extreme value integers were incorrectly interpreted as missing for format versions 111 and prior (:issue:`58130`)
588+
- Bug in :meth:`read_stata` where the missing code for double was not recognised for format versions 105 and prior (:issue:`58149`)
586589

587590
Period
588591
^^^^^^

pandas/_config/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,6 @@
3030
from pandas._config.display import detect_console_encoding
3131

3232

33-
def using_pyarrow_string_dtype() -> bool:
33+
def using_string_dtype() -> bool:
3434
_mode_options = _global_config["future"]
3535
return _mode_options["infer_string"]

pandas/_libs/lib.pyx

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ from cython cimport (
3737
floating,
3838
)
3939

40-
from pandas._config import using_pyarrow_string_dtype
40+
from pandas._config import using_string_dtype
4141

4242
from pandas._libs.missing import check_na_tuples_nonequal
4343

@@ -2699,10 +2699,10 @@ def maybe_convert_objects(ndarray[object] objects,
26992699
seen.object_ = True
27002700

27012701
elif seen.str_:
2702-
if using_pyarrow_string_dtype() and is_string_array(objects, skipna=True):
2702+
if using_string_dtype() and is_string_array(objects, skipna=True):
27032703
from pandas.core.arrays.string_ import StringDtype
27042704

2705-
dtype = StringDtype(storage="pyarrow_numpy")
2705+
dtype = StringDtype(storage="pyarrow", na_value=np.nan)
27062706
return dtype.construct_array_type()._from_sequence(objects, dtype=dtype)
27072707

27082708
elif convert_to_nullable_dtype and is_string_array(objects, skipna=True):

pandas/_libs/tslibs/nattype.pyx

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,13 @@ class NaTType(_NaT):
441441
442442
Monday == 1 ... Sunday == 7.
443443
444+
See Also
445+
--------
446+
Timestamp.weekday : Return the day of the week with Monday=0, Sunday=6.
447+
Timestamp.isocalendar : Return a tuple containing ISO year, week number
448+
and weekday.
449+
datetime.date.isoweekday : Equivalent method in datetime module.
450+
444451
Examples
445452
--------
446453
>>> ts = pd.Timestamp('2023-01-01 10:00:00')

0 commit comments

Comments
 (0)