Merge branch 'main' into fix/group_by_agg_pyarrow_bool_numpy_same_type

Kei · Kei · commit 3a3f2a283b7d · 2024-04-21T16:43:09.000+08:00
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -108,7 +108,7 @@ repos:
         types: [python]
         stages: [manual]
         additional_dependencies: &pyright_dependencies
-        - pyright@1.1.351
+        - pyright@1.1.352
     -   id: pyright
         # note: assumes python env is setup and activated
         name: pyright reportGeneralTypeIssues
diff --git a/ci/code_checks.sh b/ci/code_checks.sh
@@ -99,11 +99,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DataFrame.reorder_levels SA01" \
         -i "pandas.DataFrame.sem PR01,RT03,SA01" \
         -i "pandas.DataFrame.skew RT03,SA01" \
-        -i "pandas.DataFrame.sparse PR01,SA01" \
-        -i "pandas.DataFrame.sparse.density SA01" \
-        -i "pandas.DataFrame.sparse.from_spmatrix SA01" \
-        -i "pandas.DataFrame.sparse.to_coo SA01" \
-        -i "pandas.DataFrame.sparse.to_dense SA01" \
+        -i "pandas.DataFrame.sparse PR01" \
         -i "pandas.DataFrame.std PR01,RT03,SA01" \
         -i "pandas.DataFrame.sum RT03" \
         -i "pandas.DataFrame.swaplevel SA01" \
@@ -138,8 +134,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DatetimeTZDtype.tz SA01" \
         -i "pandas.DatetimeTZDtype.unit SA01" \
         -i "pandas.Grouper PR02" \
-        -i "pandas.HDFStore.append PR01,SA01" \
-        -i "pandas.HDFStore.get SA01" \
         -i "pandas.HDFStore.groups SA01" \
         -i "pandas.HDFStore.info RT03,SA01" \
         -i "pandas.HDFStore.keys SA01" \
@@ -178,13 +172,10 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Index.name SA01" \
         -i "pandas.Index.names GL08" \
         -i "pandas.Index.nbytes SA01" \
-        -i "pandas.Index.ndim SA01" \
         -i "pandas.Index.nunique RT03" \
         -i "pandas.Index.putmask PR01,RT03" \
         -i "pandas.Index.ravel PR01,RT03" \
         -i "pandas.Index.reindex PR07" \
-        -i "pandas.Index.shape SA01" \
-        -i "pandas.Index.size SA01" \
         -i "pandas.Index.slice_indexer PR07,RT03,SA01" \
         -i "pandas.Index.slice_locs RT03" \
         -i "pandas.Index.str PR01,SA01" \
@@ -361,7 +352,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Series.mode SA01" \
         -i "pandas.Series.mul PR07" \
         -i "pandas.Series.nbytes SA01" \
-        -i "pandas.Series.ndim SA01" \
         -i "pandas.Series.ne PR07,SA01" \
         -i "pandas.Series.nunique RT03" \
         -i "pandas.Series.pad PR01,SA01" \
@@ -381,7 +371,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Series.rtruediv PR07" \
         -i "pandas.Series.sem PR01,RT03,SA01" \
         -i "pandas.Series.shape SA01" \
-        -i "pandas.Series.size SA01" \
         -i "pandas.Series.skew RT03,SA01" \
         -i "pandas.Series.sparse PR01,SA01" \
         -i "pandas.Series.sparse.density SA01" \
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -142,6 +142,8 @@ Optional libraries below the lowest tested version may still work, but are not c
 +------------------------+---------------------+
 | adbc-driver-postgresql | 0.10.0              |
 +------------------------+---------------------+
+| mypy (dev)             | 1.9.0               |
++------------------------+---------------------+
 
 See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more.
 
diff --git a/environment.yml b/environment.yml
@@ -77,7 +77,7 @@ dependencies:
 
   # code checks
   - flake8=6.1.0  # run in subprocess over docstring examples
-  - mypy=1.8.0  # pre-commit uses locally installed mypy
+  - mypy=1.9.0  # pre-commit uses locally installed mypy
   - tokenize-rt  # scripts/check_for_inconsistent_pandas_namespace.py
   - pre-commit>=3.6.0
 
diff --git a/pandas/_typing.py b/pandas/_typing.py
@@ -239,6 +239,7 @@ def __reversed__(self) -> Iterator[_T_co]: ...
 # see https://mypy.readthedocs.io/en/stable/generics.html#declaring-decorators
 FuncType = Callable[..., Any]
 F = TypeVar("F", bound=FuncType)
+TypeT = TypeVar("TypeT", bound=type)
 
 # types of vectorized key functions for DataFrame::sort_values and
 # DataFrame::sort_index, among others
diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py
@@ -8,6 +8,7 @@
 from __future__ import annotations
 
 from typing import (
+    TYPE_CHECKING,
     Callable,
     final,
 )
@@ -16,6 +17,12 @@
 from pandas.util._decorators import doc
 from pandas.util._exceptions import find_stack_level
 
+if TYPE_CHECKING:
+    from pandas._typing import TypeT
+
+    from pandas import Index
+    from pandas.core.generic import NDFrame
+
 
 class DirNamesMixin:
     _accessors: set[str] = set()
@@ -232,7 +239,9 @@ def __get__(self, obj, cls):
 
 
 @doc(klass="", examples="", others="")
-def _register_accessor(name: str, cls):
+def _register_accessor(
+    name: str, cls: type[NDFrame | Index]
+) -> Callable[[TypeT], TypeT]:
     """
     Register a custom accessor on {klass} objects.
 
@@ -277,7 +286,7 @@ def _register_accessor(name: str, cls):
     {examples}
     """
 
-    def decorator(accessor):
+    def decorator(accessor: TypeT) -> TypeT:
         if hasattr(cls, name):
             warnings.warn(
                 f"registration of accessor {accessor!r} under name "
@@ -320,7 +329,7 @@ def decorator(accessor):
 
 
 @doc(_register_accessor, klass="DataFrame", examples=_register_df_examples)
-def register_dataframe_accessor(name: str):
+def register_dataframe_accessor(name: str) -> Callable[[TypeT], TypeT]:
     from pandas import DataFrame
 
     return _register_accessor(name, DataFrame)
@@ -351,7 +360,7 @@ def register_dataframe_accessor(name: str):
 
 
 @doc(_register_accessor, klass="Series", examples=_register_series_examples)
-def register_series_accessor(name: str):
+def register_series_accessor(name: str) -> Callable[[TypeT], TypeT]:
     from pandas import Series
 
     return _register_accessor(name, Series)
@@ -385,7 +394,7 @@ def register_series_accessor(name: str):
 
 
 @doc(_register_accessor, klass="Index", examples=_register_index_examples)
-def register_index_accessor(name: str):
+def register_index_accessor(name: str) -> Callable[[TypeT], TypeT]:
     from pandas import Index
 
     return _register_accessor(name, Index)
diff --git a/pandas/core/arrays/sparse/accessor.py b/pandas/core/arrays/sparse/accessor.py
@@ -243,6 +243,10 @@ class SparseFrameAccessor(BaseAccessor, PandasDelegate):
     """
     DataFrame accessor for sparse data.
 
+    See Also
+    --------
+    DataFrame.sparse.density : Ratio of non-sparse points to total (dense) data points.
+
     Examples
     --------
     >>> df = pd.DataFrame({"a": [1, 2, 0, 0], "b": [3, 0, 0, 4]}, dtype="Sparse[int]")
@@ -274,6 +278,11 @@ def from_spmatrix(cls, data, index=None, columns=None) -> DataFrame:
             Each column of the DataFrame is stored as a
             :class:`arrays.SparseArray`.
 
+        See Also
+        --------
+        DataFrame.sparse.to_coo : Return the contents of the frame as a
+            sparse SciPy COO matrix.
+
         Examples
         --------
         >>> import scipy.sparse
@@ -319,6 +328,11 @@ def to_dense(self) -> DataFrame:
         DataFrame
             A DataFrame with the same values stored as dense arrays.
 
+        See Also
+        --------
+        DataFrame.sparse.density : Ratio of non-sparse points to total
+            (dense) data points.
+
         Examples
         --------
         >>> df = pd.DataFrame({"A": pd.arrays.SparseArray([0, 1, 0])})
@@ -343,6 +357,10 @@ def to_coo(self) -> spmatrix:
             If the caller is heterogeneous and contains booleans or objects,
             the result will be of dtype=object. See Notes.
 
+        See Also
+        --------
+        DataFrame.sparse.to_dense : Convert a DataFrame with sparse values to dense.
+
         Notes
         -----
         The dtype will be the lowest-common-denominator type (implicit
@@ -388,6 +406,11 @@ def density(self) -> float:
         """
         Ratio of non-sparse points to total (dense) data points.
 
+        See Also
+        --------
+        DataFrame.sparse.from_spmatrix : Create a new DataFrame from a
+            scipy sparse matrix.
+
         Examples
         --------
         >>> df = pd.DataFrame({"A": pd.arrays.SparseArray([0, 1, 0, 1])})
diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -355,6 +355,13 @@ def ndim(self) -> Literal[1]:
         """
         Number of dimensions of the underlying data, by definition 1.
 
+        See Also
+        --------
+        Series.size: Return the number of elements in the underlying data.
+        Series.shape: Return a tuple of the shape of the underlying data.
+        Series.dtype: Return the dtype object of the underlying data.
+        Series.values: Return Series as ndarray or ndarray-like depending on the dtype.
+
         Examples
         --------
         >>> s = pd.Series(["Ant", "Bear", "Cow"])
@@ -440,6 +447,13 @@ def size(self) -> int:
         """
         Return the number of elements in the underlying data.
 
+        See Also
+        --------
+        Series.ndim: Number of dimensions of the underlying data, by definition 1.
+        Series.shape: Return a tuple of the shape of the underlying data.
+        Series.dtype: Return the dtype object of the underlying data.
+        Series.values: Return Series as ndarray or ndarray-like depending on the dtype.
+
         Examples
         --------
         For Series:
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -7104,6 +7104,13 @@ def shape(self) -> Shape:
         """
         Return a tuple of the shape of the underlying data.
 
+        See Also
+        --------
+        Index.size: Return the number of elements in the underlying data.
+        Index.ndim: Number of dimensions of the underlying data, by definition 1.
+        Index.dtype: Return the dtype object of the underlying data.
+        Index.values: Return an array representing the data in the Index.
+
         Examples
         --------
         >>> idx = pd.Index([1, 2, 3])
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
@@ -786,6 +786,11 @@ def get(self, key: str):
         object
             Same type as object stored in file.
 
+        See Also
+        --------
+        HDFStore.get_node : Returns the node with the key.
+        HDFStore.get_storer : Returns the storer object for a key.
+
         Examples
         --------
         >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"])
@@ -1261,15 +1266,19 @@ def append(
                 Table format. Write as a PyTables Table structure which may perform
                 worse but allow more flexible operations like searching / selecting
                 subsets of the data.
+        axes : default None
+            This parameter is currently not accepted.
         index : bool, default True
             Write DataFrame index as a column.
         append : bool, default True
             Append the input data to the existing.
-        data_columns : list of columns, or True, default None
-            List of columns to create as indexed data columns for on-disk
-            queries, or True to use all columns. By default only the axes
-            of the object are indexed. See `here
-            <https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#query-via-data-columns>`__.
+        complib : default None
+            This parameter is currently not accepted.
+        complevel : int, 0-9, default None
+            Specifies a compression level for data.
+            A value of 0 or None disables compression.
+        columns : default None
+            This parameter is currently not accepted, try data_columns.
         min_itemsize : int, dict, or None
             Dict of columns that specify minimum str sizes.
         nan_rep : str
@@ -1278,11 +1287,26 @@ def append(
             Size to chunk the writing.
         expectedrows : int
             Expected TOTAL row size of this table.
-        encoding : default None
-            Provide an encoding for str.
         dropna : bool, default False, optional
             Do not write an ALL nan row to the store settable
             by the option 'io.hdf.dropna_table'.
+        data_columns : list of columns, or True, default None
+            List of columns to create as indexed data columns for on-disk
+            queries, or True to use all columns. By default only the axes
+            of the object are indexed. See `here
+            <https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#query-via-data-columns>`__.
+        encoding : default None
+            Provide an encoding for str.
+        errors : str, default 'strict'
+            The error handling scheme to use for encoding errors.
+            The default is 'strict' meaning that encoding errors raise a
+            UnicodeEncodeError.  Other possible values are 'ignore', 'replace' and
+            'xmlcharrefreplace' as well as any other name registered with
+            codecs.register_error that can handle UnicodeEncodeErrors.
+
+        See Also
+        --------
+        HDFStore.append_to_multiple : Append to multiple tables.
 
         Notes
         -----
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -54,7 +54,7 @@ moto
 flask
 asv>=0.6.1
 flake8==6.1.0
-mypy==1.8.0
+mypy==1.9.0
 tokenize-rt
 pre-commit>=3.6.0
 gitpython