diff --git a/pandas-stubs/core/indexes/datetimes.pyi b/pandas-stubs/core/indexes/datetimes.pyi index 3a506349..1d08f0a8 100644 --- a/pandas-stubs/core/indexes/datetimes.pyi +++ b/pandas-stubs/core/indexes/datetimes.pyi @@ -23,6 +23,7 @@ from pandas import ( from pandas.core.indexes.accessors import DatetimeIndexProperties from pandas.core.indexes.datetimelike import DatetimeTimedeltaMixin from pandas.core.series import ( + Series, TimedeltaSeries, TimestampSeries, ) @@ -56,6 +57,19 @@ class DatetimeIndex(DatetimeTimedeltaMixin[Timestamp], DatetimeIndexProperties): copy: bool = ..., name: Hashable = ..., ) -> None: ... + @classmethod + def __new__( + cls, + data: AxesData | Series, + freq: Frequency = ..., + tz: TimeZones = ..., + ambiguous: str = ..., + dayfirst: bool = ..., + yearfirst: bool = ..., + dtype: Dtype = ..., + copy: bool = ..., + name: Hashable = ..., + ) -> Self: ... def __reduce__(self): ... # various ignores needed for mypy, as we do want to restrict what can be used in # arithmetic for these types diff --git a/tests/test_frame.py b/tests/test_frame.py index 90d5626e..6219ebd0 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -109,17 +109,44 @@ def getSeriesData() -> dict[str, pd.Series]: def test_types_init() -> None: - pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) - pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}, index=[2, 1]) - pd.DataFrame(data=[[1, 2, 3], [4, 5, 6]]) - pd.DataFrame(data=itertools.repeat([1, 2, 3], 3)) - pd.DataFrame(data=(range(i) for i in range(5))) - pd.DataFrame(data=[1, 2, 3, 4], dtype=np.int8) - pd.DataFrame( - np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), - columns=["a", "b", "c"], - dtype=np.int8, - copy=True, + check( + assert_type(pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}), pd.DataFrame), + pd.DataFrame, + ) + check( + assert_type( + pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}, index=[2, 1]), + pd.DataFrame, + ), + pd.DataFrame, + ) + check( + assert_type(pd.DataFrame(data=[[1, 2, 3], [4, 5, 6]]), pd.DataFrame), + pd.DataFrame, + ) + check( + assert_type(pd.DataFrame(data=itertools.repeat([1, 2, 3], 3)), pd.DataFrame), + pd.DataFrame, + ) + check( + assert_type(pd.DataFrame(data=(range(i) for i in range(5))), pd.DataFrame), + pd.DataFrame, + ) + check( + assert_type(pd.DataFrame(data=[1, 2, 3, 4], dtype=np.int8), pd.DataFrame), + pd.DataFrame, + ) + check( + assert_type( + pd.DataFrame( + np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), + columns=["a", "b", "c"], + dtype=np.int8, + copy=True, + ), + pd.DataFrame, + ), + pd.DataFrame, ) check( assert_type(pd.DataFrame(0, index=[0, 1], columns=[0, 1]), pd.DataFrame), @@ -213,14 +240,14 @@ def test_types_getitem() -> None: s = pd.Series(["col1", "col2"]) select_df = pd.DataFrame({"col1": [True, True], "col2": [False, True]}) a = np.array(["col1", "col2"]) - df["col1"] - df[5] - df[["col1", "col2"]] - df[1:] - df[s] - df[a] - df[select_df] - df[i] + check(assert_type(df["col1"], pd.Series), pd.Series) + check(assert_type(df[5], pd.Series), pd.Series) + check(assert_type(df[["col1", "col2"]], pd.DataFrame), pd.DataFrame) + check(assert_type(df[1:], pd.DataFrame), pd.DataFrame) + check(assert_type(df[s], pd.DataFrame), pd.DataFrame) + check(assert_type(df[a], pd.DataFrame), pd.DataFrame) + check(assert_type(df[select_df], pd.DataFrame), pd.DataFrame) + check(assert_type(df[i], pd.DataFrame), pd.DataFrame) def test_types_getitem_with_hashable() -> None: @@ -269,16 +296,22 @@ def test_types_setitem_mask() -> None: def test_types_iloc_iat() -> None: df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) - df.iloc[1, 1] - df.iloc[[1], [1]] + check(assert_type(df.iloc[1, 1], Scalar), np.integer) + check(assert_type(df.iloc[[1], [1]], pd.DataFrame), pd.DataFrame) + + # TODO the below should pass + # check(assert_type(df.iat[0, 0], Scalar), np.integer) df.iat[0, 0] def test_types_loc_at() -> None: df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) - df.loc[[0], "col1"] + check(assert_type(df.loc[[0], "col1"], pd.Series), pd.Series) + check(assert_type(df.loc[0, "col1"], Scalar), np.integer) + + # TODO the below should pass + # check(assert_type(df.at[0, "col1"], Scalar), np.integer) df.at[0, "col1"] - df.loc[0, "col1"] def test_types_boolean_indexing() -> None: @@ -308,8 +341,8 @@ def test_types_df_to_df_comparison() -> None: def test_types_head_tail() -> None: df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) - df.head(1) - df.tail(1) + check(assert_type(df.head(1), pd.DataFrame), pd.DataFrame) + check(assert_type(df.tail(1), pd.DataFrame), pd.DataFrame) def test_types_assign() -> None: @@ -389,17 +422,20 @@ def test_types_sample() -> None: def test_types_nlargest_nsmallest() -> None: df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) - df.nlargest(1, "col1") - df.nsmallest(1, "col2") + check(assert_type(df.nlargest(1, "col1"), pd.DataFrame), pd.DataFrame) + check(assert_type(df.nsmallest(1, "col2"), pd.DataFrame), pd.DataFrame) def test_types_filter() -> None: df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) - df.filter(items=["col1"]) - df.filter(regex="co.*") - df.filter(like="1") - # [PR 964] Docs state `items` is `list-like` - df.filter(items=("col2", "col2", 1, tuple([4]))) + check(assert_type(df.filter(items=["col1"]), pd.DataFrame), pd.DataFrame) + check(assert_type(df.filter(regex="co.*"), pd.DataFrame), pd.DataFrame) + check(assert_type(df.filter(like="1"), pd.DataFrame), pd.DataFrame) + # GH964 Docs state `items` is `list-like` + check( + assert_type(df.filter(items=("col2", "col2", 1, tuple([4]))), pd.DataFrame), + pd.DataFrame, + ) def test_types_setting() -> None: @@ -447,18 +483,18 @@ def test_types_dropna() -> None: check(assert_type(df.dropna(), pd.DataFrame), pd.DataFrame) check(assert_type(df.dropna(ignore_index=True), pd.DataFrame), pd.DataFrame) check(assert_type(df.dropna(axis=1, thresh=1), pd.DataFrame), pd.DataFrame) - assert ( - assert_type(df.dropna(axis=0, how="all", subset=["col1"], inplace=True), None) - is None + check( + assert_type(df.dropna(axis=0, how="all", subset=["col1"], inplace=True), None), + type(None), ) - assert ( + check( assert_type( df.dropna( axis=0, how="all", subset=["col1"], inplace=True, ignore_index=False ), None, - ) - is None + ), + type(None), ) @@ -476,7 +512,7 @@ def test_types_drop_duplicates() -> None: check(assert_type(df.drop_duplicates(["AAA"]), pd.DataFrame), pd.DataFrame) check(assert_type(df.drop_duplicates(("AAA",)), pd.DataFrame), pd.DataFrame) check(assert_type(df.drop_duplicates("AAA"), pd.DataFrame), pd.DataFrame) - assert assert_type(df.drop_duplicates("AAA", inplace=True), None) is None + check(assert_type(df.drop_duplicates("AAA", inplace=True), None), type(None)) check( assert_type( df.drop_duplicates("AAA", inplace=False, ignore_index=True), pd.DataFrame @@ -619,10 +655,15 @@ def test_types_shift() -> None: def test_types_rank() -> None: df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) - df.rank(axis=0, na_option="bottom") - df.rank(method="min", pct=True) - df.rank(method="dense", ascending=True) - df.rank(method="first", numeric_only=True) + check(assert_type(df.rank(axis=0, na_option="bottom"), pd.DataFrame), pd.DataFrame) + check(assert_type(df.rank(method="min", pct=True), pd.DataFrame), pd.DataFrame) + check( + assert_type(df.rank(method="dense", ascending=True), pd.DataFrame), pd.DataFrame + ) + check( + assert_type(df.rank(method="first", numeric_only=True), pd.DataFrame), + pd.DataFrame, + ) def test_types_mean() -> None: @@ -728,35 +769,35 @@ def test_frame_iterator() -> None: def test_types_sum() -> None: df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) - df.sum() - df.sum(axis=1) + check(assert_type(df.sum(), pd.Series), pd.Series) + check(assert_type(df.sum(axis=1), pd.Series), pd.Series) def test_types_cumsum() -> None: df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) - df.cumsum() - df.sum(axis=0) + check(assert_type(df.cumsum(), pd.DataFrame), pd.DataFrame) + check(assert_type(df.sum(axis=0), pd.Series), pd.Series) def test_types_min() -> None: df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) - df.min() - df.min(axis=0) + check(assert_type(df.min(), pd.Series), pd.Series) + check(assert_type(df.min(axis=0), pd.Series), pd.Series) def test_types_max() -> None: df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) - df.max() - df.max(axis=0) + check(assert_type(df.max(), pd.Series), pd.Series) + check(assert_type(df.max(axis=0), pd.Series), pd.Series) def test_types_quantile() -> None: df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) - df.quantile([0.25, 0.5]) - df.quantile(0.75) - df.quantile() + check(assert_type(df.quantile([0.25, 0.5]), pd.DataFrame), pd.DataFrame) + check(assert_type(df.quantile(0.75), pd.Series), pd.Series) + check(assert_type(df.quantile(), pd.Series), pd.Series) # GH 81 - df.quantile(np.array([0.25, 0.75])) + check(assert_type(df.quantile(np.array([0.25, 0.75])), pd.DataFrame), pd.DataFrame) def test_dataframe_clip() -> None: @@ -975,33 +1016,33 @@ def test_dataframe_clip() -> None: def test_types_abs() -> None: df = pd.DataFrame(data={"col1": [-5, 1], "col2": [3, -14]}) - df.abs() + check(assert_type(df.abs(), pd.DataFrame), pd.DataFrame) def test_types_var() -> None: df = pd.DataFrame(data={"col1": [2, 1], "col2": [1, 4]}) - df.var() - df.var(axis=1, ddof=1) - df.var(skipna=True, numeric_only=False) + check(assert_type(df.var(), pd.Series), pd.Series) + check(assert_type(df.var(axis=1, ddof=1), pd.Series), pd.Series) + check(assert_type(df.var(skipna=True, numeric_only=False), pd.Series), pd.Series) def test_types_std() -> None: df = pd.DataFrame(data={"col1": [2, 1], "col2": [1, 4]}) - df.std() - df.std(axis=1, ddof=1) - df.std(skipna=True, numeric_only=False) + check(assert_type(df.std(), pd.Series), pd.Series) + check(assert_type(df.std(axis=1, ddof=1), pd.Series), pd.Series) + check(assert_type(df.std(skipna=True, numeric_only=False), pd.Series), pd.Series) def test_types_idxmin() -> None: df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) - df.idxmin() - df.idxmin(axis=0) + check(assert_type(df.idxmin(), pd.Series), pd.Series) + check(assert_type(df.idxmin(axis=0), pd.Series), pd.Series) def test_types_idxmax() -> None: df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) - df.idxmax() - df.idxmax(axis=0) + check(assert_type(df.idxmax(), pd.Series), pd.Series) + check(assert_type(df.idxmax(axis=0), pd.Series), pd.Series) # This was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html @@ -1018,7 +1059,7 @@ def test_types_value_counts() -> None: def test_types_unique() -> None: # This is really more for of a Series test df = pd.DataFrame(data={"col1": [1, 2], "col2": [1, 4]}) - df["col1"].unique() + check(assert_type(df["col1"].unique(), np.ndarray), np.ndarray) def test_types_apply() -> None: @@ -1269,12 +1310,12 @@ def gethead(s: pd.Series, y: int) -> pd.Series: def test_types_map() -> None: # GH774 df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) - df.map(lambda x: x**2) - df.map(np.exp) - df.map(str) + check(assert_type(df.map(lambda x: x**2), pd.DataFrame), pd.DataFrame) + check(assert_type(df.map(np.exp), pd.DataFrame), pd.DataFrame) + check(assert_type(df.map(str), pd.DataFrame), pd.DataFrame) # na_action parameter was added in 1.2.0 https://pandas.pydata.org/docs/whatsnew/v1.2.0.html - df.map(np.exp, na_action="ignore") - df.map(str, na_action=None) + check(assert_type(df.map(np.exp, na_action="ignore"), pd.DataFrame), pd.DataFrame) + check(assert_type(df.map(str, na_action=None), pd.DataFrame), pd.DataFrame) def test_types_element_wise_arithmetic() -> None: @@ -1353,23 +1394,41 @@ def test_types_scalar_arithmetic() -> None: def test_types_melt() -> None: df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) - df.melt() - df.melt(id_vars=["col1"], value_vars=["col2"]) - df.melt( - id_vars=["col1"], - value_vars=["col2"], - var_name="someVariable", - value_name="someValue", + check(assert_type(df.melt(), pd.DataFrame), pd.DataFrame) + check( + assert_type(df.melt(id_vars=["col1"], value_vars=["col2"]), pd.DataFrame), + pd.DataFrame, + ) + check( + assert_type( + df.melt( + id_vars=["col1"], + value_vars=["col2"], + var_name="someVariable", + value_name="someValue", + ), + pd.DataFrame, + ), + pd.DataFrame, ) - pd.melt(df) - pd.melt(df, id_vars=["col1"], value_vars=["col2"]) - pd.melt( - df, - id_vars=["col1"], - value_vars=["col2"], - var_name="someVariable", - value_name="someValue", + check(assert_type(pd.melt(df), pd.DataFrame), pd.DataFrame) + check( + assert_type(pd.melt(df, id_vars=["col1"], value_vars=["col2"]), pd.DataFrame), + pd.DataFrame, + ) + check( + assert_type( + pd.melt( + df, + id_vars=["col1"], + value_vars=["col2"], + var_name="someVariable", + value_name="someValue", + ), + pd.DataFrame, + ), + pd.DataFrame, ) @@ -1694,9 +1753,15 @@ def test_types_group_by_with_dropna_keyword() -> None: df = pd.DataFrame( data={"col1": [1, 1, 2, 1], "col2": [2, None, 1, 2], "col3": [3, 4, 3, 2]} ) - df.groupby(by="col2", dropna=True).sum() - df.groupby(by="col2", dropna=False).sum() - df.groupby(by="col2").sum() + check( + assert_type(df.groupby(by="col2", dropna=True).sum(), pd.DataFrame), + pd.DataFrame, + ) + check( + assert_type(df.groupby(by="col2", dropna=False).sum(), pd.DataFrame), + pd.DataFrame, + ) + check(assert_type(df.groupby(by="col2").sum(), pd.DataFrame), pd.DataFrame) def test_types_groupby_any() -> None: @@ -1753,17 +1818,41 @@ def test_types_groupby_level() -> None: def test_types_merge() -> None: df = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [3, 4, 5]}) df2 = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [0, 1, 0]}) - df.merge(df2) - df.merge(df2, on="col1") - df.merge(df2, on="col1", how="left") - df.merge(df2, on=["col1", "col2"], how="left") - df.merge(df2, on=("col1", "col2"), how="left") - df.merge(df2, on=("col1", "col2"), how="left", suffixes=(None, "s")) - df.merge(df2, on=("col1", "col2"), how="left", suffixes=("t", "s")) - df.merge(df2, on=("col1", "col2"), how="left", suffixes=("a", None)) - df.merge(df2, how="cross") # GH 289 + check(assert_type(df.merge(df2), pd.DataFrame), pd.DataFrame) + check(assert_type(df.merge(df2, on="col1"), pd.DataFrame), pd.DataFrame) + check(assert_type(df.merge(df2, on="col1", how="left"), pd.DataFrame), pd.DataFrame) + check( + assert_type(df.merge(df2, on=["col1", "col2"], how="left"), pd.DataFrame), + pd.DataFrame, + ) + check( + assert_type(df.merge(df2, on=("col1", "col2"), how="left"), pd.DataFrame), + pd.DataFrame, + ) + check( + assert_type( + df.merge(df2, on=("col1", "col2"), how="left", suffixes=(None, "s")), + pd.DataFrame, + ), + pd.DataFrame, + ) + check( + assert_type( + df.merge(df2, on=("col1", "col2"), how="left", suffixes=("t", "s")), + pd.DataFrame, + ), + pd.DataFrame, + ) + check( + assert_type( + df.merge(df2, on=("col1", "col2"), how="left", suffixes=("a", None)), + pd.DataFrame, + ), + pd.DataFrame, + ) + check(assert_type(df.merge(df2, how="cross"), pd.DataFrame), pd.DataFrame) # GH 289 columns = ["col1", "col2"] - df.merge(df2, on=columns) + check(assert_type(df.merge(df2, on=columns), pd.DataFrame), pd.DataFrame) def test_types_plot() -> None: @@ -1825,10 +1914,10 @@ def test_types_window() -> None: def test_types_cov() -> None: df = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [3, 4, 5]}) - df.cov() - df.cov(min_periods=1) + check(assert_type(df.cov(), pd.DataFrame), pd.DataFrame) + check(assert_type(df.cov(min_periods=1), pd.DataFrame), pd.DataFrame) # ddof param was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html - df.cov(ddof=2) + check(assert_type(df.cov(ddof=2), pd.DataFrame), pd.DataFrame) def test_types_to_numpy() -> None: @@ -1887,8 +1976,14 @@ def test_types_compare() -> None: df2 = pd.DataFrame( data={"col1": [1, 2, 5, 6], "col2": [3, 4, 1, 1], "col3": [3, 4, 3, 2]} ) - df1.compare(df2) - df2.compare(df1, align_axis=0, keep_shape=True, keep_equal=True) + check(assert_type(df1.compare(df2), pd.DataFrame), pd.DataFrame) + check( + assert_type( + df2.compare(df1, align_axis=0, keep_shape=True, keep_equal=True), + pd.DataFrame, + ), + pd.DataFrame, + ) def test_types_agg() -> None: @@ -1972,9 +2067,12 @@ def test_types_describe() -> None: ], } ) - df.describe() - df.describe(percentiles=[0.5], include="all") - df.describe(exclude=[np.number]) + check(assert_type(df.describe(), pd.DataFrame), pd.DataFrame) + check( + assert_type(df.describe(percentiles=[0.5], include="all"), pd.DataFrame), + pd.DataFrame, + ) + check(assert_type(df.describe(exclude=[np.number]), pd.DataFrame), pd.DataFrame) def test_types_to_string() -> None: @@ -1988,21 +2086,27 @@ def test_types_to_string() -> None: ], } ) - df.to_string( - index=True, - col_space=2, - header=["a", "b"], - na_rep="0", - justify="left", - max_rows=2, - min_rows=0, - max_cols=2, - show_dimensions=True, - line_width=3, + check( + assert_type( + df.to_string( + index=True, + col_space=2, + header=["a", "b"], + na_rep="0", + justify="left", + max_rows=2, + min_rows=0, + max_cols=2, + show_dimensions=True, + line_width=3, + ), + str, + ), + str, ) # col_space accepting list or dict added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html - df.to_string(col_space=[1, 2]) - df.to_string(col_space={"col1": 1, "col2": 3}) + check(assert_type(df.to_string(col_space=[1, 2]), str), str) + check(assert_type(df.to_string(col_space={"col1": 1, "col2": 3}), str), str) def test_dataframe_to_string_float_fmt() -> None: @@ -2037,19 +2141,25 @@ def test_types_to_html() -> None: ], } ) - df.to_html( - index=True, - col_space=2, - header=True, - na_rep="0", - justify="left", - max_rows=2, - max_cols=2, - show_dimensions=True, + check( + assert_type( + df.to_html( + index=True, + col_space=2, + header=True, + na_rep="0", + justify="left", + max_rows=2, + max_cols=2, + show_dimensions=True, + ), + str, + ), + str, ) # col_space accepting list or dict added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html - df.to_html(col_space=[1, 2]) - df.to_html(col_space={"col1": 1, "col2": 3}) + check(assert_type(df.to_html(col_space=[1, 2]), str), str) + check(assert_type(df.to_html(col_space={"col1": 1, "col2": 3}), str), str) def test_types_resample() -> None: @@ -2071,15 +2181,15 @@ def test_types_resample() -> None: def test_types_to_dict() -> None: data = pd.DataFrame({"a": [1], "b": [2]}) - data.to_dict(orient="records") - data.to_dict(orient="dict") - data.to_dict(orient="list") - data.to_dict(orient="series") - data.to_dict(orient="split") - data.to_dict(orient="index") + check(assert_type(data.to_dict(orient="records"), list[dict[Hashable, Any]]), list) + check(assert_type(data.to_dict(orient="dict"), dict[Hashable, Any]), dict) + check(assert_type(data.to_dict(orient="list"), dict[Hashable, Any]), dict) + check(assert_type(data.to_dict(orient="series"), dict[Hashable, Any]), dict) + check(assert_type(data.to_dict(orient="split"), dict[Hashable, Any]), dict) + check(assert_type(data.to_dict(orient="index"), dict[Hashable, Any]), dict) # orient param accepting "tight" added in 1.4.0 https://pandas.pydata.org/docs/whatsnew/v1.4.0.html - data.to_dict(orient="tight") + check(assert_type(data.to_dict(orient="tight"), dict[Hashable, Any]), dict) def test_types_from_dict() -> None: @@ -2353,13 +2463,28 @@ def dataframe_not_first_arg(_: int, df: pd.DataFrame) -> pd.DataFrame: # set_flags() method added in 1.2.0 https://pandas.pydata.org/docs/whatsnew/v1.2.0.html def test_types_set_flags() -> None: - pd.DataFrame([[1, 2], [8, 9]], columns=["A", "B"]).set_flags( - allows_duplicate_labels=False + check( + assert_type( + pd.DataFrame([[1, 2], [8, 9]], columns=["A", "B"]).set_flags( + allows_duplicate_labels=False + ), + pd.DataFrame, + ), + pd.DataFrame, + ) + check( + assert_type( + pd.DataFrame([[1, 2], [8, 9]], columns=["A", "A"]).set_flags( + allows_duplicate_labels=True + ), + pd.DataFrame, + ), + pd.DataFrame, ) - pd.DataFrame([[1, 2], [8, 9]], columns=["A", "A"]).set_flags( - allows_duplicate_labels=True + check( + assert_type(pd.DataFrame([[1, 2], [8, 9]], columns=["A", "A"]), pd.DataFrame), + pd.DataFrame, ) - pd.DataFrame([[1, 2], [8, 9]], columns=["A", "A"]) def test_types_to_parquet() -> None: @@ -2376,13 +2501,21 @@ def test_types_to_parquet() -> None: def test_types_to_latex() -> None: df = pd.DataFrame([[1, 2], [8, 9]], columns=["A", "B"]) - df.to_latex( - columns=["A"], label="some_label", caption="some_caption", multirow=True + check( + assert_type( + df.to_latex( + columns=["A"], label="some_label", caption="some_caption", multirow=True + ), + str, + ), + str, + ) + check( + assert_type(df.to_latex(escape=False, decimal=",", column_format="r"), str), str ) - df.to_latex(escape=False, decimal=",", column_format="r") # position param was added in 1.2.0 https://pandas.pydata.org/docs/whatsnew/v1.2.0.html - df.to_latex(position="some") - df.to_latex(caption=("cap1", "cap2")) + check(assert_type(df.to_latex(position="some"), str), str) + check(assert_type(df.to_latex(caption=("cap1", "cap2")), str), str) def test_types_explode() -> None: @@ -2396,14 +2529,16 @@ def test_types_explode() -> None: def test_types_rename() -> None: df = pd.DataFrame(columns=["a"]) col_map = {"a": "b"} - df.rename(columns=col_map) - df.rename(columns={"a": "b"}) - df.rename(columns={1: "b"}) + check(assert_type(df.rename(columns=col_map), pd.DataFrame), pd.DataFrame) + check(assert_type(df.rename(columns={"a": "b"}), pd.DataFrame), pd.DataFrame) + check(assert_type(df.rename(columns={1: "b"}), pd.DataFrame), pd.DataFrame) # Apparently all of these calls are accepted by pandas - df.rename(columns={None: "b"}) - df.rename(columns={"": "b"}) - df.rename(columns={(2, 1): "b"}) - df.rename(columns=lambda s: s.upper()) + check(assert_type(df.rename(columns={None: "b"}), pd.DataFrame), pd.DataFrame) + check(assert_type(df.rename(columns={"": "b"}), pd.DataFrame), pd.DataFrame) + check(assert_type(df.rename(columns={(2, 1): "b"}), pd.DataFrame), pd.DataFrame) + check( + assert_type(df.rename(columns=lambda s: s.upper()), pd.DataFrame), pd.DataFrame + ) def test_types_rename_axis() -> None: @@ -2567,11 +2702,14 @@ def test_types_regressions() -> None: # https://github.com/microsoft/python-type-stubs/issues/115 df = pd.DataFrame({"A": [1, 2, 3], "B": [5, 6, 7]}) - pd.DatetimeIndex( - data=df["A"], - tz=None, - ambiguous="NaT", - copy=True, + pd.DatetimeIndex(data=df["A"], tz=None, ambiguous="NaT", copy=True), + + check( + assert_type( + pd.DatetimeIndex(data=df["A"], tz=None, ambiguous="NaT", copy=True), + pd.DatetimeIndex, + ), + pd.DatetimeIndex, ) @@ -2743,24 +2881,24 @@ def test_groupby_series_methods() -> None: df = pd.DataFrame({"x": [1, 2, 2, 3, 3], "y": [10, 20, 30, 40, 50]}) gb = df.groupby("x")["y"] check(assert_type(gb.describe(), pd.DataFrame), pd.DataFrame) - gb.count().loc[2] - gb.pct_change().loc[2] - gb.bfill().loc[2] - gb.cummax().loc[2] - gb.cummin().loc[2] - gb.cumprod().loc[2] - gb.cumsum().loc[2] - gb.ffill().loc[2] - gb.first().loc[2] - gb.head().loc[2] - gb.last().loc[2] - gb.max().loc[2] - gb.mean().loc[2] - gb.median().loc[2] - gb.min().loc[2] - gb.nlargest().loc[2] - gb.nsmallest().loc[2] - gb.nth(0).loc[1] + check(assert_type(gb.count().loc[2], int), np.integer) + check(assert_type(gb.pct_change(), pd.Series), pd.Series) + check(assert_type(gb.bfill(), pd.Series), pd.Series) + check(assert_type(gb.cummax(), pd.Series), pd.Series) + check(assert_type(gb.cummin(), pd.Series), pd.Series) + check(assert_type(gb.cumprod(), pd.Series), pd.Series) + check(assert_type(gb.cumsum(), pd.Series), pd.Series) + check(assert_type(gb.ffill(), pd.Series), pd.Series) + check(assert_type(gb.first(), pd.Series), pd.Series) + check(assert_type(gb.head(), pd.Series), pd.Series) + check(assert_type(gb.last(), pd.Series), pd.Series) + check(assert_type(gb.max(), pd.Series), pd.Series) + check(assert_type(gb.mean(), pd.Series), pd.Series) + check(assert_type(gb.median(), pd.Series), pd.Series) + check(assert_type(gb.min(), pd.Series), pd.Series) + check(assert_type(gb.nlargest(), pd.Series), pd.Series) + check(assert_type(gb.nsmallest(), pd.Series), pd.Series) + check(assert_type(gb.nth(0), Union[pd.DataFrame, pd.Series]), pd.Series) def test_dataframe_pct_change() -> None: @@ -2837,10 +2975,11 @@ def test_sum_get_add() -> None: def test_getset_untyped() -> None: - result: int = 10 + """Test that Dataframe.__getitem__ needs to return untyped series.""" df = pd.DataFrame({"x": [1, 2, 3, 4, 5], "y": [10, 20, 30, 40, 50]}) - # Tests that Dataframe.__getitem__ needs to return untyped series. + result: int = 10 result = df["x"].max() + check(assert_type(df["x"].max(), Any), np.integer) def test_getmultiindex_columns() -> None: