Skip to content

Commit bd3f584

Browse files
authored
feat: (df|s).hist(), (df|s).line(), (df|s).area(), (df|s).bar(), df.scatter() (#1320)
1 parent b503355 commit bd3f584

File tree

3 files changed

+180
-22
lines changed

3 files changed

+180
-22
lines changed

bigframes/dataframe.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4313,6 +4313,56 @@ def get_right_id(id):
43134313
def plot(self):
43144314
return plotting.PlotAccessor(self)
43154315

4316+
def hist(
4317+
self, by: typing.Optional[typing.Sequence[str]] = None, bins: int = 10, **kwargs
4318+
):
4319+
return self.plot.hist(by=by, bins=bins, **kwargs)
4320+
4321+
hist.__doc__ = inspect.getdoc(plotting.PlotAccessor.hist)
4322+
4323+
def line(
4324+
self,
4325+
x: typing.Optional[typing.Hashable] = None,
4326+
y: typing.Optional[typing.Hashable] = None,
4327+
**kwargs,
4328+
):
4329+
return self.plot.line(x=x, y=y, **kwargs)
4330+
4331+
line.__doc__ = inspect.getdoc(plotting.PlotAccessor.line)
4332+
4333+
def area(
4334+
self,
4335+
x: typing.Optional[typing.Hashable] = None,
4336+
y: typing.Optional[typing.Hashable] = None,
4337+
stacked: bool = True,
4338+
**kwargs,
4339+
):
4340+
return self.plot.area(x=x, y=y, stacked=stacked, **kwargs)
4341+
4342+
area.__doc__ = inspect.getdoc(plotting.PlotAccessor.area)
4343+
4344+
def bar(
4345+
self,
4346+
x: typing.Optional[typing.Hashable] = None,
4347+
y: typing.Optional[typing.Hashable] = None,
4348+
**kwargs,
4349+
):
4350+
return self.plot.bar(x=x, y=y, **kwargs)
4351+
4352+
bar.__doc__ = inspect.getdoc(plotting.PlotAccessor.bar)
4353+
4354+
def scatter(
4355+
self,
4356+
x: typing.Optional[typing.Hashable] = None,
4357+
y: typing.Optional[typing.Hashable] = None,
4358+
s: typing.Union[typing.Hashable, typing.Sequence[typing.Hashable]] = None,
4359+
c: typing.Union[typing.Hashable, typing.Sequence[typing.Hashable]] = None,
4360+
**kwargs,
4361+
):
4362+
return self.plot.scatter(x=x, y=y, s=s, c=c, **kwargs)
4363+
4364+
scatter.__doc__ = inspect.getdoc(plotting.PlotAccessor.scatter)
4365+
43164366
def __matmul__(self, other) -> DataFrame:
43174367
return self.dot(other)
43184368

bigframes/series.py

Lines changed: 44 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1984,16 +1984,48 @@ def __array_ufunc__(
19841984

19851985
return NotImplemented
19861986

1987-
# Keep this at the bottom of the Series class to avoid
1988-
# confusing type checker by overriding str
1989-
@property
1990-
def str(self) -> strings.StringMethods:
1991-
return strings.StringMethods(self._block)
1992-
19931987
@property
19941988
def plot(self):
19951989
return plotting.PlotAccessor(self)
19961990

1991+
def hist(
1992+
self, by: typing.Optional[typing.Sequence[str]] = None, bins: int = 10, **kwargs
1993+
):
1994+
return self.plot.hist(by=by, bins=bins, **kwargs)
1995+
1996+
hist.__doc__ = inspect.getdoc(plotting.PlotAccessor.hist)
1997+
1998+
def line(
1999+
self,
2000+
x: typing.Optional[typing.Hashable] = None,
2001+
y: typing.Optional[typing.Hashable] = None,
2002+
**kwargs,
2003+
):
2004+
return self.plot.line(x=x, y=y, **kwargs)
2005+
2006+
line.__doc__ = inspect.getdoc(plotting.PlotAccessor.line)
2007+
2008+
def area(
2009+
self,
2010+
x: typing.Optional[typing.Hashable] = None,
2011+
y: typing.Optional[typing.Hashable] = None,
2012+
stacked: bool = True,
2013+
**kwargs,
2014+
):
2015+
return self.plot.area(x=x, y=y, stacked=stacked, **kwargs)
2016+
2017+
area.__doc__ = inspect.getdoc(plotting.PlotAccessor.area)
2018+
2019+
def bar(
2020+
self,
2021+
x: typing.Optional[typing.Hashable] = None,
2022+
y: typing.Optional[typing.Hashable] = None,
2023+
**kwargs,
2024+
):
2025+
return self.plot.bar(x=x, y=y, **kwargs)
2026+
2027+
bar.__doc__ = inspect.getdoc(plotting.PlotAccessor.bar)
2028+
19972029
def _slice(
19982030
self,
19992031
start: typing.Optional[int] = None,
@@ -2022,6 +2054,12 @@ def _cached(self, *, force: bool = True, session_aware: bool = True) -> Series:
20222054
self._block.cached(force=force, session_aware=session_aware)
20232055
return self
20242056

2057+
# Keep this at the bottom of the Series class to avoid
2058+
# confusing type checker by overriding str
2059+
@property
2060+
def str(self) -> strings.StringMethods:
2061+
return strings.StringMethods(self._block)
2062+
20252063

20262064
def _is_list_like(obj: typing.Any) -> typing_extensions.TypeGuard[typing.Sequence]:
20272065
return pandas.api.types.is_list_like(obj)

tests/system/small/operations/test_plotting.py

Lines changed: 86 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,20 @@ def _check_legend_labels(ax, labels):
3434
assert label == e
3535

3636

37-
def test_series_hist_bins(scalars_dfs):
37+
@pytest.mark.parametrize(
38+
("alias"),
39+
[
40+
pytest.param(True),
41+
pytest.param(False),
42+
],
43+
)
44+
def test_series_hist_bins(scalars_dfs, alias):
3845
scalars_df, scalars_pandas_df = scalars_dfs
3946
bins = 5
40-
ax = scalars_df["int64_col"].plot.hist(bins=bins)
47+
if alias:
48+
ax = scalars_df["int64_col"].hist(bins=bins)
49+
else:
50+
ax = scalars_df["int64_col"].plot.hist(bins=bins)
4151
pd_ax = scalars_pandas_df["int64_col"].plot.hist(bins=bins)
4252

4353
# Compares axis values and height between bigframes and pandas histograms.
@@ -49,11 +59,21 @@ def test_series_hist_bins(scalars_dfs):
4959
assert ax.patches[i]._height == pd_ax.patches[i]._height
5060

5161

52-
def test_dataframes_hist_bins(scalars_dfs):
62+
@pytest.mark.parametrize(
63+
("alias"),
64+
[
65+
pytest.param(True),
66+
pytest.param(False),
67+
],
68+
)
69+
def test_dataframes_hist_bins(scalars_dfs, alias):
5370
scalars_df, scalars_pandas_df = scalars_dfs
5471
bins = 7
5572
columns = ["int64_col", "int64_too", "float64_col"]
56-
ax = scalars_df[columns].plot.hist(bins=bins)
73+
if alias:
74+
ax = scalars_df[columns].hist(bins=bins)
75+
else:
76+
ax = scalars_df[columns].plot.hist(bins=bins)
5777
pd_ax = scalars_pandas_df[columns].plot.hist(bins=bins)
5878

5979
# Compares axis values and height between bigframes and pandas histograms.
@@ -171,10 +191,25 @@ def test_hist_kwargs_ticks_props(scalars_dfs):
171191
tm.assert_almost_equal(ylabels[i].get_rotation(), pd_ylables[i].get_rotation())
172192

173193

174-
def test_line(scalars_dfs):
194+
@pytest.mark.parametrize(
195+
("col_names", "alias"),
196+
[
197+
pytest.param(
198+
["int64_col", "float64_col", "int64_too", "bool_col"], True, id="df_alias"
199+
),
200+
pytest.param(
201+
["int64_col", "float64_col", "int64_too", "bool_col"], False, id="df"
202+
),
203+
pytest.param(["int64_col"], True, id="series_alias"),
204+
pytest.param(["int64_col"], False, id="series"),
205+
],
206+
)
207+
def test_line(scalars_dfs, col_names, alias):
175208
scalars_df, scalars_pandas_df = scalars_dfs
176-
col_names = ["int64_col", "float64_col", "int64_too", "bool_col"]
177-
ax = scalars_df[col_names].plot.line()
209+
if alias:
210+
ax = scalars_df[col_names].line()
211+
else:
212+
ax = scalars_df[col_names].plot.line()
178213
pd_ax = scalars_pandas_df[col_names].plot.line()
179214
tm.assert_almost_equal(ax.get_xticks(), pd_ax.get_xticks())
180215
tm.assert_almost_equal(ax.get_yticks(), pd_ax.get_yticks())
@@ -183,10 +218,21 @@ def test_line(scalars_dfs):
183218
tm.assert_almost_equal(line.get_data()[1], pd_line.get_data()[1])
184219

185220

186-
def test_area(scalars_dfs):
221+
@pytest.mark.parametrize(
222+
("col_names", "alias"),
223+
[
224+
pytest.param(["int64_col", "float64_col", "int64_too"], True, id="df_alias"),
225+
pytest.param(["int64_col", "float64_col", "int64_too"], False, id="df"),
226+
pytest.param(["int64_col"], True, id="series_alias"),
227+
pytest.param(["int64_col"], False, id="series"),
228+
],
229+
)
230+
def test_area(scalars_dfs, col_names, alias):
187231
scalars_df, scalars_pandas_df = scalars_dfs
188-
col_names = ["int64_col", "float64_col", "int64_too"]
189-
ax = scalars_df[col_names].plot.area(stacked=False)
232+
if alias:
233+
ax = scalars_df[col_names].area(stacked=False)
234+
else:
235+
ax = scalars_df[col_names].plot.area(stacked=False)
190236
pd_ax = scalars_pandas_df[col_names].plot.area(stacked=False)
191237
tm.assert_almost_equal(ax.get_xticks(), pd_ax.get_xticks())
192238
tm.assert_almost_equal(ax.get_yticks(), pd_ax.get_yticks())
@@ -195,10 +241,21 @@ def test_area(scalars_dfs):
195241
tm.assert_almost_equal(line.get_data()[1], pd_line.get_data()[1])
196242

197243

198-
def test_bar(scalars_dfs):
244+
@pytest.mark.parametrize(
245+
("col_names", "alias"),
246+
[
247+
pytest.param(["int64_col", "float64_col", "int64_too"], True, id="df_alias"),
248+
pytest.param(["int64_col", "float64_col", "int64_too"], False, id="df"),
249+
pytest.param(["int64_col"], True, id="series_alias"),
250+
pytest.param(["int64_col"], False, id="series"),
251+
],
252+
)
253+
def test_bar(scalars_dfs, col_names, alias):
199254
scalars_df, scalars_pandas_df = scalars_dfs
200-
col_names = ["int64_col", "float64_col", "int64_too"]
201-
ax = scalars_df[col_names].plot.bar()
255+
if alias:
256+
ax = scalars_df[col_names].bar()
257+
else:
258+
ax = scalars_df[col_names].plot.bar()
202259
pd_ax = scalars_pandas_df[col_names].plot.bar()
203260
tm.assert_almost_equal(ax.get_xticks(), pd_ax.get_xticks())
204261
tm.assert_almost_equal(ax.get_yticks(), pd_ax.get_yticks())
@@ -207,10 +264,23 @@ def test_bar(scalars_dfs):
207264
tm.assert_almost_equal(line.get_data()[1], pd_line.get_data()[1])
208265

209266

210-
def test_scatter(scalars_dfs):
267+
@pytest.mark.parametrize(
268+
("col_names", "alias"),
269+
[
270+
pytest.param(
271+
["int64_col", "float64_col", "int64_too", "bool_col"], True, id="df_alias"
272+
),
273+
pytest.param(
274+
["int64_col", "float64_col", "int64_too", "bool_col"], False, id="df"
275+
),
276+
],
277+
)
278+
def test_scatter(scalars_dfs, col_names, alias):
211279
scalars_df, scalars_pandas_df = scalars_dfs
212-
col_names = ["int64_col", "float64_col", "int64_too", "bool_col"]
213-
ax = scalars_df[col_names].plot.scatter(x="int64_col", y="float64_col")
280+
if alias:
281+
ax = scalars_df[col_names].scatter(x="int64_col", y="float64_col")
282+
else:
283+
ax = scalars_df[col_names].plot.scatter(x="int64_col", y="float64_col")
214284
pd_ax = scalars_pandas_df[col_names].plot.scatter(x="int64_col", y="float64_col")
215285
tm.assert_almost_equal(ax.get_xticks(), pd_ax.get_xticks())
216286
tm.assert_almost_equal(ax.get_yticks(), pd_ax.get_yticks())

0 commit comments

Comments
 (0)