Skip to content

Commit 0318764

Browse files
fix: fix read_gbq_function issue in dataframe apply method (#1174)
* fix: fix read_gbq_function issue in dataframe apply method * add a test * fix the test * resolve the comments * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * quick fix * resolve comments * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * minor fix * resolve comments * resolve comments * Update dataframe.py * Update test_remote_function.py --------- Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
1 parent aa9af88 commit 0318764

File tree

2 files changed

+39
-0
lines changed

2 files changed

+39
-0
lines changed

bigframes/dataframe.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3923,6 +3923,10 @@ def map(self, func, na_action: Optional[str] = None) -> DataFrame:
39233923
)
39243924

39253925
def apply(self, func, *, axis=0, args: typing.Tuple = (), **kwargs):
3926+
# In Bigframes remote function, DataFrame '.apply' method is specifically
3927+
# designed to work with row-wise or column-wise operations, where the input
3928+
# to the applied function should be a Series, not a scalar.
3929+
39263930
if utils.get_axis_number(axis) == 1:
39273931
msg = "axis=1 scenario is in preview."
39283932
warnings.warn(msg, category=bfe.PreviewWarning)
@@ -4030,8 +4034,19 @@ def apply(self, func, *, axis=0, args: typing.Tuple = (), **kwargs):
40304034

40314035
return result_series
40324036

4037+
# At this point column-wise or element-wise remote function operation will
4038+
# be performed (not supported).
4039+
if hasattr(func, "bigframes_remote_function"):
4040+
raise NotImplementedError(
4041+
"BigFrames DataFrame '.apply()' does not support remote function "
4042+
"for column-wise (i.e. with axis=0) operations, please use a "
4043+
"regular python function instead. For element-wise operations of "
4044+
"the remote function, please use '.map()'."
4045+
)
4046+
40334047
# Per-column apply
40344048
results = {name: func(col, *args, **kwargs) for name, col in self.items()}
4049+
40354050
if all(
40364051
[
40374052
isinstance(val, bigframes.series.Series) or utils.is_list_like(val)

tests/system/small/test_remote_function.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -968,6 +968,30 @@ def test_read_gbq_function_supported_python_output_type(
968968
rf.read_gbq_function(str(sql_routine.reference), session=session)
969969

970970

971+
@pytest.mark.flaky(retries=2, delay=120)
972+
def test_df_apply_scalar_func(session, scalars_dfs):
973+
scalars_df, _ = scalars_dfs
974+
bdf = bigframes.pandas.DataFrame(
975+
{
976+
"Column1": scalars_df["string_col"],
977+
"Column2": scalars_df["string_col"],
978+
}
979+
)
980+
981+
# The "cw_lower_case_ascii_only" is a scalar function.
982+
func_ref = session.read_gbq_function("bqutil.fn.cw_lower_case_ascii_only")
983+
984+
# DataFrame '.apply()' only supports series level application.
985+
with pytest.raises(NotImplementedError) as context:
986+
bdf.apply(func_ref)
987+
assert str(context.value) == (
988+
"BigFrames DataFrame '.apply()' does not support remote function for "
989+
"column-wise (i.e. with axis=0) operations, please use a regular python "
990+
"function instead. For element-wise operations of the remote function, "
991+
"please use '.map()'."
992+
)
993+
994+
971995
@pytest.mark.flaky(retries=2, delay=120)
972996
def test_read_gbq_function_multiple_inputs_not_a_row_processor(session):
973997
with pytest.raises(ValueError) as context:

0 commit comments

Comments
 (0)