Skip to content

Commit f137122

Browse files
committed
resolve the comments
1 parent 4e76f53 commit f137122

File tree

3 files changed

+23
-30
lines changed

3 files changed

+23
-30
lines changed

bigframes/series.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1904,6 +1904,15 @@ def apply(
19041904
*,
19051905
args: typing.Tuple = (),
19061906
) -> Series:
1907+
# Note: This signature differs from pandas.Series.apply. Specifically,
1908+
# `args` is keyword-only and `by_row` is a custom parameter here. Full
1909+
# alignment would involve breaking changes. However, given that by_row
1910+
# is not frequently used, we defer any such changes until there is a
1911+
# clear need based on user feedback.
1912+
#
1913+
# See pandas docs for reference:
1914+
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.apply.html
1915+
19071916
# TODO(shobs, b/274645634): Support convert_dtype, **kwargs
19081917
# is actually a ternary op
19091918

@@ -1952,6 +1961,9 @@ def apply(
19521961
result_series = self._apply_nary_op(
19531962
ops.NaryRemoteFunctionOp(function_def=func.udf_def), args
19541963
)
1964+
# TODO(jialuo): Investigate why `_apply_nary_op` drops the series
1965+
# `name`. Manually reassigning it here as a temporary fix.
1966+
result_series.name = self.name
19551967
else:
19561968
result_series = self._apply_unary_op(
19571969
ops.RemoteFunctionOp(function_def=func.udf_def, apply_on_null=True)

tests/system/large/functions/test_managed_function.py

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1124,22 +1124,17 @@ def foo_list(x: int, y0: float, y1: bytes, y2: bool) -> list[str]:
11241124

11251125
scalars_df, scalars_pandas_df = scalars_dfs
11261126

1127-
bf_result_col = scalars_df["int64_too"].apply(
1128-
foo_list, args=(12.34, b"hello world", False)
1129-
)
11301127
bf_result = (
1131-
scalars_df["int64_too"].to_frame().assign(result=bf_result_col).to_pandas()
1128+
scalars_df["int64_too"]
1129+
.apply(foo_list, args=(12.34, b"hello world", False))
1130+
.to_pandas()
11321131
)
1133-
1134-
pd_result_col = scalars_pandas_df["int64_too"].apply(
1132+
pd_result = scalars_pandas_df["int64_too"].apply(
11351133
foo_list, args=(12.34, b"hello world", False)
11361134
)
1137-
pd_result = (
1138-
scalars_pandas_df["int64_too"].to_frame().assign(result=pd_result_col)
1139-
)
11401135

11411136
# Ignore any dtype difference.
1142-
pandas.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
1137+
pandas.testing.assert_series_equal(bf_result, pd_result, check_dtype=False)
11431138

11441139
finally:
11451140
# Clean up the gcp assets created for the managed function.

tests/system/large/functions/test_remote_function.py

Lines changed: 6 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2990,34 +2990,20 @@ def foo(x: int, y: bool, z: float) -> str:
29902990
scalars_df, scalars_pandas_df = scalars_dfs
29912991

29922992
args1 = (True, 10.0)
2993-
bf_result_col = scalars_df["int64_too"].apply(foo, args=args1)
2994-
bf_result = (
2995-
scalars_df["int64_too"].to_frame().assign(result=bf_result_col).to_pandas()
2996-
)
2997-
2998-
pd_result_col = scalars_pandas_df["int64_too"].apply(foo, args=args1)
2999-
pd_result = (
3000-
scalars_pandas_df["int64_too"].to_frame().assign(result=pd_result_col)
3001-
)
2993+
bf_result = scalars_df["int64_too"].apply(foo, args=args1).to_pandas()
2994+
pd_result = scalars_pandas_df["int64_too"].apply(foo, args=args1)
30022995

30032996
# Ignore any dtype difference.
3004-
pandas.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
2997+
pandas.testing.assert_series_equal(bf_result, pd_result, check_dtype=False)
30052998

30062999
args2 = (False, -10.0)
30073000
foo_ref = session.read_gbq_function(foo.bigframes_bigquery_function)
30083001

3009-
bf_result_col = scalars_df["int64_too"].apply(foo_ref, args=args2)
3010-
bf_result = (
3011-
scalars_df["int64_too"].to_frame().assign(result=bf_result_col).to_pandas()
3012-
)
3013-
3014-
pd_result_col = scalars_pandas_df["int64_too"].apply(foo, args=args2)
3015-
pd_result = (
3016-
scalars_pandas_df["int64_too"].to_frame().assign(result=pd_result_col)
3017-
)
3002+
bf_result = scalars_df["int64_too"].apply(foo_ref, args=args2).to_pandas()
3003+
pd_result = scalars_pandas_df["int64_too"].apply(foo, args=args2)
30183004

30193005
# Ignore any dtype difference.
3020-
pandas.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
3006+
pandas.testing.assert_series_equal(bf_result, pd_result, check_dtype=False)
30213007

30223008
finally:
30233009
# Clean up the gcp assets created for the remote function.

0 commit comments

Comments
 (0)