From c1207fa016d86d8be029221ae0350717c5dcdcb6 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Fri, 20 Sep 2024 08:09:31 -0400 Subject: [PATCH 1/7] Test no longer hangs, and updated error string to match latest --- python/datafusion/tests/test_udaf.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/python/datafusion/tests/test_udaf.py b/python/datafusion/tests/test_udaf.py index 76488e19b..1c1ef8c2f 100644 --- a/python/datafusion/tests/test_udaf.py +++ b/python/datafusion/tests/test_udaf.py @@ -71,7 +71,6 @@ def df(): return ctx.create_dataframe([[batch]]) -@pytest.mark.skip(reason="df.collect() will hang, need more investigations") def test_errors(df): with pytest.raises(TypeError): udaf( @@ -92,8 +91,8 @@ def test_errors(df): df = df.aggregate([], [accum(column("a"))]) msg = ( - "Can't instantiate abstract class MissingMethods with abstract " - "methods evaluate, merge, update" + "Execution error: TypeError: Can't instantiate abstract class MissingMethods " + "without an implementation for abstract methods 'evaluate', 'merge', 'update'" ) with pytest.raises(Exception, match=msg): df.collect() From 47b2c8bd2c2a601603900a1b0e2afeb59c1e4d0b Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Fri, 20 Sep 2024 08:53:15 -0400 Subject: [PATCH 2/7] Add unit tests for registering udf and udaf --- python/datafusion/tests/test_udaf.py | 41 +++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/python/datafusion/tests/test_udaf.py b/python/datafusion/tests/test_udaf.py index 1c1ef8c2f..e076496e8 100644 --- a/python/datafusion/tests/test_udaf.py +++ b/python/datafusion/tests/test_udaf.py @@ -21,7 +21,7 @@ import pyarrow.compute as pc import pytest -from datafusion import Accumulator, SessionContext, column, udaf +from datafusion import Accumulator, column, udaf, udf class Summarize(Accumulator): @@ -60,15 +60,15 @@ def state(self) -> List[pa.Scalar]: @pytest.fixture -def df(): - ctx = SessionContext() +def df(ctx): + # ctx = SessionContext() # create a RecordBatch and a new DataFrame from it batch = pa.RecordBatch.from_arrays( [pa.array([1, 2, 3]), pa.array([4, 4, 6])], names=["a", "b"], ) - return ctx.create_dataframe([[batch]]) + return ctx.create_dataframe([[batch]], name="test_table") def test_errors(df): @@ -131,3 +131,36 @@ def test_group_by(df): arrays = [batch.column(1) for batch in batches] joined = pa.concat_arrays(arrays) assert joined == pa.array([1.0 + 2.0, 3.0]) + + +def test_register_udaf(ctx, df) -> None: + summarize = udaf( + Summarize, + pa.float64(), + pa.float64(), + [pa.float64()], + volatility="immutable", + ) + + ctx.register_udaf(summarize) + + df_result = ctx.sql("select summarize(b) from test_table") + + assert df_result.collect()[0][0][0].as_py() == 14.0 + + +def test_register_udf(ctx, df) -> None: + is_null = udf( + lambda x: x.is_null(), + [pa.float64()], + pa.bool_(), + volatility="immutable", + name="is_null", + ) + + ctx.register_udf(is_null) + + df_result = ctx.sql("select is_null(a) from test_table") + result = df_result.collect()[0].column(0) + + assert result == pa.array([False, False, False]) From 2ba0b9a65f5bf371e18c293e7ba4104db2935fc5 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Fri, 20 Sep 2024 08:53:47 -0400 Subject: [PATCH 3/7] Resolve error on registering udaf #874 --- python/datafusion/udf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/datafusion/udf.py b/python/datafusion/udf.py index a3b74bb11..f74d675e3 100644 --- a/python/datafusion/udf.py +++ b/python/datafusion/udf.py @@ -192,7 +192,7 @@ def __init__( See :py:func:`udaf` for a convenience function and argument descriptions. """ - self._udf = df_internal.AggregateUDF( + self._udaf = df_internal.AggregateUDF( name, accumulator, input_types, return_type, state_type, str(volatility) ) @@ -203,7 +203,7 @@ def __call__(self, *args: Expr) -> Expr: occur during the evaluation of the dataframe. """ args = [arg.expr for arg in args] - return Expr(self._udf.__call__(*args)) + return Expr(self._udaf.__call__(*args)) @staticmethod def udaf( From 3791819d3a0f4e50d393781ec8e2ab0fa91a0ec5 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Fri, 20 Sep 2024 08:56:09 -0400 Subject: [PATCH 4/7] remove stale comment --- python/datafusion/tests/test_udaf.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/datafusion/tests/test_udaf.py b/python/datafusion/tests/test_udaf.py index e076496e8..d3d79b2e7 100644 --- a/python/datafusion/tests/test_udaf.py +++ b/python/datafusion/tests/test_udaf.py @@ -61,8 +61,6 @@ def state(self) -> List[pa.Scalar]: @pytest.fixture def df(ctx): - # ctx = SessionContext() - # create a RecordBatch and a new DataFrame from it batch = pa.RecordBatch.from_arrays( [pa.array([1, 2, 3]), pa.array([4, 4, 6])], From 46d1e8c0ddff3615e7f9a13b4fbb40a8dedc3986 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Fri, 20 Sep 2024 21:04:32 -0400 Subject: [PATCH 5/7] Update unit test text to match in multiple versions of python --- python/datafusion/tests/test_udaf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/datafusion/tests/test_udaf.py b/python/datafusion/tests/test_udaf.py index d3d79b2e7..89a22b90d 100644 --- a/python/datafusion/tests/test_udaf.py +++ b/python/datafusion/tests/test_udaf.py @@ -89,8 +89,8 @@ def test_errors(df): df = df.aggregate([], [accum(column("a"))]) msg = ( - "Execution error: TypeError: Can't instantiate abstract class MissingMethods " - "without an implementation for abstract methods 'evaluate', 'merge', 'update'" + "Can't instantiate abstract class MissingMethods without an implementation " + "for abstract methods 'evaluate', 'merge', 'update'" ) with pytest.raises(Exception, match=msg): df.collect() From 2619634f9843493d07c069b50c4289f53661558e Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sat, 21 Sep 2024 07:07:38 -0400 Subject: [PATCH 6/7] Regex for exception that is compatible with python 3.10 and 3.12 --- python/datafusion/tests/test_udaf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/datafusion/tests/test_udaf.py b/python/datafusion/tests/test_udaf.py index 89a22b90d..ac22fd8af 100644 --- a/python/datafusion/tests/test_udaf.py +++ b/python/datafusion/tests/test_udaf.py @@ -90,7 +90,7 @@ def test_errors(df): msg = ( "Can't instantiate abstract class MissingMethods without an implementation " - "for abstract methods 'evaluate', 'merge', 'update'" + "for abstract methods (evaluate, merge, update|'evaluate', 'merge', 'update')" ) with pytest.raises(Exception, match=msg): df.collect() From ba57eb9b30903d61b1c8a978de76ef9fc7e61214 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sat, 21 Sep 2024 09:15:57 -0400 Subject: [PATCH 7/7] Regex for exception that is compatible with python 3.10 and 3.12 --- python/datafusion/tests/test_udaf.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/datafusion/tests/test_udaf.py b/python/datafusion/tests/test_udaf.py index ac22fd8af..6f2525b0f 100644 --- a/python/datafusion/tests/test_udaf.py +++ b/python/datafusion/tests/test_udaf.py @@ -89,8 +89,9 @@ def test_errors(df): df = df.aggregate([], [accum(column("a"))]) msg = ( - "Can't instantiate abstract class MissingMethods without an implementation " - "for abstract methods (evaluate, merge, update|'evaluate', 'merge', 'update')" + "Can't instantiate abstract class MissingMethods (without an implementation " + "for abstract methods 'evaluate', 'merge', 'update'|with abstract methods " + "evaluate, merge, update)" ) with pytest.raises(Exception, match=msg): df.collect()