daft/udf/udf_v2.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -230,6 +230,11 @@ def __call__(self, *args: Any, **kwargs: Any) -> Expression | T:
  
                bound_method = self._cls._daft_bind_method(self._method)

                return bound_method(*args, **kwargs)

            # When building expression-based UDFs, we must avoid incorrectly sharing call-site state across multiple uses of the same function.

            call_seq = getattr(self, "_daft_call_seq", 0)

            setattr(self, "_daft_call_seq", call_seq + 1)

            call_id = f"{self.func_id}-{call_seq}"

            check_serializable(

                self._method,

                "Daft functions must be serializable. If your function accesses a non-serializable global or nonlocal variable to avoid reinitialization, use `@daft.cls` with a setup method instead.",

    @@ -247,7 +252,7 @@ def method(s: C, *args: P.args, **kwargs: P.kwargs) -> list[Any]:
  
                expr = Expression._from_pyexpr(

                    row_wise_udf(

                        self.func_id,

                        call_id,

                        self.name,

                        self._cls,

                        method,

    @@ -266,7 +271,7 @@ def method(s: C, *args: P.args, **kwargs: P.kwargs) -> list[Any]:
  
            elif self.is_batch:

                expr = Expression._from_pyexpr(

                    batch_udf(

                        self.func_id,

                        call_id,

                        self.name,

                        self._cls,

                        self._method,

    @@ -286,7 +291,7 @@ def method(s: C, *args: P.args, **kwargs: P.kwargs) -> list[Any]:
  
            else:

                expr = Expression._from_pyexpr(

                    row_wise_udf(

                        self.func_id,

                        call_id,

                        self.name,

                        self._cls,

                        self._method,

tests/udf/test_row_wise_udf.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -469,3 +469,26 @@ def stringify_and_sum(a: int, b: int) -> str: @@
             dynamic_batching_df = df.select("*", stringify_and_sum(col("x"), col("y")).alias("sum"))
             dynamic_batching_df = dynamic_batching_df.collect().sort("id")
             assert non_dynamic_batching_df.to_pydict() == dynamic_batching_df.to_pydict()
+    def test_row_wise_udf_kwargs_prefix_suffix_literals_and_exprs():
+        @daft.func
+        def format_number(value: int, prefix: str = "$", suffix: str = "") -> str:
+            return f"{prefix}{value}{suffix}"
+        df = daft.from_pydict({"amount": [10, 20, 30]})
+        df = df.with_column("dollar", format_number(df["amount"]))
+        df = df.with_column("euro", format_number(df["amount"], prefix="€", suffix=" EUR"))
+        df = df.with_column(
+            "customized",
+            format_number(df["amount"], suffix=df["amount"].cast(DataType.string())),
+        )
+        result = df.to_pydict()
+        expected = {
+            "amount": [10, 20, 30],
+            "dollar": ["$10", "$20", "$30"],
+            "euro": ["€10 EUR", "€20 EUR", "€30 EUR"],
+            "customized": ["$1010", "$2020", "$3030"],
+        }
+        assert result == expected

fix(udf): ensure per-call kwargs in udf v2 are uniquely bound per call site #6079

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open

huleilei wants to merge 1 commit into Eventual-Inc:main from huleilei:fix-udf-kwargs-binding

-Original file line number
+Diff line change
@@ Expand Up / @@ -469,3 +469,26 @@ def stringify_and_sum(a: int, b: int) -> str: @@
             dynamic_batching_df = df.select("*", stringify_and_sum(col("x"), col("y")).alias("sum"))
             dynamic_batching_df = dynamic_batching_df.collect().sort("id")
             assert non_dynamic_batching_df.to_pydict() == dynamic_batching_df.to_pydict()
+    def test_row_wise_udf_kwargs_prefix_suffix_literals_and_exprs():
+        @daft.func
+        def format_number(value: int, prefix: str = "$", suffix: str = "") -> str:
+            return f"{prefix}{value}{suffix}"
+        df = daft.from_pydict({"amount": [10, 20, 30]})
+        df = df.with_column("dollar", format_number(df["amount"]))
+        df = df.with_column("euro", format_number(df["amount"], prefix="€", suffix=" EUR"))
+        df = df.with_column(
+            "customized",
+            format_number(df["amount"], suffix=df["amount"].cast(DataType.string())),
+        )
+        result = df.to_pydict()
+        expected = {
+            "amount": [10, 20, 30],
+            "dollar": ["$10", "$20", "$30"],
+            "euro": ["€10 EUR", "€20 EUR", "€30 EUR"],
+            "customized": ["$1010", "$2020", "$3030"],
+        }
+        assert result == expected

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

fix(udf): ensure per-call kwargs in udf v2 are uniquely bound per call site #6079

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!

fix(udf): ensure per-call kwargs in udf v2 are uniquely bound per call site #6079

Are you sure you want to change the base?

Uh oh!

fix(udf): ensure per-call kwargs in udf v2 are uniquely bound per call site #6079

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!