1919
2020from __future__ import annotations
2121
22+ import functools
2223from abc import ABCMeta , abstractmethod
2324from enum import Enum
2425from typing import TYPE_CHECKING , Callable , List , Optional , TypeVar
25- import functools
2626
2727import pyarrow
2828
@@ -117,15 +117,17 @@ class udf:
117117 This class can be used both as a **function** and as a **decorator**.
118118
119119 Usage:
120- - **As a function**: Call `udf(func, input_types, return_type, volatility, name)`.
121- - **As a decorator**: Use `@udf(input_types, return_type, volatility, name)`.
122- In this case, do **not** pass `func` explicitly.
120+ - **As a function**: Call `udf(func, input_types, return_type, volatility,
121+ name)`.
122+ - **As a decorator**: Use `@udf(input_types, return_type, volatility,
123+ name)`. In this case, do **not** pass `func` explicitly.
123124
124125 Args:
125126 func (Callable, optional): **Only needed when calling as a function.**
126127 Skip this argument when using `udf` as a decorator.
127128 input_types (list[pyarrow.DataType]): The data types of the arguments
128- to `func`. This list must be of the same length as the number of arguments.
129+ to `func`. This list must be of the same length as the number of
130+ arguments.
129131 return_type (_R): The data type of the return value from the function.
130132 volatility (Volatility | str): See `Volatility` for allowed values.
131133 name (Optional[str]): A descriptive name for the function.
@@ -139,7 +141,8 @@ class udf:
139141 ```
140142 def double_func(x):
141143 return x * 2
142- double_udf = udf(double_func, [pyarrow.int32()], pyarrow.int32(), "volatile", "double_it")
144+ double_udf = udf(double_func, [pyarrow.int32()], pyarrow.int32(),
145+ "volatile", "double_it")
143146 ```
144147
145148 **Using `udf` as a decorator:**
@@ -149,8 +152,13 @@ def double_udf(x):
149152 return x * 2
150153 ```
151154 """
155+
152156 def __new__ (cls , * args , ** kwargs ):
153- if args and callable (args [0 ]):
157+ """Create a new UDF.
158+
159+ Trigger UDF function or decorator depending on if the first args is callable
160+ """
161+ if args and callable (args [0 ]):
154162 # Case 1: Used as a function, require the first parameter to be callable
155163 return cls ._function (* args , ** kwargs )
156164 else :
@@ -185,23 +193,22 @@ def _decorator(
185193 input_types : list [pyarrow .DataType ],
186194 return_type : _R ,
187195 volatility : Volatility | str ,
188- name : Optional [str ] = None
196+ name : Optional [str ] = None ,
189197 ):
190198 def decorator (func ):
191199 udf_caller = ScalarUDF .udf (
192- func ,
193- input_types ,
194- return_type ,
195- volatility ,
196- name
200+ func , input_types , return_type , volatility , name
197201 )
198-
202+
199203 @functools .wraps (func )
200204 def wrapper (* args , ** kwargs ):
201205 return udf_caller (* args , ** kwargs )
206+
202207 return wrapper
208+
203209 return decorator
204210
211+
205212class Accumulator (metaclass = ABCMeta ):
206213 """Defines how an :py:class:`AggregateUDF` accumulates values."""
207214
@@ -268,8 +275,8 @@ def __call__(self, *args: Expr) -> Expr:
268275 class udaf :
269276 """Create a new User-Defined Aggregate Function (UDAF).
270277
271- This class allows you to define an **aggregate function** that can be used in data
272- aggregation or window function calls.
278+ This class allows you to define an **aggregate function** that can be used in
279+ data aggregation or window function calls.
273280
274281 Usage:
275282 - **As a function**: Call `udaf(accum, input_types, return_type, state_type,
@@ -279,12 +286,12 @@ class udaf:
279286 When using `udaf` as a decorator, **do not pass `accum` explicitly**.
280287
281288 **Function example:**
282-
283- If your `:py:class:Accumulator` can be instantiated with no arguments, you can
284- simply pass it's type as `accum`. If you need to pass additional arguments to
285- it's constructor, you can define a lambda or a factory method. During runtime the
286- `:py:class:Accumulator` will be constructed for every instance in which this UDAF is
287- used. The following examples are all valid.
289+
290+ If your `:py:class:Accumulator` can be instantiated with no arguments, you
291+ can simply pass it's type as `accum`. If you need to pass additional
292+ arguments to it's constructor, you can define a lambda or a factory method.
293+ During runtime the `:py:class:Accumulator` will be constructed for every
294+ instance in which this UDAF is used. The following examples are all valid.
288295 ```
289296 import pyarrow as pa
290297 import pyarrow.compute as pc
@@ -308,11 +315,14 @@ def evaluate(self) -> pa.Scalar:
308315 def sum_bias_10() -> Summarize:
309316 return Summarize(10.0)
310317
311- udaf1 = udaf(Summarize, pa.float64(), pa.float64(), [pa.float64()], "immutable")
312- udaf2 = udaf(sum_bias_10, pa.float64(), pa.float64(), [pa.float64()], "immutable")
313- udaf3 = udaf(lambda: Summarize(20.0), pa.float64(), pa.float64(), [pa.float64()], "immutable")
318+ udaf1 = udaf(Summarize, pa.float64(), pa.float64(), [pa.float64()],
319+ "immutable")
320+ udaf2 = udaf(sum_bias_10, pa.float64(), pa.float64(), [pa.float64()],
321+ "immutable")
322+ udaf3 = udaf(lambda: Summarize(20.0), pa.float64(), pa.float64(),
323+ [pa.float64()], "immutable")
314324 ```
315-
325+
316326 **Decorator example:**
317327 ```
318328 @udaf(pa.float64(), pa.float64(), [pa.float64()], "immutable")
@@ -321,7 +331,8 @@ def udf4() -> Summarize:
321331 ```
322332
323333 Args:
324- accum: The accumulator python function. **Only needed when calling as a function. Skip this argument when using `udaf` as a decorator.**
334+ accum: The accumulator python function. **Only needed when calling as a
335+ function. Skip this argument when using `udaf` as a decorator.**
325336 input_types: The data types of the arguments to ``accum``.
326337 return_type: The data type of the return value.
327338 state_type: The data types of the intermediate accumulation.
@@ -334,13 +345,18 @@ def udf4() -> Summarize:
334345 """
335346
336347 def __new__ (cls , * args , ** kwargs ):
337- if args and callable (args [0 ]):
348+ """Create a new UDAF.
349+
350+ Trigger UDAF function or decorator depending on if the first args is
351+ callable
352+ """
353+ if args and callable (args [0 ]):
338354 # Case 1: Used as a function, require the first parameter to be callable
339355 return cls ._function (* args , ** kwargs )
340356 else :
341357 # Case 2: Used as a decorator with parameters
342358 return cls ._decorator (* args , ** kwargs )
343-
359+
344360 @staticmethod
345361 def _function (
346362 accum : Callable [[], Accumulator ],
@@ -368,31 +384,27 @@ def _function(
368384 state_type = state_type ,
369385 volatility = volatility ,
370386 )
371-
387+
372388 @staticmethod
373389 def _decorator (
374390 input_types : pyarrow .DataType | list [pyarrow .DataType ],
375391 return_type : pyarrow .DataType ,
376392 state_type : list [pyarrow .DataType ],
377393 volatility : Volatility | str ,
378- name : Optional [str ] = None
394+ name : Optional [str ] = None ,
379395 ):
380396 def decorator (accum : Callable [[], Accumulator ]):
381397 udaf_caller = AggregateUDF .udaf (
382- accum ,
383- input_types ,
384- return_type ,
385- state_type ,
386- volatility ,
387- name
398+ accum , input_types , return_type , state_type , volatility , name
388399 )
389-
400+
390401 @functools .wraps (accum )
391402 def wrapper (* args , ** kwargs ):
392403 return udaf_caller (* args , ** kwargs )
404+
393405 return wrapper
394- return decorator
395406
407+ return decorator
396408
397409
398410class WindowEvaluator (metaclass = ABCMeta ):
0 commit comments