Skip to content

Commit 25cb812

Browse files
joseph-isaacsalamb
andauthored
Move many udf implementations from invoke to invoke_batch (#13491)
* Added support for `ScalarUDFImpl::invoke_with_return_type` where the invoke is passed the return type created for the udf instance * Move from invoke to invoke batch * ex * of * docs * fx * fx * fx * fix * fix * Do not yet deprecate invoke_batch, add docs to invoke_with_args * add ticket reference * fix * fix * fix * fix * fmt * fmt * remove invoke * fix agg * unused * update func docs * update tests and remove deprecation * remove dep * oops * internal as vec * dep * fixup * fixup * fix * fix --------- Co-authored-by: Andrew Lamb <[email protected]>
1 parent ed227c0 commit 25cb812

File tree

144 files changed

+1079
-410
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

144 files changed

+1079
-410
lines changed

datafusion-examples/examples/advanced_udf.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,11 @@ impl ScalarUDFImpl for PowUdf {
9191
///
9292
/// However, it also means the implementation is more complex than when
9393
/// using `create_udf`.
94-
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
94+
fn invoke_batch(
95+
&self,
96+
args: &[ColumnarValue],
97+
_number_rows: usize,
98+
) -> Result<ColumnarValue> {
9599
// DataFusion has arranged for the correct inputs to be passed to this
96100
// function, but we check again to make sure
97101
assert_eq!(args.len(), 2);

datafusion-examples/examples/function_factory.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,9 @@ use datafusion_common::tree_node::{Transformed, TreeNode};
2626
use datafusion_common::{exec_err, internal_err, DataFusionError};
2727
use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
2828
use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
29-
use datafusion_expr::{CreateFunction, Expr, ScalarUDF, ScalarUDFImpl, Signature};
29+
use datafusion_expr::{
30+
CreateFunction, Expr, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature,
31+
};
3032

3133
/// This example shows how to utilize [FunctionFactory] to implement simple
3234
/// SQL-macro like functions using a `CREATE FUNCTION` statement. The same
@@ -132,9 +134,9 @@ impl ScalarUDFImpl for ScalarFunctionWrapper {
132134
Ok(self.return_type.clone())
133135
}
134136

135-
fn invoke(
137+
fn invoke_with_args(
136138
&self,
137-
_args: &[datafusion_expr::ColumnarValue],
139+
_args: ScalarFunctionArgs,
138140
) -> Result<datafusion_expr::ColumnarValue> {
139141
// Since this function is always simplified to another expression, it
140142
// should never actually be invoked

datafusion-examples/examples/optimizer_rule.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,11 @@ impl ScalarUDFImpl for MyEq {
205205
Ok(DataType::Boolean)
206206
}
207207

208-
fn invoke(&self, _args: &[ColumnarValue]) -> Result<ColumnarValue> {
208+
fn invoke_batch(
209+
&self,
210+
_args: &[ColumnarValue],
211+
_number_rows: usize,
212+
) -> Result<ColumnarValue> {
209213
// this example simply returns "true" which is not what a real
210214
// implementation would do.
211215
Ok(ColumnarValue::Scalar(ScalarValue::from(true)))

datafusion/core/src/physical_optimizer/projection_pushdown.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1382,7 +1382,11 @@ mod tests {
13821382
Ok(DataType::Int32)
13831383
}
13841384

1385-
fn invoke(&self, _args: &[ColumnarValue]) -> Result<ColumnarValue> {
1385+
fn invoke_batch(
1386+
&self,
1387+
_args: &[ColumnarValue],
1388+
_number_rows: usize,
1389+
) -> Result<ColumnarValue> {
13861390
unimplemented!("DummyUDF::invoke")
13871391
}
13881392
}

datafusion/core/tests/fuzz_cases/equivalence/utils.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -581,7 +581,11 @@ impl ScalarUDFImpl for TestScalarUDF {
581581
Ok(input[0].sort_properties)
582582
}
583583

584-
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
584+
fn invoke_batch(
585+
&self,
586+
args: &[ColumnarValue],
587+
_number_rows: usize,
588+
) -> Result<ColumnarValue> {
585589
let args = ColumnarValue::values_to_arrays(args)?;
586590

587591
let arr: ArrayRef = match args[0].data_type() {

datafusion/core/tests/user_defined/user_defined_scalar_functions.rs

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -520,10 +520,6 @@ impl ScalarUDFImpl for AddIndexToStringVolatileScalarUDF {
520520
Ok(self.return_type.clone())
521521
}
522522

523-
fn invoke(&self, _args: &[ColumnarValue]) -> Result<ColumnarValue> {
524-
not_impl_err!("index_with_offset function does not accept arguments")
525-
}
526-
527523
fn invoke_batch(
528524
&self,
529525
args: &[ColumnarValue],
@@ -720,7 +716,11 @@ impl ScalarUDFImpl for CastToI64UDF {
720716
Ok(ExprSimplifyResult::Simplified(new_expr))
721717
}
722718

723-
fn invoke(&self, _args: &[ColumnarValue]) -> Result<ColumnarValue> {
719+
fn invoke_batch(
720+
&self,
721+
_args: &[ColumnarValue],
722+
_number_rows: usize,
723+
) -> Result<ColumnarValue> {
724724
unimplemented!("Function should have been simplified prior to evaluation")
725725
}
726726
}
@@ -848,7 +848,11 @@ impl ScalarUDFImpl for TakeUDF {
848848
}
849849

850850
// The actual implementation
851-
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
851+
fn invoke_batch(
852+
&self,
853+
args: &[ColumnarValue],
854+
_number_rows: usize,
855+
) -> Result<ColumnarValue> {
852856
let take_idx = match &args[2] {
853857
ColumnarValue::Scalar(ScalarValue::Int64(Some(v))) if v < &2 => *v as usize,
854858
_ => unreachable!(),
@@ -956,7 +960,11 @@ impl ScalarUDFImpl for ScalarFunctionWrapper {
956960
Ok(self.return_type.clone())
957961
}
958962

959-
fn invoke(&self, _args: &[ColumnarValue]) -> Result<ColumnarValue> {
963+
fn invoke_batch(
964+
&self,
965+
_args: &[ColumnarValue],
966+
_number_rows: usize,
967+
) -> Result<ColumnarValue> {
960968
internal_err!("This function should not get invoked!")
961969
}
962970

@@ -1240,7 +1248,11 @@ impl ScalarUDFImpl for MyRegexUdf {
12401248
}
12411249
}
12421250

1243-
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
1251+
fn invoke_batch(
1252+
&self,
1253+
args: &[ColumnarValue],
1254+
_number_rows: usize,
1255+
) -> Result<ColumnarValue> {
12441256
match args {
12451257
[ColumnarValue::Scalar(ScalarValue::Utf8(value))] => {
12461258
Ok(ColumnarValue::Scalar(ScalarValue::Boolean(

datafusion/expr/src/expr.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2389,7 +2389,7 @@ mod test {
23892389
use crate::expr_fn::col;
23902390
use crate::{
23912391
case, lit, qualified_wildcard, wildcard, wildcard_with_options, ColumnarValue,
2392-
ScalarUDF, ScalarUDFImpl, Volatility,
2392+
ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Volatility,
23932393
};
23942394
use sqlparser::ast;
23952395
use sqlparser::ast::{Ident, IdentWithAlias};
@@ -2518,7 +2518,10 @@ mod test {
25182518
Ok(DataType::Utf8)
25192519
}
25202520

2521-
fn invoke(&self, _args: &[ColumnarValue]) -> Result<ColumnarValue> {
2521+
fn invoke_with_args(
2522+
&self,
2523+
_args: ScalarFunctionArgs,
2524+
) -> Result<ColumnarValue> {
25222525
Ok(ColumnarValue::Scalar(ScalarValue::from("a")))
25232526
}
25242527
}

datafusion/expr/src/expr_fn.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -462,7 +462,11 @@ impl ScalarUDFImpl for SimpleScalarUDF {
462462
Ok(self.return_type.clone())
463463
}
464464

465-
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
465+
fn invoke_batch(
466+
&self,
467+
args: &[ColumnarValue],
468+
_number_rows: usize,
469+
) -> Result<ColumnarValue> {
466470
(self.fun)(args)
467471
}
468472
}

datafusion/expr/src/udf.rs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -213,13 +213,11 @@ impl ScalarUDF {
213213
self.inner.is_nullable(args, schema)
214214
}
215215

216-
#[deprecated(since = "43.0.0", note = "Use `invoke_with_args` instead")]
217216
pub fn invoke_batch(
218217
&self,
219218
args: &[ColumnarValue],
220219
number_rows: usize,
221220
) -> Result<ColumnarValue> {
222-
#[allow(deprecated)]
223221
self.inner.invoke_batch(args, number_rows)
224222
}
225223

@@ -544,7 +542,6 @@ pub trait ScalarUDFImpl: Debug + Send + Sync {
544542
/// [`ColumnarValue::values_to_arrays`] can be used to convert the arguments
545543
/// to arrays, which will likely be simpler code, but be slower.
546544
fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
547-
#[allow(deprecated)]
548545
self.invoke_batch(args.args, args.number_rows)
549546
}
550547

datafusion/functions-nested/benches/map.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,9 +96,9 @@ fn criterion_benchmark(c: &mut Criterion) {
9696

9797
b.iter(|| {
9898
black_box(
99-
#[allow(deprecated)] // TODO use invoke_batch
99+
// TODO use invoke_with_args
100100
map_udf()
101-
.invoke(&[keys.clone(), values.clone()])
101+
.invoke_batch(&[keys.clone(), values.clone()], 1)
102102
.expect("map should work on valid values"),
103103
);
104104
});

0 commit comments

Comments
 (0)