@@ -21,10 +21,9 @@ use datafusion::common::{
2121 not_impl_err, plan_err, substrait_err, DFSchema , DataFusionError , ScalarValue ,
2222} ;
2323use datafusion:: execution:: FunctionRegistry ;
24- use datafusion:: logical_expr:: { expr, BinaryExpr , Expr , Like , Operator } ;
24+ use datafusion:: logical_expr:: { expr, Between , BinaryExpr , Expr , Like , Operator } ;
2525use std:: vec:: Drain ;
2626use substrait:: proto:: expression:: ScalarFunction ;
27- use substrait:: proto:: function_argument:: ArgType ;
2827
2928pub async fn from_scalar_function (
3029 consumer : & impl SubstraitConsumer ,
@@ -70,7 +69,7 @@ pub async fn from_scalar_function(
7069 // In those cases we build a balanced tree of BinaryExprs
7170 arg_list_to_binary_op_tree ( op, args)
7271 } else if let Some ( builder) = BuiltinExprBuilder :: try_from_name ( fn_name) {
73- builder. build ( consumer, f, input_schema ) . await
72+ builder. build ( consumer, f, args ) . await
7473 } else {
7574 not_impl_err ! ( "Unsupported function name: {fn_name:?}" )
7675 }
@@ -180,7 +179,8 @@ impl BuiltinExprBuilder {
180179 match name {
181180 "not" | "like" | "ilike" | "is_null" | "is_not_null" | "is_true"
182181 | "is_false" | "is_not_true" | "is_not_false" | "is_unknown"
183- | "is_not_unknown" | "negative" | "negate" => Some ( Self {
182+ | "is_not_unknown" | "negative" | "negate" | "and_not" | "xor"
183+ | "between" | "logb" => Some ( Self {
184184 expr_name : name. to_string ( ) ,
185185 } ) ,
186186 _ => None ,
@@ -191,37 +191,30 @@ impl BuiltinExprBuilder {
191191 self ,
192192 consumer : & impl SubstraitConsumer ,
193193 f : & ScalarFunction ,
194- input_schema : & DFSchema ,
194+ args : Vec < Expr > ,
195195 ) -> Result < Expr > {
196196 match self . expr_name . as_str ( ) {
197- "like" => Self :: build_like_expr ( consumer , false , f, input_schema ) . await ,
198- "ilike" => Self :: build_like_expr ( consumer , true , f, input_schema ) . await ,
197+ "like" => Self :: build_like_expr ( false , f, args ) . await ,
198+ "ilike" => Self :: build_like_expr ( true , f, args ) . await ,
199199 "not" | "negative" | "negate" | "is_null" | "is_not_null" | "is_true"
200200 | "is_false" | "is_not_true" | "is_not_false" | "is_unknown"
201- | "is_not_unknown" => {
202- Self :: build_unary_expr ( consumer, & self . expr_name , f, input_schema) . await
201+ | "is_not_unknown" => Self :: build_unary_expr ( & self . expr_name , args) . await ,
202+ "and_not" | "xor" => Self :: build_binary_expr ( & self . expr_name , args) . await ,
203+ "between" => Self :: build_between_expr ( & self . expr_name , args) . await ,
204+ "logb" => {
205+ Self :: build_custom_handling_expr ( consumer, & self . expr_name , args) . await
203206 }
204207 _ => {
205208 not_impl_err ! ( "Unsupported builtin expression: {}" , self . expr_name)
206209 }
207210 }
208211 }
209212
210- async fn build_unary_expr (
211- consumer : & impl SubstraitConsumer ,
212- fn_name : & str ,
213- f : & ScalarFunction ,
214- input_schema : & DFSchema ,
215- ) -> Result < Expr > {
216- if f. arguments . len ( ) != 1 {
217- return substrait_err ! ( "Expect one argument for {fn_name} expr" ) ;
218- }
219- let Some ( ArgType :: Value ( expr_substrait) ) = & f. arguments [ 0 ] . arg_type else {
220- return substrait_err ! ( "Invalid arguments type for {fn_name} expr" ) ;
213+ async fn build_unary_expr ( fn_name : & str , args : Vec < Expr > ) -> Result < Expr > {
214+ let [ arg] = match args. try_into ( ) {
215+ Ok ( args_arr) => args_arr,
216+ Err ( _) => return substrait_err ! ( "Expected one argument for {fn_name} expr" ) ,
221217 } ;
222- let arg = consumer
223- . consume_expression ( expr_substrait, input_schema)
224- . await ?;
225218 let arg = Box :: new ( arg) ;
226219
227220 let expr = match fn_name {
@@ -242,40 +235,29 @@ impl BuiltinExprBuilder {
242235 }
243236
244237 async fn build_like_expr (
245- consumer : & impl SubstraitConsumer ,
246238 case_insensitive : bool ,
247239 f : & ScalarFunction ,
248- input_schema : & DFSchema ,
240+ args : Vec < Expr > ,
249241 ) -> Result < Expr > {
250242 let fn_name = if case_insensitive { "ILIKE" } else { "LIKE" } ;
251- if f . arguments . len ( ) != 2 && f . arguments . len ( ) != 3 {
243+ if args . len ( ) != 2 && args . len ( ) != 3 {
252244 return substrait_err ! ( "Expect two or three arguments for `{fn_name}` expr" ) ;
253245 }
254246
255- let Some ( ArgType :: Value ( expr_substrait) ) = & f. arguments [ 0 ] . arg_type else {
256- return substrait_err ! ( "Invalid arguments type for `{fn_name}` expr" ) ;
247+ let mut args_iter = args. into_iter ( ) ;
248+ let Some ( expr) = args_iter. next ( ) else {
249+ return substrait_err ! ( "Missing first argument for {fn_name} expression" ) ;
257250 } ;
258- let expr = consumer
259- . consume_expression ( expr_substrait, input_schema)
260- . await ?;
261- let Some ( ArgType :: Value ( pattern_substrait) ) = & f. arguments [ 1 ] . arg_type else {
262- return substrait_err ! ( "Invalid arguments type for `{fn_name}` expr" ) ;
251+ let Some ( pattern) = args_iter. next ( ) else {
252+ return substrait_err ! ( "Missing second argument for {fn_name} expression" ) ;
263253 } ;
264- let pattern = consumer
265- . consume_expression ( pattern_substrait, input_schema)
266- . await ?;
267254
268255 // Default case: escape character is Literal(Utf8(None))
269256 let escape_char = if f. arguments . len ( ) == 3 {
270- let Some ( ArgType :: Value ( escape_char_substrait) ) = & f. arguments [ 2 ] . arg_type
271- else {
272- return substrait_err ! ( "Invalid arguments type for `{fn_name}` expr" ) ;
257+ let Some ( escape_char_expr) = args_iter. next ( ) else {
258+ return substrait_err ! ( "Missing third argument for {fn_name} expression" ) ;
273259 } ;
274260
275- let escape_char_expr = consumer
276- . consume_expression ( escape_char_substrait, input_schema)
277- . await ?;
278-
279261 match escape_char_expr {
280262 Expr :: Literal ( ScalarValue :: Utf8 ( escape_char_string) , _) => {
281263 // Convert Option<String> to Option<char>
@@ -299,6 +281,80 @@ impl BuiltinExprBuilder {
299281 case_insensitive,
300282 } ) )
301283 }
284+
285+ async fn build_binary_expr ( fn_name : & str , args : Vec < Expr > ) -> Result < Expr > {
286+ let [ a, b] = match args. try_into ( ) {
287+ Ok ( args_arr) => args_arr,
288+ Err ( _) => {
289+ return substrait_err ! ( "Expected two arguments for `{fn_name}` expr" )
290+ }
291+ } ;
292+ match fn_name {
293+ "and_not" => Ok ( Self :: build_and_not_expr ( a, b) ) ,
294+ "xor" => Ok ( Self :: build_xor_expr ( a, b) ) ,
295+ _ => not_impl_err ! ( "Unsupported builtin expression: {}" , fn_name) ,
296+ }
297+ }
298+
299+ fn build_and_not_expr ( a : Expr , b : Expr ) -> Expr {
300+ a. and ( Expr :: Not ( Box :: new ( b) ) )
301+ }
302+
303+ fn build_xor_expr ( a : Expr , b : Expr ) -> Expr {
304+ let or_expr = a. clone ( ) . or ( b. clone ( ) ) ;
305+ let and_expr = a. and ( b) ;
306+ Self :: build_and_not_expr ( or_expr, and_expr)
307+ }
308+
309+ async fn build_between_expr ( fn_name : & str , args : Vec < Expr > ) -> Result < Expr > {
310+ let [ expression, low, high] = match args. try_into ( ) {
311+ Ok ( args_arr) => args_arr,
312+ Err ( _) => {
313+ return substrait_err ! ( "Expected three arguments for `{fn_name}` expr" )
314+ }
315+ } ;
316+
317+ Ok ( Expr :: Between ( Between {
318+ expr : Box :: new ( expression) ,
319+ negated : false ,
320+ low : Box :: new ( low) ,
321+ high : Box :: new ( high) ,
322+ } ) )
323+ }
324+
325+ //This handles any functions that require custom handling
326+ async fn build_custom_handling_expr (
327+ consumer : & impl SubstraitConsumer ,
328+ fn_name : & str ,
329+ args : Vec < Expr > ,
330+ ) -> Result < Expr > {
331+ match fn_name {
332+ "logb" => Self :: build_logb_expr ( consumer, args) . await ,
333+ _ => not_impl_err ! ( "Unsupported custom handled expression: {}" , fn_name) ,
334+ }
335+ }
336+
337+ async fn build_logb_expr (
338+ consumer : & impl SubstraitConsumer ,
339+ args : Vec < Expr > ,
340+ ) -> Result < Expr > {
341+ if args. len ( ) != 2 {
342+ return substrait_err ! ( "Expect two arguments for logb function" ) ;
343+ }
344+
345+ let mut args = args;
346+ args. swap ( 0 , 1 ) ;
347+
348+ //The equivalent of logb in DataFusion is the log function (which has its arguments in reverse order)
349+ if let Ok ( func) = consumer. get_function_registry ( ) . udf ( "log" ) {
350+ Ok ( Expr :: ScalarFunction ( expr:: ScalarFunction :: new_udf (
351+ func. to_owned ( ) ,
352+ args,
353+ ) ) )
354+ } else {
355+ not_impl_err ! ( "Unsupported function name: logb" )
356+ }
357+ }
302358}
303359
304360#[ cfg( test) ]
0 commit comments