@@ -3087,10 +3087,18 @@ pub fn create_cube_regclass_cast_udf() -> ScalarUDF {
30873087 Some ( as_str) => {
30883088 match PgType :: get_all ( ) . iter ( ) . find ( |e| e. typname == as_str) {
30893089 None => {
3090- return Err ( DataFusionError :: Execution ( format ! (
3091- "Unable to cast expression to Regclass: Unknown type: {}" ,
3092- as_str
3093- ) ) )
3090+ // If the type name contains a dot, it's a schema-qualified name
3091+ // and we should return the approprate RegClass to be converted to OID
3092+ // For now, we'll return 0 so metabase can sync without failing
3093+ // TODO actually read `pg_type`
3094+ if as_str. contains ( '.' ) {
3095+ builder. append_value ( 0 ) ?;
3096+ } else {
3097+ return Err ( DataFusionError :: Execution ( format ! (
3098+ "Unable to cast expression to Regclass: Unknown type: {}" ,
3099+ as_str
3100+ ) ) ) ;
3101+ }
30943102 }
30953103 Some ( ty) => {
30963104 builder. append_value ( ty. oid as i64 ) ?;
@@ -3148,6 +3156,171 @@ pub fn create_pg_get_serial_sequence_udf() -> ScalarUDF {
31483156 )
31493157}
31503158
3159+ // Return a NOOP for this so metabase can sync without failing
3160+ // See https://www.postgresql.org/docs/17/functions-info.html#FUNCTIONS-INFO-COMMENT here
3161+ // TODO: Implement this
3162+ pub fn create_col_description_udf ( ) -> ScalarUDF {
3163+ let fun = make_scalar_function ( move |args : & [ ArrayRef ] | {
3164+ // Ensure the output array has the same length as the input
3165+ let input_length = args[ 0 ] . len ( ) ;
3166+ let mut builder = StringBuilder :: new ( input_length) ;
3167+
3168+ for _ in 0 ..input_length {
3169+ builder. append_null ( ) ?;
3170+ }
3171+
3172+ Ok ( Arc :: new ( builder. finish ( ) ) as ArrayRef )
3173+ } ) ;
3174+
3175+ let return_type: ReturnTypeFunction = Arc :: new ( move |_| Ok ( Arc :: new ( DataType :: Utf8 ) ) ) ;
3176+
3177+ ScalarUDF :: new (
3178+ "col_description" ,
3179+ // Correct signature for col_description should be `(oid, integer) → text`
3180+ // We model oid as UInt32, so [DataType::UInt32, DataType::Int32] is a proper arguments
3181+ // However, it seems that coercion rules in DF differs from PostgreSQL at the moment
3182+ // And metabase uses col_description(CAST(CAST(... AS regclass) AS oid), cardinal_number)
3183+ // And we model regclass as Int64, and cardinal_number as UInt32
3184+ // Which is why second signature is necessary
3185+ & Signature :: one_of (
3186+ vec ! [
3187+ TypeSignature :: Exact ( vec![ DataType :: UInt32 , DataType :: Int32 ] ) ,
3188+ // TODO remove this signature in favor of proper model/coercion
3189+ TypeSignature :: Exact ( vec![ DataType :: Int64 , DataType :: UInt32 ] ) ,
3190+ ] ,
3191+ Volatility :: Stable ,
3192+ ) ,
3193+ & return_type,
3194+ & fun,
3195+ )
3196+ }
3197+
3198+ // See https://www.postgresql.org/docs/17/functions-string.html#FUNCTIONS-STRING-FORMAT
3199+ pub fn create_format_udf ( ) -> ScalarUDF {
3200+ let fun = make_scalar_function ( move |args : & [ ArrayRef ] | {
3201+ // Ensure at least one argument is provided
3202+ if args. is_empty ( ) {
3203+ return Err ( DataFusionError :: Execution (
3204+ "format() requires at least one argument" . to_string ( ) ,
3205+ ) ) ;
3206+ }
3207+
3208+ // Ensure the first argument is a Utf8 (string)
3209+ if args[ 0 ] . data_type ( ) != & DataType :: Utf8 {
3210+ return Err ( DataFusionError :: Execution (
3211+ "format() first argument must be a string" . to_string ( ) ,
3212+ ) ) ;
3213+ }
3214+
3215+ let format_strings = downcast_string_arg ! ( & args[ 0 ] , "format_str" , i32 ) ;
3216+ let mut builder = StringBuilder :: new ( format_strings. len ( ) ) ;
3217+
3218+ for i in 0 ..format_strings. len ( ) {
3219+ if format_strings. is_null ( i) {
3220+ builder. append_null ( ) ?;
3221+ continue ;
3222+ }
3223+
3224+ let format_str = format_strings. value ( i) ;
3225+ let mut result = String :: new ( ) ;
3226+ let mut format_chars = format_str. chars ( ) . peekable ( ) ;
3227+ let mut arg_index = 1 ; // Start from first argument after format string
3228+
3229+ while let Some ( c) = format_chars. next ( ) {
3230+ if c != '%' {
3231+ result. push ( c) ;
3232+ continue ;
3233+ }
3234+
3235+ match format_chars. next ( ) {
3236+ Some ( 'I' ) => {
3237+ // Handle %I - SQL identifier
3238+ if arg_index >= args. len ( ) {
3239+ return Err ( DataFusionError :: Execution (
3240+ "Not enough arguments for format string" . to_string ( ) ,
3241+ ) ) ;
3242+ }
3243+
3244+ let arg = & args[ arg_index] ;
3245+ let value = match arg. data_type ( ) {
3246+ DataType :: Utf8 => {
3247+ let str_arr = downcast_string_arg ! ( arg, "arg" , i32 ) ;
3248+ if str_arr. is_null ( i) {
3249+ return Err ( DataFusionError :: Execution (
3250+ "NULL values cannot be formatted as identifiers"
3251+ . to_string ( ) ,
3252+ ) ) ;
3253+ }
3254+ str_arr. value ( i) . to_string ( )
3255+ }
3256+ _ => {
3257+ // For other types, try to convert to string
3258+ let str_arr = cast ( & arg, & DataType :: Utf8 ) ?;
3259+ let str_arr =
3260+ str_arr. as_any ( ) . downcast_ref :: < StringArray > ( ) . unwrap ( ) ;
3261+ if str_arr. is_null ( i) {
3262+ return Err ( DataFusionError :: Execution (
3263+ "NULL values cannot be formatted as identifiers"
3264+ . to_string ( ) ,
3265+ ) ) ;
3266+ }
3267+ str_arr. value ( i) . to_string ( )
3268+ }
3269+ } ;
3270+
3271+ // Quote any identifier for now
3272+ // That's a safety-first approach: it would quote too much, but every edge-case would be covered
3273+ // Like `1` or `1a` or `select`
3274+ // TODO Quote identifier only if necessary
3275+ let needs_quoting = true ;
3276+
3277+ if needs_quoting {
3278+ result. push ( '"' ) ;
3279+ result. push_str ( & value. replace ( '"' , "\" \" " ) ) ;
3280+ result. push ( '"' ) ;
3281+ } else {
3282+ result. push_str ( & value) ;
3283+ }
3284+ arg_index += 1 ;
3285+ }
3286+ Some ( '%' ) => {
3287+ // %% is escaped to single %
3288+ result. push ( '%' ) ;
3289+ }
3290+ Some ( c) => {
3291+ return Err ( DataFusionError :: Execution ( format ! (
3292+ "Unsupported format specifier %{}" ,
3293+ c
3294+ ) ) ) ;
3295+ }
3296+ None => {
3297+ return Err ( DataFusionError :: Execution (
3298+ "Invalid format string - ends with %" . to_string ( ) ,
3299+ ) ) ;
3300+ }
3301+ }
3302+ }
3303+
3304+ builder. append_value ( result) ?;
3305+ }
3306+
3307+ Ok ( Arc :: new ( builder. finish ( ) ) as ArrayRef )
3308+ } ) ;
3309+
3310+ let return_type: ReturnTypeFunction = Arc :: new ( move |_| Ok ( Arc :: new ( DataType :: Utf8 ) ) ) ;
3311+
3312+ ScalarUDF :: new (
3313+ "format" ,
3314+ // Actually, format should be variadic with types (Utf8, any*)
3315+ // But ATM DataFusion does not support those signatures
3316+ // And this would work through implicit casting to Utf8
3317+ // TODO migrate to proper custom signature once it's supported by DF
3318+ & Signature :: variadic ( vec ! [ DataType :: Utf8 ] , Volatility :: Immutable ) ,
3319+ & return_type,
3320+ & fun,
3321+ )
3322+ }
3323+
31513324pub fn create_json_build_object_udf ( ) -> ScalarUDF {
31523325 let fun = make_scalar_function ( move |_args : & [ ArrayRef ] | {
31533326 // TODO: Implement
@@ -3769,13 +3942,6 @@ pub fn register_fun_stubs(mut ctx: SessionContext) -> SessionContext {
37693942 rettyp = TimestampTz ,
37703943 vol = Volatile
37713944 ) ;
3772- register_fun_stub ! (
3773- udf,
3774- "col_description" ,
3775- tsig = [ Oid , Int32 ] ,
3776- rettyp = Utf8 ,
3777- vol = Stable
3778- ) ;
37793945 register_fun_stub ! ( udf, "convert" , tsig = [ Binary , Utf8 , Utf8 ] , rettyp = Binary ) ;
37803946 register_fun_stub ! ( udf, "convert_from" , tsig = [ Binary , Utf8 ] , rettyp = Utf8 ) ;
37813947 register_fun_stub ! ( udf, "convert_to" , tsig = [ Utf8 , Utf8 ] , rettyp = Binary ) ;
0 commit comments