Skip to content

Commit e9acd36

Browse files
pauldheinrichsmcheshkov
authored andcommitted
feat(cubesql) Implement format / col_description
1 parent c97526f commit e9acd36

11 files changed

+1228
-11
lines changed

rust/cubesql/cubesql/src/compile/engine/udf/common.rs

Lines changed: 162 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3087,10 +3087,17 @@ pub fn create_cube_regclass_cast_udf() -> ScalarUDF {
30873087
Some(as_str) => {
30883088
match PgType::get_all().iter().find(|e| e.typname == as_str) {
30893089
None => {
3090-
return Err(DataFusionError::Execution(format!(
3091-
"Unable to cast expression to Regclass: Unknown type: {}",
3092-
as_str
3093-
)))
3090+
// If the type name contains a dot, it's a schema-qualified name
3091+
// and we should return return the approprate RegClass to be converted to OID
3092+
// For now, we'll return 0 so metabase can sync without failing
3093+
if as_str.contains('.') {
3094+
builder.append_value(0)?;
3095+
} else {
3096+
return Err(DataFusionError::Execution(format!(
3097+
"Unable to cast expression to Regclass: Unknown type: {}",
3098+
as_str
3099+
)));
3100+
}
30943101
}
30953102
Some(ty) => {
30963103
builder.append_value(ty.oid as i64)?;
@@ -3148,6 +3155,157 @@ pub fn create_pg_get_serial_sequence_udf() -> ScalarUDF {
31483155
)
31493156
}
31503157

3158+
// Return a NOOP for this so metabase can sync without failing
3159+
// TODO: Implement this
3160+
pub fn create_col_description_udf() -> ScalarUDF {
3161+
let fun = make_scalar_function(move |args: &[ArrayRef]| {
3162+
// Ensure the output array has the same length as the input
3163+
let input_length = args[0].len();
3164+
let mut builder = StringBuilder::new(input_length);
3165+
3166+
for _ in 0..input_length {
3167+
builder.append_null()?;
3168+
}
3169+
3170+
Ok(Arc::new(builder.finish()) as ArrayRef)
3171+
});
3172+
3173+
let return_type: ReturnTypeFunction = Arc::new(move |_| Ok(Arc::new(DataType::Utf8)));
3174+
3175+
ScalarUDF::new(
3176+
"col_description",
3177+
&Signature::one_of(vec![
3178+
TypeSignature::Exact(vec![DataType::Utf8, DataType::Utf8]),
3179+
], Volatility::Immutable),
3180+
&return_type,
3181+
&fun,
3182+
)
3183+
}
3184+
3185+
pub fn create_format_udf() -> ScalarUDF {
3186+
let fun = make_scalar_function(move |args: &[ArrayRef]| {
3187+
// Ensure at least one argument is provided
3188+
if args.is_empty() {
3189+
return Err(DataFusionError::Execution(
3190+
"format() requires at least one argument".to_string(),
3191+
));
3192+
}
3193+
3194+
// Ensure the first argument is a Utf8 (string)
3195+
if args[0].data_type() != &DataType::Utf8 {
3196+
return Err(DataFusionError::Execution(
3197+
"format() first argument must be a string".to_string(),
3198+
));
3199+
}
3200+
3201+
let format_strings = downcast_string_arg!(&args[0], "format_str", i32);
3202+
let mut builder = StringBuilder::new(format_strings.len());
3203+
3204+
for i in 0..format_strings.len() {
3205+
if format_strings.is_null(i) {
3206+
builder.append_null()?;
3207+
continue;
3208+
}
3209+
3210+
let format_str = format_strings.value(i);
3211+
let mut result = String::new();
3212+
let mut format_chars = format_str.chars().peekable();
3213+
let mut arg_index = 1; // Start from first argument after format string
3214+
3215+
while let Some(c) = format_chars.next() {
3216+
if c != '%' {
3217+
result.push(c);
3218+
continue;
3219+
}
3220+
3221+
match format_chars.next() {
3222+
Some('I') => {
3223+
// Handle %I - SQL identifier
3224+
if arg_index >= args.len() {
3225+
return Err(DataFusionError::Execution(
3226+
"Not enough arguments for format string".to_string(),
3227+
));
3228+
}
3229+
3230+
let arg = &args[arg_index];
3231+
let value = match arg.data_type() {
3232+
DataType::Utf8 => {
3233+
let str_arr = downcast_string_arg!(arg, "arg", i32);
3234+
if str_arr.is_null(i) {
3235+
return Err(DataFusionError::Execution(
3236+
"NULL values cannot be formatted as identifiers".to_string(),
3237+
));
3238+
}
3239+
str_arr.value(i).to_string()
3240+
}
3241+
_ => {
3242+
// For other types, try to convert to string
3243+
let str_arr = cast(&arg, &DataType::Utf8)?;
3244+
let str_arr = str_arr
3245+
.as_any()
3246+
.downcast_ref::<StringArray>()
3247+
.unwrap();
3248+
if str_arr.is_null(i) {
3249+
return Err(DataFusionError::Execution(
3250+
"NULL values cannot be formatted as identifiers".to_string(),
3251+
));
3252+
}
3253+
str_arr.value(i).to_string()
3254+
}
3255+
};
3256+
3257+
// Quote identifier if necessary
3258+
let needs_quoting = !value.chars().all(|c| {
3259+
c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_'
3260+
}) || value.is_empty();
3261+
3262+
if needs_quoting {
3263+
result.push('"');
3264+
result.push_str(&value.replace('"', "\"\""));
3265+
result.push('"');
3266+
} else {
3267+
result.push_str(&value);
3268+
}
3269+
arg_index += 1;
3270+
}
3271+
Some('%') => {
3272+
// %% is escaped to single %
3273+
result.push('%');
3274+
}
3275+
Some(c) => {
3276+
return Err(DataFusionError::Execution(format!(
3277+
"Unsupported format specifier %{}",
3278+
c
3279+
)));
3280+
}
3281+
None => {
3282+
return Err(DataFusionError::Execution(
3283+
"Invalid format string - ends with %".to_string(),
3284+
));
3285+
}
3286+
}
3287+
}
3288+
3289+
builder.append_value(result)?;
3290+
}
3291+
3292+
Ok(Arc::new(builder.finish()) as ArrayRef)
3293+
});
3294+
3295+
let return_type: ReturnTypeFunction = Arc::new(move |_| Ok(Arc::new(DataType::Utf8)));
3296+
3297+
ScalarUDF::new(
3298+
"format",
3299+
&Signature::variadic(
3300+
vec![DataType::Utf8],
3301+
Volatility::Immutable,
3302+
),
3303+
&return_type,
3304+
&fun,
3305+
)
3306+
}
3307+
3308+
31513309
pub fn create_json_build_object_udf() -> ScalarUDF {
31523310
let fun = make_scalar_function(move |_args: &[ArrayRef]| {
31533311
// TODO: Implement
@@ -3769,13 +3927,6 @@ pub fn register_fun_stubs(mut ctx: SessionContext) -> SessionContext {
37693927
rettyp = TimestampTz,
37703928
vol = Volatile
37713929
);
3772-
register_fun_stub!(
3773-
udf,
3774-
"col_description",
3775-
tsig = [Oid, Int32],
3776-
rettyp = Utf8,
3777-
vol = Stable
3778-
);
37793930
register_fun_stub!(udf, "convert", tsig = [Binary, Utf8, Utf8], rettyp = Binary);
37803931
register_fun_stub!(udf, "convert_from", tsig = [Binary, Utf8], rettyp = Utf8);
37813932
register_fun_stub!(udf, "convert_to", tsig = [Utf8, Utf8], rettyp = Binary);

rust/cubesql/cubesql/src/compile/mod.rs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16351,4 +16351,33 @@ LIMIT {{ limit }}{% endif %}"#.to_string(),
1635116351

1635216352
Ok(())
1635316353
}
16354+
16355+
#[tokio::test]
16356+
async fn test_format_function() -> Result<(), CubeError> {
16357+
// Test: Basic usage with a single identifier
16358+
let result = execute_query("SELECT format('%I', 'column_name') AS formatted_identifier".to_string(), DatabaseProtocol::PostgreSQL).await?;
16359+
insta::assert_snapshot!("formatted_identifier", result);
16360+
16361+
// Test: Using multiple identifiers
16362+
let result = execute_query("SELECT format('%I, %I', 'table_name', 'column_name') AS formatted_identifiers".to_string(), DatabaseProtocol::PostgreSQL).await?;
16363+
insta::assert_snapshot!("formatted_identifiers", result);
16364+
16365+
// Test: Unsupported format specifier
16366+
let result = execute_query("SELECT format('%X', 'value') AS unsupported_specifier".to_string(), DatabaseProtocol::PostgreSQL).await;
16367+
assert!(result.is_err());
16368+
16369+
// Test: Format string ending with %
16370+
let result = execute_query("SELECT format('%', 'value') AS invalid_format".to_string(), DatabaseProtocol::PostgreSQL).await;
16371+
assert!(result.is_err());
16372+
16373+
// Test: Quoting necessary for special characters
16374+
let result = execute_query("SELECT format('%I', 'column-name') AS quoted_identifier".to_string(), DatabaseProtocol::PostgreSQL).await?;
16375+
insta::assert_snapshot!("quoted_identifier", result);
16376+
16377+
// Test: Quoting necessary for reserved keywords
16378+
let result = execute_query("SELECT format('%I', 'select') AS quoted_keyword".to_string(), DatabaseProtocol::PostgreSQL).await?;
16379+
insta::assert_snapshot!("quoted_keyword", result);
16380+
16381+
Ok(())
16382+
}
1635416383
}

rust/cubesql/cubesql/src/compile/query_engine.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -465,7 +465,9 @@ impl QueryEngine for SqlQueryEngine {
465465
ctx.register_udf(create_current_timestamp_udf("localtimestamp"));
466466
ctx.register_udf(create_current_schema_udf());
467467
ctx.register_udf(create_current_schemas_udf());
468+
ctx.register_udf(create_format_udf());
468469
ctx.register_udf(create_format_type_udf());
470+
ctx.register_udf(create_col_description_udf());
469471
ctx.register_udf(create_pg_datetime_precision_udf());
470472
ctx.register_udf(create_pg_numeric_precision_udf());
471473
ctx.register_udf(create_pg_numeric_scale_udf());
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
---
2+
source: cubesql/src/compile/mod.rs
3+
expression: result
4+
---
5+
+----------------------+
6+
| formatted_identifier |
7+
+----------------------+
8+
| column_name |
9+
+----------------------+
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
---
2+
source: cubesql/src/compile/mod.rs
3+
expression: result
4+
---
5+
+-------------------------+
6+
| formatted_identifiers |
7+
+-------------------------+
8+
| table_name, column_name |
9+
+-------------------------+
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
---
2+
source: cubesql/src/compile/mod.rs
3+
expression: result
4+
---
5+
+-------------------+
6+
| quoted_identifier |
7+
+-------------------+
8+
| "column-name" |
9+
+-------------------+
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
---
2+
source: cubesql/src/compile/mod.rs
3+
expression: result
4+
---
5+
+----------------+
6+
| quoted_keyword |
7+
+----------------+
8+
| select |
9+
+----------------+

0 commit comments

Comments
 (0)