Skip to content

Commit 29698ef

Browse files
committed
clean up alias name creation
This takes us from `attributes -> Utf8(\"foo\")` to `attributes -> foo`
1 parent 2fffb96 commit 29698ef

File tree

2 files changed

+236
-162
lines changed

2 files changed

+236
-162
lines changed

src/rewrite.rs

Lines changed: 84 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,18 @@
1+
use std::sync::Arc;
2+
13
use datafusion::arrow::datatypes::DataType;
24
use datafusion::common::config::ConfigOptions;
35
use datafusion::common::tree_node::Transformed;
6+
use datafusion::common::Column;
47
use datafusion::common::DFSchema;
58
use datafusion::common::Result;
9+
use datafusion::error::DataFusionError;
610
use datafusion::logical_expr::expr::{Alias, Cast, Expr, ScalarFunction};
711
use datafusion::logical_expr::expr_rewriter::FunctionRewrite;
812
use datafusion::logical_expr::planner::{ExprPlanner, PlannerResult, RawBinaryExpr};
913
use datafusion::logical_expr::sqlparser::ast::BinaryOperator;
14+
use datafusion::logical_expr::ScalarUDF;
15+
use datafusion::scalar::ScalarValue;
1016

1117
#[derive(Debug)]
1218
pub(crate) struct JsonFunctionRewriter;
@@ -93,27 +99,95 @@ fn extract_scalar_function(expr: &Expr) -> Option<&ScalarFunction> {
9399
}
94100
}
95101

102+
#[derive(Debug, Clone, Copy)]
103+
enum JsonOperator {
104+
Arrow,
105+
LongArrow,
106+
Question,
107+
}
108+
109+
impl TryFrom<&BinaryOperator> for JsonOperator {
110+
type Error = DataFusionError;
111+
112+
fn try_from(op: &BinaryOperator) -> Result<Self> {
113+
match op {
114+
BinaryOperator::Arrow => Ok(JsonOperator::Arrow),
115+
BinaryOperator::LongArrow => Ok(JsonOperator::LongArrow),
116+
BinaryOperator::Question => Ok(JsonOperator::Question),
117+
_ => Err(DataFusionError::Internal(format!(
118+
"Unexpected operator {:?} in JSON function rewriter",
119+
op
120+
))),
121+
}
122+
}
123+
}
124+
125+
impl From<JsonOperator> for Arc<ScalarUDF> {
126+
fn from(op: JsonOperator) -> Arc<ScalarUDF> {
127+
match op {
128+
JsonOperator::Arrow => crate::udfs::json_get_udf(),
129+
JsonOperator::LongArrow => crate::udfs::json_as_text_udf(),
130+
JsonOperator::Question => crate::udfs::json_contains_udf(),
131+
}
132+
}
133+
}
134+
135+
impl std::fmt::Display for JsonOperator {
136+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
137+
match self {
138+
JsonOperator::Arrow => write!(f, "->"),
139+
JsonOperator::LongArrow => write!(f, "->>"),
140+
JsonOperator::Question => write!(f, "?"),
141+
}
142+
}
143+
}
144+
145+
/// Convert an Expr to a String representatiion for use in alias names.
146+
fn expr_to_sql_repr(expr: &Expr) -> String {
147+
match expr {
148+
Expr::Column(Column { name, relation }) => relation
149+
.as_ref()
150+
.map(|r| format!("{}.{}", r, name))
151+
.unwrap_or_else(|| name.clone()),
152+
Expr::Alias(alias) => alias.name.clone(),
153+
Expr::Literal(scalar) => match scalar {
154+
ScalarValue::Utf8(Some(v)) | ScalarValue::Utf8View(Some(v)) | ScalarValue::LargeUtf8(Some(v)) => {
155+
format!("'{v}'")
156+
}
157+
ScalarValue::UInt8(Some(v)) => v.to_string(),
158+
ScalarValue::UInt16(Some(v)) => v.to_string(),
159+
ScalarValue::UInt32(Some(v)) => v.to_string(),
160+
ScalarValue::UInt64(Some(v)) => v.to_string(),
161+
ScalarValue::Int8(Some(v)) => v.to_string(),
162+
ScalarValue::Int16(Some(v)) => v.to_string(),
163+
ScalarValue::Int32(Some(v)) => v.to_string(),
164+
ScalarValue::Int64(Some(v)) => v.to_string(),
165+
_ => scalar.to_string(),
166+
},
167+
Expr::Cast(cast) => expr_to_sql_repr(&cast.expr),
168+
_ => expr.to_string(),
169+
}
170+
}
171+
96172
/// Implement a custom SQL planner to replace postgres JSON operators with custom UDFs
97173
#[derive(Debug, Default)]
98174
pub struct JsonExprPlanner;
99175

100176
impl ExprPlanner for JsonExprPlanner {
101177
fn plan_binary_op(&self, expr: RawBinaryExpr, _schema: &DFSchema) -> Result<PlannerResult<RawBinaryExpr>> {
102-
let (func, op_display) = match &expr.op {
103-
BinaryOperator::Arrow => (crate::json_get::json_get_udf(), "->"),
104-
BinaryOperator::LongArrow => (crate::json_as_text::json_as_text_udf(), "->>"),
105-
BinaryOperator::Question => (crate::json_contains::json_contains_udf(), "?"),
106-
_ => return Ok(PlannerResult::Original(expr)),
107-
};
108-
let alias_name = match &expr.left {
109-
Expr::Alias(alias) => format!("{} {} {}", alias.name, op_display, expr.right),
110-
left_expr => format!("{} {} {}", left_expr, op_display, expr.right),
178+
let Ok(op) = JsonOperator::try_from(&expr.op) else {
179+
return Ok(PlannerResult::Original(expr));
111180
};
112181

182+
let left_repr = expr_to_sql_repr(&expr.left);
183+
let right_repr = expr_to_sql_repr(&expr.right);
184+
185+
let alias_name = format!("{left_repr} {op} {right_repr}");
186+
113187
// we put the alias in so that default column titles are `foo -> bar` instead of `json_get(foo, bar)`
114188
Ok(PlannerResult::Planned(Expr::Alias(Alias::new(
115189
Expr::ScalarFunction(ScalarFunction {
116-
func,
190+
func: op.into(),
117191
args: vec![expr.left, expr.right],
118192
}),
119193
None::<&str>,

0 commit comments

Comments
 (0)