Skip to content

Commit da2c0b8

Browse files
committed
feat: parse JsonAccess as a custom binary operator
- Needed in datafusion-contrib/datafusion-variant#26 - `sqlparser-rs` currently exposes the colon operator (`:`) as a special `JsonAccess` expression. So it fails in datafusion's parsing before an `ExprPlanner` is even invoked. - Fix it by converting `JsonAccess` to a normal binary expr, on which the `ExprPlanner` is invoked and custom parsing can be done. I have arrived at this solution iteratively to solve datafusion-contrib/datafusion-variant#26. Some alternatives I considered (but NOT IMPLEMENTED): - Add a new `Expr::JsonAccess` instead of re-using `BinaryExpr`. Alongside this, we could also introduce datafusion's version of a `JsonPath` struct. `Expr::JsonAccess` could also contain the optional cast that usually comes after it (like in `col_a:field_b::int`). The cast might be needed for performance optimizations. This is a much bigger change, so I have not done it.
1 parent 73fbd48 commit da2c0b8

File tree

8 files changed

+85
-8
lines changed

8 files changed

+85
-8
lines changed

datafusion/expr-common/src/operator.rs

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,10 @@ pub enum Operator {
140140
///
141141
/// Not implemented in DataFusion yet.
142142
QuestionPipe,
143+
/// Colon operator, like `:`
144+
///
145+
/// Not implemented in DataFusion yet.
146+
Colon,
143147
}
144148

145149
impl Operator {
@@ -188,7 +192,8 @@ impl Operator {
188192
| Operator::AtQuestion
189193
| Operator::Question
190194
| Operator::QuestionAnd
191-
| Operator::QuestionPipe => None,
195+
| Operator::QuestionPipe
196+
| Operator::Colon => None,
192197
}
193198
}
194199

@@ -283,7 +288,8 @@ impl Operator {
283288
| Operator::AtQuestion
284289
| Operator::Question
285290
| Operator::QuestionAnd
286-
| Operator::QuestionPipe => None,
291+
| Operator::QuestionPipe
292+
| Operator::Colon => None,
287293
}
288294
}
289295

@@ -323,7 +329,8 @@ impl Operator {
323329
| Operator::AtQuestion
324330
| Operator::Question
325331
| Operator::QuestionAnd
326-
| Operator::QuestionPipe => 30,
332+
| Operator::QuestionPipe
333+
| Operator::Colon => 30,
327334
Operator::Plus | Operator::Minus => 40,
328335
Operator::Multiply | Operator::Divide | Operator::Modulo => 45,
329336
}
@@ -369,7 +376,8 @@ impl Operator {
369376
| Operator::AtQuestion
370377
| Operator::Question
371378
| Operator::QuestionAnd
372-
| Operator::QuestionPipe => true,
379+
| Operator::QuestionPipe
380+
| Operator::Colon => true,
373381

374382
// E.g. `TRUE OR NULL` is `TRUE`
375383
Operator::Or
@@ -429,6 +437,7 @@ impl fmt::Display for Operator {
429437
Operator::Question => "?",
430438
Operator::QuestionAnd => "?&",
431439
Operator::QuestionPipe => "?|",
440+
Operator::Colon => ":",
432441
};
433442
write!(f, "{display}")
434443
}

datafusion/expr-common/src/type_coercion/binary.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,9 @@ impl<'a> BinaryTypeCoercer<'a> {
324324
)
325325
}
326326
},
327+
Colon => {
328+
Ok(Signature { lhs: lhs.clone(), rhs: rhs.clone(), ret: lhs.clone() })
329+
},
327330
IntegerDivide | Arrow | LongArrow | HashArrow | HashLongArrow
328331
| HashMinus | AtQuestion | Question | QuestionAnd | QuestionPipe => {
329332
not_impl_err!("Operator {} is not yet supported", self.op)

datafusion/physical-expr/src/expressions/binary.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -715,7 +715,7 @@ impl BinaryExpr {
715715
StringConcat => concat_elements(&left, &right),
716716
AtArrow | ArrowAt | Arrow | LongArrow | HashArrow | HashLongArrow | AtAt
717717
| HashMinus | AtQuestion | Question | QuestionAnd | QuestionPipe
718-
| IntegerDivide => {
718+
| IntegerDivide | Colon => {
719719
not_impl_err!(
720720
"Binary operator '{:?}' is not supported in the physical expr",
721721
self.op

datafusion/sql/src/expr/binary_op.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use sqlparser::ast::BinaryOperator;
2222

2323
impl<S: ContextProvider> SqlToRel<'_, S> {
2424
pub(crate) fn parse_sql_binary_op(&self, op: &BinaryOperator) -> Result<Operator> {
25-
match *op {
25+
match op {
2626
BinaryOperator::Gt => Ok(Operator::Gt),
2727
BinaryOperator::GtEq => Ok(Operator::GtEq),
2828
BinaryOperator::Lt => Ok(Operator::Lt),
@@ -68,6 +68,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
6868
BinaryOperator::Question => Ok(Operator::Question),
6969
BinaryOperator::QuestionAnd => Ok(Operator::QuestionAnd),
7070
BinaryOperator::QuestionPipe => Ok(Operator::QuestionPipe),
71+
BinaryOperator::Custom(s) if s == ":" => Ok(Operator::Colon),
7172
_ => not_impl_err!("Unsupported binary operator: {:?}", op),
7273
}
7374
}

datafusion/sql/src/expr/mod.rs

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ use datafusion_expr::planner::{
2222
use sqlparser::ast::{
2323
AccessExpr, BinaryOperator, CastFormat, CastKind, CeilFloorKind,
2424
DataType as SQLDataType, DateTimeField, DictionaryField, Expr as SQLExpr,
25-
ExprWithAlias as SQLExprWithAlias, MapEntry, StructField, Subscript, TrimWhereField,
26-
TypedString, Value, ValueWithSpan,
25+
ExprWithAlias as SQLExprWithAlias, JsonPath, MapEntry, StructField, Subscript,
26+
TrimWhereField, TypedString, Value, ValueWithSpan,
2727
};
2828

2929
use datafusion_common::{
@@ -651,10 +651,36 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
651651
options: Box::new(WildcardOptions::default()),
652652
}),
653653
SQLExpr::Tuple(values) => self.parse_tuple(schema, planner_context, values),
654+
SQLExpr::JsonAccess { value, path } => {
655+
self.parse_json_access(schema, planner_context, value, &path)
656+
}
654657
_ => not_impl_err!("Unsupported ast node in sqltorel: {sql:?}"),
655658
}
656659
}
657660

661+
fn parse_json_access(
662+
&self,
663+
schema: &DFSchema,
664+
planner_context: &mut PlannerContext,
665+
value: Box<SQLExpr>,
666+
path: &JsonPath,
667+
) -> Result<Expr> {
668+
let json_path = path.to_string();
669+
let json_path = if let Some(json_path) = json_path.strip_prefix(":") {
670+
// sqlparser's JsonPath display adds an extra `:` at the beginning.
671+
json_path.to_owned()
672+
} else {
673+
json_path
674+
};
675+
self.build_logical_expr(
676+
BinaryOperator::Custom(":".to_owned()),
677+
self.sql_to_expr(*value, schema, planner_context)?,
678+
// pass json path as a string literal, let the impl parse it when needed.
679+
Expr::Literal(ScalarValue::Utf8(Some(json_path)), None),
680+
schema,
681+
)
682+
}
683+
658684
/// Parses a struct(..) expression and plans it creation
659685
fn parse_struct(
660686
&self,

datafusion/sql/src/unparser/expr.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1094,6 +1094,7 @@ impl Unparser<'_> {
10941094
Operator::Question => Ok(BinaryOperator::Question),
10951095
Operator::QuestionAnd => Ok(BinaryOperator::QuestionAnd),
10961096
Operator::QuestionPipe => Ok(BinaryOperator::QuestionPipe),
1097+
Operator::Colon => Ok(BinaryOperator::Custom(":".to_owned())),
10971098
}
10981099
}
10991100

datafusion/sql/tests/cases/plan_to_sql.rs

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2821,3 +2821,39 @@ fn test_struct_expr3() {
28212821
@r#"SELECT test.c1."metadata".product."name" FROM (SELECT {"metadata": {product: {"name": 'Product Name'}}} AS c1) AS test"#
28222822
);
28232823
}
2824+
2825+
#[test]
2826+
fn test_json_access_1() {
2827+
let statement = generate_round_trip_statement(
2828+
GenericDialect {},
2829+
r#"SELECT j1_string:field FROM j1"#,
2830+
);
2831+
assert_snapshot!(
2832+
statement,
2833+
@r#"SELECT (j1.j1_string : 'field') FROM j1"#
2834+
);
2835+
}
2836+
2837+
#[test]
2838+
fn test_json_access_2() {
2839+
let statement = generate_round_trip_statement(
2840+
GenericDialect {},
2841+
r#"SELECT j1_string:field[0] FROM j1"#,
2842+
);
2843+
assert_snapshot!(
2844+
statement,
2845+
@r#"SELECT (j1.j1_string : 'field[0]') FROM j1"#
2846+
);
2847+
}
2848+
2849+
#[test]
2850+
fn test_json_access_3() {
2851+
let statement = generate_round_trip_statement(
2852+
GenericDialect {},
2853+
r#"SELECT j1_string:field.inner1['inner2'] FROM j1"#,
2854+
);
2855+
assert_snapshot!(
2856+
statement,
2857+
@r#"SELECT (j1.j1_string : 'field.inner1[''inner2'']') FROM j1"#
2858+
);
2859+
}

datafusion/substrait/src/logical_plan/producer/expr/scalar_function.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,5 +344,6 @@ pub fn operator_to_name(op: Operator) -> &'static str {
344344
Operator::BitwiseXor => "bitwise_xor",
345345
Operator::BitwiseShiftRight => "bitwise_shift_right",
346346
Operator::BitwiseShiftLeft => "bitwise_shift_left",
347+
Operator::Colon => "colon",
347348
}
348349
}

0 commit comments

Comments
 (0)