|
| 1 | +//! Apache DataFusion SQL Dialect |
| 2 | +//! |
| 3 | +//! DataFusion is an Arrow-based query engine with modern SQL extensions. |
| 4 | +//! Reference: https://datafusion.apache.org/user-guide/sql/ |
| 5 | +//! |
| 6 | +//! Key characteristics: |
| 7 | +//! - Arrow-native type system (Int8, Int16, Int32, Int64, Float32, Float64, Utf8, etc.) |
| 8 | +//! - Double-quote identifiers |
| 9 | +//! - Lowercase function names by default |
| 10 | +//! - QUALIFY clause support |
| 11 | +//! - EXCEPT for column exclusion (SELECT * EXCEPT(col)) |
| 12 | +//! - LEFT SEMI JOIN / LEFT ANTI JOIN syntax |
| 13 | +//! - TRY_CAST support |
| 14 | +//! - Pipe operator (|>) for query chaining |
| 15 | +//! - No UPDATE/DELETE support |
| 16 | +//! - arrow_cast() and arrow_typeof() functions |
| 17 | +//! - COPY ... TO syntax (no INTO keyword) |
| 18 | +//! - Nested comment support |
| 19 | +
|
| 20 | +use super::{DialectImpl, DialectType}; |
| 21 | +use crate::error::Result; |
| 22 | +use crate::expressions::{Expression, Function}; |
| 23 | +use crate::generator::GeneratorConfig; |
| 24 | +use crate::tokens::TokenizerConfig; |
| 25 | + |
| 26 | +/// Apache DataFusion dialect |
| 27 | +pub struct DataFusionDialect; |
| 28 | + |
| 29 | +impl DialectImpl for DataFusionDialect { |
| 30 | + fn dialect_type(&self) -> DialectType { |
| 31 | + DialectType::DataFusion |
| 32 | + } |
| 33 | + |
| 34 | + fn tokenizer_config(&self) -> TokenizerConfig { |
| 35 | + let mut config = TokenizerConfig::default(); |
| 36 | + // DataFusion uses double quotes for identifiers |
| 37 | + config.identifiers.insert('"', '"'); |
| 38 | + // DataFusion supports nested comments |
| 39 | + config.nested_comments = true; |
| 40 | + config |
| 41 | + } |
| 42 | + |
| 43 | + fn generator_config(&self) -> GeneratorConfig { |
| 44 | + use crate::generator::{IdentifierQuoteStyle, LimitFetchStyle, NormalizeFunctions}; |
| 45 | + GeneratorConfig { |
| 46 | + identifier_quote: '"', |
| 47 | + identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE, |
| 48 | + dialect: Some(DialectType::DataFusion), |
| 49 | + // DataFusion lowercases function names |
| 50 | + normalize_functions: NormalizeFunctions::Lower, |
| 51 | + // TRY_CAST is supported |
| 52 | + try_supported: true, |
| 53 | + // DataFusion uses EXCEPT for column exclusion: SELECT * EXCEPT(col) |
| 54 | + star_except: "EXCEPT", |
| 55 | + // No multi-arg DISTINCT: COUNT(DISTINCT a, b) not supported |
| 56 | + multi_arg_distinct: false, |
| 57 | + // Window EXCLUDE not supported |
| 58 | + supports_window_exclude: false, |
| 59 | + // Interval allows plural form (DAYS, HOURS, etc.) |
| 60 | + interval_allows_plural_form: true, |
| 61 | + // Normalize date parts in EXTRACT |
| 62 | + normalize_extract_date_parts: true, |
| 63 | + // LIMIT style (not FETCH) |
| 64 | + limit_fetch_style: LimitFetchStyle::Limit, |
| 65 | + // No hints |
| 66 | + join_hints: false, |
| 67 | + table_hints: false, |
| 68 | + query_hints: false, |
| 69 | + // LEFT SEMI JOIN / LEFT ANTI JOIN syntax |
| 70 | + semi_anti_join_with_side: true, |
| 71 | + // COPY does not use INTO keyword |
| 72 | + copy_has_into_keyword: false, |
| 73 | + // NVL2 is supported (via coalesce-like behavior) |
| 74 | + nvl2_supported: true, |
| 75 | + // MEDIAN is supported |
| 76 | + supports_median: true, |
| 77 | + // Can implement array_any |
| 78 | + can_implement_array_any: true, |
| 79 | + // LIKE quantifiers not supported |
| 80 | + supports_like_quantifiers: false, |
| 81 | + // Aggregate FILTER is supported |
| 82 | + aggregate_filter_supported: true, |
| 83 | + // BETWEEN flags not supported |
| 84 | + supports_between_flags: false, |
| 85 | + ..Default::default() |
| 86 | + } |
| 87 | + } |
| 88 | + |
| 89 | + fn transform_expr(&self, expr: Expression) -> Result<Expression> { |
| 90 | + match expr { |
| 91 | + // Function transformations |
| 92 | + Expression::Function(f) => self.transform_function(*f), |
| 93 | + |
| 94 | + // Aggregate function transformations |
| 95 | + Expression::AggregateFunction(f) => self.transform_aggregate_function(f), |
| 96 | + |
| 97 | + // Pass through everything else |
| 98 | + _ => Ok(expr), |
| 99 | + } |
| 100 | + } |
| 101 | +} |
| 102 | + |
| 103 | +impl DataFusionDialect { |
| 104 | + fn transform_function(&self, f: Function) -> Result<Expression> { |
| 105 | + let name_upper = f.name.to_uppercase(); |
| 106 | + match name_upper.as_str() { |
| 107 | + // IFNULL → COALESCE (DataFusion uses COALESCE) |
| 108 | + "IFNULL" => Ok(Expression::Function(Box::new(Function::new( |
| 109 | + "coalesce".to_string(), |
| 110 | + f.args, |
| 111 | + )))), |
| 112 | + |
| 113 | + // SQUARE(x) → POWER(x, 2) |
| 114 | + "SQUARE" => { |
| 115 | + let mut args = f.args; |
| 116 | + args.push(Expression::Literal(crate::expressions::Literal::Number( |
| 117 | + "2".to_string(), |
| 118 | + ))); |
| 119 | + Ok(Expression::Function(Box::new(Function::new( |
| 120 | + "power".to_string(), |
| 121 | + args, |
| 122 | + )))) |
| 123 | + } |
| 124 | + |
| 125 | + // REGEXP_MATCHES → REGEXP_MATCH |
| 126 | + "REGEXP_MATCHES" => Ok(Expression::Function(Box::new(Function::new( |
| 127 | + "regexp_match".to_string(), |
| 128 | + f.args, |
| 129 | + )))), |
| 130 | + |
| 131 | + // DATE_FORMAT / TIME_TO_STR / STRFTIME → TO_CHAR |
| 132 | + "DATE_FORMAT" | "TIME_TO_STR" => Ok(Expression::Function(Box::new(Function::new( |
| 133 | + "to_char".to_string(), |
| 134 | + f.args, |
| 135 | + )))), |
| 136 | + |
| 137 | + // Pass through everything else |
| 138 | + _ => Ok(Expression::Function(Box::new(f))), |
| 139 | + } |
| 140 | + } |
| 141 | + |
| 142 | + fn transform_aggregate_function( |
| 143 | + &self, |
| 144 | + f: Box<crate::expressions::AggregateFunction>, |
| 145 | + ) -> Result<Expression> { |
| 146 | + let name_upper = f.name.to_uppercase(); |
| 147 | + match name_upper.as_str() { |
| 148 | + // GROUP_CONCAT → STRING_AGG |
| 149 | + "GROUP_CONCAT" => Ok(Expression::Function(Box::new(Function::new( |
| 150 | + "string_agg".to_string(), |
| 151 | + f.args, |
| 152 | + )))), |
| 153 | + |
| 154 | + // LISTAGG → STRING_AGG |
| 155 | + "LISTAGG" => Ok(Expression::Function(Box::new(Function::new( |
| 156 | + "string_agg".to_string(), |
| 157 | + f.args, |
| 158 | + )))), |
| 159 | + |
| 160 | + // Pass through everything else |
| 161 | + _ => Ok(Expression::AggregateFunction(f)), |
| 162 | + } |
| 163 | + } |
| 164 | +} |
0 commit comments