Skip to content

Commit dd197cf

Browse files
committed
chore(query): add datatype mapping for iceberg and hive2
1 parent 5d3230e commit dd197cf

File tree

10 files changed

+114
-112
lines changed

10 files changed

+114
-112
lines changed

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/query/ast/src/parser/expr.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1353,7 +1353,10 @@ pub fn type_name(i: Input) -> IResult<TypeName> {
13531353
rule! { ( INT64 | SIGNED | BIGINT ) ~ ( "(" ~ #literal_u64 ~ ")" )? },
13541354
);
13551355
let ty_float32 = value(TypeName::Float32, rule! { FLOAT32 | FLOAT });
1356-
let ty_float64 = value(TypeName::Float64, rule! { FLOAT64 | DOUBLE });
1356+
let ty_float64 = value(
1357+
TypeName::Float64,
1358+
rule! { (FLOAT64 | DOUBLE) ~ ( PRECISION )? },
1359+
);
13571360
let ty_decimal = map_res(
13581361
rule! { DECIMAL ~ "(" ~ #literal_u64 ~ "," ~ #literal_u64 ~ ")" },
13591362
|(_, _, precision, _, scale, _)| {

src/query/ast/src/parser/token.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -663,6 +663,8 @@ pub enum TokenKind {
663663
ROLE,
664664
#[token("PRECEDING", ignore(ascii_case))]
665665
PRECEDING,
666+
#[token("PRECISION", ignore(ascii_case))]
667+
PRECISION,
666668
#[token("PRESIGN", ignore(ascii_case))]
667669
PRESIGN,
668670
#[token("PRIVILEGES", ignore(ascii_case))]

src/query/expression/src/schema.rs

Lines changed: 0 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ use common_arrow::arrow::datatypes::DataType as ArrowDataType;
2424
use common_arrow::arrow::datatypes::Field as ArrowField;
2525
use common_arrow::arrow::datatypes::Schema as ArrowSchema;
2626
use common_arrow::arrow::datatypes::TimeUnit;
27-
use common_ast::ast::TypeName;
2827
use common_exception::ErrorCode;
2928
use common_exception::Result;
3029
use ethnum::i256;
@@ -1196,79 +1195,6 @@ impl TableDataType {
11961195
}
11971196
}
11981197
}
1199-
1200-
pub fn from_type_name(type_name: &TypeName) -> Result<TableDataType> {
1201-
let data_type = match type_name {
1202-
TypeName::Boolean => TableDataType::Boolean,
1203-
TypeName::UInt8 => TableDataType::Number(NumberDataType::UInt8),
1204-
TypeName::UInt16 => TableDataType::Number(NumberDataType::UInt16),
1205-
TypeName::UInt32 => TableDataType::Number(NumberDataType::UInt32),
1206-
TypeName::UInt64 => TableDataType::Number(NumberDataType::UInt64),
1207-
TypeName::Int8 => TableDataType::Number(NumberDataType::Int8),
1208-
TypeName::Int16 => TableDataType::Number(NumberDataType::Int16),
1209-
TypeName::Int32 => TableDataType::Number(NumberDataType::Int32),
1210-
TypeName::Int64 => TableDataType::Number(NumberDataType::Int64),
1211-
TypeName::Float32 => TableDataType::Number(NumberDataType::Float32),
1212-
TypeName::Float64 => TableDataType::Number(NumberDataType::Float64),
1213-
TypeName::Decimal { precision, scale } => {
1214-
TableDataType::Decimal(DecimalDataType::from_size(DecimalSize {
1215-
precision: *precision,
1216-
scale: *scale,
1217-
})?)
1218-
}
1219-
TypeName::String => TableDataType::String,
1220-
TypeName::Timestamp => TableDataType::Timestamp,
1221-
TypeName::Date => TableDataType::Date,
1222-
TypeName::Array(item_type) => {
1223-
TableDataType::Array(Box::new(Self::from_type_name(item_type)?))
1224-
}
1225-
TypeName::Map { key_type, val_type } => {
1226-
let key_type = Self::from_type_name(key_type)?;
1227-
match key_type {
1228-
TableDataType::Boolean
1229-
| TableDataType::String
1230-
| TableDataType::Number(_)
1231-
| TableDataType::Decimal(_)
1232-
| TableDataType::Timestamp
1233-
| TableDataType::Date => {
1234-
let val_type = Self::from_type_name(val_type)?;
1235-
let inner_type = TableDataType::Tuple {
1236-
fields_name: vec!["key".to_string(), "value".to_string()],
1237-
fields_type: vec![key_type, val_type],
1238-
};
1239-
TableDataType::Map(Box::new(inner_type))
1240-
}
1241-
_ => {
1242-
return Err(ErrorCode::Internal(format!(
1243-
"Invalid Map key type \'{:?}\'",
1244-
key_type
1245-
)));
1246-
}
1247-
}
1248-
}
1249-
TypeName::Tuple {
1250-
fields_type,
1251-
fields_name,
1252-
} => TableDataType::Tuple {
1253-
fields_name: match fields_name {
1254-
None => (0..fields_type.len())
1255-
.map(|i| (i + 1).to_string())
1256-
.collect(),
1257-
Some(names) => names.clone(),
1258-
},
1259-
fields_type: fields_type
1260-
.iter()
1261-
.map(Self::from_type_name)
1262-
.collect::<Result<Vec<_>>>()?,
1263-
},
1264-
TypeName::Nullable(inner_type) => {
1265-
TableDataType::Nullable(Box::new(Self::from_type_name(inner_type)?))
1266-
}
1267-
TypeName::Variant => TableDataType::Variant,
1268-
};
1269-
1270-
Ok(data_type)
1271-
}
12721198
}
12731199

12741200
pub type DataSchemaRef = Arc<DataSchema>;

src/query/sql/src/planner/binder/ddl/table.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,8 @@ use crate::optimizer::optimize;
7878
use crate::optimizer::OptimizerConfig;
7979
use crate::optimizer::OptimizerContext;
8080
use crate::planner::semantic::normalize_identifier;
81+
use crate::planner::semantic::resolve_type_name;
8182
use crate::planner::semantic::IdentifierNormalizer;
82-
use crate::planner::semantic::TypeChecker;
8383
use crate::plans::AddTableColumnPlan;
8484
use crate::plans::AlterTableClusterKeyPlan;
8585
use crate::plans::AnalyzeTablePlan;
@@ -869,7 +869,7 @@ impl Binder {
869869
let mut fields_comments = Vec::with_capacity(columns.len());
870870
for column in columns.iter() {
871871
let name = normalize_identifier(&column.name, &self.name_resolution_ctx).name;
872-
let schema_data_type = TypeChecker::resolve_type_name(&column.data_type)?;
872+
let schema_data_type = resolve_type_name(&column.data_type)?;
873873

874874
fields.push(TableField::new(&name, schema_data_type.clone()));
875875
fields_default_expr.push({

src/query/sql/src/planner/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ pub use metadata::*;
3636
pub use planner::Planner;
3737
pub use plans::ScalarExpr;
3838
pub use semantic::normalize_identifier;
39+
pub use semantic::resolve_type_name_by_str;
3940
pub use semantic::validate_function_arg;
4041
pub use semantic::IdentifierNormalizer;
4142
pub use semantic::NameResolutionContext;

src/query/sql/src/planner/semantic/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,5 +26,7 @@ pub use name_resolution::compare_table_name;
2626
pub use name_resolution::normalize_identifier;
2727
pub use name_resolution::IdentifierNormalizer;
2828
pub use name_resolution::NameResolutionContext;
29+
pub use type_check::resolve_type_name;
30+
pub use type_check::resolve_type_name_by_str;
2931
pub use type_check::validate_function_arg;
3032
pub use type_check::TypeChecker;

src/query/sql/src/planner/semantic/type_check.rs

Lines changed: 91 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -518,7 +518,7 @@ impl<'a> TypeChecker<'a> {
518518
span: None,
519519
is_try: false,
520520
expr: Box::new(scalar.as_raw_expr_with_col_name()),
521-
dest_type: DataType::from(&TableDataType::from_type_name(target_type)?),
521+
dest_type: DataType::from(&resolve_type_name(target_type)?),
522522
};
523523
let registry = &BUILTIN_FUNCTIONS;
524524
let expr = type_check::check(&raw_expr, registry)?;
@@ -541,7 +541,7 @@ impl<'a> TypeChecker<'a> {
541541
span: None,
542542
is_try: true,
543543
expr: Box::new(scalar.as_raw_expr_with_col_name()),
544-
dest_type: DataType::from(&TableDataType::from_type_name(target_type)?),
544+
dest_type: DataType::from(&resolve_type_name(target_type)?),
545545
};
546546
let registry = &BUILTIN_FUNCTIONS;
547547
let expr = type_check::check(&raw_expr, registry)?;
@@ -2459,6 +2459,95 @@ impl<'a> TypeChecker<'a> {
24592459
}
24602460
}
24612461

2462+
pub fn resolve_type_name_by_str(name: &str) -> Result<TableDataType> {
2463+
let sql_tokens = common_ast::parser::tokenize_sql(name)?;
2464+
let backtrace = common_ast::Backtrace::new();
2465+
match common_ast::parser::expr::type_name(common_ast::Input(
2466+
&sql_tokens,
2467+
common_ast::Dialect::default(),
2468+
&backtrace,
2469+
)) {
2470+
Ok((_, typename)) => resolve_type_name(&typename),
2471+
Err(err) => {
2472+
return Err(ErrorCode::SyntaxException(format!(
2473+
"Unsupported type name: {}, error: {}",
2474+
name, err
2475+
)));
2476+
}
2477+
}
2478+
}
2479+
2480+
pub fn resolve_type_name(type_name: &TypeName) -> Result<TableDataType> {
2481+
let data_type = match type_name {
2482+
TypeName::Boolean => TableDataType::Boolean,
2483+
TypeName::UInt8 => TableDataType::Number(NumberDataType::UInt8),
2484+
TypeName::UInt16 => TableDataType::Number(NumberDataType::UInt16),
2485+
TypeName::UInt32 => TableDataType::Number(NumberDataType::UInt32),
2486+
TypeName::UInt64 => TableDataType::Number(NumberDataType::UInt64),
2487+
TypeName::Int8 => TableDataType::Number(NumberDataType::Int8),
2488+
TypeName::Int16 => TableDataType::Number(NumberDataType::Int16),
2489+
TypeName::Int32 => TableDataType::Number(NumberDataType::Int32),
2490+
TypeName::Int64 => TableDataType::Number(NumberDataType::Int64),
2491+
TypeName::Float32 => TableDataType::Number(NumberDataType::Float32),
2492+
TypeName::Float64 => TableDataType::Number(NumberDataType::Float64),
2493+
TypeName::Decimal { precision, scale } => {
2494+
TableDataType::Decimal(DecimalDataType::from_size(DecimalSize {
2495+
precision: *precision,
2496+
scale: *scale,
2497+
})?)
2498+
}
2499+
TypeName::String => TableDataType::String,
2500+
TypeName::Timestamp => TableDataType::Timestamp,
2501+
TypeName::Date => TableDataType::Date,
2502+
TypeName::Array(item_type) => TableDataType::Array(Box::new(resolve_type_name(item_type)?)),
2503+
TypeName::Map { key_type, val_type } => {
2504+
let key_type = resolve_type_name(key_type)?;
2505+
match key_type {
2506+
TableDataType::Boolean
2507+
| TableDataType::String
2508+
| TableDataType::Number(_)
2509+
| TableDataType::Decimal(_)
2510+
| TableDataType::Timestamp
2511+
| TableDataType::Date => {
2512+
let val_type = resolve_type_name(val_type)?;
2513+
let inner_type = TableDataType::Tuple {
2514+
fields_name: vec!["key".to_string(), "value".to_string()],
2515+
fields_type: vec![key_type, val_type],
2516+
};
2517+
TableDataType::Map(Box::new(inner_type))
2518+
}
2519+
_ => {
2520+
return Err(ErrorCode::Internal(format!(
2521+
"Invalid Map key type \'{:?}\'",
2522+
key_type
2523+
)));
2524+
}
2525+
}
2526+
}
2527+
TypeName::Tuple {
2528+
fields_type,
2529+
fields_name,
2530+
} => TableDataType::Tuple {
2531+
fields_name: match fields_name {
2532+
None => (0..fields_type.len())
2533+
.map(|i| (i + 1).to_string())
2534+
.collect(),
2535+
Some(names) => names.clone(),
2536+
},
2537+
fields_type: fields_type
2538+
.iter()
2539+
.map(resolve_type_name)
2540+
.collect::<Result<Vec<_>>>()?,
2541+
},
2542+
TypeName::Nullable(inner_type) => {
2543+
TableDataType::Nullable(Box::new(resolve_type_name(inner_type)?))
2544+
}
2545+
TypeName::Variant => TableDataType::Variant,
2546+
};
2547+
2548+
Ok(data_type)
2549+
}
2550+
24622551
pub fn validate_function_arg(
24632552
name: &str,
24642553
args_len: usize,

src/query/storages/hive/hive/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ test = false
1313

1414
[dependencies]
1515
common-arrow = { path = "../../../../common/arrow" }
16-
common-ast = { path = "../../../ast" }
1716
common-base = { path = "../../../../common/base" }
1817
common-catalog = { path = "../../../catalog" }
1918
common-exception = { path = "../../../../common/exception" }
@@ -24,6 +23,7 @@ common-meta-app = { path = "../../../../meta/app" }
2423
common-meta-types = { path = "../../../../meta/types" }
2524
common-pipeline-core = { path = "../../../pipeline/core" }
2625
common-pipeline-sources = { path = "../../../pipeline/sources" }
26+
common-sql = { path = "../../../sql" }
2727
common-storage = { path = "../../../../common/storage" }
2828

2929
storages-common-cache = { path = "../../common/cache" }

src/query/storages/hive/hive/src/converters.rs

Lines changed: 10 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,9 @@
1515
use std::sync::Arc;
1616

1717
use chrono::Utc;
18-
use common_exception::ErrorCode;
1918
use common_exception::Result;
2019
use common_expression::types::decimal::DecimalSize;
2120
use common_expression::types::DecimalDataType;
22-
use common_expression::types::NumberDataType;
2321
use common_expression::TableDataType;
2422
use common_expression::TableField;
2523
use common_expression::TableSchema;
@@ -31,6 +29,7 @@ use common_meta_app::schema::DatabaseNameIdent;
3129
use common_meta_app::schema::TableIdent;
3230
use common_meta_app::schema::TableInfo;
3331
use common_meta_app::schema::TableMeta;
32+
use common_sql::resolve_type_name_by_str;
3433

3534
use crate::hive_catalog::HIVE_CATALOG;
3635
use crate::hive_database::HiveDatabase;
@@ -142,34 +141,14 @@ fn try_from_filed_type_name(type_name: impl AsRef<str>) -> Result<TableDataType>
142141
let sub_type = try_from_filed_type_name(sub_type)?;
143142
Ok(TableDataType::Array(Box::new(sub_type.wrap_nullable())))
144143
} else {
145-
let number = match name.as_str() {
146-
"DOUBLE PRECISION" => Ok(NumberDataType::Float64),
147-
"DECIMAL" | "NUMERIC" => {
148-
return Ok(TableDataType::Decimal(DecimalDataType::Decimal128(
149-
DecimalSize {
150-
precision: 10,
151-
scale: 0,
152-
},
153-
)));
154-
}
155-
_ => {
156-
let sql_tokens = common_ast::parser::tokenize_sql(name.as_str())?;
157-
let backtrace = common_ast::parser::Backtrace::new();
158-
match common_ast::parser::expr::type_name(common_ast::Input(
159-
&sql_tokens,
160-
common_ast::Dialect::default(),
161-
backtrace,
162-
)) {
163-
Ok((_, typename)) => TableDataType::from_type_name(&typename),
164-
Err(err) => {
165-
return Err(ErrorCode::SyntaxException(format!(
166-
"Unsupported type name: {}, error: {}",
167-
name, err
168-
)));
169-
}
170-
}
171-
}
172-
}?;
173-
Ok(TableDataType::Number(number))
144+
match name.as_str() {
145+
"DECIMAL" | "NUMERIC" => Ok(TableDataType::Decimal(DecimalDataType::Decimal128(
146+
DecimalSize {
147+
precision: 10,
148+
scale: 0,
149+
},
150+
))),
151+
_ => resolve_type_name_by_str(name.as_str()),
152+
}
174153
}
175154
}

0 commit comments

Comments
 (0)