Skip to content

Commit a7d94ce

Browse files
authored
Merge pull request #10544 from sundy-li/hive_iceberg-decimal
chore(query): add datatype mapping for iceberg and hive
2 parents c990e9b + 678213d commit a7d94ce

File tree

10 files changed

+119
-102
lines changed

10 files changed

+119
-102
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/query/ast/src/parser/expr.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1353,7 +1353,10 @@ pub fn type_name(i: Input) -> IResult<TypeName> {
13531353
rule! { ( INT64 | SIGNED | BIGINT ) ~ ( "(" ~ #literal_u64 ~ ")" )? },
13541354
);
13551355
let ty_float32 = value(TypeName::Float32, rule! { FLOAT32 | FLOAT });
1356-
let ty_float64 = value(TypeName::Float64, rule! { FLOAT64 | DOUBLE });
1356+
let ty_float64 = value(
1357+
TypeName::Float64,
1358+
rule! { (FLOAT64 | DOUBLE) ~ ( PRECISION )? },
1359+
);
13571360
let ty_decimal = map_res(
13581361
rule! { DECIMAL ~ "(" ~ #literal_u64 ~ "," ~ #literal_u64 ~ ")" },
13591362
|(_, _, precision, _, scale, _)| {

src/query/ast/src/parser/token.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -663,6 +663,8 @@ pub enum TokenKind {
663663
ROLE,
664664
#[token("PRECEDING", ignore(ascii_case))]
665665
PRECEDING,
666+
#[token("PRECISION", ignore(ascii_case))]
667+
PRECISION,
666668
#[token("PRESIGN", ignore(ascii_case))]
667669
PRESIGN,
668670
#[token("PRIVILEGES", ignore(ascii_case))]

src/query/sql/src/planner/binder/ddl/table.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,8 @@ use crate::optimizer::optimize;
7878
use crate::optimizer::OptimizerConfig;
7979
use crate::optimizer::OptimizerContext;
8080
use crate::planner::semantic::normalize_identifier;
81+
use crate::planner::semantic::resolve_type_name;
8182
use crate::planner::semantic::IdentifierNormalizer;
82-
use crate::planner::semantic::TypeChecker;
8383
use crate::plans::AddTableColumnPlan;
8484
use crate::plans::AlterTableClusterKeyPlan;
8585
use crate::plans::AnalyzeTablePlan;
@@ -869,7 +869,7 @@ impl Binder {
869869
let mut fields_comments = Vec::with_capacity(columns.len());
870870
for column in columns.iter() {
871871
let name = normalize_identifier(&column.name, &self.name_resolution_ctx).name;
872-
let schema_data_type = TypeChecker::resolve_type_name(&column.data_type)?;
872+
let schema_data_type = resolve_type_name(&column.data_type)?;
873873

874874
fields.push(TableField::new(&name, schema_data_type.clone()));
875875
fields_default_expr.push({

src/query/sql/src/planner/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ pub use metadata::*;
3636
pub use planner::Planner;
3737
pub use plans::ScalarExpr;
3838
pub use semantic::normalize_identifier;
39+
pub use semantic::resolve_type_name_by_str;
3940
pub use semantic::validate_function_arg;
4041
pub use semantic::IdentifierNormalizer;
4142
pub use semantic::NameResolutionContext;

src/query/sql/src/planner/semantic/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,5 +26,7 @@ pub use name_resolution::compare_table_name;
2626
pub use name_resolution::normalize_identifier;
2727
pub use name_resolution::IdentifierNormalizer;
2828
pub use name_resolution::NameResolutionContext;
29+
pub use type_check::resolve_type_name;
30+
pub use type_check::resolve_type_name_by_str;
2931
pub use type_check::validate_function_arg;
3032
pub use type_check::TypeChecker;

src/query/sql/src/planner/semantic/type_check.rs

Lines changed: 84 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -518,7 +518,7 @@ impl<'a> TypeChecker<'a> {
518518
span: None,
519519
is_try: false,
520520
expr: Box::new(scalar.as_raw_expr_with_col_name()),
521-
dest_type: DataType::from(&Self::resolve_type_name(target_type)?),
521+
dest_type: DataType::from(&resolve_type_name(target_type)?),
522522
};
523523
let registry = &BUILTIN_FUNCTIONS;
524524
let expr = type_check::check(&raw_expr, registry)?;
@@ -541,7 +541,7 @@ impl<'a> TypeChecker<'a> {
541541
span: None,
542542
is_try: true,
543543
expr: Box::new(scalar.as_raw_expr_with_col_name()),
544-
dest_type: DataType::from(&Self::resolve_type_name(target_type)?),
544+
dest_type: DataType::from(&resolve_type_name(target_type)?),
545545
};
546546
let registry = &BUILTIN_FUNCTIONS;
547547
let expr = type_check::check(&raw_expr, registry)?;
@@ -2457,79 +2457,93 @@ impl<'a> TypeChecker<'a> {
24572457
&& names.contains(&name);
24582458
Ok(result)
24592459
}
2460+
}
24602461

2461-
pub fn resolve_type_name(type_name: &TypeName) -> Result<TableDataType> {
2462-
let data_type = match type_name {
2463-
TypeName::Boolean => TableDataType::Boolean,
2464-
TypeName::UInt8 => TableDataType::Number(NumberDataType::UInt8),
2465-
TypeName::UInt16 => TableDataType::Number(NumberDataType::UInt16),
2466-
TypeName::UInt32 => TableDataType::Number(NumberDataType::UInt32),
2467-
TypeName::UInt64 => TableDataType::Number(NumberDataType::UInt64),
2468-
TypeName::Int8 => TableDataType::Number(NumberDataType::Int8),
2469-
TypeName::Int16 => TableDataType::Number(NumberDataType::Int16),
2470-
TypeName::Int32 => TableDataType::Number(NumberDataType::Int32),
2471-
TypeName::Int64 => TableDataType::Number(NumberDataType::Int64),
2472-
TypeName::Float32 => TableDataType::Number(NumberDataType::Float32),
2473-
TypeName::Float64 => TableDataType::Number(NumberDataType::Float64),
2474-
TypeName::Decimal { precision, scale } => {
2475-
TableDataType::Decimal(DecimalDataType::from_size(DecimalSize {
2476-
precision: *precision,
2477-
scale: *scale,
2478-
})?)
2479-
}
2480-
TypeName::String => TableDataType::String,
2481-
TypeName::Timestamp => TableDataType::Timestamp,
2482-
TypeName::Date => TableDataType::Date,
2483-
TypeName::Array(item_type) => {
2484-
TableDataType::Array(Box::new(Self::resolve_type_name(item_type)?))
2485-
}
2486-
TypeName::Map { key_type, val_type } => {
2487-
let key_type = Self::resolve_type_name(key_type)?;
2488-
match key_type {
2489-
TableDataType::Boolean
2490-
| TableDataType::String
2491-
| TableDataType::Number(_)
2492-
| TableDataType::Decimal(_)
2493-
| TableDataType::Timestamp
2494-
| TableDataType::Date => {
2495-
let val_type = Self::resolve_type_name(val_type)?;
2496-
let inner_type = TableDataType::Tuple {
2497-
fields_name: vec!["key".to_string(), "value".to_string()],
2498-
fields_type: vec![key_type, val_type],
2499-
};
2500-
TableDataType::Map(Box::new(inner_type))
2501-
}
2502-
_ => {
2503-
return Err(ErrorCode::Internal(format!(
2504-
"Invalid Map key type \'{:?}\'",
2505-
key_type
2506-
)));
2507-
}
2462+
pub fn resolve_type_name_by_str(name: &str) -> Result<TableDataType> {
2463+
let sql_tokens = common_ast::parser::tokenize_sql(name)?;
2464+
let backtrace = common_ast::Backtrace::new();
2465+
match common_ast::parser::expr::type_name(common_ast::Input(
2466+
&sql_tokens,
2467+
common_ast::Dialect::default(),
2468+
&backtrace,
2469+
)) {
2470+
Ok((_, typename)) => resolve_type_name(&typename),
2471+
Err(err) => Err(ErrorCode::SyntaxException(format!(
2472+
"Unsupported type name: {}, error: {}",
2473+
name, err
2474+
))),
2475+
}
2476+
}
2477+
2478+
pub fn resolve_type_name(type_name: &TypeName) -> Result<TableDataType> {
2479+
let data_type = match type_name {
2480+
TypeName::Boolean => TableDataType::Boolean,
2481+
TypeName::UInt8 => TableDataType::Number(NumberDataType::UInt8),
2482+
TypeName::UInt16 => TableDataType::Number(NumberDataType::UInt16),
2483+
TypeName::UInt32 => TableDataType::Number(NumberDataType::UInt32),
2484+
TypeName::UInt64 => TableDataType::Number(NumberDataType::UInt64),
2485+
TypeName::Int8 => TableDataType::Number(NumberDataType::Int8),
2486+
TypeName::Int16 => TableDataType::Number(NumberDataType::Int16),
2487+
TypeName::Int32 => TableDataType::Number(NumberDataType::Int32),
2488+
TypeName::Int64 => TableDataType::Number(NumberDataType::Int64),
2489+
TypeName::Float32 => TableDataType::Number(NumberDataType::Float32),
2490+
TypeName::Float64 => TableDataType::Number(NumberDataType::Float64),
2491+
TypeName::Decimal { precision, scale } => {
2492+
TableDataType::Decimal(DecimalDataType::from_size(DecimalSize {
2493+
precision: *precision,
2494+
scale: *scale,
2495+
})?)
2496+
}
2497+
TypeName::String => TableDataType::String,
2498+
TypeName::Timestamp => TableDataType::Timestamp,
2499+
TypeName::Date => TableDataType::Date,
2500+
TypeName::Array(item_type) => TableDataType::Array(Box::new(resolve_type_name(item_type)?)),
2501+
TypeName::Map { key_type, val_type } => {
2502+
let key_type = resolve_type_name(key_type)?;
2503+
match key_type {
2504+
TableDataType::Boolean
2505+
| TableDataType::String
2506+
| TableDataType::Number(_)
2507+
| TableDataType::Decimal(_)
2508+
| TableDataType::Timestamp
2509+
| TableDataType::Date => {
2510+
let val_type = resolve_type_name(val_type)?;
2511+
let inner_type = TableDataType::Tuple {
2512+
fields_name: vec!["key".to_string(), "value".to_string()],
2513+
fields_type: vec![key_type, val_type],
2514+
};
2515+
TableDataType::Map(Box::new(inner_type))
2516+
}
2517+
_ => {
2518+
return Err(ErrorCode::Internal(format!(
2519+
"Invalid Map key type \'{:?}\'",
2520+
key_type
2521+
)));
25082522
}
25092523
}
2510-
TypeName::Tuple {
2511-
fields_type,
2512-
fields_name,
2513-
} => TableDataType::Tuple {
2514-
fields_name: match fields_name {
2515-
None => (0..fields_type.len())
2516-
.map(|i| (i + 1).to_string())
2517-
.collect(),
2518-
Some(names) => names.clone(),
2519-
},
2520-
fields_type: fields_type
2521-
.iter()
2522-
.map(Self::resolve_type_name)
2523-
.collect::<Result<Vec<_>>>()?,
2524+
}
2525+
TypeName::Tuple {
2526+
fields_type,
2527+
fields_name,
2528+
} => TableDataType::Tuple {
2529+
fields_name: match fields_name {
2530+
None => (0..fields_type.len())
2531+
.map(|i| (i + 1).to_string())
2532+
.collect(),
2533+
Some(names) => names.clone(),
25242534
},
2525-
TypeName::Nullable(inner_type) => {
2526-
TableDataType::Nullable(Box::new(Self::resolve_type_name(inner_type)?))
2527-
}
2528-
TypeName::Variant => TableDataType::Variant,
2529-
};
2535+
fields_type: fields_type
2536+
.iter()
2537+
.map(resolve_type_name)
2538+
.collect::<Result<Vec<_>>>()?,
2539+
},
2540+
TypeName::Nullable(inner_type) => {
2541+
TableDataType::Nullable(Box::new(resolve_type_name(inner_type)?))
2542+
}
2543+
TypeName::Variant => TableDataType::Variant,
2544+
};
25302545

2531-
Ok(data_type)
2532-
}
2546+
Ok(data_type)
25332547
}
25342548

25352549
pub fn validate_function_arg(

src/query/storages/hive/hive/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ common-meta-app = { path = "../../../../meta/app" }
2323
common-meta-types = { path = "../../../../meta/types" }
2424
common-pipeline-core = { path = "../../../pipeline/core" }
2525
common-pipeline-sources = { path = "../../../pipeline/sources" }
26+
common-sql = { path = "../../../sql" }
2627
common-storage = { path = "../../../../common/storage" }
2728

2829
storages-common-cache = { path = "../../common/cache" }

src/query/storages/hive/hive/src/converters.rs

Lines changed: 11 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,9 @@
1515
use std::sync::Arc;
1616

1717
use chrono::Utc;
18-
use common_exception::ErrorCode;
1918
use common_exception::Result;
20-
use common_expression::types::NumberDataType;
19+
use common_expression::types::decimal::DecimalSize;
20+
use common_expression::types::DecimalDataType;
2121
use common_expression::TableDataType;
2222
use common_expression::TableField;
2323
use common_expression::TableSchema;
@@ -29,6 +29,7 @@ use common_meta_app::schema::DatabaseNameIdent;
2929
use common_meta_app::schema::TableIdent;
3030
use common_meta_app::schema::TableInfo;
3131
use common_meta_app::schema::TableMeta;
32+
use common_sql::resolve_type_name_by_str;
3233

3334
use crate::hive_catalog::HIVE_CATALOG;
3435
use crate::hive_database::HiveDatabase;
@@ -140,26 +141,14 @@ fn try_from_filed_type_name(type_name: impl AsRef<str>) -> Result<TableDataType>
140141
let sub_type = try_from_filed_type_name(sub_type)?;
141142
Ok(TableDataType::Array(Box::new(sub_type.wrap_nullable())))
142143
} else {
143-
let number = match name.as_str() {
144-
"TINYINT" => Ok(NumberDataType::Int8),
145-
"SMALLINT" => Ok(NumberDataType::Int16),
146-
"INT" => Ok(NumberDataType::Int32),
147-
"BIGINT" => Ok(NumberDataType::Int64),
148-
//"DECIMAL", "NUMERIC" type not supported
149-
"FLOAT" => Ok(NumberDataType::Float32),
150-
"DOUBLE" | "DOUBLE PRECISION" => Ok(NumberDataType::Float64),
151-
152-
"BINARY" | "STRING" => return Ok(TableDataType::String),
153-
// boolean
154-
"BOOLEAN" => return Ok(TableDataType::Boolean),
155-
// timestamp
156-
"TIMESTAMP" => return Ok(TableDataType::Timestamp),
157-
"DATE" => return Ok(TableDataType::Date),
158-
_ => Err(ErrorCode::IllegalDataType(format!(
159-
"Unsupported data type: {}",
160-
name
144+
match name.as_str() {
145+
"DECIMAL" | "NUMERIC" => Ok(TableDataType::Decimal(DecimalDataType::Decimal128(
146+
DecimalSize {
147+
precision: 10,
148+
scale: 0,
149+
},
161150
))),
162-
}?;
163-
Ok(TableDataType::Number(number))
151+
_ => resolve_type_name_by_str(name.as_str()),
152+
}
164153
}
165154
}

src/query/storages/iceberg/src/converters.rs

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
//! to databend
1717
1818
use chrono::Utc;
19+
use common_expression::types::decimal::DecimalSize;
20+
use common_expression::types::DecimalDataType;
1921
use common_expression::types::NumberDataType;
2022
use common_expression::TableDataType;
2123
use common_expression::TableField;
@@ -93,9 +95,14 @@ fn primitive_iceberg_to_databend(prim: &AllType) -> TableDataType {
9395
iceberg_rs::model::schema::PrimitiveType::Double => {
9496
TableDataType::Number(NumberDataType::Float64)
9597
}
96-
iceberg_rs::model::schema::PrimitiveType::Decimal { .. } => {
97-
// not supported
98-
unimplemented!()
98+
iceberg_rs::model::schema::PrimitiveType::Decimal { precision, scale } => {
99+
TableDataType::Decimal(
100+
DecimalDataType::from_size(DecimalSize {
101+
precision: *precision as u8,
102+
scale: *scale,
103+
})
104+
.unwrap(),
105+
)
99106
}
100107
iceberg_rs::model::schema::PrimitiveType::Date => {
101108
// 4 bytes date type
@@ -105,10 +112,7 @@ fn primitive_iceberg_to_databend(prim: &AllType) -> TableDataType {
105112
// not supported, time without date
106113
unimplemented!()
107114
}
108-
iceberg_rs::model::schema::PrimitiveType::Timestamp => {
109-
// not supported, timestamp without timezone
110-
unimplemented!()
111-
}
115+
iceberg_rs::model::schema::PrimitiveType::Timestamp => TableDataType::Timestamp,
112116
iceberg_rs::model::schema::PrimitiveType::Timestampz => TableDataType::Timestamp,
113117
iceberg_rs::model::schema::PrimitiveType::String => TableDataType::String,
114118
iceberg_rs::model::schema::PrimitiveType::Uuid => TableDataType::String,

0 commit comments

Comments
 (0)