Skip to content

Commit 5d3230e

Browse files
committed
chore(query): add datatype mapping for iceberg and hive
1 parent 6df81ff commit 5d3230e

File tree

6 files changed

+117
-100
lines changed

6 files changed

+117
-100
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/query/expression/src/schema.rs

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ use common_arrow::arrow::datatypes::DataType as ArrowDataType;
2424
use common_arrow::arrow::datatypes::Field as ArrowField;
2525
use common_arrow::arrow::datatypes::Schema as ArrowSchema;
2626
use common_arrow::arrow::datatypes::TimeUnit;
27+
use common_ast::ast::TypeName;
2728
use common_exception::ErrorCode;
2829
use common_exception::Result;
2930
use ethnum::i256;
@@ -1195,6 +1196,79 @@ impl TableDataType {
11951196
}
11961197
}
11971198
}
1199+
1200+
pub fn from_type_name(type_name: &TypeName) -> Result<TableDataType> {
1201+
let data_type = match type_name {
1202+
TypeName::Boolean => TableDataType::Boolean,
1203+
TypeName::UInt8 => TableDataType::Number(NumberDataType::UInt8),
1204+
TypeName::UInt16 => TableDataType::Number(NumberDataType::UInt16),
1205+
TypeName::UInt32 => TableDataType::Number(NumberDataType::UInt32),
1206+
TypeName::UInt64 => TableDataType::Number(NumberDataType::UInt64),
1207+
TypeName::Int8 => TableDataType::Number(NumberDataType::Int8),
1208+
TypeName::Int16 => TableDataType::Number(NumberDataType::Int16),
1209+
TypeName::Int32 => TableDataType::Number(NumberDataType::Int32),
1210+
TypeName::Int64 => TableDataType::Number(NumberDataType::Int64),
1211+
TypeName::Float32 => TableDataType::Number(NumberDataType::Float32),
1212+
TypeName::Float64 => TableDataType::Number(NumberDataType::Float64),
1213+
TypeName::Decimal { precision, scale } => {
1214+
TableDataType::Decimal(DecimalDataType::from_size(DecimalSize {
1215+
precision: *precision,
1216+
scale: *scale,
1217+
})?)
1218+
}
1219+
TypeName::String => TableDataType::String,
1220+
TypeName::Timestamp => TableDataType::Timestamp,
1221+
TypeName::Date => TableDataType::Date,
1222+
TypeName::Array(item_type) => {
1223+
TableDataType::Array(Box::new(Self::from_type_name(item_type)?))
1224+
}
1225+
TypeName::Map { key_type, val_type } => {
1226+
let key_type = Self::from_type_name(key_type)?;
1227+
match key_type {
1228+
TableDataType::Boolean
1229+
| TableDataType::String
1230+
| TableDataType::Number(_)
1231+
| TableDataType::Decimal(_)
1232+
| TableDataType::Timestamp
1233+
| TableDataType::Date => {
1234+
let val_type = Self::from_type_name(val_type)?;
1235+
let inner_type = TableDataType::Tuple {
1236+
fields_name: vec!["key".to_string(), "value".to_string()],
1237+
fields_type: vec![key_type, val_type],
1238+
};
1239+
TableDataType::Map(Box::new(inner_type))
1240+
}
1241+
_ => {
1242+
return Err(ErrorCode::Internal(format!(
1243+
"Invalid Map key type \'{:?}\'",
1244+
key_type
1245+
)));
1246+
}
1247+
}
1248+
}
1249+
TypeName::Tuple {
1250+
fields_type,
1251+
fields_name,
1252+
} => TableDataType::Tuple {
1253+
fields_name: match fields_name {
1254+
None => (0..fields_type.len())
1255+
.map(|i| (i + 1).to_string())
1256+
.collect(),
1257+
Some(names) => names.clone(),
1258+
},
1259+
fields_type: fields_type
1260+
.iter()
1261+
.map(Self::from_type_name)
1262+
.collect::<Result<Vec<_>>>()?,
1263+
},
1264+
TypeName::Nullable(inner_type) => {
1265+
TableDataType::Nullable(Box::new(Self::from_type_name(inner_type)?))
1266+
}
1267+
TypeName::Variant => TableDataType::Variant,
1268+
};
1269+
1270+
Ok(data_type)
1271+
}
11981272
}
11991273

12001274
pub type DataSchemaRef = Arc<DataSchema>;

src/query/sql/src/planner/semantic/type_check.rs

Lines changed: 2 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -518,7 +518,7 @@ impl<'a> TypeChecker<'a> {
518518
span: None,
519519
is_try: false,
520520
expr: Box::new(scalar.as_raw_expr_with_col_name()),
521-
dest_type: DataType::from(&Self::resolve_type_name(target_type)?),
521+
dest_type: DataType::from(&TableDataType::from_type_name(target_type)?),
522522
};
523523
let registry = &BUILTIN_FUNCTIONS;
524524
let expr = type_check::check(&raw_expr, registry)?;
@@ -541,7 +541,7 @@ impl<'a> TypeChecker<'a> {
541541
span: None,
542542
is_try: true,
543543
expr: Box::new(scalar.as_raw_expr_with_col_name()),
544-
dest_type: DataType::from(&Self::resolve_type_name(target_type)?),
544+
dest_type: DataType::from(&TableDataType::from_type_name(target_type)?),
545545
};
546546
let registry = &BUILTIN_FUNCTIONS;
547547
let expr = type_check::check(&raw_expr, registry)?;
@@ -2457,79 +2457,6 @@ impl<'a> TypeChecker<'a> {
24572457
&& names.contains(&name);
24582458
Ok(result)
24592459
}
2460-
2461-
pub fn resolve_type_name(type_name: &TypeName) -> Result<TableDataType> {
2462-
let data_type = match type_name {
2463-
TypeName::Boolean => TableDataType::Boolean,
2464-
TypeName::UInt8 => TableDataType::Number(NumberDataType::UInt8),
2465-
TypeName::UInt16 => TableDataType::Number(NumberDataType::UInt16),
2466-
TypeName::UInt32 => TableDataType::Number(NumberDataType::UInt32),
2467-
TypeName::UInt64 => TableDataType::Number(NumberDataType::UInt64),
2468-
TypeName::Int8 => TableDataType::Number(NumberDataType::Int8),
2469-
TypeName::Int16 => TableDataType::Number(NumberDataType::Int16),
2470-
TypeName::Int32 => TableDataType::Number(NumberDataType::Int32),
2471-
TypeName::Int64 => TableDataType::Number(NumberDataType::Int64),
2472-
TypeName::Float32 => TableDataType::Number(NumberDataType::Float32),
2473-
TypeName::Float64 => TableDataType::Number(NumberDataType::Float64),
2474-
TypeName::Decimal { precision, scale } => {
2475-
TableDataType::Decimal(DecimalDataType::from_size(DecimalSize {
2476-
precision: *precision,
2477-
scale: *scale,
2478-
})?)
2479-
}
2480-
TypeName::String => TableDataType::String,
2481-
TypeName::Timestamp => TableDataType::Timestamp,
2482-
TypeName::Date => TableDataType::Date,
2483-
TypeName::Array(item_type) => {
2484-
TableDataType::Array(Box::new(Self::resolve_type_name(item_type)?))
2485-
}
2486-
TypeName::Map { key_type, val_type } => {
2487-
let key_type = Self::resolve_type_name(key_type)?;
2488-
match key_type {
2489-
TableDataType::Boolean
2490-
| TableDataType::String
2491-
| TableDataType::Number(_)
2492-
| TableDataType::Decimal(_)
2493-
| TableDataType::Timestamp
2494-
| TableDataType::Date => {
2495-
let val_type = Self::resolve_type_name(val_type)?;
2496-
let inner_type = TableDataType::Tuple {
2497-
fields_name: vec!["key".to_string(), "value".to_string()],
2498-
fields_type: vec![key_type, val_type],
2499-
};
2500-
TableDataType::Map(Box::new(inner_type))
2501-
}
2502-
_ => {
2503-
return Err(ErrorCode::Internal(format!(
2504-
"Invalid Map key type \'{:?}\'",
2505-
key_type
2506-
)));
2507-
}
2508-
}
2509-
}
2510-
TypeName::Tuple {
2511-
fields_type,
2512-
fields_name,
2513-
} => TableDataType::Tuple {
2514-
fields_name: match fields_name {
2515-
None => (0..fields_type.len())
2516-
.map(|i| (i + 1).to_string())
2517-
.collect(),
2518-
Some(names) => names.clone(),
2519-
},
2520-
fields_type: fields_type
2521-
.iter()
2522-
.map(Self::resolve_type_name)
2523-
.collect::<Result<Vec<_>>>()?,
2524-
},
2525-
TypeName::Nullable(inner_type) => {
2526-
TableDataType::Nullable(Box::new(Self::resolve_type_name(inner_type)?))
2527-
}
2528-
TypeName::Variant => TableDataType::Variant,
2529-
};
2530-
2531-
Ok(data_type)
2532-
}
25332460
}
25342461

25352462
pub fn validate_function_arg(

src/query/storages/hive/hive/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ test = false
1313

1414
[dependencies]
1515
common-arrow = { path = "../../../../common/arrow" }
16+
common-ast = { path = "../../../ast" }
1617
common-base = { path = "../../../../common/base" }
1718
common-catalog = { path = "../../../catalog" }
1819
common-exception = { path = "../../../../common/exception" }

src/query/storages/hive/hive/src/converters.rs

Lines changed: 28 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ use std::sync::Arc;
1717
use chrono::Utc;
1818
use common_exception::ErrorCode;
1919
use common_exception::Result;
20+
use common_expression::types::decimal::DecimalSize;
21+
use common_expression::types::DecimalDataType;
2022
use common_expression::types::NumberDataType;
2123
use common_expression::TableDataType;
2224
use common_expression::TableField;
@@ -141,24 +143,32 @@ fn try_from_filed_type_name(type_name: impl AsRef<str>) -> Result<TableDataType>
141143
Ok(TableDataType::Array(Box::new(sub_type.wrap_nullable())))
142144
} else {
143145
let number = match name.as_str() {
144-
"TINYINT" => Ok(NumberDataType::Int8),
145-
"SMALLINT" => Ok(NumberDataType::Int16),
146-
"INT" => Ok(NumberDataType::Int32),
147-
"BIGINT" => Ok(NumberDataType::Int64),
148-
//"DECIMAL", "NUMERIC" type not supported
149-
"FLOAT" => Ok(NumberDataType::Float32),
150-
"DOUBLE" | "DOUBLE PRECISION" => Ok(NumberDataType::Float64),
151-
152-
"BINARY" | "STRING" => return Ok(TableDataType::String),
153-
// boolean
154-
"BOOLEAN" => return Ok(TableDataType::Boolean),
155-
// timestamp
156-
"TIMESTAMP" => return Ok(TableDataType::Timestamp),
157-
"DATE" => return Ok(TableDataType::Date),
158-
_ => Err(ErrorCode::IllegalDataType(format!(
159-
"Unsupported data type: {}",
160-
name
161-
))),
146+
"DOUBLE PRECISION" => Ok(NumberDataType::Float64),
147+
"DECIMAL" | "NUMERIC" => {
148+
return Ok(TableDataType::Decimal(DecimalDataType::Decimal128(
149+
DecimalSize {
150+
precision: 10,
151+
scale: 0,
152+
},
153+
)));
154+
}
155+
_ => {
156+
let sql_tokens = common_ast::parser::tokenize_sql(name.as_str())?;
157+
let backtrace = common_ast::parser::Backtrace::new();
158+
match common_ast::parser::expr::type_name(common_ast::Input(
159+
&sql_tokens,
160+
common_ast::Dialect::default(),
161+
backtrace,
162+
)) {
163+
Ok((_, typename)) => TableDataType::from_type_name(&typename),
164+
Err(err) => {
165+
return Err(ErrorCode::SyntaxException(format!(
166+
"Unsupported type name: {}, error: {}",
167+
name, err
168+
)));
169+
}
170+
}
171+
}
162172
}?;
163173
Ok(TableDataType::Number(number))
164174
}

src/query/storages/iceberg/src/converters.rs

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
//! to databend
1717
1818
use chrono::Utc;
19+
use common_expression::types::decimal::DecimalSize;
20+
use common_expression::types::DecimalDataType;
1921
use common_expression::types::NumberDataType;
2022
use common_expression::TableDataType;
2123
use common_expression::TableField;
@@ -93,9 +95,14 @@ fn primitive_iceberg_to_databend(prim: &AllType) -> TableDataType {
9395
iceberg_rs::model::schema::PrimitiveType::Double => {
9496
TableDataType::Number(NumberDataType::Float64)
9597
}
96-
iceberg_rs::model::schema::PrimitiveType::Decimal { .. } => {
97-
// not supported
98-
unimplemented!()
98+
iceberg_rs::model::schema::PrimitiveType::Decimal { precision, scale } => {
99+
TableDataType::Decimal(
100+
DecimalDataType::from_size(DecimalSize {
101+
precision: *precision as u8,
102+
scale: *scale,
103+
})
104+
.unwrap(),
105+
)
99106
}
100107
iceberg_rs::model::schema::PrimitiveType::Date => {
101108
// 4 bytes date type
@@ -105,10 +112,7 @@ fn primitive_iceberg_to_databend(prim: &AllType) -> TableDataType {
105112
// not supported, time without date
106113
unimplemented!()
107114
}
108-
iceberg_rs::model::schema::PrimitiveType::Timestamp => {
109-
// not supported, timestamp without timezone
110-
unimplemented!()
111-
}
115+
iceberg_rs::model::schema::PrimitiveType::Timestamp => TableDataType::Timestamp,
112116
iceberg_rs::model::schema::PrimitiveType::Timestampz => TableDataType::Timestamp,
113117
iceberg_rs::model::schema::PrimitiveType::String => TableDataType::String,
114118
iceberg_rs::model::schema::PrimitiveType::Uuid => TableDataType::String,

0 commit comments

Comments
 (0)