Skip to content

Commit bd277c2

Browse files
authored
chore(query): allow arrow dictionary array into databend (#17521)
feat(query): allow arrow dictionary array into databend
1 parent 7b27c3a commit bd277c2

File tree

3 files changed

+59
-1
lines changed

3 files changed

+59
-1
lines changed

src/query/expression/src/converts/arrow/from.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,11 @@ impl TryFrom<&Field> for TableField {
150150
fields_type,
151151
}
152152
}
153+
ArrowDataType::Dictionary(_, b) => {
154+
let inner_f =
155+
Field::new(arrow_f.name(), b.as_ref().clone(), arrow_f.is_nullable());
156+
return Self::try_from(&inner_f);
157+
}
153158
arrow_type => {
154159
return Err(ErrorCode::Internal(format!(
155160
"Unsupported Arrow type: {:?}",
@@ -235,7 +240,11 @@ impl Column {
235240
Field::from(&f)
236241
}
237242

238-
pub fn from_arrow_rs(array: ArrayRef, data_type: &DataType) -> Result<Self> {
243+
pub fn from_arrow_rs(mut array: ArrayRef, data_type: &DataType) -> Result<Self> {
244+
if let ArrowDataType::Dictionary(_, v) = array.data_type() {
245+
array = arrow_cast::cast(array.as_ref(), v.as_ref())?;
246+
}
247+
239248
let column = match data_type {
240249
DataType::Null => Column::Null { len: array.len() },
241250
DataType::EmptyArray => Column::EmptyArray { len: array.len() },
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
// Copyright 2022 Datafuse Labs.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
use std::sync::Arc;
16+
17+
use arrow_array::ArrayRef;
18+
use arrow_array::DictionaryArray;
19+
use arrow_array::Int32Array;
20+
use arrow_array::StringArray;
21+
use databend_common_expression::types::DataType;
22+
use databend_common_expression::Column;
23+
24+
#[test]
25+
fn test_from_arrow_rs() {
26+
let array = Arc::new(StringArray::from(vec![
27+
Some("foo"),
28+
Some("bar"),
29+
None,
30+
Some("baz"),
31+
])) as ArrayRef;
32+
33+
let array_k = Int32Array::from(vec![0, 3, 2, 2]);
34+
let array_v = Arc::new(StringArray::from(vec![
35+
Some("foo"),
36+
Some("bar"),
37+
None,
38+
Some("baz"),
39+
])) as ArrayRef;
40+
41+
let array_dict = Arc::new(DictionaryArray::new(array_k, array_v)) as _;
42+
43+
for arr in [array, array_dict] {
44+
let column = Column::from_arrow_rs(arr.clone(), &DataType::String.wrap_nullable()).unwrap();
45+
let c = column.into_arrow_rs();
46+
assert_eq!(c.len(), arr.len());
47+
}
48+
}

src/query/expression/tests/it/main.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ use databend_common_expression::DataBlock;
2424

2525
extern crate core;
2626

27+
mod arrow;
2728
mod block;
2829
mod common;
2930
mod decimal;

0 commit comments

Comments
 (0)