Skip to content

Commit b3a441c

Browse files
committed
support dictionary types
1 parent 892c813 commit b3a441c

File tree

1 file changed

+83
-2
lines changed

1 file changed

+83
-2
lines changed

datafusion-postgres/src/datatypes.rs

Lines changed: 83 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,12 @@ use pgwire::error::{ErrorInfo, PgWireError, PgWireResult};
1818
use timezone::Tz;
1919

2020
pub(crate) fn into_pg_type(df_type: &DataType) -> PgWireResult<Type> {
21+
// Handle Dictionary types specially
22+
if let DataType::Dictionary(_, value_type) = df_type {
23+
// For Dictionary types, use the value type for mapping to Postgres types
24+
return into_pg_type(value_type);
25+
}
26+
2127
Ok(match df_type {
2228
DataType::Null => Type::UNKNOWN,
2329
DataType::Boolean => Type::BOOL,
@@ -41,7 +47,15 @@ pub(crate) fn into_pg_type(df_type: &DataType) -> PgWireResult<Type> {
4147
DataType::Utf8 => Type::VARCHAR,
4248
DataType::LargeUtf8 => Type::TEXT,
4349
DataType::List(field) | DataType::FixedSizeList(field, _) | DataType::LargeList(field) => {
44-
match field.data_type() {
50+
let field_type = field.data_type();
51+
52+
// Handle dictionary types in lists
53+
let actual_type = match field_type {
54+
DataType::Dictionary(_, value_type) => value_type.as_ref(),
55+
_ => field_type,
56+
};
57+
58+
match actual_type {
4559
DataType::Boolean => Type::BOOL_ARRAY,
4660
DataType::Int8 | DataType::UInt8 => Type::CHAR_ARRAY,
4761
DataType::Int16 | DataType::UInt16 => Type::INT2_ARRAY,
@@ -239,11 +253,72 @@ fn get_time64_nanosecond_value(arr: &Arc<dyn Array>, idx: usize) -> Option<Naive
239253
.value_as_datetime(idx)
240254
}
241255

256+
fn encode_dictionary_value(
257+
encoder: &mut DataRowEncoder,
258+
arr: &Arc<dyn Array>,
259+
idx: usize,
260+
) -> Option<PgWireResult<()>> {
261+
// Use pattern matching to handle dictionary arrays with different key types
262+
match arr.data_type() {
263+
DataType::Dictionary(key_type, _) => {
264+
match key_type.as_ref() {
265+
DataType::Int8 => {
266+
let dict = arr.as_any().downcast_ref::<DictionaryArray<Int8Type>>()?;
267+
let key = dict.keys().value(idx) as usize;
268+
Some(encode_value(encoder, dict.values(), key))
269+
}
270+
DataType::Int16 => {
271+
let dict = arr.as_any().downcast_ref::<DictionaryArray<Int16Type>>()?;
272+
let key = dict.keys().value(idx) as usize;
273+
Some(encode_value(encoder, dict.values(), key))
274+
}
275+
DataType::Int32 => {
276+
let dict = arr.as_any().downcast_ref::<DictionaryArray<Int32Type>>()?;
277+
let key = dict.keys().value(idx) as usize;
278+
Some(encode_value(encoder, dict.values(), key))
279+
}
280+
DataType::Int64 => {
281+
let dict = arr.as_any().downcast_ref::<DictionaryArray<Int64Type>>()?;
282+
let key = dict.keys().value(idx) as usize;
283+
Some(encode_value(encoder, dict.values(), key))
284+
}
285+
DataType::UInt8 => {
286+
let dict = arr.as_any().downcast_ref::<DictionaryArray<UInt8Type>>()?;
287+
let key = dict.keys().value(idx) as usize;
288+
Some(encode_value(encoder, dict.values(), key))
289+
}
290+
DataType::UInt16 => {
291+
let dict = arr.as_any().downcast_ref::<DictionaryArray<UInt16Type>>()?;
292+
let key = dict.keys().value(idx) as usize;
293+
Some(encode_value(encoder, dict.values(), key))
294+
}
295+
DataType::UInt32 => {
296+
let dict = arr.as_any().downcast_ref::<DictionaryArray<UInt32Type>>()?;
297+
let key = dict.keys().value(idx) as usize;
298+
Some(encode_value(encoder, dict.values(), key))
299+
}
300+
DataType::UInt64 => {
301+
let dict = arr.as_any().downcast_ref::<DictionaryArray<UInt64Type>>()?;
302+
let key = dict.keys().value(idx) as usize;
303+
Some(encode_value(encoder, dict.values(), key))
304+
}
305+
_ => None
306+
}
307+
}
308+
_ => None
309+
}
310+
}
311+
242312
fn encode_value(
243313
encoder: &mut DataRowEncoder,
244314
arr: &Arc<dyn Array>,
245315
idx: usize,
246316
) -> PgWireResult<()> {
317+
// Handle dictionary encoding by extracting the actual value from the dictionary
318+
if let Some(result) = encode_dictionary_value(encoder, arr, idx) {
319+
return result;
320+
}
321+
247322
match arr.data_type() {
248323
DataType::Null => encoder.encode_field(&None::<i8>)?,
249324
DataType::Boolean => encoder.encode_field(&get_bool_value(arr, idx))?,
@@ -347,7 +422,13 @@ fn encode_value(
347422
},
348423

349424
DataType::List(field) | DataType::FixedSizeList(field, _) | DataType::LargeList(field) => {
350-
match field.data_type() {
425+
// Extract the inner type, handling dictionaries by getting the value type
426+
let field_type = match field.data_type() {
427+
DataType::Dictionary(_, value_type) => value_type.as_ref(),
428+
data_type => data_type,
429+
};
430+
431+
match field_type {
351432
DataType::Null => encoder.encode_field(&None::<i8>)?,
352433
DataType::Boolean => encoder.encode_field(&get_bool_list_value(arr, idx))?,
353434
DataType::Int8 => encoder.encode_field(&get_i8_list_value(arr, idx))?,

0 commit comments

Comments
 (0)