|
| 1 | +// SPDX-License-Identifier: Apache-2.0 |
| 2 | +// SPDX-FileCopyrightText: Copyright the Vortex contributors |
| 3 | + |
| 4 | +use std::sync::Arc; |
| 5 | + |
| 6 | +use arrow_array::ArrayRef as ArrowArrayRef; |
| 7 | +use arrow_array::DictionaryArray; |
| 8 | +use arrow_array::cast::AsArray; |
| 9 | +use arrow_array::types::*; |
| 10 | +use arrow_schema::DataType; |
| 11 | +use vortex_error::VortexError; |
| 12 | +use vortex_error::VortexResult; |
| 13 | +use vortex_error::vortex_bail; |
| 14 | +use vortex_session::VortexSession; |
| 15 | + |
| 16 | +use crate::ArrayRef; |
| 17 | +use crate::arrays::DictArray; |
| 18 | +use crate::arrays::DictVTable; |
| 19 | +use crate::arrow::ArrowArrayExecutor; |
| 20 | + |
| 21 | +pub(super) fn to_arrow_dictionary( |
| 22 | + array: &ArrayRef, |
| 23 | + codes_type: &DataType, |
| 24 | + values_type: &DataType, |
| 25 | + session: &VortexSession, |
| 26 | +) -> VortexResult<ArrowArrayRef> { |
| 27 | + // Check if we have a Vortex dictionary array |
| 28 | + if let Some(array) = array.as_opt::<DictVTable>() { |
| 29 | + return dict_to_dict(array, codes_type, values_type, session); |
| 30 | + } |
| 31 | + |
| 32 | + // Otherwise, we should try and build a dictionary. |
| 33 | + // Arrow hides this functionality inside the cast module! |
| 34 | + let array = array.execute_arrow(values_type, session)?; |
| 35 | + arrow_cast::cast( |
| 36 | + &array, |
| 37 | + &DataType::Dictionary(Box::new(codes_type.clone()), Box::new(values_type.clone())), |
| 38 | + ) |
| 39 | + .map_err(VortexError::from) |
| 40 | +} |
| 41 | + |
| 42 | +/// Convert a Vortex dictionary array to an Arrow dictionary array. |
| 43 | +fn dict_to_dict( |
| 44 | + array: &DictArray, |
| 45 | + codes_type: &DataType, |
| 46 | + values_type: &DataType, |
| 47 | + session: &VortexSession, |
| 48 | +) -> VortexResult<ArrowArrayRef> { |
| 49 | + let codes = array.codes().execute_arrow(codes_type, session)?; |
| 50 | + let values = array.values().execute_arrow(values_type, session)?; |
| 51 | + |
| 52 | + Ok(match codes_type { |
| 53 | + DataType::Int8 => Arc::new(unsafe { |
| 54 | + DictionaryArray::new_unchecked(codes.as_primitive::<Int8Type>().clone(), values) |
| 55 | + }), |
| 56 | + DataType::Int16 => Arc::new(unsafe { |
| 57 | + DictionaryArray::new_unchecked(codes.as_primitive::<Int16Type>().clone(), values) |
| 58 | + }), |
| 59 | + DataType::Int32 => Arc::new(unsafe { |
| 60 | + DictionaryArray::new_unchecked(codes.as_primitive::<Int32Type>().clone(), values) |
| 61 | + }), |
| 62 | + DataType::Int64 => Arc::new(unsafe { |
| 63 | + DictionaryArray::new_unchecked(codes.as_primitive::<Int64Type>().clone(), values) |
| 64 | + }), |
| 65 | + DataType::UInt8 => Arc::new(unsafe { |
| 66 | + DictionaryArray::new_unchecked(codes.as_primitive::<UInt8Type>().clone(), values) |
| 67 | + }), |
| 68 | + DataType::UInt16 => Arc::new(unsafe { |
| 69 | + DictionaryArray::new_unchecked(codes.as_primitive::<UInt16Type>().clone(), values) |
| 70 | + }), |
| 71 | + DataType::UInt32 => Arc::new(unsafe { |
| 72 | + DictionaryArray::new_unchecked(codes.as_primitive::<UInt32Type>().clone(), values) |
| 73 | + }), |
| 74 | + DataType::UInt64 => Arc::new(unsafe { |
| 75 | + DictionaryArray::new_unchecked(codes.as_primitive::<UInt64Type>().clone(), values) |
| 76 | + }), |
| 77 | + _ => vortex_bail!("Unsupported dictionary codes type: {:?}", codes_type), |
| 78 | + }) |
| 79 | +} |
0 commit comments