@@ -23,10 +23,11 @@ use arrow::array::{
2323 TimestampSecondArray ,
2424} ;
2525use arrow:: datatypes:: {
26- i256, BinaryType , BinaryViewType , Date32Type , Date64Type , Decimal128Type , Decimal256Type ,
27- Decimal32Type , Decimal64Type , Float16Type , Float32Type , Float64Type , Int16Type , Int32Type ,
28- Int64Type , Int8Type , LargeBinaryType , Time32MillisecondType , Time32SecondType ,
29- Time64MicrosecondType , Time64NanosecondType , UInt16Type , UInt32Type , UInt64Type , UInt8Type ,
26+ i256, ArrowNativeType , BinaryType , BinaryViewType , Date32Type , Date64Type , Decimal128Type ,
27+ Decimal256Type , Decimal32Type , Decimal64Type , Float16Type , Float32Type , Float64Type , Int16Type ,
28+ Int32Type , Int64Type , Int8Type , LargeBinaryType , RunEndIndexType , Time32MillisecondType ,
29+ Time32SecondType , Time64MicrosecondType , Time64NanosecondType , UInt16Type , UInt32Type ,
30+ UInt64Type , UInt8Type ,
3031} ;
3132use arrow:: temporal_conversions:: {
3233 timestamp_ms_to_datetime, timestamp_ns_to_datetime, timestamp_s_to_datetime,
@@ -502,6 +503,17 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
502503 builder
503504 ) ;
504505 }
506+ DataType :: RunEndEncoded ( run_ends, _) => match run_ends. data_type ( ) {
507+ DataType :: Int16 => process_run_end_encoded :: < Int16Type > ( input, & mut builder) ?,
508+ DataType :: Int32 => process_run_end_encoded :: < Int32Type > ( input, & mut builder) ?,
509+ DataType :: Int64 => process_run_end_encoded :: < Int64Type > ( input, & mut builder) ?,
510+ _ => {
511+ return Err ( ArrowError :: CastError ( format ! (
512+ "Unsupported run ends type: {:?}" ,
513+ run_ends. data_type( )
514+ ) ) ) ;
515+ }
516+ } ,
505517 DataType :: Dictionary ( _, _) => {
506518 let dict_array = input. as_any_dictionary ( ) ;
507519 let values_variant_array = cast_to_variant ( dict_array. values ( ) . as_ref ( ) ) ?;
@@ -532,6 +544,41 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
532544 Ok ( builder. build ( ) )
533545}
534546
547+ /// Generic function to process run-end encoded arrays
548+ fn process_run_end_encoded < R : RunEndIndexType > (
549+ input : & dyn Array ,
550+ builder : & mut VariantArrayBuilder ,
551+ ) -> Result < ( ) , ArrowError > {
552+ let run_array = input. as_run :: < R > ( ) ;
553+ let values_variant_array = cast_to_variant ( run_array. values ( ) . as_ref ( ) ) ?;
554+
555+ // Process runs in batches for better performance
556+ let run_ends = run_array. run_ends ( ) . values ( ) ;
557+ let mut logical_start = 0 ;
558+
559+ for ( physical_idx, & run_end) in run_ends. iter ( ) . enumerate ( ) {
560+ let logical_end = run_end. as_usize ( ) ;
561+ let run_length = logical_end - logical_start;
562+
563+ if values_variant_array. is_null ( physical_idx) {
564+ // Append nulls for the entire run
565+ for _ in 0 ..run_length {
566+ builder. append_null ( ) ;
567+ }
568+ } else {
569+ // Get the value once and append it for the entire run
570+ let value = values_variant_array. value ( physical_idx) ;
571+ for _ in 0 ..run_length {
572+ builder. append_variant ( value. clone ( ) ) ;
573+ }
574+ }
575+
576+ logical_start = logical_end;
577+ }
578+
579+ Ok ( ( ) )
580+ }
581+
535582// TODO do we need a cast_with_options to allow specifying conversion behavior,
536583// e.g. how to handle overflows, whether to convert to Variant::Null or return
537584// an error, etc. ?
@@ -544,9 +591,9 @@ mod tests {
544591 Decimal256Array , Decimal32Array , Decimal64Array , DictionaryArray , FixedSizeBinaryBuilder ,
545592 Float16Array , Float32Array , Float64Array , GenericByteBuilder , GenericByteViewBuilder ,
546593 Int16Array , Int32Array , Int64Array , Int8Array , IntervalYearMonthArray , LargeStringArray ,
547- NullArray , StringArray , StringViewArray , StructArray , Time32MillisecondArray ,
548- Time32SecondArray , Time64MicrosecondArray , Time64NanosecondArray , UInt16Array , UInt32Array ,
549- UInt64Array , UInt8Array ,
594+ NullArray , StringArray , StringRunBuilder , StringViewArray , StructArray ,
595+ Time32MillisecondArray , Time32SecondArray , Time64MicrosecondArray , Time64NanosecondArray ,
596+ UInt16Array , UInt32Array , UInt64Array , UInt8Array ,
550597 } ;
551598 use arrow:: buffer:: NullBuffer ;
552599 use arrow_schema:: { Field , Fields } ;
@@ -1847,6 +1894,58 @@ mod tests {
18471894 ) ;
18481895 }
18491896
1897+ #[ test]
1898+ fn test_cast_to_variant_run_end_encoded ( ) {
1899+ let mut builder = StringRunBuilder :: < Int32Type > :: new ( ) ;
1900+ builder. append_value ( "apple" ) ;
1901+ builder. append_value ( "apple" ) ;
1902+ builder. append_value ( "banana" ) ;
1903+ builder. append_value ( "banana" ) ;
1904+ builder. append_value ( "banana" ) ;
1905+ builder. append_value ( "cherry" ) ;
1906+ let run_array = builder. finish ( ) ;
1907+
1908+ run_test (
1909+ Arc :: new ( run_array) ,
1910+ vec ! [
1911+ Some ( Variant :: from( "apple" ) ) ,
1912+ Some ( Variant :: from( "apple" ) ) ,
1913+ Some ( Variant :: from( "banana" ) ) ,
1914+ Some ( Variant :: from( "banana" ) ) ,
1915+ Some ( Variant :: from( "banana" ) ) ,
1916+ Some ( Variant :: from( "cherry" ) ) ,
1917+ ] ,
1918+ ) ;
1919+ }
1920+
1921+ #[ test]
1922+ fn test_cast_to_variant_run_end_encoded_with_nulls ( ) {
1923+ use arrow:: array:: StringRunBuilder ;
1924+ use arrow:: datatypes:: Int32Type ;
1925+
1926+ // Test run-end encoded array with nulls
1927+ let mut builder = StringRunBuilder :: < Int32Type > :: new ( ) ;
1928+ builder. append_value ( "apple" ) ;
1929+ builder. append_null ( ) ;
1930+ builder. append_value ( "banana" ) ;
1931+ builder. append_value ( "banana" ) ;
1932+ builder. append_null ( ) ;
1933+ builder. append_null ( ) ;
1934+ let run_array = builder. finish ( ) ;
1935+
1936+ run_test (
1937+ Arc :: new ( run_array) ,
1938+ vec ! [
1939+ Some ( Variant :: from( "apple" ) ) ,
1940+ None ,
1941+ Some ( Variant :: from( "banana" ) ) ,
1942+ Some ( Variant :: from( "banana" ) ) ,
1943+ None ,
1944+ None ,
1945+ ] ,
1946+ ) ;
1947+ }
1948+
18501949 #[ test]
18511950 fn test_cast_to_variant_dictionary ( ) {
18521951 let values = StringArray :: from ( vec ! [ "apple" , "banana" , "cherry" , "date" ] ) ;
0 commit comments