@@ -106,16 +106,30 @@ def convert_type(red_arrow_type)
106106 convert_field ( field )
107107 end
108108 ArrowFormat ::SparseUnionType . new ( fields , red_arrow_type . type_codes )
109+ when Arrow ::DictionaryDataType
110+ index_type = convert_type ( red_arrow_type . index_data_type )
111+ type = convert_type ( red_arrow_type . value_data_type )
112+ ArrowFormat ::DictionaryType . new ( index_type ,
113+ type ,
114+ red_arrow_type . ordered? )
109115 else
110116 raise "Unsupported type: #{ red_arrow_type . inspect } "
111117 end
112118 end
113119
114120 def convert_field ( red_arrow_field )
121+ type = convert_type ( red_arrow_field . data_type )
122+ if type . is_a? ( ArrowFormat ::DictionaryType )
123+ @dictionary_id ||= 0
124+ dictionary_id = @dictionary_id
125+ @dictionary_id += 1
126+ else
127+ dictionary_id = nil
128+ end
115129 ArrowFormat ::Field . new ( red_arrow_field . name ,
116- convert_type ( red_arrow_field . data_type ) ,
130+ type ,
117131 red_arrow_field . nullable? ,
118- nil )
132+ dictionary_id )
119133 end
120134
121135 def convert_buffer ( buffer )
@@ -171,11 +185,33 @@ def convert_array(red_arrow_array)
171185 type . build_array ( red_arrow_array . size ,
172186 types_buffer ,
173187 children )
188+ when ArrowFormat ::DictionaryType
189+ validity_buffer = convert_buffer ( red_arrow_array . null_bitmap )
190+ indices_buffer = convert_buffer ( red_arrow_array . indices . data_buffer )
191+ dictionary = convert_array ( red_arrow_array . dictionary )
192+ type . build_array ( red_arrow_array . size ,
193+ validity_buffer ,
194+ indices_buffer ,
195+ dictionary )
174196 else
175197 raise "Unsupported array #{ red_arrow_array . inspect } "
176198 end
177199 end
178200
201+ def write ( writer )
202+ red_arrow_array = build_array
203+ array = convert_array ( red_arrow_array )
204+ red_arrow_field = Arrow ::Field . new ( "value" ,
205+ red_arrow_array . value_data_type ,
206+ true )
207+ fields = [ convert_field ( red_arrow_field ) ]
208+ schema = ArrowFormat ::Schema . new ( fields )
209+ record_batch = ArrowFormat ::RecordBatch . new ( schema , array . size , [ array ] )
210+ writer . start ( schema )
211+ writer . write_record_batch ( record_batch )
212+ writer . finish
213+ end
214+
179215 class << self
180216 def included ( base )
181217 base . class_eval do
@@ -939,6 +975,19 @@ def test_write
939975 @values )
940976 end
941977 end
978+
979+ sub_test_case ( "Dictionary" ) do
980+ def build_array
981+ values = [ "a" , "b" , "c" , nil , "a" ]
982+ string_array = Arrow ::StringArray . new ( values )
983+ string_array . dictionary_encode
984+ end
985+
986+ def test_write
987+ assert_equal ( [ "a" , "b" , "c" , nil , "a" ] ,
988+ @values )
989+ end
990+ end
942991 end
943992 end
944993 end
@@ -952,19 +1001,7 @@ def setup
9521001 path = File . join ( tmp_dir , "data.arrow" )
9531002 File . open ( path , "wb" ) do |output |
9541003 writer = ArrowFormat ::FileWriter . new ( output )
955- red_arrow_array = build_array
956- array = convert_array ( red_arrow_array )
957- fields = [
958- ArrowFormat ::Field . new ( "value" ,
959- array . type ,
960- true ,
961- nil ) ,
962- ]
963- schema = ArrowFormat ::Schema . new ( fields )
964- record_batch = ArrowFormat ::RecordBatch . new ( schema , array . size , [ array ] )
965- writer . start ( schema )
966- writer . write_record_batch ( record_batch )
967- writer . finish
1004+ write ( writer )
9681005 end
9691006 data = File . open ( path , "rb" , &:read ) . freeze
9701007 table = Arrow ::Table . load ( Arrow ::Buffer . new ( data ) , format : :arrow )
@@ -982,19 +1019,7 @@ def setup
9821019 path = File . join ( tmp_dir , "data.arrows" )
9831020 File . open ( path , "wb" ) do |output |
9841021 writer = ArrowFormat ::StreamingWriter . new ( output )
985- red_arrow_array = build_array
986- array = convert_array ( red_arrow_array )
987- fields = [
988- ArrowFormat ::Field . new ( "value" ,
989- array . type ,
990- true ,
991- nil ) ,
992- ]
993- schema = ArrowFormat ::Schema . new ( fields )
994- record_batch = ArrowFormat ::RecordBatch . new ( schema , array . size , [ array ] )
995- writer . start ( schema )
996- writer . write_record_batch ( record_batch )
997- writer . finish
1022+ write ( writer )
9981023 end
9991024 data = File . open ( path , "rb" , &:read ) . freeze
10001025 table = Arrow ::Table . load ( Arrow ::Buffer . new ( data ) , format : :arrows )
0 commit comments