2222require_relative "schema"
2323require_relative "type"
2424
25+ require_relative "org/apache/arrow/flatbuf/binary"
2526require_relative "org/apache/arrow/flatbuf/bool"
2627require_relative "org/apache/arrow/flatbuf/footer"
27- require_relative "org/apache/arrow/flatbuf/message"
28- require_relative "org/apache/arrow/flatbuf/binary"
2928require_relative "org/apache/arrow/flatbuf/int"
29+ require_relative "org/apache/arrow/flatbuf/list"
30+ require_relative "org/apache/arrow/flatbuf/message"
3031require_relative "org/apache/arrow/flatbuf/null"
31- require_relative "org/apache/arrow/flatbuf/utf8"
3232require_relative "org/apache/arrow/flatbuf/schema"
33+ require_relative "org/apache/arrow/flatbuf/utf8"
3334
3435module ArrowFormat
3536 class FileReader
@@ -90,9 +91,10 @@ def each
9091 when Org ::Apache ::Arrow ::Flatbuf ::RecordBatch
9192 n_rows = header . length
9293 columns = [ ]
94+ nodes = header . nodes
9395 buffers = header . buffers
9496 schema . fields . each do |field |
95- columns << read_column ( field , n_rows , buffers , body )
97+ columns << read_column ( field , nodes , buffers , body )
9698 end
9799 yield ( RecordBatch . new ( schema , n_rows , columns ) )
98100 end
@@ -129,35 +131,44 @@ def read_footer
129131 Org ::Apache ::Arrow ::Flatbuf ::Footer . new ( footer_data )
130132 end
131133
132- def read_schema ( fb_schema )
133- fields = fb_schema . fields . collect do |fb_field |
134- fb_type = fb_field . type
135- case fb_type
136- when Org ::Apache ::Arrow ::Flatbuf ::Null
137- type = NullType . singleton
138- when Org ::Apache ::Arrow ::Flatbuf ::Bool
139- type = BooleanType . singleton
140- when Org ::Apache ::Arrow ::Flatbuf ::Int
141- case fb_type . bit_width
142- when 8
143- if fb_type . signed?
144- type = Int8Type . singleton
145- else
146- type = UInt8Type . singleton
147- end
134+ def read_field ( fb_field )
135+ fb_type = fb_field . type
136+ case fb_type
137+ when Org ::Apache ::Arrow ::Flatbuf ::Null
138+ type = NullType . singleton
139+ when Org ::Apache ::Arrow ::Flatbuf ::Bool
140+ type = BooleanType . singleton
141+ when Org ::Apache ::Arrow ::Flatbuf ::Int
142+ case fb_type . bit_width
143+ when 8
144+ if fb_type . signed?
145+ type = Int8Type . singleton
146+ else
147+ type = UInt8Type . singleton
148148 end
149- when Org ::Apache ::Arrow ::Flatbuf ::Binary
150- type = BinaryType . singleton
151- when Org ::Apache ::Arrow ::Flatbuf ::Utf8
152- type = UTF8Type . singleton
153149 end
154- Field . new ( fb_field . name , type )
150+ when Org ::Apache ::Arrow ::Flatbuf ::List
151+ type = ListType . new ( read_field ( fb_field . children [ 0 ] ) )
152+ when Org ::Apache ::Arrow ::Flatbuf ::Binary
153+ type = BinaryType . singleton
154+ when Org ::Apache ::Arrow ::Flatbuf ::Utf8
155+ type = UTF8Type . singleton
156+ end
157+ Field . new ( fb_field . name , type )
158+ end
159+
160+ def read_schema ( fb_schema )
161+ fields = fb_schema . fields . collect do |fb_field |
162+ read_field ( fb_field )
155163 end
156164 Schema . new ( fields )
157165 end
158166
159- def read_column ( field , n_rows , buffers , body )
160- return field . type . build_array ( n_rows ) if field . type . is_a? ( NullType )
167+ def read_column ( field , nodes , buffers , body )
168+ node = nodes . shift
169+ length = node . length
170+
171+ return field . type . build_array ( length ) if field . type . is_a? ( NullType )
161172
162173 validity_buffer = buffers . shift
163174 if validity_buffer . length . zero?
@@ -172,14 +183,19 @@ def read_column(field, n_rows, buffers, body)
172183 UInt8Type
173184 values_buffer = buffers . shift
174185 values = body . slice ( values_buffer . offset , values_buffer . length )
175- field . type . build_array ( n_rows , validity , values )
186+ field . type . build_array ( length , validity , values )
187+ when ListType
188+ offsets_buffer = buffers . shift
189+ offsets = body . slice ( offsets_buffer . offset , offsets_buffer . length )
190+ child = read_column ( field . type . child , nodes , buffers , body )
191+ field . type . build_array ( length , validity , offsets , child )
176192 when BinaryType ,
177193 UTF8Type
178194 offsets_buffer = buffers . shift
179195 values_buffer = buffers . shift
180196 offsets = body . slice ( offsets_buffer . offset , offsets_buffer . length )
181197 values = body . slice ( values_buffer . offset , values_buffer . length )
182- field . type . build_array ( n_rows , validity , offsets , values )
198+ field . type . build_array ( length , validity , offsets , values )
183199 end
184200 end
185201 end
0 commit comments