Skip to content

Commit 041e634

Browse files
committed
GH-48388: [Ruby] Add support for reading map array
1 parent 6fba612 commit 041e634

File tree

6 files changed

+96
-8
lines changed

6 files changed

+96
-8
lines changed

ruby/red-arrow-format/lib/arrow-format/array.rb

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ def encoding
162162
end
163163
end
164164

165-
class ListArray < Array
165+
class VariableSizeListArray < Array
166166
def initialize(type, size, validity_buffer, offsets_buffer, child)
167167
super(type, size, validity_buffer)
168168
@offsets_buffer = offsets_buffer
@@ -181,6 +181,9 @@ def to_a
181181
end
182182
end
183183

184+
class ListArray < VariableSizeListArray
185+
end
186+
184187
class StructArray < Array
185188
def initialize(type, size, validity_buffer, children)
186189
super(type, size, validity_buffer)
@@ -197,4 +200,20 @@ def to_a
197200
apply_validity(values)
198201
end
199202
end
203+
204+
class MapArray < VariableSizeListArray
205+
def to_a
206+
super.collect do |entries|
207+
if entries.nil?
208+
entries
209+
else
210+
hash = {}
211+
entries.each do |key, value|
212+
hash[key] = value
213+
end
214+
hash
215+
end
216+
end
217+
end
218+
end
200219
end

ruby/red-arrow-format/lib/arrow-format/error.rb

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,14 @@ module ArrowFormat
1818
class Error < StandardError
1919
end
2020

21-
class ReadError < StandardError
21+
class ReadError < Error
2222
attr_reader :buffer
2323
def initialize(buffer, message)
2424
@buffer = buffer
2525
super("#{message}: #{@buffer}")
2626
end
2727
end
28+
29+
class TypeError < Error
30+
end
2831
end

ruby/red-arrow-format/lib/arrow-format/field.rb

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,14 @@ module ArrowFormat
1818
class Field
1919
attr_reader :name
2020
attr_reader :type
21-
def initialize(name, type)
21+
def initialize(name, type, nullable)
2222
@name = name
2323
@type = type
24+
@nullable = nullable
25+
end
26+
27+
def nullable?
28+
@nullable
2429
end
2530
end
2631
end

ruby/red-arrow-format/lib/arrow-format/file-reader.rb

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
require_relative "org/apache/arrow/flatbuf/int"
3030
require_relative "org/apache/arrow/flatbuf/large_binary"
3131
require_relative "org/apache/arrow/flatbuf/list"
32+
require_relative "org/apache/arrow/flatbuf/map"
3233
require_relative "org/apache/arrow/flatbuf/message"
3334
require_relative "org/apache/arrow/flatbuf/null"
3435
require_relative "org/apache/arrow/flatbuf/precision"
@@ -163,14 +164,16 @@ def read_field(fb_field)
163164
when Org::Apache::Arrow::Flatbuf::Struct
164165
children = fb_field.children.collect {|child| read_field(child)}
165166
type = StructType.new(children)
167+
when Org::Apache::Arrow::Flatbuf::Map
168+
type = MapType.new(read_field(fb_field.children[0]))
166169
when Org::Apache::Arrow::Flatbuf::Binary
167170
type = BinaryType.singleton
168171
when Org::Apache::Arrow::Flatbuf::LargeBinary
169172
type = LargeBinaryType.singleton
170173
when Org::Apache::Arrow::Flatbuf::Utf8
171174
type = UTF8Type.singleton
172175
end
173-
Field.new(fb_field.name, type)
176+
Field.new(fb_field.name, type, fb_field.nullable?)
174177
end
175178

176179
def read_schema(fb_schema)
@@ -199,7 +202,7 @@ def read_column(field, nodes, buffers, body)
199202
values_buffer = buffers.shift
200203
values = body.slice(values_buffer.offset, values_buffer.length)
201204
field.type.build_array(length, validity, values)
202-
when ListType
205+
when VariableSizeListType
203206
offsets_buffer = buffers.shift
204207
offsets = body.slice(offsets_buffer.offset, offsets_buffer.length)
205208
child = read_column(field.type.child, nodes, buffers, body)

ruby/red-arrow-format/lib/arrow-format/type.rb

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -195,13 +195,20 @@ def build_array(size, validity_buffer, offsets_buffer, values_buffer)
195195
end
196196
end
197197

198-
class ListType < Type
198+
class VariableSizeListType < Type
199199
attr_reader :child
200-
def initialize(child)
201-
super("List")
200+
def initialize(name, child)
201+
super(name)
202202
@child = child
203203
end
204204

205+
end
206+
207+
class ListType < VariableSizeListType
208+
def initialize(child)
209+
super("List", child)
210+
end
211+
205212
def build_array(size, validity_buffer, offsets_buffer, child)
206213
ListArray.new(self, size, validity_buffer, offsets_buffer, child)
207214
end
@@ -218,4 +225,30 @@ def build_array(size, validity_buffer, children)
218225
StructArray.new(self, size, validity_buffer, children)
219226
end
220227
end
228+
229+
class MapType < VariableSizeListType
230+
def initialize(child)
231+
if child.nullable?
232+
raise TypeError.new("Map entry field must not be nullable: " +
233+
child.inspect)
234+
end
235+
type = child.type
236+
unless type.is_a?(StructType)
237+
raise TypeError.new("Map entry type must be struct: #{type.inspect}")
238+
end
239+
unless type.children.size == 2
240+
raise TypeError.new("Map entry struct type must have 2 children: " +
241+
type.inspect)
242+
end
243+
if type.children[0].nullable?
244+
raise TypeError.new("Map key field must not be nullable: " +
245+
type.children[0].inspect)
246+
end
247+
super("Map", child)
248+
end
249+
250+
def build_array(size, validity_buffer, offsets_buffer, child)
251+
MapArray.new(self, size, validity_buffer, offsets_buffer, child)
252+
end
253+
end
221254
end

ruby/red-arrow-format/test/test-file-reader.rb

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,4 +160,29 @@ def test_read
160160
read)
161161
end
162162
end
163+
164+
sub_test_case("Map") do
165+
def build_array
166+
data_type = Arrow::MapDataType.new(:string, :int8)
167+
Arrow::MapArray.new(data_type,
168+
[
169+
{"a" => -128, "b" => 127},
170+
nil,
171+
{"c" => nil},
172+
])
173+
end
174+
175+
def test_read
176+
assert_equal([
177+
{
178+
"value" => [
179+
{"a" => -128, "b" => 127},
180+
nil,
181+
{"c" => nil},
182+
],
183+
},
184+
],
185+
read)
186+
end
187+
end
163188
end

0 commit comments

Comments
 (0)