Skip to content

Commit 7b54614

Browse files
authored
GH-48347: [Ruby] Add support for reading list array (#48351)
### Rationale for this change This is the first nested type. ### What changes are included in this PR? * Add `ArrowFormat::ListType` * Add `ArrowFormat::ListArray` ### Are these changes tested? Yes. ### Are there any user-facing changes? Yes. * GitHub Issue: #48347 Authored-by: Sutou Kouhei <[email protected]> Signed-off-by: Sutou Kouhei <[email protected]>
1 parent 5499afa commit 7b54614

File tree

4 files changed

+88
-30
lines changed

4 files changed

+88
-30
lines changed

ruby/red-arrow-format/lib/arrow-format/array.rb

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,4 +123,23 @@ def encoding
123123
Encoding::UTF_8
124124
end
125125
end
126+
127+
class ListArray < Array
128+
def initialize(type, size, validity_buffer, offsets_buffer, child)
129+
super(type, size, validity_buffer)
130+
@offsets_buffer = offsets_buffer
131+
@child = child
132+
end
133+
134+
def to_a
135+
child_values = @child.to_a
136+
values = @offsets_buffer.
137+
each(:s32, 0, @size + 1). # TODO: big endian support
138+
each_cons(2).
139+
collect do |(_, offset), (_, next_offset)|
140+
child_values[offset...next_offset]
141+
end
142+
apply_validity(values)
143+
end
144+
end
126145
end

ruby/red-arrow-format/lib/arrow-format/file-reader.rb

Lines changed: 45 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,15 @@
2222
require_relative "schema"
2323
require_relative "type"
2424

25+
require_relative "org/apache/arrow/flatbuf/binary"
2526
require_relative "org/apache/arrow/flatbuf/bool"
2627
require_relative "org/apache/arrow/flatbuf/footer"
27-
require_relative "org/apache/arrow/flatbuf/message"
28-
require_relative "org/apache/arrow/flatbuf/binary"
2928
require_relative "org/apache/arrow/flatbuf/int"
29+
require_relative "org/apache/arrow/flatbuf/list"
30+
require_relative "org/apache/arrow/flatbuf/message"
3031
require_relative "org/apache/arrow/flatbuf/null"
31-
require_relative "org/apache/arrow/flatbuf/utf8"
3232
require_relative "org/apache/arrow/flatbuf/schema"
33+
require_relative "org/apache/arrow/flatbuf/utf8"
3334

3435
module ArrowFormat
3536
class FileReader
@@ -90,9 +91,10 @@ def each
9091
when Org::Apache::Arrow::Flatbuf::RecordBatch
9192
n_rows = header.length
9293
columns = []
94+
nodes = header.nodes
9395
buffers = header.buffers
9496
schema.fields.each do |field|
95-
columns << read_column(field, n_rows, buffers, body)
97+
columns << read_column(field, nodes, buffers, body)
9698
end
9799
yield(RecordBatch.new(schema, n_rows, columns))
98100
end
@@ -129,35 +131,44 @@ def read_footer
129131
Org::Apache::Arrow::Flatbuf::Footer.new(footer_data)
130132
end
131133

132-
def read_schema(fb_schema)
133-
fields = fb_schema.fields.collect do |fb_field|
134-
fb_type = fb_field.type
135-
case fb_type
136-
when Org::Apache::Arrow::Flatbuf::Null
137-
type = NullType.singleton
138-
when Org::Apache::Arrow::Flatbuf::Bool
139-
type = BooleanType.singleton
140-
when Org::Apache::Arrow::Flatbuf::Int
141-
case fb_type.bit_width
142-
when 8
143-
if fb_type.signed?
144-
type = Int8Type.singleton
145-
else
146-
type = UInt8Type.singleton
147-
end
134+
def read_field(fb_field)
135+
fb_type = fb_field.type
136+
case fb_type
137+
when Org::Apache::Arrow::Flatbuf::Null
138+
type = NullType.singleton
139+
when Org::Apache::Arrow::Flatbuf::Bool
140+
type = BooleanType.singleton
141+
when Org::Apache::Arrow::Flatbuf::Int
142+
case fb_type.bit_width
143+
when 8
144+
if fb_type.signed?
145+
type = Int8Type.singleton
146+
else
147+
type = UInt8Type.singleton
148148
end
149-
when Org::Apache::Arrow::Flatbuf::Binary
150-
type = BinaryType.singleton
151-
when Org::Apache::Arrow::Flatbuf::Utf8
152-
type = UTF8Type.singleton
153149
end
154-
Field.new(fb_field.name, type)
150+
when Org::Apache::Arrow::Flatbuf::List
151+
type = ListType.new(read_field(fb_field.children[0]))
152+
when Org::Apache::Arrow::Flatbuf::Binary
153+
type = BinaryType.singleton
154+
when Org::Apache::Arrow::Flatbuf::Utf8
155+
type = UTF8Type.singleton
156+
end
157+
Field.new(fb_field.name, type)
158+
end
159+
160+
def read_schema(fb_schema)
161+
fields = fb_schema.fields.collect do |fb_field|
162+
read_field(fb_field)
155163
end
156164
Schema.new(fields)
157165
end
158166

159-
def read_column(field, n_rows, buffers, body)
160-
return field.type.build_array(n_rows) if field.type.is_a?(NullType)
167+
def read_column(field, nodes, buffers, body)
168+
node = nodes.shift
169+
length = node.length
170+
171+
return field.type.build_array(length) if field.type.is_a?(NullType)
161172

162173
validity_buffer = buffers.shift
163174
if validity_buffer.length.zero?
@@ -172,14 +183,19 @@ def read_column(field, n_rows, buffers, body)
172183
UInt8Type
173184
values_buffer = buffers.shift
174185
values = body.slice(values_buffer.offset, values_buffer.length)
175-
field.type.build_array(n_rows, validity, values)
186+
field.type.build_array(length, validity, values)
187+
when ListType
188+
offsets_buffer = buffers.shift
189+
offsets = body.slice(offsets_buffer.offset, offsets_buffer.length)
190+
child = read_column(field.type.child, nodes, buffers, body)
191+
field.type.build_array(length, validity, offsets, child)
176192
when BinaryType,
177193
UTF8Type
178194
offsets_buffer = buffers.shift
179195
values_buffer = buffers.shift
180196
offsets = body.slice(offsets_buffer.offset, offsets_buffer.length)
181197
values = body.slice(values_buffer.offset, values_buffer.length)
182-
field.type.build_array(n_rows, validity, offsets, values)
198+
field.type.build_array(length, validity, offsets, values)
183199
end
184200
end
185201
end

ruby/red-arrow-format/lib/arrow-format/type.rb

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,6 @@ def singleton
103103
end
104104
end
105105

106-
attr_reader :name
107106
def initialize
108107
super("Binary")
109108
end
@@ -129,4 +128,16 @@ def build_array(size, validity_buffer, offsets_buffer, values_buffer)
129128
UTF8Array.new(self, size, validity_buffer, offsets_buffer, values_buffer)
130129
end
131130
end
131+
132+
class ListType < Type
133+
attr_reader :child
134+
def initialize(child)
135+
super("List")
136+
@child = child
137+
end
138+
139+
def build_array(size, validity_buffer, offsets_buffer, child)
140+
ListArray.new(self, size, validity_buffer, offsets_buffer, child)
141+
end
142+
end
132143
end

ruby/red-arrow-format/test/test-file-reader.rb

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,4 +105,16 @@ def test_read
105105
read)
106106
end
107107
end
108+
109+
sub_test_case("List") do
110+
def build_array
111+
data_type = Arrow::ListDataType.new(name: "count", type: :int8)
112+
Arrow::ListArray.new(data_type, [[-128, 127], nil, [-1, 0, 1]])
113+
end
114+
115+
def test_read
116+
assert_equal([{"value" => [[-128, 127], nil, [-1, 0, 1]]}],
117+
read)
118+
end
119+
end
108120
end

0 commit comments

Comments
 (0)