Skip to content

Commit 8974ddc

Browse files
authored
GH-48425: [Ruby] Add support for reading dense union array (#48426)
### Rationale for this change It's a dense variant of union array. ### What changes are included in this PR? * Add `ArrowFormat::DenseUnionType` * Add `ArrowFormat::DenseUnionArray` ### Are these changes tested? Yes. ### Are there any user-facing changes? Yes. * GitHub Issue: #48425 Authored-by: Sutou Kouhei <[email protected]> Signed-off-by: Sutou Kouhei <[email protected]>
1 parent 6c9f542 commit 8974ddc

File tree

4 files changed

+94
-0
lines changed

4 files changed

+94
-0
lines changed

ruby/red-arrow-format/lib/arrow-format/array.rb

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,31 @@ def to_a
231231
end
232232
end
233233

234+
class UnionArray < Array
235+
def initialize(type,
236+
size,
237+
types_buffer,
238+
offsets_buffer,
239+
children)
240+
super(type, size, nil)
241+
@types_buffer = types_buffer
242+
@offsets_buffer = offsets_buffer
243+
@children = children
244+
end
245+
end
246+
247+
class DenseUnionArray < UnionArray
248+
def to_a
249+
children_values = @children.collect(&:to_a)
250+
types = @types_buffer.each(:S8, 0, @size)
251+
offsets = @offsets_buffer.each(:s32, 0, @size)
252+
types.zip(offsets).collect do |(_, type), (_, offset)|
253+
index = @type.resolve_type_index(type)
254+
children_values[index][offset]
255+
end
256+
end
257+
end
258+
234259
class MapArray < VariableSizeListArray
235260
def to_a
236261
super.collect do |entries|

ruby/red-arrow-format/lib/arrow-format/file-reader.rb

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@
3838
require_relative "org/apache/arrow/flatbuf/precision"
3939
require_relative "org/apache/arrow/flatbuf/schema"
4040
require_relative "org/apache/arrow/flatbuf/struct_"
41+
require_relative "org/apache/arrow/flatbuf/union"
42+
require_relative "org/apache/arrow/flatbuf/union_mode"
4143
require_relative "org/apache/arrow/flatbuf/utf8"
4244

4345
module ArrowFormat
@@ -176,6 +178,13 @@ def read_field(fb_field)
176178
when Org::Apache::Arrow::Flatbuf::Struct
177179
children = fb_field.children.collect {|child| read_field(child)}
178180
type = StructType.new(children)
181+
when Org::Apache::Arrow::Flatbuf::Union
182+
children = fb_field.children.collect {|child| read_field(child)}
183+
type_ids = fb_type.type_ids
184+
case fb_type.mode
185+
when Org::Apache::Arrow::Flatbuf::UnionMode::DENSE
186+
type = DenseUnionType.new(children, type_ids)
187+
end
179188
when Org::Apache::Arrow::Flatbuf::Map
180189
type = MapType.new(read_field(fb_field.children[0]))
181190
when Org::Apache::Arrow::Flatbuf::Binary
@@ -225,6 +234,15 @@ def read_column(field, nodes, buffers, body)
225234
read_column(child, nodes, buffers, body)
226235
end
227236
field.type.build_array(length, validity, children)
237+
when UnionType
238+
# union type doesn't have validity.
239+
types = validity
240+
offsets_buffer = buffers.shift
241+
offsets = body.slice(offsets_buffer.offset, offsets_buffer.length)
242+
children = field.type.children.collect do |child|
243+
read_column(child, nodes, buffers, body)
244+
end
245+
field.type.build_array(length, types, offsets, children)
228246
when VariableSizeBinaryType
229247
offsets_buffer = buffers.shift
230248
values_buffer = buffers.shift

ruby/red-arrow-format/lib/arrow-format/type.rb

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,4 +296,29 @@ def build_array(size, validity_buffer, offsets_buffer, child)
296296
MapArray.new(self, size, validity_buffer, offsets_buffer, child)
297297
end
298298
end
299+
300+
class UnionType < Type
301+
attr_reader :children
302+
attr_reader :type_ids
303+
def initialize(name, children, type_ids)
304+
super(name)
305+
@children = children
306+
@type_ids = type_ids
307+
@type_indexes = {}
308+
end
309+
310+
def resolve_type_index(type)
311+
@type_indexes[type] ||= @type_ids.index(type)
312+
end
313+
end
314+
315+
class DenseUnionType < UnionType
316+
def initialize(children, type_ids)
317+
super("DenseUnion", children, type_ids)
318+
end
319+
320+
def build_array(size, types_buffer, offsets_buffer, children)
321+
DenseUnionArray.new(self, size, types_buffer, offsets_buffer, children)
322+
end
323+
end
299324
end

ruby/red-arrow-format/test/test-file-reader.rb

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,32 @@ def test_read
230230
end
231231
end
232232

233+
sub_test_case("DenseUnion") do
234+
def build_array
235+
fields = [
236+
Arrow::Field.new("number", :int8),
237+
Arrow::Field.new("text", :string),
238+
]
239+
type_ids = [11, 13]
240+
data_type = Arrow::DenseUnionDataType.new(fields, type_ids)
241+
types = Arrow::Int8Array.new([11, 13, 11, 13, 13])
242+
value_offsets = Arrow::Int32Array.new([0, 0, 1, 1, 2])
243+
children = [
244+
Arrow::Int8Array.new([1, nil]),
245+
Arrow::StringArray.new(["a", "b", "c"])
246+
]
247+
Arrow::DenseUnionArray.new(data_type,
248+
types,
249+
value_offsets,
250+
children)
251+
end
252+
253+
def test_read
254+
assert_equal([{"value" => [1, "a", nil, "b", "c"]}],
255+
read)
256+
end
257+
end
258+
233259
sub_test_case("Map") do
234260
def build_array
235261
data_type = Arrow::MapDataType.new(:string, :int8)

0 commit comments

Comments
 (0)