Skip to content

Commit 0bfbd19

Browse files
authored
GH-48435: [Ruby] Add support for reading sparse union array (#48439)
### Rationale for this change It's a sparse variant of union array. ### What changes are included in this PR? * Add `ArrowFormat::SparseUnionType` * Add `ArrowFormat::SparseUnionArray` ### Are these changes tested? Yes. ### Are there any user-facing changes? Yes. * GitHub Issue: #48435 Authored-by: Sutou Kouhei <[email protected]> Signed-off-by: Sutou Kouhei <[email protected]>
1 parent 50e3f75 commit 0bfbd19

File tree

4 files changed

+62
-7
lines changed

4 files changed

+62
-7
lines changed

ruby/red-arrow-format/lib/arrow-format/array.rb

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -232,19 +232,23 @@ def to_a
232232
end
233233

234234
class UnionArray < Array
235+
def initialize(type, size, types_buffer, children)
236+
super(type, size, nil)
237+
@types_buffer = types_buffer
238+
@children = children
239+
end
240+
end
241+
242+
class DenseUnionArray < UnionArray
235243
def initialize(type,
236244
size,
237245
types_buffer,
238246
offsets_buffer,
239247
children)
240-
super(type, size, nil)
241-
@types_buffer = types_buffer
248+
super(type, size, types_buffer, children)
242249
@offsets_buffer = offsets_buffer
243-
@children = children
244250
end
245-
end
246251

247-
class DenseUnionArray < UnionArray
248252
def to_a
249253
children_values = @children.collect(&:to_a)
250254
types = @types_buffer.each(:S8, 0, @size)
@@ -256,6 +260,16 @@ def to_a
256260
end
257261
end
258262

263+
class SparseUnionArray < UnionArray
264+
def to_a
265+
children_values = @children.collect(&:to_a)
266+
@types_buffer.each(:S8, 0, @size).with_index.collect do |(_, type), i|
267+
index = @type.resolve_type_index(type)
268+
children_values[index][i]
269+
end
270+
end
271+
end
272+
259273
class MapArray < VariableSizeListArray
260274
def to_a
261275
super.collect do |entries|

ruby/red-arrow-format/lib/arrow-format/file-reader.rb

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,8 @@ def read_field(fb_field)
184184
case fb_type.mode
185185
when Org::Apache::Arrow::Flatbuf::UnionMode::DENSE
186186
type = DenseUnionType.new(children, type_ids)
187+
when Org::Apache::Arrow::Flatbuf::UnionMode::SPARSE
188+
type = SparseUnionType.new(children, type_ids)
187189
end
188190
when Org::Apache::Arrow::Flatbuf::Map
189191
type = MapType.new(read_field(fb_field.children[0]))
@@ -234,15 +236,22 @@ def read_column(field, nodes, buffers, body)
234236
read_column(child, nodes, buffers, body)
235237
end
236238
field.type.build_array(length, validity, children)
237-
when UnionType
238-
# union type doesn't have validity.
239+
when DenseUnionType
240+
# dense union type doesn't have validity.
239241
types = validity
240242
offsets_buffer = buffers.shift
241243
offsets = body.slice(offsets_buffer.offset, offsets_buffer.length)
242244
children = field.type.children.collect do |child|
243245
read_column(child, nodes, buffers, body)
244246
end
245247
field.type.build_array(length, types, offsets, children)
248+
when SparseUnionType
249+
# sparse union type doesn't have validity.
250+
types = validity
251+
children = field.type.children.collect do |child|
252+
read_column(child, nodes, buffers, body)
253+
end
254+
field.type.build_array(length, types, children)
246255
when VariableSizeBinaryType
247256
offsets_buffer = buffers.shift
248257
values_buffer = buffers.shift

ruby/red-arrow-format/lib/arrow-format/type.rb

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -321,4 +321,14 @@ def build_array(size, types_buffer, offsets_buffer, children)
321321
DenseUnionArray.new(self, size, types_buffer, offsets_buffer, children)
322322
end
323323
end
324+
325+
class SparseUnionType < UnionType
326+
def initialize(children, type_ids)
327+
super("SparseUnion", children, type_ids)
328+
end
329+
330+
def build_array(size, types_buffer, children)
331+
SparseUnionArray.new(self, size, types_buffer, children)
332+
end
333+
end
324334
end

ruby/red-arrow-format/test/test-file-reader.rb

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,28 @@ def test_read
256256
end
257257
end
258258

259+
sub_test_case("SparseUnion") do
260+
def build_array
261+
fields = [
262+
Arrow::Field.new("number", :int8),
263+
Arrow::Field.new("text", :string),
264+
]
265+
type_ids = [11, 13]
266+
data_type = Arrow::SparseUnionDataType.new(fields, type_ids)
267+
types = Arrow::Int8Array.new([11, 13, 11, 13, 11])
268+
children = [
269+
Arrow::Int8Array.new([1, nil, nil, nil, 5]),
270+
Arrow::StringArray.new([nil, "b", nil, "d", nil])
271+
]
272+
Arrow::SparseUnionArray.new(data_type, types, children)
273+
end
274+
275+
def test_read
276+
assert_equal([{"value" => [1, "b", nil, "d", 5]}],
277+
read)
278+
end
279+
end
280+
259281
sub_test_case("Map") do
260282
def build_array
261283
data_type = Arrow::MapDataType.new(:string, :int8)

0 commit comments

Comments
 (0)