Skip to content

Commit fdcc647

Browse files
authored
GH-48537: [Ruby] Add support for reading fixed size binary array (#48538)
### Rationale for this change It's a fixed size variant of binary array. ### What changes are included in this PR? * Add `ArrowFormat::FixedSizeBinaryType` * Add `ArrowFormat::FixedSizeBinaryArray` ### Are these changes tested? Yes. ### Are there any user-facing changes? Yes. * GitHub Issue: #48537 Authored-by: Sutou Kouhei <[email protected]> Signed-off-by: Sutou Kouhei <[email protected]>
1 parent 323c251 commit fdcc647

File tree

4 files changed

+51
-7
lines changed

4 files changed

+51
-7
lines changed

ruby/red-arrow-format/lib/arrow-format/array.rb

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,21 @@ def encoding
235235
end
236236
end
237237

238+
class FixedSizeBinaryArray < Array
239+
def initialize(type, size, validity_buffer, values_buffer)
240+
super(type, size, validity_buffer)
241+
@values_buffer = values_buffer
242+
end
243+
244+
def to_a
245+
byte_width = @type.byte_width
246+
values = 0.step(@size * byte_width - 1, byte_width).collect do |offset|
247+
@values_buffer.get_string(offset, byte_width)
248+
end
249+
apply_validity(values)
250+
end
251+
end
252+
238253
class VariableSizeListArray < Array
239254
def initialize(type, size, validity_buffer, offsets_buffer, child)
240255
super(type, size, validity_buffer)

ruby/red-arrow-format/lib/arrow-format/file-reader.rb

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,8 @@ def read_field(fb_field)
232232
type = LargeBinaryType.singleton
233233
when Org::Apache::Arrow::Flatbuf::Utf8
234234
type = UTF8Type.singleton
235+
when Org::Apache::Arrow::Flatbuf::FixedSizeBinary
236+
type = FixedSizeBinaryType.new(fb_type.byte_width)
235237
end
236238
Field.new(fb_field.name, type, fb_field.nullable?)
237239
end
@@ -263,6 +265,16 @@ def read_column(field, nodes, buffers, body)
263265
values_buffer = buffers.shift
264266
values = body.slice(values_buffer.offset, values_buffer.length)
265267
field.type.build_array(length, validity, values)
268+
when VariableSizeBinaryType
269+
offsets_buffer = buffers.shift
270+
values_buffer = buffers.shift
271+
offsets = body.slice(offsets_buffer.offset, offsets_buffer.length)
272+
values = body.slice(values_buffer.offset, values_buffer.length)
273+
field.type.build_array(length, validity, offsets, values)
274+
when FixedSizeBinaryType
275+
values_buffer = buffers.shift
276+
values = body.slice(values_buffer.offset, values_buffer.length)
277+
field.type.build_array(length, validity, values)
266278
when VariableSizeListType
267279
offsets_buffer = buffers.shift
268280
offsets = body.slice(offsets_buffer.offset, offsets_buffer.length)
@@ -289,12 +301,6 @@ def read_column(field, nodes, buffers, body)
289301
read_column(child, nodes, buffers, body)
290302
end
291303
field.type.build_array(length, types, children)
292-
when VariableSizeBinaryType
293-
offsets_buffer = buffers.shift
294-
values_buffer = buffers.shift
295-
offsets = body.slice(offsets_buffer.offset, offsets_buffer.length)
296-
values = body.slice(values_buffer.offset, values_buffer.length)
297-
field.type.build_array(length, validity, offsets, values)
298304
end
299305
end
300306
end

ruby/red-arrow-format/lib/arrow-format/type.rb

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,6 @@ def singleton
344344
end
345345
end
346346

347-
attr_reader :name
348347
def initialize
349348
super("UTF8")
350349
end
@@ -354,6 +353,18 @@ def build_array(size, validity_buffer, offsets_buffer, values_buffer)
354353
end
355354
end
356355

356+
class FixedSizeBinaryType < Type
357+
attr_reader :byte_width
358+
def initialize(byte_width)
359+
super("FixedSizeBinary")
360+
@byte_width = byte_width
361+
end
362+
363+
def build_array(size, validity_buffer, values_buffer)
364+
FixedSizeBinaryArray.new(self, size, validity_buffer, values_buffer)
365+
end
366+
end
367+
357368
class VariableSizeListType < Type
358369
attr_reader :child
359370
def initialize(name, child)

ruby/red-arrow-format/test/test-file-reader.rb

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,18 @@ def test_read
358358
end
359359
end
360360

361+
sub_test_case("FixedSizeBinary") do
362+
def build_array
363+
data_type = Arrow::FixedSizeBinaryDataType.new(4)
364+
Arrow::FixedSizeBinaryArray.new(data_type, ["0124".b, nil, "abcd".b])
365+
end
366+
367+
def test_read
368+
assert_equal([{"value" => ["0124".b, nil, "abcd".b]}],
369+
read)
370+
end
371+
end
372+
361373
sub_test_case("List") do
362374
def build_array
363375
data_type = Arrow::ListDataType.new(name: "count", type: :int8)

0 commit comments

Comments
 (0)