Skip to content

Commit 6d65569

Browse files
authored
GH-48360: [Ruby] Add support for reading large binary array (#48361)
### Rationale for this change It's the 64 bit offset version of binary array. ### What changes are included in this PR? * Add `ArrowFormat::LargeBinaryType` * Add `ArrowFormat::LargeBinaryArray` ### Are these changes tested? Yes. ### Are there any user-facing changes? Yes. * GitHub Issue: #48360 Authored-by: Sutou Kouhei <[email protected]> Signed-off-by: Sutou Kouhei <[email protected]>
1 parent 5fbd2dc commit 6d65569

File tree

4 files changed

+60
-5
lines changed

4 files changed

+60
-5
lines changed

ruby/red-arrow-format/lib/arrow-format/array.rb

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ def initialize(type, size, validity_buffer, offsets_buffer, values_buffer)
113113

114114
def to_a
115115
values = @offsets_buffer.
116-
each(:s32, 0, @size + 1). # TODO: big endian support
116+
each(buffer_type, 0, @size + 1).
117117
each_cons(2).
118118
collect do |(_, offset), (_, next_offset)|
119119
length = next_offset - offset
@@ -125,13 +125,32 @@ def to_a
125125

126126
class BinaryArray < VariableSizeBinaryLayoutArray
127127
private
128+
def buffer_type
129+
:s32 # TODO: big endian support
130+
end
131+
132+
def encoding
133+
Encoding::ASCII_8BIT
134+
end
135+
end
136+
137+
class LargeBinaryArray < VariableSizeBinaryLayoutArray
138+
private
139+
def buffer_type
140+
:s64 # TODO: big endian support
141+
end
142+
128143
def encoding
129144
Encoding::ASCII_8BIT
130145
end
131146
end
132147

133148
class UTF8Array < VariableSizeBinaryLayoutArray
134149
private
150+
def buffer_type
151+
:s32 # TODO: big endian support
152+
end
153+
135154
def encoding
136155
Encoding::UTF_8
137156
end

ruby/red-arrow-format/lib/arrow-format/file-reader.rb

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
require_relative "org/apache/arrow/flatbuf/floating_point"
2828
require_relative "org/apache/arrow/flatbuf/footer"
2929
require_relative "org/apache/arrow/flatbuf/int"
30+
require_relative "org/apache/arrow/flatbuf/large_binary"
3031
require_relative "org/apache/arrow/flatbuf/list"
3132
require_relative "org/apache/arrow/flatbuf/message"
3233
require_relative "org/apache/arrow/flatbuf/null"
@@ -158,6 +159,8 @@ def read_field(fb_field)
158159
type = ListType.new(read_field(fb_field.children[0]))
159160
when Org::Apache::Arrow::Flatbuf::Binary
160161
type = BinaryType.singleton
162+
when Org::Apache::Arrow::Flatbuf::LargeBinary
163+
type = LargeBinaryType.singleton
161164
when Org::Apache::Arrow::Flatbuf::Utf8
162165
type = UTF8Type.singleton
163166
end
@@ -196,8 +199,7 @@ def read_column(field, nodes, buffers, body)
196199
offsets = body.slice(offsets_buffer.offset, offsets_buffer.length)
197200
child = read_column(field.type.child, nodes, buffers, body)
198201
field.type.build_array(length, validity, offsets, child)
199-
when BinaryType,
200-
UTF8Type
202+
when VariableSizeBinaryType
201203
offsets_buffer = buffers.shift
202204
values_buffer = buffers.shift
203205
offsets = body.slice(offsets_buffer.offset, offsets_buffer.length)

ruby/red-arrow-format/lib/arrow-format/type.rb

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,10 @@ def build_array(size, validity_buffer, values_buffer)
120120
end
121121
end
122122

123-
class BinaryType < Type
123+
class VariableSizeBinaryType < Type
124+
end
125+
126+
class BinaryType < VariableSizeBinaryType
124127
class << self
125128
def singleton
126129
@singleton ||= new
@@ -136,7 +139,27 @@ def build_array(size, validity_buffer, offsets_buffer, values_buffer)
136139
end
137140
end
138141

139-
class UTF8Type < Type
142+
class LargeBinaryType < VariableSizeBinaryType
143+
class << self
144+
def singleton
145+
@singleton ||= new
146+
end
147+
end
148+
149+
def initialize
150+
super("LargeBinary")
151+
end
152+
153+
def build_array(size, validity_buffer, offsets_buffer, values_buffer)
154+
LargeBinaryArray.new(self,
155+
size,
156+
validity_buffer,
157+
offsets_buffer,
158+
values_buffer)
159+
end
160+
end
161+
162+
class UTF8Type < VariableSizeBinaryType
140163
class << self
141164
def singleton
142165
@singleton ||= new

ruby/red-arrow-format/test/test-file-reader.rb

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,17 @@ def test_read
106106
end
107107
end
108108

109+
sub_test_case("LargeBinary") do
110+
def build_array
111+
Arrow::LargeBinaryArray.new(["Hello".b, nil, "World".b])
112+
end
113+
114+
def test_read
115+
assert_equal([{"value" => ["Hello".b, nil, "World".b]}],
116+
read)
117+
end
118+
end
119+
109120
sub_test_case("UTF8") do
110121
def build_array
111122
Arrow::StringArray.new(["Hello", nil, "World"])

0 commit comments

Comments
 (0)