Skip to content

Commit 8077fe2

Browse files
committed
GH-48360: [Ruby] Add support for reading large binary array
1 parent 7b54614 commit 8077fe2

File tree

4 files changed

+60
-5
lines changed

4 files changed

+60
-5
lines changed

ruby/red-arrow-format/lib/arrow-format/array.rb

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def initialize(type, size, validity_buffer, offsets_buffer, values_buffer)
100100

101101
def to_a
102102
values = @offsets_buffer.
103-
each(:s32, 0, @size + 1). # TODO: big endian support
103+
each(buffer_type, 0, @size + 1).
104104
each_cons(2).
105105
collect do |(_, offset), (_, next_offset)|
106106
length = next_offset - offset
@@ -112,13 +112,32 @@ def to_a
112112

113113
class BinaryArray < VariableSizeBinaryLayoutArray
114114
private
115+
def buffer_type
116+
:s32 # TODO: big endian support
117+
end
118+
119+
def encoding
120+
Encoding::ASCII_8BIT
121+
end
122+
end
123+
124+
class LargeBinaryArray < VariableSizeBinaryLayoutArray
125+
private
126+
def buffer_type
127+
:s64 # TODO: big endian support
128+
end
129+
115130
def encoding
116131
Encoding::ASCII_8BIT
117132
end
118133
end
119134

120135
class UTF8Array < VariableSizeBinaryLayoutArray
121136
private
137+
def buffer_type
138+
:s32 # TODO: big endian support
139+
end
140+
122141
def encoding
123142
Encoding::UTF_8
124143
end

ruby/red-arrow-format/lib/arrow-format/file-reader.rb

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
require_relative "org/apache/arrow/flatbuf/bool"
2727
require_relative "org/apache/arrow/flatbuf/footer"
2828
require_relative "org/apache/arrow/flatbuf/int"
29+
require_relative "org/apache/arrow/flatbuf/large_binary"
2930
require_relative "org/apache/arrow/flatbuf/list"
3031
require_relative "org/apache/arrow/flatbuf/message"
3132
require_relative "org/apache/arrow/flatbuf/null"
@@ -151,6 +152,8 @@ def read_field(fb_field)
151152
type = ListType.new(read_field(fb_field.children[0]))
152153
when Org::Apache::Arrow::Flatbuf::Binary
153154
type = BinaryType.singleton
155+
when Org::Apache::Arrow::Flatbuf::LargeBinary
156+
type = LargeBinaryType.singleton
154157
when Org::Apache::Arrow::Flatbuf::Utf8
155158
type = UTF8Type.singleton
156159
end
@@ -189,8 +192,7 @@ def read_column(field, nodes, buffers, body)
189192
offsets = body.slice(offsets_buffer.offset, offsets_buffer.length)
190193
child = read_column(field.type.child, nodes, buffers, body)
191194
field.type.build_array(length, validity, offsets, child)
192-
when BinaryType,
193-
UTF8Type
195+
when VariableSizeBinaryType
194196
offsets_buffer = buffers.shift
195197
values_buffer = buffers.shift
196198
offsets = body.slice(offsets_buffer.offset, offsets_buffer.length)

ruby/red-arrow-format/lib/arrow-format/type.rb

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,10 @@ def build_array(size, validity_buffer, values_buffer)
9696
end
9797
end
9898

99-
class BinaryType < Type
99+
class VariableSizeBinaryType < Type
100+
end
101+
102+
class BinaryType < VariableSizeBinaryType
100103
class << self
101104
def singleton
102105
@singleton ||= new
@@ -112,7 +115,27 @@ def build_array(size, validity_buffer, offsets_buffer, values_buffer)
112115
end
113116
end
114117

115-
class UTF8Type < Type
118+
class LargeBinaryType < VariableSizeBinaryType
119+
class << self
120+
def singleton
121+
@singleton ||= new
122+
end
123+
end
124+
125+
def initialize
126+
super("LargeBinary")
127+
end
128+
129+
def build_array(size, validity_buffer, offsets_buffer, values_buffer)
130+
LargeBinaryArray.new(self,
131+
size,
132+
validity_buffer,
133+
offsets_buffer,
134+
values_buffer)
135+
end
136+
end
137+
138+
class UTF8Type < VariableSizeBinaryType
116139
class << self
117140
def singleton
118141
@singleton ||= new

ruby/red-arrow-format/test/test-file-reader.rb

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,17 @@ def test_read
9595
end
9696
end
9797

98+
sub_test_case("LargeBinary") do
99+
def build_array
100+
Arrow::LargeBinaryArray.new(["Hello".b, nil, "World".b])
101+
end
102+
103+
def test_read
104+
assert_equal([{"value" => ["Hello".b, nil, "World".b]}],
105+
read)
106+
end
107+
end
108+
98109
sub_test_case("UTF8") do
99110
def build_array
100111
Arrow::StringArray.new(["Hello", nil, "World"])

0 commit comments

Comments
 (0)