Skip to content

Commit 4cbe26d

Browse files
authored
GH-48551: [Ruby] Add support for reading large UTF-8 array (#48552)
### Rationale for this change It's a large variant of UTF-8 array. ### What changes are included in this PR? * Add `ArrowFormat::LargeUTF8Type` * Add `ArrowFormat::LargeUTF8Array` ### Are these changes tested? Yes. ### Are there any user-facing changes? Yes. * GitHub Issue: #48551 Authored-by: Sutou Kouhei <[email protected]> Signed-off-by: Sutou Kouhei <[email protected]>
1 parent db9f556 commit 4cbe26d

File tree

4 files changed

+46
-0
lines changed

4 files changed

+46
-0
lines changed

ruby/red-arrow-format/lib/arrow-format/array.rb

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,17 @@ def encoding
235235
end
236236
end
237237

238+
class LargeUTF8Array < VariableSizeBinaryLayoutArray
239+
private
240+
def buffer_type
241+
:s64 # TODO: big endian support
242+
end
243+
244+
def encoding
245+
Encoding::UTF_8
246+
end
247+
end
248+
238249
class FixedSizeBinaryArray < Array
239250
def initialize(type, size, validity_buffer, values_buffer)
240251
super(type, size, validity_buffer)

ruby/red-arrow-format/lib/arrow-format/file-reader.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,13 @@
2626
require_relative "org/apache/arrow/flatbuf/bool"
2727
require_relative "org/apache/arrow/flatbuf/date"
2828
require_relative "org/apache/arrow/flatbuf/date_unit"
29+
require_relative "org/apache/arrow/flatbuf/fixed_size_binary"
2930
require_relative "org/apache/arrow/flatbuf/floating_point"
3031
require_relative "org/apache/arrow/flatbuf/footer"
3132
require_relative "org/apache/arrow/flatbuf/int"
3233
require_relative "org/apache/arrow/flatbuf/large_binary"
3334
require_relative "org/apache/arrow/flatbuf/large_list"
35+
require_relative "org/apache/arrow/flatbuf/large_utf8"
3436
require_relative "org/apache/arrow/flatbuf/list"
3537
require_relative "org/apache/arrow/flatbuf/map"
3638
require_relative "org/apache/arrow/flatbuf/message"
@@ -232,6 +234,8 @@ def read_field(fb_field)
232234
type = LargeBinaryType.singleton
233235
when Org::Apache::Arrow::Flatbuf::Utf8
234236
type = UTF8Type.singleton
237+
when Org::Apache::Arrow::Flatbuf::LargeUtf8
238+
type = LargeUTF8Type.singleton
235239
when Org::Apache::Arrow::Flatbuf::FixedSizeBinary
236240
type = FixedSizeBinaryType.new(fb_type.byte_width)
237241
end

ruby/red-arrow-format/lib/arrow-format/type.rb

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,26 @@ def build_array(size, validity_buffer, offsets_buffer, values_buffer)
353353
end
354354
end
355355

356+
class LargeUTF8Type < VariableSizeBinaryType
357+
class << self
358+
def singleton
359+
@singleton ||= new
360+
end
361+
end
362+
363+
def initialize
364+
super("LargeUTF8")
365+
end
366+
367+
def build_array(size, validity_buffer, offsets_buffer, values_buffer)
368+
LargeUTF8Array.new(self,
369+
size,
370+
validity_buffer,
371+
offsets_buffer,
372+
values_buffer)
373+
end
374+
end
375+
356376
class FixedSizeBinaryType < Type
357377
attr_reader :byte_width
358378
def initialize(byte_width)

ruby/red-arrow-format/test/test-file-reader.rb

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,17 @@ def test_read
358358
end
359359
end
360360

361+
sub_test_case("LargeUTF8") do
362+
def build_array
363+
Arrow::LargeStringArray.new(["Hello", nil, "World"])
364+
end
365+
366+
def test_read
367+
assert_equal([{"value" => ["Hello", nil, "World"]}],
368+
read)
369+
end
370+
end
371+
361372
sub_test_case("FixedSizeBinary") do
362373
def build_array
363374
data_type = Arrow::FixedSizeBinaryDataType.new(4)

0 commit comments

Comments
 (0)