Skip to content

Commit 6a37b9c

Browse files
authored
GH-48346: [Ruby] Add support for reading boolean array (#48348)
### Rationale for this change This is a primitive type but we need to handle bitmap for boolean values. ### What changes are included in this PR? * Add `ArrowFormat::BooleanType` * Add `ArrowFormat::BooleanArray` ### Are these changes tested? Yes. ### Are there any user-facing changes? Yes. * GitHub Issue: #48346 Authored-by: Sutou Kouhei <[email protected]> Signed-off-by: Sutou Kouhei <[email protected]>
1 parent 773848a commit 6a37b9c

File tree

5 files changed

+96
-13
lines changed

5 files changed

+96
-13
lines changed

ruby/red-arrow-format/lib/arrow-format/array.rb

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
# specific language governing permissions and limitations
1515
# under the License.
1616

17+
require_relative "bitmap"
18+
1719
module ArrowFormat
1820
class Array
1921
attr_reader :type
@@ -37,18 +39,9 @@ def null?(i)
3739
private
3840
def apply_validity(array)
3941
return array if @validity_buffer.nil?
40-
n_bytes = @size / 8
41-
@validity_buffer.each(:U8, 0, n_bytes) do |offset, value|
42-
7.times do |i|
43-
array[offset * 8 + i] = nil if (value & (1 << (i % 8))).zero?
44-
end
45-
end
46-
remained_bits = @size % 8
47-
unless remained_bits.zero?
48-
value = @validity_buffer.get_value(:U8, n_bytes)
49-
remained_bits.times do |i|
50-
array[n_bytes * 8 + i] = nil if (value & (1 << (i % 8))).zero?
51-
end
42+
@validity_bitmap ||= Bitmap.new(@validity_buffer, @size)
43+
@validity_bitmap.each_with_index do |bit, i|
44+
array[i] = nil if bit.zero?
5245
end
5346
array
5447
end
@@ -64,6 +57,21 @@ def to_a
6457
end
6558
end
6659

60+
class BooleanArray < Array
61+
def initialize(type, size, validity_buffer, values_buffer)
62+
super(type, size, validity_buffer)
63+
@values_buffer = values_buffer
64+
end
65+
66+
def to_a
67+
@values_bitmap ||= Bitmap.new(@values_buffer, @size)
68+
values = @values_bitmap.each.collect do |bit|
69+
not bit.zero?
70+
end
71+
apply_validity(values)
72+
end
73+
end
74+
6775
class IntArray < Array
6876
def initialize(type, size, validity_buffer, values_buffer)
6977
super(type, size, validity_buffer)
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# or more contributor license agreements. See the NOTICE file
2+
# distributed with this work for additional information
3+
# regarding copyright ownership. The ASF licenses this file
4+
# to you under the Apache License, Version 2.0 (the
5+
# "License"); you may not use this file except in compliance
6+
# with the License. You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing,
11+
# software distributed under the License is distributed on an
12+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13+
# KIND, either express or implied. See the License for the
14+
# specific language governing permissions and limitations
15+
# under the License.
16+
17+
module ArrowFormat
18+
class Bitmap
19+
include Enumerable
20+
21+
def initialize(buffer, n_values)
22+
@buffer = buffer
23+
@n_values = n_values
24+
end
25+
26+
def each
27+
return to_enum(__method__) unless block_given?
28+
29+
n_bytes = @n_values / 8
30+
@buffer.each(:U8, 0, n_bytes) do |offset, value|
31+
7.times do |i|
32+
yield(value & (1 << (i % 8)))
33+
end
34+
end
35+
remained_bits = @n_values % 8
36+
unless remained_bits.zero?
37+
value = @buffer.get_value(:U8, n_bytes)
38+
remained_bits.times do |i|
39+
yield(value & (1 << (i % 8)))
40+
end
41+
end
42+
end
43+
end
44+
end

ruby/red-arrow-format/lib/arrow-format/file-reader.rb

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
require_relative "schema"
2323
require_relative "type"
2424

25+
require_relative "org/apache/arrow/flatbuf/bool"
2526
require_relative "org/apache/arrow/flatbuf/footer"
2627
require_relative "org/apache/arrow/flatbuf/message"
2728
require_relative "org/apache/arrow/flatbuf/binary"
@@ -134,6 +135,8 @@ def read_schema(fb_schema)
134135
case fb_type
135136
when Org::Apache::Arrow::Flatbuf::Null
136137
type = NullType.singleton
138+
when Org::Apache::Arrow::Flatbuf::Bool
139+
type = BooleanType.singleton
137140
when Org::Apache::Arrow::Flatbuf::Int
138141
case fb_type.bit_width
139142
when 8
@@ -164,7 +167,8 @@ def read_column(field, n_rows, buffers, body)
164167
end
165168

166169
case field.type
167-
when Int8Type,
170+
when BooleanType,
171+
Int8Type,
168172
UInt8Type
169173
values_buffer = buffers.shift
170174
values = body.slice(values_buffer.offset, values_buffer.length)

ruby/red-arrow-format/lib/arrow-format/type.rb

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,22 @@ def build_array(size)
3838
end
3939
end
4040

41+
class BooleanType < Type
42+
class << self
43+
def singleton
44+
@singleton ||= new
45+
end
46+
end
47+
48+
def initialize
49+
super("Boolean")
50+
end
51+
52+
def build_array(size, validity_buffer, values_buffer)
53+
BooleanArray.new(self, size, validity_buffer, values_buffer)
54+
end
55+
end
56+
4157
class IntType < Type
4258
attr_reader :bit_width
4359
attr_reader :signed

ruby/red-arrow-format/test/test-file-reader.rb

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,17 @@ def test_read
5151
end
5252
end
5353

54+
sub_test_case("Boolean") do
55+
def build_array
56+
Arrow::BooleanArray.new([true, nil, false])
57+
end
58+
59+
def test_read
60+
assert_equal([{"value" => [true, nil, false]}],
61+
read)
62+
end
63+
end
64+
5465
sub_test_case("Int8") do
5566
def build_array
5667
Arrow::Int8Array.new([-128, nil, 127])

0 commit comments

Comments
 (0)