Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion ruby/red-arrow-format/lib/arrow-format/array.rb
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def initialize(type, size, validity_buffer, offsets_buffer, child)
def to_a
child_values = @child.to_a
values = @offsets_buffer.
each(:s32, 0, @size + 1). # TODO: big endian support
each(offset_type, 0, @size + 1).
each_cons(2).
collect do |(_, offset), (_, next_offset)|
child_values[offset...next_offset]
Expand All @@ -182,6 +182,17 @@ def to_a
end

class ListArray < VariableSizeListArray
private
def offset_type
:s32 # TODO: big endian support
end
end

class LargeListArray < VariableSizeListArray
private
def offset_type
:s64 # TODO: big endian support
end
end

class StructArray < Array
Expand Down Expand Up @@ -215,5 +226,10 @@ def to_a
end
end
end

private
def offset_type
:s32 # TODO: big endian support
end
end
end
3 changes: 3 additions & 0 deletions ruby/red-arrow-format/lib/arrow-format/file-reader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
require_relative "org/apache/arrow/flatbuf/footer"
require_relative "org/apache/arrow/flatbuf/int"
require_relative "org/apache/arrow/flatbuf/large_binary"
require_relative "org/apache/arrow/flatbuf/large_list"
require_relative "org/apache/arrow/flatbuf/list"
require_relative "org/apache/arrow/flatbuf/map"
require_relative "org/apache/arrow/flatbuf/message"
Expand Down Expand Up @@ -161,6 +162,8 @@ def read_field(fb_field)
end
when Org::Apache::Arrow::Flatbuf::List
type = ListType.new(read_field(fb_field.children[0]))
when Org::Apache::Arrow::Flatbuf::LargeList
type = LargeListType.new(read_field(fb_field.children[0]))
when Org::Apache::Arrow::Flatbuf::Struct
children = fb_field.children.collect {|child| read_field(child)}
type = StructType.new(children)
Expand Down
10 changes: 10 additions & 0 deletions ruby/red-arrow-format/lib/arrow-format/type.rb
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,16 @@ def build_array(size, validity_buffer, offsets_buffer, child)
end
end

class LargeListType < VariableSizeListType
def initialize(child)
super("LargeList", child)
end

def build_array(size, validity_buffer, offsets_buffer, child)
LargeListArray.new(self, size, validity_buffer, offsets_buffer, child)
end
end

class StructType < Type
attr_reader :children
def initialize(children)
Expand Down
12 changes: 12 additions & 0 deletions ruby/red-arrow-format/test/test-file-reader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,18 @@ def test_read
end
end

sub_test_case("LargeList") do
def build_array
data_type = Arrow::LargeListDataType.new(name: "count", type: :int8)
Arrow::LargeListArray.new(data_type, [[-128, 127], nil, [-1, 0, 1]])
end

def test_read
assert_equal([{"value" => [[-128, 127], nil, [-1, 0, 1]]}],
read)
end
end

sub_test_case("Struct") do
def build_array
data_type = Arrow::StructDataType.new(count: :int8,
Expand Down
5 changes: 5 additions & 0 deletions ruby/red-arrow/ext/arrow/converters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ namespace red_arrow {
return list_array_value_converter_->convert(array, i);
}

VALUE ArrayValueConverter::convert(const arrow::LargeListArray& array,
const int64_t i) {
return large_list_array_value_converter_->convert(array, i);
}

VALUE ArrayValueConverter::convert(const arrow::StructArray& array,
const int64_t i) {
return struct_array_value_converter_->convert(array, i);
Expand Down
115 changes: 115 additions & 0 deletions ruby/red-arrow/ext/arrow/converters.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

namespace red_arrow {
class ListArrayValueConverter;
class LargeListArrayValueConverter;
class StructArrayValueConverter;
class MapArrayValueConverter;
class UnionArrayValueConverter;
Expand All @@ -38,18 +39,21 @@ namespace red_arrow {
ArrayValueConverter()
: decimal_buffer_(),
list_array_value_converter_(nullptr),
large_list_array_value_converter_(nullptr),
struct_array_value_converter_(nullptr),
map_array_value_converter_(nullptr),
union_array_value_converter_(nullptr),
dictionary_array_value_converter_(nullptr) {
}

inline void set_sub_value_converters(ListArrayValueConverter* list_array_value_converter,
LargeListArrayValueConverter* large_list_array_value_converter,
StructArrayValueConverter* struct_array_value_converter,
MapArrayValueConverter* map_array_value_converter,
UnionArrayValueConverter* union_array_value_converter,
DictionaryArrayValueConverter* dictionary_array_value_converter) {
list_array_value_converter_ = list_array_value_converter;
large_list_array_value_converter_ = large_list_array_value_converter;
struct_array_value_converter_ = struct_array_value_converter;
map_array_value_converter_ = map_array_value_converter;
union_array_value_converter_ = union_array_value_converter;
Expand Down Expand Up @@ -263,6 +267,9 @@ namespace red_arrow {
VALUE convert(const arrow::ListArray& array,
const int64_t i);

VALUE convert(const arrow::LargeListArray& array,
const int64_t i);

VALUE convert(const arrow::StructArray& array,
const int64_t i);

Expand Down Expand Up @@ -298,6 +305,7 @@ namespace red_arrow {

std::string decimal_buffer_;
ListArrayValueConverter* list_array_value_converter_;
LargeListArrayValueConverter* large_list_array_value_converter_;
StructArrayValueConverter* struct_array_value_converter_;
MapArrayValueConverter* map_array_value_converter_;
UnionArrayValueConverter* union_array_value_converter_;
Expand Down Expand Up @@ -359,6 +367,106 @@ namespace red_arrow {
VISIT(DayTimeInterval)
VISIT(MonthDayNanoInterval)
VISIT(List)
VISIT(LargeList)
VISIT(Struct)
VISIT(Map)
VISIT(SparseUnion)
VISIT(DenseUnion)
VISIT(Dictionary)
VISIT(Decimal128)
VISIT(Decimal256)
// TODO
// VISIT(Extension)

#undef VISIT

private:
template <typename ArrayType>
inline VALUE convert_value(const ArrayType& array,
const int64_t i) {
return array_value_converter_->convert(array, i);
}

template <typename ArrayType>
arrow::Status visit_value(const ArrayType& array) {
if (array.null_count() > 0) {
for (int64_t i = 0; i < length_; ++i) {
auto value = Qnil;
if (!array.IsNull(i + offset_)) {
value = convert_value(array, i + offset_);
}
rb_ary_push(result_, value);
}
} else {
for (int64_t i = 0; i < length_; ++i) {
rb_ary_push(result_, convert_value(array, i + offset_));
}
}
return arrow::Status::OK();
}

ArrayValueConverter* array_value_converter_;
int32_t offset_;
int32_t length_;
VALUE result_;
};

class LargeListArrayValueConverter : public arrow::ArrayVisitor {
public:
explicit LargeListArrayValueConverter(ArrayValueConverter* converter)
: array_value_converter_(converter),
offset_(0),
length_(0),
result_(Qnil) {}

VALUE convert(const arrow::LargeListArray& array, const int64_t index) {
auto values = array.values().get();
auto offset_keep = offset_;
auto length_keep = length_;
offset_ = array.value_offset(index);
length_ = array.value_length(index);
auto result_keep = result_;
result_ = rb_ary_new_capa(length_);
check_status(values->Accept(this),
"[raw-records][large-list-array]");
offset_ = offset_keep;
length_ = length_keep;
auto result_return = result_;
result_ = result_keep;
return result_return;
}

#define VISIT(TYPE) \
arrow::Status Visit(const arrow::TYPE ## Array& array) override { \
return visit_value(array); \
}

VISIT(Null)
VISIT(Boolean)
VISIT(Int8)
VISIT(Int16)
VISIT(Int32)
VISIT(Int64)
VISIT(UInt8)
VISIT(UInt16)
VISIT(UInt32)
VISIT(UInt64)
VISIT(HalfFloat)
VISIT(Float)
VISIT(Double)
VISIT(Binary)
VISIT(String)
VISIT(FixedSizeBinary)
VISIT(Date32)
VISIT(Date64)
VISIT(Time32)
VISIT(Time64)
VISIT(Timestamp)
VISIT(MonthInterval)
VISIT(DayTimeInterval)
VISIT(MonthDayNanoInterval)
VISIT(List)
VISIT(LargeList)
VISIT(Struct)
VISIT(Map)
VISIT(SparseUnion)
Expand Down Expand Up @@ -465,6 +573,7 @@ namespace red_arrow {
VISIT(DayTimeInterval)
VISIT(MonthDayNanoInterval)
VISIT(List)
VISIT(LargeList)
VISIT(Struct)
VISIT(Map)
VISIT(SparseUnion)
Expand Down Expand Up @@ -567,6 +676,7 @@ namespace red_arrow {
VISIT(DayTimeInterval)
VISIT(MonthDayNanoInterval)
VISIT(List)
VISIT(LargeList)
VISIT(Struct)
VISIT(Map)
VISIT(SparseUnion)
Expand Down Expand Up @@ -670,6 +780,7 @@ namespace red_arrow {
VISIT(DayTimeInterval)
VISIT(MonthDayNanoInterval)
VISIT(List)
VISIT(LargeList)
VISIT(Struct)
VISIT(Map)
VISIT(SparseUnion)
Expand Down Expand Up @@ -781,6 +892,7 @@ namespace red_arrow {
VISIT(DayTimeInterval)
VISIT(MonthDayNanoInterval)
VISIT(List)
VISIT(LargeList)
VISIT(Struct)
VISIT(Map)
VISIT(SparseUnion)
Expand Down Expand Up @@ -810,12 +922,14 @@ namespace red_arrow {
explicit Converter()
: array_value_converter_(),
list_array_value_converter_(&array_value_converter_),
large_list_array_value_converter_(&array_value_converter_),
struct_array_value_converter_(&array_value_converter_),
map_array_value_converter_(&array_value_converter_),
union_array_value_converter_(&array_value_converter_),
dictionary_array_value_converter_(&array_value_converter_) {
array_value_converter_.
set_sub_value_converters(&list_array_value_converter_,
&large_list_array_value_converter_,
&struct_array_value_converter_,
&map_array_value_converter_,
&union_array_value_converter_,
Expand All @@ -830,6 +944,7 @@ namespace red_arrow {

ArrayValueConverter array_value_converter_;
ListArrayValueConverter list_array_value_converter_;
LargeListArrayValueConverter large_list_array_value_converter_;
StructArrayValueConverter struct_array_value_converter_;
MapArrayValueConverter map_array_value_converter_;
UnionArrayValueConverter union_array_value_converter_;
Expand Down
1 change: 1 addition & 0 deletions ruby/red-arrow/ext/arrow/values.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ namespace red_arrow {
VISIT(DayTimeInterval)
VISIT(MonthDayNanoInterval)
VISIT(List)
VISIT(LargeList)
VISIT(Struct)
VISIT(Map)
VISIT(SparseUnion)
Expand Down
29 changes: 29 additions & 0 deletions ruby/red-arrow/lib/arrow/large-list-array-builder.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

module Arrow
class LargeListArrayBuilder
class << self
def build(data_type, values)
builder = new(data_type)
builder.build(values)
end
end

prepend ListValuesAppendable
end
end
Loading
Loading