Skip to content

Commit 610c201

Browse files
paleolimbotbkietz
andauthored
GH-46659: [C++] Fix export of extension arrays with binary view/string view storage (#46660)
### Rationale for this change Extension arrays exported with binary view/string view storage did not export the variadic sizes buffer which resulted in crashes when reimporting. ### What changes are included in this PR? The expression that controlled whether the variadic sizes buffer was written was updated. ### Are these changes tested? Yes, a test was added ### Are there any user-facing changes? No * GitHub Issue: #46659 Lead-authored-by: Dewey Dunnington <[email protected]> Co-authored-by: Dewey Dunnington <[email protected]> Co-authored-by: Benjamin Kietzman <[email protected]> Signed-off-by: Dewey Dunnington <[email protected]>
1 parent 03a1867 commit 610c201

File tree

4 files changed

+61
-4
lines changed

4 files changed

+61
-4
lines changed

cpp/src/arrow/c/bridge.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -586,8 +586,8 @@ struct ArrayExporter {
586586
++buffers_begin;
587587
}
588588

589-
bool need_variadic_buffer_sizes =
590-
data->type->id() == Type::BINARY_VIEW || data->type->id() == Type::STRING_VIEW;
589+
bool need_variadic_buffer_sizes = data->type->storage_id() == Type::BINARY_VIEW ||
590+
data->type->storage_id() == Type::STRING_VIEW;
591591
if (need_variadic_buffer_sizes) {
592592
++n_buffers;
593593
}

cpp/src/arrow/c/bridge_test.cc

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -580,8 +580,10 @@ struct ArrayExportChecker {
580580
--expected_n_buffers;
581581
++expected_buffers;
582582
}
583-
bool has_variadic_buffer_sizes = expected_data.type->id() == Type::STRING_VIEW ||
584-
expected_data.type->id() == Type::BINARY_VIEW;
583+
584+
bool has_variadic_buffer_sizes =
585+
expected_data.type->storage_id() == Type::BINARY_VIEW ||
586+
expected_data.type->storage_id() == Type::STRING_VIEW;
585587
ASSERT_EQ(c_export->n_buffers, expected_n_buffers + has_variadic_buffer_sizes);
586588
ASSERT_NE(c_export->buffers, nullptr);
587589

@@ -960,6 +962,13 @@ TEST_F(TestArrayExport, BinaryViewMultipleBuffers) {
960962
});
961963
}
962964

965+
TEST_F(TestArrayExport, BinaryViewExtensionWithMultipleBuffers) {
966+
TestPrimitive([&] {
967+
auto storage = MakeBinaryViewArrayWithMultipleDataBuffers();
968+
return BinaryViewExtensionType::WrapArray(binary_view_extension_type(), storage);
969+
});
970+
}
971+
963972
TEST_F(TestArrayExport, Null) {
964973
TestPrimitive(null(), "[null, null, null]");
965974
TestPrimitive(null(), "[]");

cpp/src/arrow/testing/extension_type.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,23 @@ class ARROW_TESTING_EXPORT DictExtensionType : public ExtensionType {
132132
std::string Serialize() const override { return "dict-extension-serialized"; }
133133
};
134134

135+
class ARROW_TESTING_EXPORT BinaryViewExtensionType : public ExtensionType {
136+
public:
137+
BinaryViewExtensionType() : ExtensionType(binary_view()) {}
138+
139+
std::string extension_name() const override { return "binary_view"; }
140+
141+
bool ExtensionEquals(const ExtensionType& other) const override;
142+
143+
std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override;
144+
145+
Result<std::shared_ptr<DataType>> Deserialize(
146+
std::shared_ptr<DataType> storage_type,
147+
const std::string& serialized) const override;
148+
149+
std::string Serialize() const override { return "binary_view_serialized"; }
150+
};
151+
135152
// A minimal extension type that does not error when passed blank extension information
136153
class ARROW_TESTING_EXPORT MetadataOptionalExtensionType : public ExtensionType {
137154
public:
@@ -190,6 +207,9 @@ std::shared_ptr<DataType> list_extension_type();
190207
ARROW_TESTING_EXPORT
191208
std::shared_ptr<DataType> dict_extension_type();
192209

210+
ARROW_TESTING_EXPORT
211+
std::shared_ptr<DataType> binary_view_extension_type();
212+
193213
ARROW_TESTING_EXPORT
194214
std::shared_ptr<DataType> complex128();
195215

cpp/src/arrow/testing/gtest_util.cc

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -944,6 +944,30 @@ Result<std::shared_ptr<DataType>> DictExtensionType::Deserialize(
944944
return std::make_shared<DictExtensionType>();
945945
}
946946

947+
bool BinaryViewExtensionType::ExtensionEquals(const ExtensionType& other) const {
948+
return (other.extension_name() == this->extension_name());
949+
}
950+
951+
std::shared_ptr<Array> BinaryViewExtensionType::MakeArray(
952+
std::shared_ptr<ArrayData> data) const {
953+
DCHECK_EQ(data->type->id(), Type::EXTENSION);
954+
DCHECK_EQ("binary_view",
955+
static_cast<const ExtensionType&>(*data->type).extension_name());
956+
return std::make_shared<TinyintArray>(data);
957+
}
958+
959+
Result<std::shared_ptr<DataType>> BinaryViewExtensionType::Deserialize(
960+
std::shared_ptr<DataType> storage_type, const std::string& serialized) const {
961+
if (serialized != "binary_view_serialized") {
962+
return Status::Invalid("Type identifier did not match: '", serialized, "'");
963+
}
964+
if (!storage_type->Equals(*int16())) {
965+
return Status::Invalid("Invalid storage type for BinaryViewExtensionType: ",
966+
storage_type->ToString());
967+
}
968+
return std::make_shared<BinaryViewExtensionType>();
969+
}
970+
947971
bool Complex128Type::ExtensionEquals(const ExtensionType& other) const {
948972
return (other.extension_name() == this->extension_name());
949973
}
@@ -976,6 +1000,10 @@ std::shared_ptr<DataType> list_extension_type() {
9761000
return std::make_shared<ListExtensionType>();
9771001
}
9781002

1003+
std::shared_ptr<DataType> binary_view_extension_type() {
1004+
return std::make_shared<BinaryViewExtensionType>();
1005+
}
1006+
9791007
std::shared_ptr<DataType> dict_extension_type() {
9801008
return std::make_shared<DictExtensionType>();
9811009
}

0 commit comments

Comments
 (0)