From d2253c8f796a273e888d92f6b1d24c88599fdbf7 Mon Sep 17 00:00:00 2001 From: Arash Andishgar Date: Sun, 31 Aug 2025 14:47:21 +0330 Subject: [PATCH 1/5] Correct comments and variable naming --- cpp/src/arrow/compare.h | 33 +++++++ cpp/src/arrow/record_batch.cc | 30 ++----- cpp/src/arrow/record_batch.h | 16 +++- cpp/src/arrow/record_batch_test.cc | 135 ++++++++++++++++++++++------- 4 files changed, 155 insertions(+), 59 deletions(-) diff --git a/cpp/src/arrow/compare.h b/cpp/src/arrow/compare.h index 4d2282c982a7..c4ced5ab5f35 100644 --- a/cpp/src/arrow/compare.h +++ b/cpp/src/arrow/compare.h @@ -83,6 +83,37 @@ class EqualOptions { return res; } + /// Whether the \ref arrow::Schema property is used in the comparison. + /// + /// This option only affects the Equals methods + /// and has no effect on ApproxEquals methods. + bool use_schema() const { return use_schema_; } + + /// Return a new EqualOptions object with the "use_schema_" property changed. + /// Setting this option is false making the value of \ref EqualOptions::use_metadata_ + /// is ignored. + EqualOptions use_schema(bool v) const { + auto res = EqualOptions(*this); + res.use_schema_ = v; + return res; + } + + /// Whether the "metadata" in \ref arrow::Schema is used in the comparison. + /// + /// This option only affects the Equals methods + /// and has no effect on the ApproxEquals methods. + /// + /// Note: This option is only considered when \ref arrow::EqualOptions::use_schema_ is + /// set to true. + bool use_metadata() const { return use_metadata_; } + + /// Return a new EqualOptions object with the "use_metadata" property changed. + EqualOptions use_metadata(bool v) const { + auto res = EqualOptions(*this); + res.use_metadata_ = v; + return res; + } + /// The ostream to which a diff will be formatted if arrays disagree. /// If this is null (the default) no diff will be formatted. std::ostream* diff_sink() const { return diff_sink_; } @@ -103,6 +134,8 @@ class EqualOptions { bool nans_equal_ = false; bool signed_zeros_equal_ = true; bool use_atol_ = false; + bool use_schema_ = true; + bool use_metadata_ = false; std::ostream* diff_sink_ = NULLPTR; }; diff --git a/cpp/src/arrow/record_batch.cc b/cpp/src/arrow/record_batch.cc index bc952d3fe2f2..2429113333ac 100644 --- a/cpp/src/arrow/record_batch.cc +++ b/cpp/src/arrow/record_batch.cc @@ -35,6 +35,7 @@ #include "arrow/array/statistics.h" #include "arrow/array/validate.h" #include "arrow/c/abi.h" +#include "arrow/compare.h" #include "arrow/pretty_print.h" #include "arrow/status.h" #include "arrow/table.h" @@ -349,30 +350,10 @@ bool CanIgnoreNaNInEquality(const RecordBatch& batch, const EqualOptions& opts) bool RecordBatch::Equals(const RecordBatch& other, bool check_metadata, const EqualOptions& opts) const { - if (this == &other) { - if (CanIgnoreNaNInEquality(*this, opts)) { - return true; - } - } else { - if (num_columns() != other.num_columns() || num_rows_ != other.num_rows()) { - return false; - } else if (!schema_->Equals(*other.schema(), check_metadata)) { - return false; - } else if (device_type() != other.device_type()) { - return false; - } - } - - for (int i = 0; i < num_columns(); ++i) { - if (!column(i)->Equals(other.column(i), opts)) { - return false; - } - } - - return true; + return Equals(other, opts.use_metadata(check_metadata)); } -bool RecordBatch::ApproxEquals(const RecordBatch& other, const EqualOptions& opts) const { +bool RecordBatch::Equals(const RecordBatch& other, const EqualOptions& opts) const { if (this == &other) { if (CanIgnoreNaNInEquality(*this, opts)) { return true; @@ -380,13 +361,16 @@ bool RecordBatch::ApproxEquals(const RecordBatch& other, const EqualOptions& opt } else { if (num_columns() != other.num_columns() || num_rows_ != other.num_rows()) { return false; + } else if (opts.use_schema() && + !schema_->Equals(*other.schema(), opts.use_metadata())) { + return false; } else if (device_type() != other.device_type()) { return false; } } for (int i = 0; i < num_columns(); ++i) { - if (!column(i)->ApproxEquals(other.column(i), opts)) { + if (!column(i)->Equals(other.column(i), opts)) { return false; } } diff --git a/cpp/src/arrow/record_batch.h b/cpp/src/arrow/record_batch.h index 43a8ee63b596..7a283d5368f5 100644 --- a/cpp/src/arrow/record_batch.h +++ b/cpp/src/arrow/record_batch.h @@ -118,22 +118,32 @@ class ARROW_EXPORT RecordBatch { static Result> FromStructArray( const std::shared_ptr& array, MemoryPool* pool = default_memory_pool()); - /// \brief Determine if two record batches are exactly equal + /// \brief Determine if two record batches are equal /// /// \param[in] other the RecordBatch to compare with - /// \param[in] check_metadata if true, check that Schema metadata is the same + /// \param[in] check_metadata if true, the schema metadata will be compared, + /// regardless of the value set in \ref EqualOptions::use_metadata_ /// \param[in] opts the options for equality comparisons /// \return true if batches are equal bool Equals(const RecordBatch& other, bool check_metadata = false, const EqualOptions& opts = EqualOptions::Defaults()) const; + /// \brief Determine if two record batches are equal + /// + /// \param[in] other the RecordBatch to compare with + /// \param[in] opts the options for equality comparisons + /// \return true if batches are equal + bool Equals(const RecordBatch& other, const EqualOptions& opts) const; + /// \brief Determine if two record batches are approximately equal /// /// \param[in] other the RecordBatch to compare with /// \param[in] opts the options for equality comparisons /// \return true if batches are approximately equal bool ApproxEquals(const RecordBatch& other, - const EqualOptions& opts = EqualOptions::Defaults()) const; + const EqualOptions& opts = EqualOptions::Defaults()) const { + return Equals(other, opts.use_schema(false).use_atol(true)); + } /// \return the record batch's schema const std::shared_ptr& schema() const { return schema_; } diff --git a/cpp/src/arrow/record_batch_test.cc b/cpp/src/arrow/record_batch_test.cc index 18b21e272105..7b0b25baafbd 100644 --- a/cpp/src/arrow/record_batch_test.cc +++ b/cpp/src/arrow/record_batch_test.cc @@ -40,6 +40,7 @@ #include "arrow/array/util.h" #include "arrow/c/abi.h" #include "arrow/chunked_array.h" +#include "arrow/compare.h" #include "arrow/config.h" #include "arrow/status.h" #include "arrow/table.h" @@ -64,6 +65,48 @@ class TestRecordBatch : public ::testing::Test {}; TEST_F(TestRecordBatch, Equals) { const int length = 10; + auto f0 = field("f0", int32()); + auto f1 = field("f1", uint8()); + auto f2 = field("f2", int16()); + + auto schema0_f0_f1_f2 = schema({f0, f1, f2}); + auto schema1_f0_f1_f2 = schema({f0, f1, f2}); + auto schema2_f0_f1 = schema({f0, f1}); + + random::RandomArrayGenerator gen(42); + + auto a0 = gen.ArrayOf(int32(), length); + auto a1 = gen.ArrayOf(uint8(), length); + auto a2 = gen.ArrayOf(int16(), length); + auto a3 = a0->Slice(0, length / 2); + auto a4 = a1->Slice(0, length / 2); + auto a5 = gen.ArrayOf(int32(), length); + auto a6 = gen.ArrayOf(uint8(), length); + + auto b0_a0_a1_a2 = RecordBatch::Make(schema0_f0_f1_f2, length, {a0, a1, a2}); + auto b1_a0_a1_a2 = RecordBatch::Make(schema1_f0_f1_f2, length, {a0, a1, a2}); + auto b2_a0_a1 = RecordBatch::Make(schema2_f0_f1, length, {a0, a1}); + auto b3_a3_a4_half_rows = RecordBatch::Make(schema2_f0_f1, length / 2, {a3, a4}); + auto b4_a5_a6 = RecordBatch::Make(schema2_f0_f1, length, {a5, a6}); + + // Same Values + ASSERT_TRUE(b0_a0_a1_a2->Equals(*b1_a0_a1_a2)); + + // Different number of columns + ASSERT_FALSE(b0_a0_a1_a2->Equals(*b2_a0_a1)); + + // Different number of rows + ASSERT_FALSE(b2_a0_a1->Equals(*b3_a3_a4_half_rows)); + + // Different values + ASSERT_FALSE(b2_a0_a1->Equals(*b4_a5_a6)); +} + +class TestRecordBatchEqualOptions : public TestRecordBatch {}; + +TEST_F(TestRecordBatchEqualOptions, MetadataAndSchema) { + int length = 10; + auto f0 = field("f0", int32()); auto f1 = field("f1", uint8()); auto f2 = field("f2", int16()); @@ -71,11 +114,9 @@ TEST_F(TestRecordBatch, Equals) { auto metadata = key_value_metadata({"foo"}, {"bar"}); - std::vector> fields = {f0, f1, f2}; - auto schema = ::arrow::schema({f0, f1, f2}); - auto schema2 = ::arrow::schema({f0, f1}); - auto schema3 = ::arrow::schema({f0, f1, f2}, metadata); - auto schema4 = ::arrow::schema({f0, f1, f2b}); + auto schema0_f0_f1_f2 = schema({f0, f1, f2}); + auto schema1_f0_f1_f2_with_metadat = schema({f0, f1, f2}, metadata); + auto schema2_f0_f1_f2b = schema({f0, f1, f2b}); random::RandomArrayGenerator gen(42); @@ -83,25 +124,39 @@ TEST_F(TestRecordBatch, Equals) { auto a1 = gen.ArrayOf(uint8(), length); auto a2 = gen.ArrayOf(int16(), length); - auto b1 = RecordBatch::Make(schema, length, {a0, a1, a2}); - auto b2 = RecordBatch::Make(schema3, length, {a0, a1, a2}); - auto b3 = RecordBatch::Make(schema2, length, {a0, a1}); - auto b4 = RecordBatch::Make(schema, length, {a0, a1, a1}); - auto b5 = RecordBatch::Make(schema4, length, {a0, a1, a2}); - - ASSERT_TRUE(b1->Equals(*b1)); - ASSERT_FALSE(b1->Equals(*b3)); - ASSERT_FALSE(b1->Equals(*b4)); + // All RecordBatches have the same values but different schemas. + auto b0_f0_f1_f2 = RecordBatch::Make(schema0_f0_f1_f2, length, {a0, a1, a2}); + auto b1_f0_f1_f2_with_metadat = + RecordBatch::Make(schema1_f0_f1_f2_with_metadat, length, {a0, a1, a2}); + auto b2_f0_f1_f2b = RecordBatch::Make(schema2_f0_f1_f2b, length, {a0, a1, a2}); + auto options = EqualOptions::Defaults(); // Same values and types, but different field names - ASSERT_FALSE(b1->Equals(*b5)); + ASSERT_FALSE(b0_f0_f1_f2->Equals(*b2_f0_f1_f2b)); + ASSERT_TRUE(b0_f0_f1_f2->Equals(*b2_f0_f1_f2b, options.use_schema(false))); + ASSERT_TRUE(b0_f0_f1_f2->ApproxEquals(*b2_f0_f1_f2b)); + ASSERT_TRUE(b0_f0_f1_f2->ApproxEquals(*b2_f0_f1_f2b, options.use_schema(true))); // Different metadata - ASSERT_TRUE(b1->Equals(*b2)); - ASSERT_FALSE(b1->Equals(*b2, /*check_metadata=*/true)); + ASSERT_TRUE(b0_f0_f1_f2->Equals(*b1_f0_f1_f2_with_metadat)); + ASSERT_TRUE(b0_f0_f1_f2->Equals(*b1_f0_f1_f2_with_metadat, options)); + ASSERT_FALSE(b0_f0_f1_f2->Equals(*b1_f0_f1_f2_with_metadat, + /*check_metadata=*/true)); + ASSERT_FALSE(b0_f0_f1_f2->Equals(*b1_f0_f1_f2_with_metadat, + /*check_metadata=*/true, options.use_schema(true))); + ASSERT_TRUE(b0_f0_f1_f2->Equals(*b1_f0_f1_f2_with_metadat, + /*check_metadata=*/true, options.use_schema(false))); + ASSERT_TRUE(b0_f0_f1_f2->Equals(*b1_f0_f1_f2_with_metadat, + options.use_schema(true).use_metadata(false))); + ASSERT_FALSE(b0_f0_f1_f2->Equals(*b1_f0_f1_f2_with_metadat, + options.use_schema(true).use_metadata(true))); + ASSERT_TRUE(b0_f0_f1_f2->Equals(*b1_f0_f1_f2_with_metadat, + options.use_schema(false).use_metadata(true))); + ASSERT_TRUE(b0_f0_f1_f2->ApproxEquals(*b1_f0_f1_f2_with_metadat, + options.use_schema(true).use_metadata(true))); } -TEST_F(TestRecordBatch, EqualOptions) { +TEST_F(TestRecordBatchEqualOptions, NaN) { int length = 2; auto f = field("f", float64()); @@ -114,13 +169,27 @@ TEST_F(TestRecordBatch, EqualOptions) { auto b1 = RecordBatch::Make(schema, length, {array1}); auto b2 = RecordBatch::Make(schema, length, {array2}); - EXPECT_FALSE(b1->Equals(*b2, /*check_metadata=*/false, - EqualOptions::Defaults().nans_equal(false))); - EXPECT_TRUE(b1->Equals(*b2, /*check_metadata=*/false, - EqualOptions::Defaults().nans_equal(true))); + EXPECT_FALSE(b1->Equals(*b2, EqualOptions::Defaults().nans_equal(false))); + EXPECT_TRUE(b1->Equals(*b2, EqualOptions::Defaults().nans_equal(true))); +} + +TEST_F(TestRecordBatchEqualOptions, SignedZero) { + int length = 2; + auto f = field("f", float64()); + + auto schema = ::arrow::schema({f}); + + std::shared_ptr array1, array2; + ArrayFromVector(float64(), {true, true}, {0.5, +0.0}, &array1); + ArrayFromVector(float64(), {true, true}, {0.5, -0.0}, &array2); + auto b1 = RecordBatch::Make(schema, length, {array1}); + auto b2 = RecordBatch::Make(schema, length, {array2}); + + ASSERT_FALSE(b1->Equals(*b2, EqualOptions::Defaults().signed_zeros_equal(false))); + ASSERT_TRUE(b1->Equals(*b2, EqualOptions::Defaults().signed_zeros_equal(true))); } -TEST_F(TestRecordBatch, ApproxEqualOptions) { +TEST_F(TestRecordBatchEqualOptions, Approx) { int length = 2; auto f = field("f", float64()); @@ -137,8 +206,8 @@ TEST_F(TestRecordBatch, ApproxEqualOptions) { EXPECT_FALSE(b1->ApproxEquals(*b2, EqualOptions::Defaults().nans_equal(true))); auto options = EqualOptions::Defaults().nans_equal(true).atol(0.1); - EXPECT_FALSE(b1->Equals(*b2, false, options)); - EXPECT_TRUE(b1->Equals(*b2, false, options.use_atol(true))); + EXPECT_FALSE(b1->Equals(*b2, options)); + EXPECT_TRUE(b1->Equals(*b2, options.use_atol(true))); EXPECT_TRUE(b1->ApproxEquals(*b2, options)); } @@ -158,8 +227,8 @@ TEST_F(TestRecordBatchEqualsSameAddress, NonFloatType) { auto options = EqualOptions::Defaults(); - ASSERT_TRUE(b0->Equals(*b1, true, options)); - ASSERT_TRUE(b0->Equals(*b1, true, options.nans_equal(true))); + ASSERT_TRUE(b0->Equals(*b1, options)); + ASSERT_TRUE(b0->Equals(*b1, options.nans_equal(true))); ASSERT_TRUE(b0->ApproxEquals(*b1, options)); ASSERT_TRUE(b0->ApproxEquals(*b1, options.nans_equal(true))); @@ -180,8 +249,8 @@ TEST_F(TestRecordBatchEqualsSameAddress, NestedTypesWithoutFloatType) { auto options = EqualOptions::Defaults(); - ASSERT_TRUE(b0->Equals(*b1, true, options)); - ASSERT_TRUE(b0->Equals(*b1, true, options.nans_equal(true))); + ASSERT_TRUE(b0->Equals(*b1, options)); + ASSERT_TRUE(b0->Equals(*b1, options.nans_equal(true))); ASSERT_TRUE(b0->ApproxEquals(*b1, options)); ASSERT_TRUE(b0->ApproxEquals(*b1, options.nans_equal(true))); @@ -201,8 +270,8 @@ TEST_F(TestRecordBatchEqualsSameAddress, FloatType) { auto options = EqualOptions::Defaults(); - ASSERT_FALSE(b0->Equals(*b1, true, options)); - ASSERT_TRUE(b0->Equals(*b1, true, options.nans_equal(true))); + ASSERT_FALSE(b0->Equals(*b1, options)); + ASSERT_TRUE(b0->Equals(*b1, options.nans_equal(true))); ASSERT_FALSE(b0->ApproxEquals(*b1, options)); ASSERT_TRUE(b0->ApproxEquals(*b1, options.nans_equal(true))); @@ -223,8 +292,8 @@ TEST_F(TestRecordBatchEqualsSameAddress, NestedTypesWithFloatType) { auto options = EqualOptions::Defaults(); - ASSERT_FALSE(b0->Equals(*b1, true, options)); - ASSERT_TRUE(b0->Equals(*b1, true, options.nans_equal(true))); + ASSERT_FALSE(b0->Equals(*b1, options)); + ASSERT_TRUE(b0->Equals(*b1, options.nans_equal(true))); ASSERT_FALSE(b0->ApproxEquals(*b1, options)); ASSERT_TRUE(b0->ApproxEquals(*b1, options.nans_equal(true))); From e9af6257fd76a7a4c23433b460c48e920618154a Mon Sep 17 00:00:00 2001 From: Arash Andishgar Date: Sun, 31 Aug 2025 19:51:26 +0330 Subject: [PATCH 2/5] correct typo --- cpp/src/arrow/record_batch_test.cc | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/cpp/src/arrow/record_batch_test.cc b/cpp/src/arrow/record_batch_test.cc index 7b0b25baafbd..32ea3b602d16 100644 --- a/cpp/src/arrow/record_batch_test.cc +++ b/cpp/src/arrow/record_batch_test.cc @@ -115,7 +115,7 @@ TEST_F(TestRecordBatchEqualOptions, MetadataAndSchema) { auto metadata = key_value_metadata({"foo"}, {"bar"}); auto schema0_f0_f1_f2 = schema({f0, f1, f2}); - auto schema1_f0_f1_f2_with_metadat = schema({f0, f1, f2}, metadata); + auto schema1_f0_f1_f2_with_metadata = schema({f0, f1, f2}, metadata); auto schema2_f0_f1_f2b = schema({f0, f1, f2b}); random::RandomArrayGenerator gen(42); @@ -126,8 +126,8 @@ TEST_F(TestRecordBatchEqualOptions, MetadataAndSchema) { // All RecordBatches have the same values but different schemas. auto b0_f0_f1_f2 = RecordBatch::Make(schema0_f0_f1_f2, length, {a0, a1, a2}); - auto b1_f0_f1_f2_with_metadat = - RecordBatch::Make(schema1_f0_f1_f2_with_metadat, length, {a0, a1, a2}); + auto b1_f0_f1_f2_with_metadata = + RecordBatch::Make(schema1_f0_f1_f2_with_metadata, length, {a0, a1, a2}); auto b2_f0_f1_f2b = RecordBatch::Make(schema2_f0_f1_f2b, length, {a0, a1, a2}); auto options = EqualOptions::Defaults(); @@ -138,21 +138,21 @@ TEST_F(TestRecordBatchEqualOptions, MetadataAndSchema) { ASSERT_TRUE(b0_f0_f1_f2->ApproxEquals(*b2_f0_f1_f2b, options.use_schema(true))); // Different metadata - ASSERT_TRUE(b0_f0_f1_f2->Equals(*b1_f0_f1_f2_with_metadat)); - ASSERT_TRUE(b0_f0_f1_f2->Equals(*b1_f0_f1_f2_with_metadat, options)); - ASSERT_FALSE(b0_f0_f1_f2->Equals(*b1_f0_f1_f2_with_metadat, + ASSERT_TRUE(b0_f0_f1_f2->Equals(*b1_f0_f1_f2_with_metadata)); + ASSERT_TRUE(b0_f0_f1_f2->Equals(*b1_f0_f1_f2_with_metadata, options)); + ASSERT_FALSE(b0_f0_f1_f2->Equals(*b1_f0_f1_f2_with_metadata, /*check_metadata=*/true)); - ASSERT_FALSE(b0_f0_f1_f2->Equals(*b1_f0_f1_f2_with_metadat, + ASSERT_FALSE(b0_f0_f1_f2->Equals(*b1_f0_f1_f2_with_metadata, /*check_metadata=*/true, options.use_schema(true))); - ASSERT_TRUE(b0_f0_f1_f2->Equals(*b1_f0_f1_f2_with_metadat, + ASSERT_TRUE(b0_f0_f1_f2->Equals(*b1_f0_f1_f2_with_metadata, /*check_metadata=*/true, options.use_schema(false))); - ASSERT_TRUE(b0_f0_f1_f2->Equals(*b1_f0_f1_f2_with_metadat, + ASSERT_TRUE(b0_f0_f1_f2->Equals(*b1_f0_f1_f2_with_metadata, options.use_schema(true).use_metadata(false))); - ASSERT_FALSE(b0_f0_f1_f2->Equals(*b1_f0_f1_f2_with_metadat, + ASSERT_FALSE(b0_f0_f1_f2->Equals(*b1_f0_f1_f2_with_metadata, options.use_schema(true).use_metadata(true))); - ASSERT_TRUE(b0_f0_f1_f2->Equals(*b1_f0_f1_f2_with_metadat, + ASSERT_TRUE(b0_f0_f1_f2->Equals(*b1_f0_f1_f2_with_metadata, options.use_schema(false).use_metadata(true))); - ASSERT_TRUE(b0_f0_f1_f2->ApproxEquals(*b1_f0_f1_f2_with_metadat, + ASSERT_TRUE(b0_f0_f1_f2->ApproxEquals(*b1_f0_f1_f2_with_metadata, options.use_schema(true).use_metadata(true))); } From 22a35448043224d2a7a26b9d9acd0eadd2a51d9a Mon Sep 17 00:00:00 2001 From: Arash Andishgar Date: Mon, 1 Sep 2025 14:05:31 +0330 Subject: [PATCH 3/5] apply kou suggestion correct naming correct documentation comment add compare.h document comment on utilities.rst --- cpp/src/arrow/compare.h | 1 + cpp/src/arrow/record_batch_test.cc | 102 +++++++++++++++-------------- docs/source/cpp/api/utilities.rst | 6 ++ 3 files changed, 59 insertions(+), 50 deletions(-) diff --git a/cpp/src/arrow/compare.h b/cpp/src/arrow/compare.h index c4ced5ab5f35..7b2d1d6cf6c6 100644 --- a/cpp/src/arrow/compare.h +++ b/cpp/src/arrow/compare.h @@ -90,6 +90,7 @@ class EqualOptions { bool use_schema() const { return use_schema_; } /// Return a new EqualOptions object with the "use_schema_" property changed. + /// /// Setting this option is false making the value of \ref EqualOptions::use_metadata_ /// is ignored. EqualOptions use_schema(bool v) const { diff --git a/cpp/src/arrow/record_batch_test.cc b/cpp/src/arrow/record_batch_test.cc index 32ea3b602d16..4fec2ee6864d 100644 --- a/cpp/src/arrow/record_batch_test.cc +++ b/cpp/src/arrow/record_batch_test.cc @@ -69,37 +69,39 @@ TEST_F(TestRecordBatch, Equals) { auto f1 = field("f1", uint8()); auto f2 = field("f2", int16()); - auto schema0_f0_f1_f2 = schema({f0, f1, f2}); - auto schema1_f0_f1_f2 = schema({f0, f1, f2}); - auto schema2_f0_f1 = schema({f0, f1}); + auto schema = ::arrow::schema({f0, f1, f2}); + auto schema_same = ::arrow::schema({f0, f1, f2}); + auto schema_fewer_fields = ::arrow::schema({f0, f1}); random::RandomArrayGenerator gen(42); - auto a0 = gen.ArrayOf(int32(), length); - auto a1 = gen.ArrayOf(uint8(), length); - auto a2 = gen.ArrayOf(int16(), length); - auto a3 = a0->Slice(0, length / 2); - auto a4 = a1->Slice(0, length / 2); - auto a5 = gen.ArrayOf(int32(), length); - auto a6 = gen.ArrayOf(uint8(), length); - - auto b0_a0_a1_a2 = RecordBatch::Make(schema0_f0_f1_f2, length, {a0, a1, a2}); - auto b1_a0_a1_a2 = RecordBatch::Make(schema1_f0_f1_f2, length, {a0, a1, a2}); - auto b2_a0_a1 = RecordBatch::Make(schema2_f0_f1, length, {a0, a1}); - auto b3_a3_a4_half_rows = RecordBatch::Make(schema2_f0_f1, length / 2, {a3, a4}); - auto b4_a5_a6 = RecordBatch::Make(schema2_f0_f1, length, {a5, a6}); + auto a_f0 = gen.ArrayOf(int32(), length); + auto a_f1 = gen.ArrayOf(uint8(), length); + auto a_f2 = gen.ArrayOf(int16(), length); + auto a_f0_half = a_f0->Slice(0, length / 2); + auto a_f1_half = a_f1->Slice(0, length / 2); + auto a_f0_different = gen.ArrayOf(int32(), length); + auto a_f1_different = gen.ArrayOf(uint8(), length); + + auto b = RecordBatch::Make(schema, length, {a_f0, a_f1, a_f2}); + auto b_same = RecordBatch::Make(schema_same, length, {a_f0, a_f1, a_f2}); + auto b_fewer_fields = RecordBatch::Make(schema_fewer_fields, length, {a_f0, a_f1}); + auto b_fewer_fields_half = + RecordBatch::Make(schema_fewer_fields, length / 2, {a_f0_half, a_f1_half}); + auto b_fewer_fields_different = + RecordBatch::Make(schema_fewer_fields, length, {a_f0_different, a_f1_different}); // Same Values - ASSERT_TRUE(b0_a0_a1_a2->Equals(*b1_a0_a1_a2)); + ASSERT_TRUE(b->Equals(*b_same)); // Different number of columns - ASSERT_FALSE(b0_a0_a1_a2->Equals(*b2_a0_a1)); + ASSERT_FALSE(b->Equals(*b_fewer_fields)); // Different number of rows - ASSERT_FALSE(b2_a0_a1->Equals(*b3_a3_a4_half_rows)); + ASSERT_FALSE(b_fewer_fields->Equals(*b_fewer_fields_half)); // Different values - ASSERT_FALSE(b2_a0_a1->Equals(*b4_a5_a6)); + ASSERT_FALSE(b_fewer_fields->Equals(*b_fewer_fields_different)); } class TestRecordBatchEqualOptions : public TestRecordBatch {}; @@ -114,46 +116,46 @@ TEST_F(TestRecordBatchEqualOptions, MetadataAndSchema) { auto metadata = key_value_metadata({"foo"}, {"bar"}); - auto schema0_f0_f1_f2 = schema({f0, f1, f2}); - auto schema1_f0_f1_f2_with_metadata = schema({f0, f1, f2}, metadata); - auto schema2_f0_f1_f2b = schema({f0, f1, f2b}); + auto schema = ::arrow::schema({f0, f1, f2}); + auto schema_with_metadata = ::arrow::schema({f0, f1, f2}, metadata); + auto schema_renamed_field = ::arrow::schema({f0, f1, f2b}); random::RandomArrayGenerator gen(42); - auto a0 = gen.ArrayOf(int32(), length); - auto a1 = gen.ArrayOf(uint8(), length); - auto a2 = gen.ArrayOf(int16(), length); + auto a_f0 = gen.ArrayOf(int32(), length); + auto a_f1 = gen.ArrayOf(uint8(), length); + auto a_f2 = gen.ArrayOf(int16(), length); + auto a_f2b = a_f2; // All RecordBatches have the same values but different schemas. - auto b0_f0_f1_f2 = RecordBatch::Make(schema0_f0_f1_f2, length, {a0, a1, a2}); - auto b1_f0_f1_f2_with_metadata = - RecordBatch::Make(schema1_f0_f1_f2_with_metadata, length, {a0, a1, a2}); - auto b2_f0_f1_f2b = RecordBatch::Make(schema2_f0_f1_f2b, length, {a0, a1, a2}); + auto b = RecordBatch::Make(schema, length, {a_f0, a_f1, a_f2}); + auto b_with_metadata = + RecordBatch::Make(schema_with_metadata, length, {a_f0, a_f1, a_f2}); + auto b_renamed_field = + RecordBatch::Make(schema_renamed_field, length, {a_f0, a_f1, a_f2b}); auto options = EqualOptions::Defaults(); + // Same values and types, but different field names - ASSERT_FALSE(b0_f0_f1_f2->Equals(*b2_f0_f1_f2b)); - ASSERT_TRUE(b0_f0_f1_f2->Equals(*b2_f0_f1_f2b, options.use_schema(false))); - ASSERT_TRUE(b0_f0_f1_f2->ApproxEquals(*b2_f0_f1_f2b)); - ASSERT_TRUE(b0_f0_f1_f2->ApproxEquals(*b2_f0_f1_f2b, options.use_schema(true))); + ASSERT_FALSE(b->Equals(*b_renamed_field)); + ASSERT_TRUE(b->Equals(*b_renamed_field, options.use_schema(false))); + ASSERT_TRUE(b->ApproxEquals(*b_renamed_field)); + ASSERT_TRUE(b->ApproxEquals(*b_renamed_field, options.use_schema(true))); // Different metadata - ASSERT_TRUE(b0_f0_f1_f2->Equals(*b1_f0_f1_f2_with_metadata)); - ASSERT_TRUE(b0_f0_f1_f2->Equals(*b1_f0_f1_f2_with_metadata, options)); - ASSERT_FALSE(b0_f0_f1_f2->Equals(*b1_f0_f1_f2_with_metadata, - /*check_metadata=*/true)); - ASSERT_FALSE(b0_f0_f1_f2->Equals(*b1_f0_f1_f2_with_metadata, - /*check_metadata=*/true, options.use_schema(true))); - ASSERT_TRUE(b0_f0_f1_f2->Equals(*b1_f0_f1_f2_with_metadata, - /*check_metadata=*/true, options.use_schema(false))); - ASSERT_TRUE(b0_f0_f1_f2->Equals(*b1_f0_f1_f2_with_metadata, - options.use_schema(true).use_metadata(false))); - ASSERT_FALSE(b0_f0_f1_f2->Equals(*b1_f0_f1_f2_with_metadata, - options.use_schema(true).use_metadata(true))); - ASSERT_TRUE(b0_f0_f1_f2->Equals(*b1_f0_f1_f2_with_metadata, - options.use_schema(false).use_metadata(true))); - ASSERT_TRUE(b0_f0_f1_f2->ApproxEquals(*b1_f0_f1_f2_with_metadata, - options.use_schema(true).use_metadata(true))); + ASSERT_TRUE(b->Equals(*b_with_metadata)); + ASSERT_TRUE(b->Equals(*b_with_metadata, options)); + ASSERT_FALSE(b->Equals(*b_with_metadata, + /*check_metadata=*/true)); + ASSERT_FALSE(b->Equals(*b_with_metadata, + /*check_metadata=*/true, options.use_schema(true))); + ASSERT_TRUE(b->Equals(*b_with_metadata, + /*check_metadata=*/true, options.use_schema(false))); + ASSERT_TRUE(b->Equals(*b_with_metadata, options.use_schema(true).use_metadata(false))); + ASSERT_FALSE(b->Equals(*b_with_metadata, options.use_schema(true).use_metadata(true))); + ASSERT_TRUE(b->Equals(*b_with_metadata, options.use_schema(false).use_metadata(true))); + ASSERT_TRUE( + b->ApproxEquals(*b_with_metadata, options.use_schema(true).use_metadata(true))); } TEST_F(TestRecordBatchEqualOptions, NaN) { diff --git a/docs/source/cpp/api/utilities.rst b/docs/source/cpp/api/utilities.rst index 5ce3d7d1a4e4..b8d542f48192 100644 --- a/docs/source/cpp/api/utilities.rst +++ b/docs/source/cpp/api/utilities.rst @@ -43,6 +43,12 @@ Iterators .. doxygenclass:: arrow::VectorIterator :members: +Comparison +========== + +.. doxygenclass:: arrow::EqualOptions + :members: + Compression =========== From c876f0c934564e137f6cd493583bf1d76d06a756 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Tue, 2 Sep 2025 13:31:38 +0900 Subject: [PATCH 4/5] Use public interface --- cpp/src/arrow/compare.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/compare.h b/cpp/src/arrow/compare.h index 7b2d1d6cf6c6..f232ebb8915d 100644 --- a/cpp/src/arrow/compare.h +++ b/cpp/src/arrow/compare.h @@ -104,7 +104,7 @@ class EqualOptions { /// This option only affects the Equals methods /// and has no effect on the ApproxEquals methods. /// - /// Note: This option is only considered when \ref arrow::EqualOptions::use_schema_ is + /// Note: This option is only considered when \ref arrow::EqualOptions::use_schema is /// set to true. bool use_metadata() const { return use_metadata_; } From 97aedd6009648eea371246e101b6da0c9f394947 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Tue, 2 Sep 2025 16:07:35 +0900 Subject: [PATCH 5/5] Use public interface --- cpp/src/arrow/compare.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/compare.h b/cpp/src/arrow/compare.h index f232ebb8915d..2198495d7d20 100644 --- a/cpp/src/arrow/compare.h +++ b/cpp/src/arrow/compare.h @@ -91,7 +91,7 @@ class EqualOptions { /// Return a new EqualOptions object with the "use_schema_" property changed. /// - /// Setting this option is false making the value of \ref EqualOptions::use_metadata_ + /// Setting this option is false making the value of \ref EqualOptions::use_metadata /// is ignored. EqualOptions use_schema(bool v) const { auto res = EqualOptions(*this);