Skip to content

Commit d2253c8

Browse files
committed
Correct comments and variable naming
1 parent ed77d25 commit d2253c8

File tree

4 files changed

+155
-59
lines changed

4 files changed

+155
-59
lines changed

cpp/src/arrow/compare.h

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,37 @@ class EqualOptions {
8383
return res;
8484
}
8585

86+
/// Whether the \ref arrow::Schema property is used in the comparison.
87+
///
88+
/// This option only affects the Equals methods
89+
/// and has no effect on ApproxEquals methods.
90+
bool use_schema() const { return use_schema_; }
91+
92+
/// Return a new EqualOptions object with the "use_schema_" property changed.
93+
/// Setting this option is false making the value of \ref EqualOptions::use_metadata_
94+
/// is ignored.
95+
EqualOptions use_schema(bool v) const {
96+
auto res = EqualOptions(*this);
97+
res.use_schema_ = v;
98+
return res;
99+
}
100+
101+
/// Whether the "metadata" in \ref arrow::Schema is used in the comparison.
102+
///
103+
/// This option only affects the Equals methods
104+
/// and has no effect on the ApproxEquals methods.
105+
///
106+
/// Note: This option is only considered when \ref arrow::EqualOptions::use_schema_ is
107+
/// set to true.
108+
bool use_metadata() const { return use_metadata_; }
109+
110+
/// Return a new EqualOptions object with the "use_metadata" property changed.
111+
EqualOptions use_metadata(bool v) const {
112+
auto res = EqualOptions(*this);
113+
res.use_metadata_ = v;
114+
return res;
115+
}
116+
86117
/// The ostream to which a diff will be formatted if arrays disagree.
87118
/// If this is null (the default) no diff will be formatted.
88119
std::ostream* diff_sink() const { return diff_sink_; }
@@ -103,6 +134,8 @@ class EqualOptions {
103134
bool nans_equal_ = false;
104135
bool signed_zeros_equal_ = true;
105136
bool use_atol_ = false;
137+
bool use_schema_ = true;
138+
bool use_metadata_ = false;
106139

107140
std::ostream* diff_sink_ = NULLPTR;
108141
};

cpp/src/arrow/record_batch.cc

Lines changed: 7 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include "arrow/array/statistics.h"
3636
#include "arrow/array/validate.h"
3737
#include "arrow/c/abi.h"
38+
#include "arrow/compare.h"
3839
#include "arrow/pretty_print.h"
3940
#include "arrow/status.h"
4041
#include "arrow/table.h"
@@ -349,44 +350,27 @@ bool CanIgnoreNaNInEquality(const RecordBatch& batch, const EqualOptions& opts)
349350

350351
bool RecordBatch::Equals(const RecordBatch& other, bool check_metadata,
351352
const EqualOptions& opts) const {
352-
if (this == &other) {
353-
if (CanIgnoreNaNInEquality(*this, opts)) {
354-
return true;
355-
}
356-
} else {
357-
if (num_columns() != other.num_columns() || num_rows_ != other.num_rows()) {
358-
return false;
359-
} else if (!schema_->Equals(*other.schema(), check_metadata)) {
360-
return false;
361-
} else if (device_type() != other.device_type()) {
362-
return false;
363-
}
364-
}
365-
366-
for (int i = 0; i < num_columns(); ++i) {
367-
if (!column(i)->Equals(other.column(i), opts)) {
368-
return false;
369-
}
370-
}
371-
372-
return true;
353+
return Equals(other, opts.use_metadata(check_metadata));
373354
}
374355

375-
bool RecordBatch::ApproxEquals(const RecordBatch& other, const EqualOptions& opts) const {
356+
bool RecordBatch::Equals(const RecordBatch& other, const EqualOptions& opts) const {
376357
if (this == &other) {
377358
if (CanIgnoreNaNInEquality(*this, opts)) {
378359
return true;
379360
}
380361
} else {
381362
if (num_columns() != other.num_columns() || num_rows_ != other.num_rows()) {
382363
return false;
364+
} else if (opts.use_schema() &&
365+
!schema_->Equals(*other.schema(), opts.use_metadata())) {
366+
return false;
383367
} else if (device_type() != other.device_type()) {
384368
return false;
385369
}
386370
}
387371

388372
for (int i = 0; i < num_columns(); ++i) {
389-
if (!column(i)->ApproxEquals(other.column(i), opts)) {
373+
if (!column(i)->Equals(other.column(i), opts)) {
390374
return false;
391375
}
392376
}

cpp/src/arrow/record_batch.h

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -118,22 +118,32 @@ class ARROW_EXPORT RecordBatch {
118118
static Result<std::shared_ptr<RecordBatch>> FromStructArray(
119119
const std::shared_ptr<Array>& array, MemoryPool* pool = default_memory_pool());
120120

121-
/// \brief Determine if two record batches are exactly equal
121+
/// \brief Determine if two record batches are equal
122122
///
123123
/// \param[in] other the RecordBatch to compare with
124-
/// \param[in] check_metadata if true, check that Schema metadata is the same
124+
/// \param[in] check_metadata if true, the schema metadata will be compared,
125+
/// regardless of the value set in \ref EqualOptions::use_metadata_
125126
/// \param[in] opts the options for equality comparisons
126127
/// \return true if batches are equal
127128
bool Equals(const RecordBatch& other, bool check_metadata = false,
128129
const EqualOptions& opts = EqualOptions::Defaults()) const;
129130

131+
/// \brief Determine if two record batches are equal
132+
///
133+
/// \param[in] other the RecordBatch to compare with
134+
/// \param[in] opts the options for equality comparisons
135+
/// \return true if batches are equal
136+
bool Equals(const RecordBatch& other, const EqualOptions& opts) const;
137+
130138
/// \brief Determine if two record batches are approximately equal
131139
///
132140
/// \param[in] other the RecordBatch to compare with
133141
/// \param[in] opts the options for equality comparisons
134142
/// \return true if batches are approximately equal
135143
bool ApproxEquals(const RecordBatch& other,
136-
const EqualOptions& opts = EqualOptions::Defaults()) const;
144+
const EqualOptions& opts = EqualOptions::Defaults()) const {
145+
return Equals(other, opts.use_schema(false).use_atol(true));
146+
}
137147

138148
/// \return the record batch's schema
139149
const std::shared_ptr<Schema>& schema() const { return schema_; }

cpp/src/arrow/record_batch_test.cc

Lines changed: 102 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
#include "arrow/array/util.h"
4141
#include "arrow/c/abi.h"
4242
#include "arrow/chunked_array.h"
43+
#include "arrow/compare.h"
4344
#include "arrow/config.h"
4445
#include "arrow/status.h"
4546
#include "arrow/table.h"
@@ -64,44 +65,98 @@ class TestRecordBatch : public ::testing::Test {};
6465
TEST_F(TestRecordBatch, Equals) {
6566
const int length = 10;
6667

68+
auto f0 = field("f0", int32());
69+
auto f1 = field("f1", uint8());
70+
auto f2 = field("f2", int16());
71+
72+
auto schema0_f0_f1_f2 = schema({f0, f1, f2});
73+
auto schema1_f0_f1_f2 = schema({f0, f1, f2});
74+
auto schema2_f0_f1 = schema({f0, f1});
75+
76+
random::RandomArrayGenerator gen(42);
77+
78+
auto a0 = gen.ArrayOf(int32(), length);
79+
auto a1 = gen.ArrayOf(uint8(), length);
80+
auto a2 = gen.ArrayOf(int16(), length);
81+
auto a3 = a0->Slice(0, length / 2);
82+
auto a4 = a1->Slice(0, length / 2);
83+
auto a5 = gen.ArrayOf(int32(), length);
84+
auto a6 = gen.ArrayOf(uint8(), length);
85+
86+
auto b0_a0_a1_a2 = RecordBatch::Make(schema0_f0_f1_f2, length, {a0, a1, a2});
87+
auto b1_a0_a1_a2 = RecordBatch::Make(schema1_f0_f1_f2, length, {a0, a1, a2});
88+
auto b2_a0_a1 = RecordBatch::Make(schema2_f0_f1, length, {a0, a1});
89+
auto b3_a3_a4_half_rows = RecordBatch::Make(schema2_f0_f1, length / 2, {a3, a4});
90+
auto b4_a5_a6 = RecordBatch::Make(schema2_f0_f1, length, {a5, a6});
91+
92+
// Same Values
93+
ASSERT_TRUE(b0_a0_a1_a2->Equals(*b1_a0_a1_a2));
94+
95+
// Different number of columns
96+
ASSERT_FALSE(b0_a0_a1_a2->Equals(*b2_a0_a1));
97+
98+
// Different number of rows
99+
ASSERT_FALSE(b2_a0_a1->Equals(*b3_a3_a4_half_rows));
100+
101+
// Different values
102+
ASSERT_FALSE(b2_a0_a1->Equals(*b4_a5_a6));
103+
}
104+
105+
class TestRecordBatchEqualOptions : public TestRecordBatch {};
106+
107+
TEST_F(TestRecordBatchEqualOptions, MetadataAndSchema) {
108+
int length = 10;
109+
67110
auto f0 = field("f0", int32());
68111
auto f1 = field("f1", uint8());
69112
auto f2 = field("f2", int16());
70113
auto f2b = field("f2b", int16());
71114

72115
auto metadata = key_value_metadata({"foo"}, {"bar"});
73116

74-
std::vector<std::shared_ptr<Field>> fields = {f0, f1, f2};
75-
auto schema = ::arrow::schema({f0, f1, f2});
76-
auto schema2 = ::arrow::schema({f0, f1});
77-
auto schema3 = ::arrow::schema({f0, f1, f2}, metadata);
78-
auto schema4 = ::arrow::schema({f0, f1, f2b});
117+
auto schema0_f0_f1_f2 = schema({f0, f1, f2});
118+
auto schema1_f0_f1_f2_with_metadat = schema({f0, f1, f2}, metadata);
119+
auto schema2_f0_f1_f2b = schema({f0, f1, f2b});
79120

80121
random::RandomArrayGenerator gen(42);
81122

82123
auto a0 = gen.ArrayOf(int32(), length);
83124
auto a1 = gen.ArrayOf(uint8(), length);
84125
auto a2 = gen.ArrayOf(int16(), length);
85126

86-
auto b1 = RecordBatch::Make(schema, length, {a0, a1, a2});
87-
auto b2 = RecordBatch::Make(schema3, length, {a0, a1, a2});
88-
auto b3 = RecordBatch::Make(schema2, length, {a0, a1});
89-
auto b4 = RecordBatch::Make(schema, length, {a0, a1, a1});
90-
auto b5 = RecordBatch::Make(schema4, length, {a0, a1, a2});
91-
92-
ASSERT_TRUE(b1->Equals(*b1));
93-
ASSERT_FALSE(b1->Equals(*b3));
94-
ASSERT_FALSE(b1->Equals(*b4));
127+
// All RecordBatches have the same values but different schemas.
128+
auto b0_f0_f1_f2 = RecordBatch::Make(schema0_f0_f1_f2, length, {a0, a1, a2});
129+
auto b1_f0_f1_f2_with_metadat =
130+
RecordBatch::Make(schema1_f0_f1_f2_with_metadat, length, {a0, a1, a2});
131+
auto b2_f0_f1_f2b = RecordBatch::Make(schema2_f0_f1_f2b, length, {a0, a1, a2});
95132

133+
auto options = EqualOptions::Defaults();
96134
// Same values and types, but different field names
97-
ASSERT_FALSE(b1->Equals(*b5));
135+
ASSERT_FALSE(b0_f0_f1_f2->Equals(*b2_f0_f1_f2b));
136+
ASSERT_TRUE(b0_f0_f1_f2->Equals(*b2_f0_f1_f2b, options.use_schema(false)));
137+
ASSERT_TRUE(b0_f0_f1_f2->ApproxEquals(*b2_f0_f1_f2b));
138+
ASSERT_TRUE(b0_f0_f1_f2->ApproxEquals(*b2_f0_f1_f2b, options.use_schema(true)));
98139

99140
// Different metadata
100-
ASSERT_TRUE(b1->Equals(*b2));
101-
ASSERT_FALSE(b1->Equals(*b2, /*check_metadata=*/true));
141+
ASSERT_TRUE(b0_f0_f1_f2->Equals(*b1_f0_f1_f2_with_metadat));
142+
ASSERT_TRUE(b0_f0_f1_f2->Equals(*b1_f0_f1_f2_with_metadat, options));
143+
ASSERT_FALSE(b0_f0_f1_f2->Equals(*b1_f0_f1_f2_with_metadat,
144+
/*check_metadata=*/true));
145+
ASSERT_FALSE(b0_f0_f1_f2->Equals(*b1_f0_f1_f2_with_metadat,
146+
/*check_metadata=*/true, options.use_schema(true)));
147+
ASSERT_TRUE(b0_f0_f1_f2->Equals(*b1_f0_f1_f2_with_metadat,
148+
/*check_metadata=*/true, options.use_schema(false)));
149+
ASSERT_TRUE(b0_f0_f1_f2->Equals(*b1_f0_f1_f2_with_metadat,
150+
options.use_schema(true).use_metadata(false)));
151+
ASSERT_FALSE(b0_f0_f1_f2->Equals(*b1_f0_f1_f2_with_metadat,
152+
options.use_schema(true).use_metadata(true)));
153+
ASSERT_TRUE(b0_f0_f1_f2->Equals(*b1_f0_f1_f2_with_metadat,
154+
options.use_schema(false).use_metadata(true)));
155+
ASSERT_TRUE(b0_f0_f1_f2->ApproxEquals(*b1_f0_f1_f2_with_metadat,
156+
options.use_schema(true).use_metadata(true)));
102157
}
103158

104-
TEST_F(TestRecordBatch, EqualOptions) {
159+
TEST_F(TestRecordBatchEqualOptions, NaN) {
105160
int length = 2;
106161
auto f = field("f", float64());
107162

@@ -114,13 +169,27 @@ TEST_F(TestRecordBatch, EqualOptions) {
114169
auto b1 = RecordBatch::Make(schema, length, {array1});
115170
auto b2 = RecordBatch::Make(schema, length, {array2});
116171

117-
EXPECT_FALSE(b1->Equals(*b2, /*check_metadata=*/false,
118-
EqualOptions::Defaults().nans_equal(false)));
119-
EXPECT_TRUE(b1->Equals(*b2, /*check_metadata=*/false,
120-
EqualOptions::Defaults().nans_equal(true)));
172+
EXPECT_FALSE(b1->Equals(*b2, EqualOptions::Defaults().nans_equal(false)));
173+
EXPECT_TRUE(b1->Equals(*b2, EqualOptions::Defaults().nans_equal(true)));
174+
}
175+
176+
TEST_F(TestRecordBatchEqualOptions, SignedZero) {
177+
int length = 2;
178+
auto f = field("f", float64());
179+
180+
auto schema = ::arrow::schema({f});
181+
182+
std::shared_ptr<Array> array1, array2;
183+
ArrayFromVector<DoubleType>(float64(), {true, true}, {0.5, +0.0}, &array1);
184+
ArrayFromVector<DoubleType>(float64(), {true, true}, {0.5, -0.0}, &array2);
185+
auto b1 = RecordBatch::Make(schema, length, {array1});
186+
auto b2 = RecordBatch::Make(schema, length, {array2});
187+
188+
ASSERT_FALSE(b1->Equals(*b2, EqualOptions::Defaults().signed_zeros_equal(false)));
189+
ASSERT_TRUE(b1->Equals(*b2, EqualOptions::Defaults().signed_zeros_equal(true)));
121190
}
122191

123-
TEST_F(TestRecordBatch, ApproxEqualOptions) {
192+
TEST_F(TestRecordBatchEqualOptions, Approx) {
124193
int length = 2;
125194
auto f = field("f", float64());
126195

@@ -137,8 +206,8 @@ TEST_F(TestRecordBatch, ApproxEqualOptions) {
137206
EXPECT_FALSE(b1->ApproxEquals(*b2, EqualOptions::Defaults().nans_equal(true)));
138207

139208
auto options = EqualOptions::Defaults().nans_equal(true).atol(0.1);
140-
EXPECT_FALSE(b1->Equals(*b2, false, options));
141-
EXPECT_TRUE(b1->Equals(*b2, false, options.use_atol(true)));
209+
EXPECT_FALSE(b1->Equals(*b2, options));
210+
EXPECT_TRUE(b1->Equals(*b2, options.use_atol(true)));
142211
EXPECT_TRUE(b1->ApproxEquals(*b2, options));
143212
}
144213

@@ -158,8 +227,8 @@ TEST_F(TestRecordBatchEqualsSameAddress, NonFloatType) {
158227

159228
auto options = EqualOptions::Defaults();
160229

161-
ASSERT_TRUE(b0->Equals(*b1, true, options));
162-
ASSERT_TRUE(b0->Equals(*b1, true, options.nans_equal(true)));
230+
ASSERT_TRUE(b0->Equals(*b1, options));
231+
ASSERT_TRUE(b0->Equals(*b1, options.nans_equal(true)));
163232

164233
ASSERT_TRUE(b0->ApproxEquals(*b1, options));
165234
ASSERT_TRUE(b0->ApproxEquals(*b1, options.nans_equal(true)));
@@ -180,8 +249,8 @@ TEST_F(TestRecordBatchEqualsSameAddress, NestedTypesWithoutFloatType) {
180249

181250
auto options = EqualOptions::Defaults();
182251

183-
ASSERT_TRUE(b0->Equals(*b1, true, options));
184-
ASSERT_TRUE(b0->Equals(*b1, true, options.nans_equal(true)));
252+
ASSERT_TRUE(b0->Equals(*b1, options));
253+
ASSERT_TRUE(b0->Equals(*b1, options.nans_equal(true)));
185254

186255
ASSERT_TRUE(b0->ApproxEquals(*b1, options));
187256
ASSERT_TRUE(b0->ApproxEquals(*b1, options.nans_equal(true)));
@@ -201,8 +270,8 @@ TEST_F(TestRecordBatchEqualsSameAddress, FloatType) {
201270

202271
auto options = EqualOptions::Defaults();
203272

204-
ASSERT_FALSE(b0->Equals(*b1, true, options));
205-
ASSERT_TRUE(b0->Equals(*b1, true, options.nans_equal(true)));
273+
ASSERT_FALSE(b0->Equals(*b1, options));
274+
ASSERT_TRUE(b0->Equals(*b1, options.nans_equal(true)));
206275

207276
ASSERT_FALSE(b0->ApproxEquals(*b1, options));
208277
ASSERT_TRUE(b0->ApproxEquals(*b1, options.nans_equal(true)));
@@ -223,8 +292,8 @@ TEST_F(TestRecordBatchEqualsSameAddress, NestedTypesWithFloatType) {
223292

224293
auto options = EqualOptions::Defaults();
225294

226-
ASSERT_FALSE(b0->Equals(*b1, true, options));
227-
ASSERT_TRUE(b0->Equals(*b1, true, options.nans_equal(true)));
295+
ASSERT_FALSE(b0->Equals(*b1, options));
296+
ASSERT_TRUE(b0->Equals(*b1, options.nans_equal(true)));
228297

229298
ASSERT_FALSE(b0->ApproxEquals(*b1, options));
230299
ASSERT_TRUE(b0->ApproxEquals(*b1, options.nans_equal(true)));

0 commit comments

Comments
 (0)