|
29 | 29 | #include "arrow/array/data.h" |
30 | 30 | #include "arrow/array/util.h" |
31 | 31 | #include "arrow/chunked_array.h" |
| 32 | +#include "arrow/compare.h" |
32 | 33 | #include "arrow/compute/cast.h" |
33 | 34 | #include "arrow/record_batch.h" |
34 | 35 | #include "arrow/status.h" |
@@ -152,38 +153,210 @@ TEST_F(TestTable, AllColumnsAndFields) { |
152 | 153 | ASSERT_EQ(0, fields.size()); |
153 | 154 | } |
154 | 155 |
|
155 | | -TEST_F(TestTable, Equals) { |
156 | | - const int length = 100; |
157 | | - MakeExample1(length); |
| 156 | +TEST(TestTableEquality, Equals) { |
| 157 | + const int32_t length = 10; |
158 | 158 |
|
159 | | - table_ = Table::Make(schema_, columns_); |
| 159 | + auto f0 = field("f0", int32()); |
| 160 | + auto f1 = field("f1", uint8()); |
| 161 | + auto f2 = field("f2", int16()); |
160 | 162 |
|
161 | | - ASSERT_TRUE(table_->Equals(*table_)); |
162 | | - // Differing schema |
163 | | - auto f0 = field("f3", int32()); |
164 | | - auto f1 = field("f4", uint8()); |
165 | | - auto f2 = field("f5", int16()); |
166 | | - std::vector<std::shared_ptr<Field>> fields = {f0, f1, f2}; |
167 | | - auto other_schema = std::make_shared<Schema>(fields); |
168 | | - auto other = Table::Make(other_schema, columns_); |
169 | | - ASSERT_FALSE(table_->Equals(*other)); |
170 | | - // Differing columns |
171 | | - std::vector<std::shared_ptr<ChunkedArray>> other_columns = { |
172 | | - std::make_shared<ChunkedArray>( |
173 | | - gen_.ArrayOf(int32(), length, /*null_probability=*/0.3)), |
174 | | - std::make_shared<ChunkedArray>( |
175 | | - gen_.ArrayOf(uint8(), length, /*null_probability=*/0.3)), |
176 | | - std::make_shared<ChunkedArray>( |
177 | | - gen_.ArrayOf(int16(), length, /*null_probability=*/0.3))}; |
178 | | - |
179 | | - other = Table::Make(schema_, other_columns); |
180 | | - ASSERT_FALSE(table_->Equals(*other)); |
181 | | - |
182 | | - // Differing schema metadata |
183 | | - other_schema = schema_->WithMetadata(::arrow::key_value_metadata({"key"}, {"value"})); |
184 | | - other = Table::Make(other_schema, columns_); |
185 | | - ASSERT_TRUE(table_->Equals(*other)); |
186 | | - ASSERT_FALSE(table_->Equals(*other, /*check_metadata=*/true)); |
| 163 | + auto schema = ::arrow::schema({f0, f1, f2}); |
| 164 | + auto schema_same = ::arrow::schema({f0, f1, f2}); |
| 165 | + auto schema_fewer_fields = ::arrow::schema({f0, f1}); |
| 166 | + |
| 167 | + random::RandomArrayGenerator gen(42); |
| 168 | + |
| 169 | + auto a_f0 = gen.ArrayOf(int32(), length); |
| 170 | + auto a_f1 = gen.ArrayOf(uint8(), length); |
| 171 | + auto a_f2 = gen.ArrayOf(int16(), length); |
| 172 | + auto a_f0_half = a_f0->Slice(0, length / 2); |
| 173 | + auto a_f1_half = a_f1->Slice(0, length / 2); |
| 174 | + auto a_f2_half = a_f2->Slice(0, length / 2); |
| 175 | + auto a_f0_different = gen.ArrayOf(int32(), length); |
| 176 | + auto a_f1_different = gen.ArrayOf(uint8(), length); |
| 177 | + auto a_f2_different = gen.ArrayOf(uint16(), length); |
| 178 | + |
| 179 | + auto table = Table::Make(schema, {a_f0, a_f1, a_f2}, length); |
| 180 | + auto table_same = Table::Make(schema_same, {a_f0, a_f1, a_f2}, length); |
| 181 | + auto table_fewer_fields = Table::Make(schema_fewer_fields, {a_f0, a_f1}, length); |
| 182 | + auto table_half = |
| 183 | + Table::Make(schema_fewer_fields, {a_f0_half, a_f1_half, a_f2_half}, length / 2); |
| 184 | + auto table_different = Table::Make( |
| 185 | + schema_fewer_fields, {a_f0_different, a_f1_different, a_f2_different}, length); |
| 186 | + |
| 187 | + // Same Values |
| 188 | + ASSERT_TRUE(table->Equals(*table_same)); |
| 189 | + |
| 190 | + // Different number of columns |
| 191 | + ASSERT_FALSE(table->Equals(*table_fewer_fields)); |
| 192 | + |
| 193 | + // Different number of rows |
| 194 | + ASSERT_FALSE(table->Equals(*table_half)); |
| 195 | + |
| 196 | + // Different values |
| 197 | + ASSERT_FALSE(table->Equals(*table_different)); |
| 198 | +} |
| 199 | + |
| 200 | +TEST(TestTableEquality, MetadataAndSchema) { |
| 201 | + const int32_t length = 10; |
| 202 | + |
| 203 | + auto f0 = field("f0", int32()); |
| 204 | + auto f1 = field("f1", uint8()); |
| 205 | + auto f2 = field("f2", int16()); |
| 206 | + auto f2_renamed = field("f2b", int16()); |
| 207 | + |
| 208 | + auto metadata = key_value_metadata({"foo"}, {"bar"}); |
| 209 | + |
| 210 | + auto schema = ::arrow::schema({f0, f1, f2}); |
| 211 | + auto schema_with_metadata = schema->WithMetadata(metadata); |
| 212 | + auto schema_renamed_field = ::arrow::schema({f0, f1, f2_renamed}); |
| 213 | + |
| 214 | + random::RandomArrayGenerator gen(42); |
| 215 | + |
| 216 | + auto a_f0 = gen.ArrayOf(int32(), length); |
| 217 | + auto a_f1 = gen.ArrayOf(uint8(), length); |
| 218 | + auto a_f2 = gen.ArrayOf(int16(), length); |
| 219 | + auto a_f2_renamed = a_f2; |
| 220 | + |
| 221 | + // All Tables have the same values but different schemas. |
| 222 | + auto table = Table::Make(schema, {a_f0, a_f1, a_f2}, length); |
| 223 | + auto table_with_metadata = |
| 224 | + Table::Make(schema_with_metadata, {a_f0, a_f1, a_f2}, length); |
| 225 | + auto table_renamed_field = |
| 226 | + Table::Make(schema_renamed_field, {a_f0, a_f1, a_f2_renamed}, length); |
| 227 | + |
| 228 | + auto options = EqualOptions::Defaults(); |
| 229 | + |
| 230 | + // Same values and types, but different field names |
| 231 | + ASSERT_FALSE(table->Equals(*table_renamed_field)); |
| 232 | + ASSERT_TRUE(table->Equals(*table_renamed_field, options.use_schema(false))); |
| 233 | + |
| 234 | + // Different metadata |
| 235 | + ASSERT_TRUE(table->Equals(*table_with_metadata)); |
| 236 | + ASSERT_TRUE(table->Equals(*table_with_metadata, options)); |
| 237 | + ASSERT_FALSE(table->Equals(*table_with_metadata, |
| 238 | + /*check_metadata=*/true)); |
| 239 | + ASSERT_FALSE(table->Equals(*table_with_metadata, |
| 240 | + /*check_metadata=*/true, options.use_schema(true))); |
| 241 | + ASSERT_TRUE(table->Equals(*table_with_metadata, |
| 242 | + /*check_metadata=*/true, options.use_schema(false))); |
| 243 | + ASSERT_TRUE( |
| 244 | + table->Equals(*table_with_metadata, options.use_schema(true).use_metadata(false))); |
| 245 | + ASSERT_FALSE( |
| 246 | + table->Equals(*table_with_metadata, options.use_schema(true).use_metadata(true))); |
| 247 | + ASSERT_TRUE( |
| 248 | + table->Equals(*table_with_metadata, options.use_schema(false).use_metadata(true))); |
| 249 | +} |
| 250 | + |
| 251 | +TEST(TestTableEqualityFloatType, SameValue) { |
| 252 | + auto schema = ::arrow::schema({field("f0", int32()), field("f1", float64())}); |
| 253 | + auto table = TableFromJSON( |
| 254 | + schema, {R"([{"f0": 1, "f1": 4.0}, {"f0": 2, "f1": 5.0}, {"f0": 3, "f1": 6.0}])"}); |
| 255 | + auto other_table = TableFromJSON( |
| 256 | + schema, {R"([{"f0": 1, "f1": 4.0}, {"f0": 2, "f1": 5.0}, {"f0": 3, "f1": 6.0}])"}); |
| 257 | + |
| 258 | + ASSERT_TRUE(table->Equals(*other_table)); |
| 259 | +} |
| 260 | + |
| 261 | +TEST(TestTableEqualityFloatType, SingedZero) { |
| 262 | + auto schema = ::arrow::schema({field("f0", int32()), field("f1", float64())}); |
| 263 | + auto table = TableFromJSON( |
| 264 | + schema, {R"([{"f0": 1, "f1": 4.0}, {"f0": 2, "f1": -0.0}, {"f0": 3, "f1": 0.0}])"}); |
| 265 | + auto other_table = TableFromJSON( |
| 266 | + schema, {R"([{"f0": 1, "f1": 4.0}, {"f0": 2, "f1": 0.0}, {"f0": 3, "f1": -0.0}])"}); |
| 267 | + auto options = EqualOptions::Defaults(); |
| 268 | + |
| 269 | + ASSERT_TRUE(table->Equals(*other_table, options)); |
| 270 | + ASSERT_FALSE(table->Equals(*other_table, options.signed_zeros_equal(false))); |
| 271 | +} |
| 272 | + |
| 273 | +TEST(TestTableEqualityFloatType, Infinity) { |
| 274 | + auto schema = ::arrow::schema({field("f0", int32()), field("f1", float64())}); |
| 275 | + auto table = TableFromJSON( |
| 276 | + schema, {R"([{"f0": 1, "f1": 4.0}, {"f0": 2, "f1": 5.0}, {"f0": 3, "f1": Inf}])"}); |
| 277 | + auto table_different_inf = TableFromJSON( |
| 278 | + schema, {R"([{"f0": 1, "f1": 4.0}, {"f0": 2, "f1": 5.0}, {"f0": 3, "f1": -Inf}])"}); |
| 279 | + auto table_same_inf = TableFromJSON( |
| 280 | + schema, {R"([{"f0": 1, "f1": 4.0}, {"f0": 2, "f1": 5.0}, {"f0": 3, "f1": Inf}])"}); |
| 281 | + |
| 282 | + ASSERT_FALSE(table->Equals(*table_different_inf)); |
| 283 | + ASSERT_TRUE(table->Equals(*table_same_inf)); |
| 284 | +} |
| 285 | + |
| 286 | +TEST(TestTableEqualityFloatType, NaN) { |
| 287 | + auto schema = ::arrow::schema({field("f0", int32()), field("f1", float64())}); |
| 288 | + auto table = TableFromJSON( |
| 289 | + schema, {R"([{"f0": 1, "f1": 4.0}, {"f0": 2, "f1": NaN}, {"f0": 3, "f1": 6.0}])"}); |
| 290 | + auto other_table = TableFromJSON( |
| 291 | + schema, {R"([{"f0": 1, "f1": 4.0}, {"f0": 2, "f1": NaN}, {"f0": 3, "f1": 6.0}])"}); |
| 292 | + auto options = EqualOptions::Defaults(); |
| 293 | + |
| 294 | + ASSERT_FALSE(table->Equals(*other_table, options)); |
| 295 | + ASSERT_TRUE(table->Equals(*other_table, options.nans_equal(true))); |
| 296 | +} |
| 297 | + |
| 298 | +TEST(TestTableEqualityFloatType, Approximate) { |
| 299 | + auto schema = ::arrow::schema({field("f0", int32()), field("f1", float64())}); |
| 300 | + auto table = TableFromJSON( |
| 301 | + schema, |
| 302 | + {R"([{"f0": 1, "f1": 4.0001}, {"f0": 2, "f1": 5.0001}, {"f0": 3, "f1": 6.0001}])"}); |
| 303 | + auto other_table = TableFromJSON( |
| 304 | + schema, {R"([{"f0": 1, "f1": 4.0}, {"f0": 2, "f1": 5.0}, {"f0": 3, "f1": 6.0}])"}); |
| 305 | + auto options = EqualOptions::Defaults(); |
| 306 | + |
| 307 | + ASSERT_FALSE(table->Equals(*other_table, options)); |
| 308 | + |
| 309 | + ASSERT_TRUE(table->Equals(*other_table, options.use_atol(true).atol(1e-3))); |
| 310 | + |
| 311 | + ASSERT_FALSE(table->Equals(*other_table, options.use_atol(true).atol(1e-5))); |
| 312 | +} |
| 313 | + |
| 314 | +TEST(TestTableEqualitySameAddress, NonFloatType) { |
| 315 | + auto schema = ::arrow::schema({field("f0", int32()), field("f1", uint8())}); |
| 316 | + auto table = TableFromJSON( |
| 317 | + schema, {R"([{"f0": 1, "f1": 4}, {"f0": 2, "f1": 5}, {"f0": 3, "f1": 6}])"}); |
| 318 | + auto other_table = table; |
| 319 | + auto options = EqualOptions::Defaults(); |
| 320 | + |
| 321 | + ASSERT_TRUE(table->Equals(*other_table, options)); |
| 322 | + ASSERT_TRUE(table->Equals(*other_table, options.nans_equal(true))); |
| 323 | +} |
| 324 | + |
| 325 | +TEST(TestTableEqualitySameAddress, NestedTypesWithoutFloatType) { |
| 326 | + auto schema = ::arrow::schema( |
| 327 | + {field("f0", int32()), field("f1", struct_({{"f2", utf8()}, {"f3", int64()}}))}); |
| 328 | + auto table = TableFromJSON( |
| 329 | + schema, |
| 330 | + {R"([{"f0": 1, "f1": {"f2": "4", "f3": 7}}, {"f0": 2, "f1": {"f2": "5", "f3": 8}}, {"f0": 3, "f1": {"f2" : "6", "f3": 9}}])"}); |
| 331 | + auto other_table = table; |
| 332 | + auto options = EqualOptions::Defaults(); |
| 333 | + |
| 334 | + ASSERT_TRUE(table->Equals(*other_table, options)); |
| 335 | + ASSERT_TRUE(table->Equals(*other_table, options.nans_equal(true))); |
| 336 | +} |
| 337 | + |
| 338 | +TEST(TestTableEqualitySameAddress, FloatType) { |
| 339 | + auto schema = ::arrow::schema({field("f0", int32()), field("f1", float64())}); |
| 340 | + auto table = TableFromJSON( |
| 341 | + schema, {R"([{"f0": 1, "f1": 4.0}, {"f0": 2, "f1": NaN}, {"f0": 3, "f1": 6.0}])"}); |
| 342 | + auto other_table = table; |
| 343 | + auto options = EqualOptions::Defaults(); |
| 344 | + |
| 345 | + ASSERT_FALSE(table->Equals(*other_table, options)); |
| 346 | + ASSERT_TRUE(table->Equals(*other_table, options.nans_equal(true))); |
| 347 | +} |
| 348 | + |
| 349 | +TEST(TestTableEqualitySameAddress, NestedTypesWithFloatType) { |
| 350 | + auto schema = ::arrow::schema( |
| 351 | + {field("f0", int32()), field("f1", struct_({{"f2", utf8()}, {"f3", float64()}}))}); |
| 352 | + auto table = TableFromJSON( |
| 353 | + schema, |
| 354 | + {R"([{"f0": 1, "f1": {"f2": "4", "f3": 7.0}}, {"f0": 2, "f1": {"f2": "5", "f3": NaN}}, {"f0": 3,"f1": {"f2" : "6", "f3": 9.0}}])"}); |
| 355 | + auto other_table = table; |
| 356 | + auto options = EqualOptions::Defaults(); |
| 357 | + |
| 358 | + ASSERT_FALSE(table->Equals(*other_table, options)); |
| 359 | + ASSERT_TRUE(table->Equals(*other_table, options.nans_equal(true))); |
187 | 360 | } |
188 | 361 |
|
189 | 362 | TEST_F(TestTable, MakeEmpty) { |
|
0 commit comments