diff --git a/src/nanoarrow/common/array.c b/src/nanoarrow/common/array.c index be99cab88..28ab6d630 100644 --- a/src/nanoarrow/common/array.c +++ b/src/nanoarrow/common/array.c @@ -1337,40 +1337,12 @@ ArrowErrorCode ArrowArrayViewValidate(struct ArrowArrayView* array_view, return EINVAL; } -struct ArrowComparisonInternalState { +struct ArrowArrayViewComparisonInternalState { enum ArrowCompareLevel level; int is_equal; struct ArrowError* reason; }; -NANOARROW_CHECK_PRINTF_ATTRIBUTE static void ArrowComparePrependPath( - struct ArrowError* out, const char* fmt, ...) { - if (out == NULL) { - return; - } - - char prefix[128]; - prefix[0] = '\0'; - va_list args; - va_start(args, fmt); - int prefix_len = vsnprintf(prefix, sizeof(prefix), fmt, args); - va_end(args); - - if (prefix_len <= 0) { - return; - } - - size_t out_len = strlen(out->message); - size_t out_len_to_move = sizeof(struct ArrowError) - prefix_len - 1; - if (out_len_to_move > out_len) { - out_len_to_move = out_len; - } - - memmove(out->message + prefix_len, out->message, out_len_to_move); - memcpy(out->message, prefix, prefix_len); - out->message[out_len + prefix_len] = '\0'; -} - #define SET_NOT_EQUAL_AND_RETURN_IF_IMPL(cond_, state_, reason_) \ do { \ if (cond_) { \ @@ -1383,9 +1355,9 @@ NANOARROW_CHECK_PRINTF_ATTRIBUTE static void ArrowComparePrependPath( #define SET_NOT_EQUAL_AND_RETURN_IF(condition_, state_) \ SET_NOT_EQUAL_AND_RETURN_IF_IMPL(condition_, state_, #condition_) -static void ArrowArrayViewCompareBuffer(const struct ArrowArrayView* actual, - const struct ArrowArrayView* expected, int i, - struct ArrowComparisonInternalState* state) { +static void ArrowArrayViewCompareBuffer( + const struct ArrowArrayView* actual, const struct ArrowArrayView* expected, int i, + struct ArrowArrayViewComparisonInternalState* state) { SET_NOT_EQUAL_AND_RETURN_IF( actual->buffer_views[i].size_bytes != expected->buffer_views[i].size_bytes, state); @@ -1398,9 +1370,9 @@ static void ArrowArrayViewCompareBuffer(const struct ArrowArrayView* actual, } } -static void ArrowArrayViewCompareIdentical(const struct ArrowArrayView* actual, - const struct ArrowArrayView* expected, - struct ArrowComparisonInternalState* state) { +static void ArrowArrayViewCompareIdentical( + const struct ArrowArrayView* actual, const struct ArrowArrayView* expected, + struct ArrowArrayViewComparisonInternalState* state) { SET_NOT_EQUAL_AND_RETURN_IF(actual->storage_type != expected->storage_type, state); SET_NOT_EQUAL_AND_RETURN_IF(actual->n_children != expected->n_children, state); SET_NOT_EQUAL_AND_RETURN_IF(actual->dictionary == NULL && expected->dictionary != NULL, @@ -1415,7 +1387,7 @@ static void ArrowArrayViewCompareIdentical(const struct ArrowArrayView* actual, for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) { ArrowArrayViewCompareBuffer(actual, expected, i, state); if (!state->is_equal) { - ArrowComparePrependPath(state->reason, ".buffers[%d]", i); + ArrowErrorPrefix(state->reason, ".buffers[%d]", i); return; } } @@ -1423,7 +1395,7 @@ static void ArrowArrayViewCompareIdentical(const struct ArrowArrayView* actual, for (int64_t i = 0; i < actual->n_children; i++) { ArrowArrayViewCompareIdentical(actual->children[i], expected->children[i], state); if (!state->is_equal) { - ArrowComparePrependPath(state->reason, ".children[%" PRId64 "]", i); + ArrowErrorPrefix(state->reason, ".children[%" PRId64 "]", i); return; } } @@ -1431,7 +1403,7 @@ static void ArrowArrayViewCompareIdentical(const struct ArrowArrayView* actual, if (actual->dictionary != NULL) { ArrowArrayViewCompareIdentical(actual->dictionary, expected->dictionary, state); if (!state->is_equal) { - ArrowComparePrependPath(state->reason, ".dictionary"); + ArrowErrorPrefix(state->reason, ".dictionary"); return; } } @@ -1443,7 +1415,7 @@ ArrowErrorCode ArrowArrayViewCompare(const struct ArrowArrayView* actual, const struct ArrowArrayView* expected, enum ArrowCompareLevel level, int* out, struct ArrowError* reason) { - struct ArrowComparisonInternalState state; + struct ArrowArrayViewComparisonInternalState state; state.level = level; state.is_equal = 1; state.reason = reason; @@ -1458,7 +1430,7 @@ ArrowErrorCode ArrowArrayViewCompare(const struct ArrowArrayView* actual, *out = state.is_equal; if (!state.is_equal) { - ArrowComparePrependPath(state.reason, "root"); + ArrowErrorPrefix(state.reason, "root"); } return NANOARROW_OK; diff --git a/src/nanoarrow/common/inline_types.h b/src/nanoarrow/common/inline_types.h index ac5132790..659e2ea07 100644 --- a/src/nanoarrow/common/inline_types.h +++ b/src/nanoarrow/common/inline_types.h @@ -585,6 +585,8 @@ enum ArrowCompareLevel { /// account potentially different content of null slots, arrays with a /// non-zero offset, and other considerations. NANOARROW_COMPARE_IDENTICAL, + NANOARROW_COMPARE_EQUAL, + NANOARROW_COMPARE_TYPE_EQUAL, }; /// \brief Get a string value of an enum ArrowTimeUnit value diff --git a/src/nanoarrow/common/schema.c b/src/nanoarrow/common/schema.c index 21cdcd95c..45fbe4aa4 100644 --- a/src/nanoarrow/common/schema.c +++ b/src/nanoarrow/common/schema.c @@ -613,6 +613,168 @@ ArrowErrorCode ArrowSchemaDeepCopy(const struct ArrowSchema* schema, return NANOARROW_OK; } +struct ArrowSchemaComparisonInternalState { + enum ArrowCompareLevel level; + int check_metadata; + int check_exact_names; + int check_nullability; + int is_equal; + struct ArrowError* reason; +}; + +#define CHECK_METADATA_IDENTICAL 2 +#define CHECK_NAME_PARENT_WAS_MAP -1 +#define CHECK_NAME_PARENT_WAS_MAP_CHILD -2 + +#define SET_NOT_EQUAL_AND_RETURN_IF_IMPL(cond_, state_, reason_) \ + do { \ + if (cond_) { \ + ArrowErrorSet(state_->reason, ": %s", reason_); \ + state_->is_equal = 0; \ + return; \ + } \ + } while (0) + +#define SET_NOT_EQUAL_AND_RETURN_IF(condition_, state_) \ + SET_NOT_EQUAL_AND_RETURN_IF_IMPL(condition_, state_, #condition_) + +static void ArrowSchemaCompareIdentical(const struct ArrowSchema* actual, + const struct ArrowSchema* expected, + struct ArrowSchemaComparisonInternalState* state, + int check_name) { + SET_NOT_EQUAL_AND_RETURN_IF(actual->format == NULL && expected->format != NULL, state); + SET_NOT_EQUAL_AND_RETURN_IF(actual->format != NULL && expected->format == NULL, state); + if (actual->format != NULL) { + SET_NOT_EQUAL_AND_RETURN_IF(strcmp(actual->format, expected->format) != 0, state); + } + + if (check_name > 0) { + SET_NOT_EQUAL_AND_RETURN_IF(actual->name == NULL && expected->name != NULL, state); + SET_NOT_EQUAL_AND_RETURN_IF(actual->name != NULL && expected->name == NULL, state); + if (actual->name != NULL) { + SET_NOT_EQUAL_AND_RETURN_IF(strcmp(actual->name, expected->name) != 0, state); + } + } + + if (state->check_nullability) { + SET_NOT_EQUAL_AND_RETURN_IF(actual->flags != expected->flags, state); + } else { + int64_t actual_flags = actual->flags & ~ARROW_FLAG_NULLABLE; + int64_t expected_flags = expected->flags & ~ARROW_FLAG_NULLABLE; + SET_NOT_EQUAL_AND_RETURN_IF(actual_flags != expected_flags, state); + } + + if (state->check_metadata != 0) { + // Most implementations export empty metadata as NULL; however, some use + // the representation of zero key/value pairs to do so. + char empty_metadata[4] = {'\0', '\0', '\0', '\0'}; + int actual_has_metadata = + actual->metadata != NULL && + memcmp(actual->metadata, empty_metadata, sizeof(empty_metadata)) != 0; + int expected_has_metadata = + expected->metadata != NULL && + memcmp(expected->metadata, empty_metadata, sizeof(empty_metadata)) != 0; + + if (state->check_metadata == CHECK_METADATA_IDENTICAL) { + SET_NOT_EQUAL_AND_RETURN_IF(actual->metadata == NULL && expected->metadata != NULL, + state); + SET_NOT_EQUAL_AND_RETURN_IF(actual->metadata != NULL && expected->metadata == NULL, + state); + } else { + SET_NOT_EQUAL_AND_RETURN_IF(actual_has_metadata != expected_has_metadata, state); + } + + if (actual_has_metadata && expected_has_metadata) { + SET_NOT_EQUAL_AND_RETURN_IF(ArrowMetadataSizeOf(actual->metadata) != + ArrowMetadataSizeOf(expected->metadata), + state); + SET_NOT_EQUAL_AND_RETURN_IF(memcmp(actual->metadata, expected->metadata, + ArrowMetadataSizeOf(actual->metadata)) != 0, + state); + } + } + + SET_NOT_EQUAL_AND_RETURN_IF(actual->n_children != expected->n_children, state); + SET_NOT_EQUAL_AND_RETURN_IF(actual->dictionary == NULL && expected->dictionary != NULL, + state); + SET_NOT_EQUAL_AND_RETURN_IF(actual->dictionary != NULL && expected->dictionary == NULL, + state); + + int check_child_names = 1; + if (!state->check_exact_names && actual->format != NULL && + strcmp(actual->format, "+l") == 0) { + check_child_names = 0; + } else if (check_name == CHECK_NAME_PARENT_WAS_MAP) { + check_child_names = CHECK_NAME_PARENT_WAS_MAP_CHILD; + } else if (check_name == CHECK_NAME_PARENT_WAS_MAP_CHILD) { + check_child_names = 0; + } else if (!state->check_exact_names && actual->format != NULL && + strcmp(actual->format, "+m") == 0) { + check_child_names = CHECK_NAME_PARENT_WAS_MAP; + } + + for (int64_t i = 0; i < actual->n_children; i++) { + ArrowSchemaCompareIdentical(actual->children[i], expected->children[i], state, + check_child_names); + if (!state->is_equal) { + ArrowErrorPrefix(state->reason, ".children[%" PRId64 "]", i); + return; + } + } + + if (actual->dictionary != NULL) { + // The name field of the dictionary does not need to be exact for the + // purposes of allowing non-canonical names + ArrowSchemaCompareIdentical(actual->dictionary, expected->dictionary, state, + state->check_exact_names); + if (!state->is_equal) { + ArrowErrorPrefix(state->reason, ".dictionary"); + return; + } + } +} + +// Top-level entry point to take care of creating, cleaning up, and +// propagating the ArrowSchemaComparisonInternalState to the caller +ArrowErrorCode ArrowSchemaCompare(const struct ArrowSchema* actual, + const struct ArrowSchema* expected, + enum ArrowCompareLevel level, int* out, + struct ArrowError* reason) { + struct ArrowSchemaComparisonInternalState state; + state.level = level; + state.reason = reason; + state.is_equal = 1; + + switch (level) { + case NANOARROW_COMPARE_IDENTICAL: + state.check_exact_names = 1; + state.check_nullability = 1; + state.check_metadata = CHECK_METADATA_IDENTICAL; + break; + case NANOARROW_COMPARE_EQUAL: + state.check_exact_names = 1; + state.check_nullability = 1; + state.check_metadata = 1; + break; + case NANOARROW_COMPARE_TYPE_EQUAL: + state.check_exact_names = 0; + state.check_nullability = 1; + state.check_metadata = 0; + break; + default: + return ENOTSUP; + } + + ArrowSchemaCompareIdentical(actual, expected, &state, state.check_exact_names); + + *out = state.is_equal; + if (!state.is_equal) { + ArrowErrorPrefix(state.reason, "root"); + } + + return NANOARROW_OK; +} + static void ArrowSchemaViewSetPrimitive(struct ArrowSchemaView* schema_view, enum ArrowType type) { schema_view->type = type; diff --git a/src/nanoarrow/common/schema_test.cc b/src/nanoarrow/common/schema_test.cc index 175ea993a..123ab5b68 100644 --- a/src/nanoarrow/common/schema_test.cc +++ b/src/nanoarrow/common/schema_test.cc @@ -611,6 +611,318 @@ TEST(SchemaTest, SchemaCopyMetadata) { ArrowSchemaRelease(&schema); } +TEST(SchemaTest, SchemaCompareIdenticalStructure) { + struct ArrowError error; + struct ArrowSchema actual; + struct ArrowSchema expected; + int is_equal = -1; + + ASSERT_EQ(ArrowSchemaInitFromType(&actual, NANOARROW_TYPE_INT32), NANOARROW_OK); + ASSERT_EQ(ArrowSchemaCompare(&actual, &actual, NANOARROW_COMPARE_IDENTICAL, &is_equal, + &error), + NANOARROW_OK); + EXPECT_EQ(is_equal, 1); + + // Check non-equal storage type + is_equal = -1; + ASSERT_EQ(ArrowSchemaInitFromType(&expected, NANOARROW_TYPE_STRING), NANOARROW_OK); + ASSERT_EQ(ArrowSchemaCompare(&actual, &expected, NANOARROW_COMPARE_IDENTICAL, &is_equal, + &error), + NANOARROW_OK); + EXPECT_EQ(is_equal, 0); + EXPECT_STREQ(error.message, "root: strcmp(actual->format, expected->format) != 0"); + + // Check non-equal numbers of children + is_equal = -1; + ArrowSchemaRelease(&actual); + ArrowSchemaRelease(&expected); + ASSERT_EQ(ArrowSchemaInitFromType(&actual, NANOARROW_TYPE_STRUCT), NANOARROW_OK); + ASSERT_EQ(ArrowSchemaInitFromType(&expected, NANOARROW_TYPE_STRUCT), NANOARROW_OK); + ASSERT_EQ(ArrowSchemaAllocateChildren(&expected, 1), NANOARROW_OK); + ASSERT_EQ(ArrowSchemaCompare(&actual, &expected, NANOARROW_COMPARE_IDENTICAL, &is_equal, + &error), + NANOARROW_OK); + EXPECT_EQ(is_equal, 0); + EXPECT_STREQ(error.message, "root: actual->n_children != expected->n_children"); + + // Check difference in children + is_equal = -1; + ASSERT_EQ(ArrowSchemaAllocateChildren(&actual, 1), NANOARROW_OK); + ASSERT_EQ(ArrowSchemaInitFromType(actual.children[0], NANOARROW_TYPE_STRING), + NANOARROW_OK); + ASSERT_EQ(ArrowSchemaInitFromType(expected.children[0], NANOARROW_TYPE_BINARY), + NANOARROW_OK); + ASSERT_EQ(ArrowSchemaCompare(&actual, &expected, NANOARROW_COMPARE_IDENTICAL, &is_equal, + &error), + NANOARROW_OK); + EXPECT_EQ(is_equal, 0); + EXPECT_STREQ(error.message, + "root.children[0]: strcmp(actual->format, expected->format) != 0"); + + // Check presence/absence of dictionary + is_equal = -1; + ArrowSchemaRelease(&actual); + ArrowSchemaRelease(&expected); + ASSERT_EQ(ArrowSchemaInitFromType(&actual, NANOARROW_TYPE_INT32), NANOARROW_OK); + ASSERT_EQ(ArrowSchemaInitFromType(&expected, NANOARROW_TYPE_INT32), NANOARROW_OK); + ASSERT_EQ(ArrowSchemaAllocateDictionary(&expected), NANOARROW_OK); + ASSERT_EQ(ArrowSchemaCompare(&actual, &expected, NANOARROW_COMPARE_IDENTICAL, &is_equal, + &error), + NANOARROW_OK); + EXPECT_EQ(is_equal, 0); + EXPECT_STREQ(error.message, + "root: actual->dictionary == NULL && expected->dictionary != NULL"); + + is_equal = -1; + ASSERT_EQ(ArrowSchemaCompare(&expected, &actual, NANOARROW_COMPARE_IDENTICAL, &is_equal, + &error), + NANOARROW_OK); + EXPECT_EQ(is_equal, 0); + EXPECT_STREQ(error.message, + "root: actual->dictionary != NULL && expected->dictionary == NULL"); + + // Check a difference in a dictionary + is_equal = -1; + ASSERT_EQ(ArrowSchemaAllocateDictionary(&actual), NANOARROW_OK); + ASSERT_EQ(ArrowSchemaInitFromType(actual.dictionary, NANOARROW_TYPE_STRING), + NANOARROW_OK); + ASSERT_EQ(ArrowSchemaInitFromType(expected.dictionary, NANOARROW_TYPE_BINARY), + NANOARROW_OK); + ASSERT_EQ(ArrowSchemaCompare(&actual, &expected, NANOARROW_COMPARE_IDENTICAL, &is_equal, + &error), + NANOARROW_OK); + EXPECT_EQ(is_equal, 0); + EXPECT_STREQ(error.message, + "root.dictionary: strcmp(actual->format, expected->format) != 0"); + + ArrowSchemaRelease(&actual); + ArrowSchemaRelease(&expected); +} + +TEST(SchemaTest, SchemaCompareIdenticalFormat) { + struct ArrowError error; + struct ArrowSchema actual; + struct ArrowSchema expected; + int is_equal = -1; + + ArrowSchemaInit(&actual); + ArrowSchemaInit(&expected); + + ASSERT_EQ(ArrowSchemaSetFormat(&actual, "foofy"), NANOARROW_OK); + ASSERT_EQ(ArrowSchemaCompare(&actual, &expected, NANOARROW_COMPARE_IDENTICAL, &is_equal, + &error), + NANOARROW_OK); + EXPECT_EQ(is_equal, 0); + EXPECT_STREQ(error.message, "root: actual->format != NULL && expected->format == NULL"); + + is_equal = -1; + ASSERT_EQ(ArrowSchemaSetFormat(&actual, NULL), NANOARROW_OK); + ASSERT_EQ(ArrowSchemaSetFormat(&expected, "foofy"), NANOARROW_OK); + ASSERT_EQ(ArrowSchemaCompare(&actual, &expected, NANOARROW_COMPARE_IDENTICAL, &is_equal, + &error), + NANOARROW_OK); + EXPECT_EQ(is_equal, 0); + EXPECT_STREQ(error.message, "root: actual->format == NULL && expected->format != NULL"); + + is_equal = -1; + ASSERT_EQ(ArrowSchemaSetFormat(&actual, "foofy1"), NANOARROW_OK); + ASSERT_EQ(ArrowSchemaSetFormat(&expected, "foofy2"), NANOARROW_OK); + ASSERT_EQ(ArrowSchemaCompare(&actual, &expected, NANOARROW_COMPARE_IDENTICAL, &is_equal, + &error), + NANOARROW_OK); + EXPECT_EQ(is_equal, 0); + EXPECT_STREQ(error.message, "root: strcmp(actual->format, expected->format) != 0"); + + // Ensure identical formats can compare as identical + is_equal = -1; + ASSERT_EQ(ArrowSchemaSetFormat(&actual, "foofy"), NANOARROW_OK); + ASSERT_EQ(ArrowSchemaSetFormat(&expected, "foofy"), NANOARROW_OK); + ASSERT_EQ(ArrowSchemaCompare(&actual, &expected, NANOARROW_COMPARE_IDENTICAL, &is_equal, + &error), + NANOARROW_OK); + EXPECT_EQ(is_equal, 1); + + ArrowSchemaRelease(&actual); + ArrowSchemaRelease(&expected); +} + +TEST(SchemaTest, SchemaCompareIdenticalName) { + struct ArrowError error; + struct ArrowSchema actual; + struct ArrowSchema expected; + int is_equal = -1; + + ArrowSchemaInit(&actual); + ArrowSchemaInit(&expected); + + ASSERT_EQ(ArrowSchemaSetName(&actual, "foofy"), NANOARROW_OK); + ASSERT_EQ(ArrowSchemaCompare(&actual, &expected, NANOARROW_COMPARE_IDENTICAL, &is_equal, + &error), + NANOARROW_OK); + EXPECT_EQ(is_equal, 0); + EXPECT_STREQ(error.message, "root: actual->name != NULL && expected->name == NULL"); + + // The top-level name is not compared at the type equal level + is_equal = -1; + ASSERT_EQ(ArrowSchemaCompare(&actual, &expected, NANOARROW_COMPARE_TYPE_EQUAL, + &is_equal, &error), + NANOARROW_OK); + EXPECT_EQ(is_equal, 1); + + is_equal = -1; + ASSERT_EQ(ArrowSchemaSetName(&actual, NULL), NANOARROW_OK); + ASSERT_EQ(ArrowSchemaSetName(&expected, "foofy"), NANOARROW_OK); + ASSERT_EQ(ArrowSchemaCompare(&actual, &expected, NANOARROW_COMPARE_IDENTICAL, &is_equal, + &error), + NANOARROW_OK); + EXPECT_EQ(is_equal, 0); + EXPECT_STREQ(error.message, "root: actual->name == NULL && expected->name != NULL"); + + is_equal = -1; + ASSERT_EQ(ArrowSchemaSetName(&actual, "foofy1"), NANOARROW_OK); + ASSERT_EQ(ArrowSchemaSetName(&expected, "foofy2"), NANOARROW_OK); + ASSERT_EQ(ArrowSchemaCompare(&actual, &expected, NANOARROW_COMPARE_IDENTICAL, &is_equal, + &error), + NANOARROW_OK); + EXPECT_EQ(is_equal, 0); + EXPECT_STREQ(error.message, "root: strcmp(actual->name, expected->name) != 0"); + + // Ensure identical names compare as identical + is_equal = -1; + ASSERT_EQ(ArrowSchemaSetName(&actual, "foofy"), NANOARROW_OK); + ASSERT_EQ(ArrowSchemaSetName(&expected, "foofy"), NANOARROW_OK); + ASSERT_EQ(ArrowSchemaCompare(&actual, &expected, NANOARROW_COMPARE_IDENTICAL, &is_equal, + &error), + NANOARROW_OK); + EXPECT_EQ(is_equal, 1); + + ArrowSchemaRelease(&actual); + ArrowSchemaRelease(&expected); +} + +TEST(SchemaTest, SchemaCompareIdenticalNameRecursive) { + struct ArrowError error; + struct ArrowSchema actual; + struct ArrowSchema expected; + int is_equal = -1; + + ArrowSchemaInit(&actual); + ArrowSchemaInit(&expected); + + ASSERT_EQ(ArrowSchemaSetTypeStruct(&actual, 1), NANOARROW_OK); + ASSERT_EQ(ArrowSchemaSetType(actual.children[0], NANOARROW_TYPE_INT32), NANOARROW_OK); + + ASSERT_EQ(ArrowSchemaSetTypeStruct(&expected, 1), NANOARROW_OK); + ASSERT_EQ(ArrowSchemaSetType(expected.children[0], NANOARROW_TYPE_INT32), NANOARROW_OK); + ASSERT_EQ(ArrowSchemaSetName(&expected, "foofy"), NANOARROW_OK); + + ArrowSchemaRelease(&actual); + ArrowSchemaRelease(&expected); +} + +TEST(SchemaTest, SchemaCompareIdenticalMetadata) { + struct ArrowError error; + struct ArrowSchema actual; + struct ArrowSchema expected; + int is_equal = -1; + + // Create metadatas key=value and key=valuf + std::string simple_metadata = SimpleMetadata(); + std::vector other_metadata(simple_metadata.begin(), simple_metadata.end()); + other_metadata[other_metadata.size() - 1] = 'f'; + + ArrowSchemaInit(&actual); + ArrowSchemaInit(&expected); + + // Different metadata should trigger an inequality + ASSERT_EQ(ArrowSchemaSetMetadata(&actual, simple_metadata.data()), NANOARROW_OK); + ASSERT_EQ(ArrowSchemaCompare(&actual, &expected, NANOARROW_COMPARE_IDENTICAL, &is_equal, + &error), + NANOARROW_OK); + EXPECT_EQ(is_equal, 0); + EXPECT_STREQ(error.message, + "root: actual->metadata != NULL && expected->metadata == NULL"); + + // Except at the type equal level + is_equal = -1; + ASSERT_EQ(ArrowSchemaCompare(&actual, &expected, NANOARROW_COMPARE_TYPE_EQUAL, + &is_equal, &error), + NANOARROW_OK); + EXPECT_EQ(is_equal, 1); + + is_equal = -1; + ASSERT_EQ(ArrowSchemaSetMetadata(&actual, NULL), NANOARROW_OK); + ASSERT_EQ(ArrowSchemaSetMetadata(&expected, simple_metadata.data()), NANOARROW_OK); + ASSERT_EQ(ArrowSchemaCompare(&actual, &expected, NANOARROW_COMPARE_IDENTICAL, &is_equal, + &error), + NANOARROW_OK); + EXPECT_EQ(is_equal, 0); + EXPECT_STREQ(error.message, + "root: actual->metadata == NULL && expected->metadata != NULL"); + + // At the identical level, the other form of empty metadata should not be treated as + // equal + is_equal = -1; + ASSERT_EQ(ArrowSchemaSetMetadata(&expected, "\0\0\0\0"), NANOARROW_OK); + ASSERT_EQ(ArrowSchemaCompare(&actual, &expected, NANOARROW_COMPARE_IDENTICAL, &is_equal, + &error), + NANOARROW_OK); + EXPECT_EQ(is_equal, 0); + EXPECT_STREQ(error.message, + "root: actual->metadata == NULL && expected->metadata != NULL"); + + // ...but at the equal level, the other form should be treated as equal + is_equal = -1; + ASSERT_EQ( + ArrowSchemaCompare(&actual, &expected, NANOARROW_COMPARE_EQUAL, &is_equal, &error), + NANOARROW_OK); + EXPECT_EQ(is_equal, 1); + + is_equal = -1; + ASSERT_EQ(ArrowSchemaSetMetadata(&actual, simple_metadata.data()), NANOARROW_OK); + ASSERT_EQ(ArrowSchemaSetMetadata(&expected, other_metadata.data()), NANOARROW_OK); + ASSERT_EQ(ArrowSchemaCompare(&actual, &expected, NANOARROW_COMPARE_IDENTICAL, &is_equal, + &error), + NANOARROW_OK); + EXPECT_EQ(is_equal, 0); + EXPECT_STREQ(error.message, + "root: memcmp(actual->metadata, expected->metadata, " + "ArrowMetadataSizeOf(actual->metadata)) != 0"); + + // Ensure identical names compare as identical + is_equal = -1; + ASSERT_EQ(ArrowSchemaSetMetadata(&actual, simple_metadata.data()), NANOARROW_OK); + ASSERT_EQ(ArrowSchemaSetMetadata(&expected, simple_metadata.data()), NANOARROW_OK); + ASSERT_EQ(ArrowSchemaCompare(&actual, &expected, NANOARROW_COMPARE_IDENTICAL, &is_equal, + &error), + NANOARROW_OK); + EXPECT_EQ(is_equal, 1); + + ArrowSchemaRelease(&actual); + ArrowSchemaRelease(&expected); +} + +TEST(SchemaTest, SchemaCompareIdenticalFlags) { + struct ArrowError error; + struct ArrowSchema actual; + struct ArrowSchema expected; + int is_equal = -1; + + ArrowSchemaInit(&actual); + ArrowSchemaInit(&expected); + + actual.flags = 0; + expected.flags = ARROW_FLAG_NULLABLE; + ASSERT_EQ(ArrowSchemaCompare(&actual, &expected, NANOARROW_COMPARE_IDENTICAL, &is_equal, + &error), + NANOARROW_OK); + EXPECT_EQ(is_equal, 0); + EXPECT_STREQ(error.message, "root: actual->flags != expected->flags"); + + ArrowSchemaRelease(&actual); + ArrowSchemaRelease(&expected); +} + TEST(SchemaViewTest, SchemaViewInitErrors) { struct ArrowSchema schema; struct ArrowSchemaView schema_view; diff --git a/src/nanoarrow/common/utils.c b/src/nanoarrow/common/utils.c index d8923b854..c951fa421 100644 --- a/src/nanoarrow/common/utils.c +++ b/src/nanoarrow/common/utils.c @@ -49,6 +49,48 @@ ArrowErrorCode ArrowErrorSet(struct ArrowError* error, const char* fmt, ...) { } } +NANOARROW_CHECK_PRINTF_ATTRIBUTE ArrowErrorCode ArrowErrorPrefix(struct ArrowError* error, + const char* fmt, ...) { + if (error == NULL) { + return NANOARROW_OK; + } + + char prefix[sizeof(struct ArrowError)]; + memset(prefix, 0, sizeof(prefix)); + + va_list args; + va_start(args, fmt); + int prefix_len = vsnprintf(prefix, sizeof(prefix), fmt, args); + va_end(args); + + if (prefix_len < 0) { + return EINVAL; + } else if (prefix_len == 0) { + return NANOARROW_OK; + } else if (prefix_len >= ((int)sizeof(struct ArrowError) - 1)) { + memcpy(error->message, prefix, sizeof(struct ArrowError) - 1); + return ERANGE; + } + + // Calculate the maximum size of message that we *could* move + size_t out_len = strlen(error->message); + size_t out_len_to_move = sizeof(struct ArrowError) - prefix_len - 1; + + // ...constrain it to the number of characters that we actually have to move + if (out_len_to_move > out_len) { + out_len_to_move = out_len; + } + + // ...move the existing message forward + memmove(error->message + prefix_len, error->message, out_len_to_move); + + // ...copy prefix into the prefix slot + memcpy(error->message, prefix, prefix_len); + error->message[out_len + prefix_len] = '\0'; + + return NANOARROW_OK; +} + void ArrowLayoutInit(struct ArrowLayout* layout, enum ArrowType storage_type) { layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_VALIDITY; layout->buffer_data_type[0] = NANOARROW_TYPE_BOOL; diff --git a/src/nanoarrow/nanoarrow.h b/src/nanoarrow/nanoarrow.h index f65d053ad..616e90514 100644 --- a/src/nanoarrow/nanoarrow.h +++ b/src/nanoarrow/nanoarrow.h @@ -46,6 +46,7 @@ #define ArrowBufferDeallocator \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBufferDeallocator) #define ArrowErrorSet NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowErrorSet) +#define ArrowErrorPrefix NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowErrorPrefix) #define ArrowLayoutInit NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowLayoutInit) #define ArrowDecimalSetDigits NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowDecimalSetDigits) #define ArrowDecimalAppendDigitsToBuffer \ @@ -67,6 +68,7 @@ #define ArrowSchemaSetTypeUnion \ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeUnion) #define ArrowSchemaDeepCopy NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaDeepCopy) +#define ArrowSchemaCompare NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaCompare) #define ArrowSchemaSetFormat NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetFormat) #define ArrowSchemaSetName NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetName) #define ArrowSchemaSetMetadata \ @@ -266,6 +268,12 @@ static inline void ArrowArrayStreamRelease(struct ArrowArrayStream* array_stream NANOARROW_CHECK_PRINTF_ATTRIBUTE int ArrowErrorSet(struct ArrowError* error, const char* fmt, ...); +/// \brief Prefix the contents of an existing error using printf syntax. +/// +/// If error is NULL, this function does nothing and returns NANOARROW_OK. +NANOARROW_CHECK_PRINTF_ATTRIBUTE int ArrowErrorPrefix(struct ArrowError* error, + const char* fmt, ...); + /// @} /// \defgroup nanoarrow-utils Utility data structures @@ -411,6 +419,19 @@ ArrowErrorCode ArrowSchemaSetTypeUnion(struct ArrowSchema* schema, enum ArrowTyp ArrowErrorCode ArrowSchemaDeepCopy(const struct ArrowSchema* schema, struct ArrowSchema* schema_out); +/// \brief Compare two ArrowSchema objects for equality +/// +/// Given two ArrowArrayView instances, place either 0 (not equal) and +/// 1 (equal) at the address pointed to by out. If the comparison determines +/// that actual and expected are not equal, a reason will be communicated via +/// reason if reason is non-NULL. +/// +/// Returns NANOARROW_OK if the comparison completed successfully. +ArrowErrorCode ArrowSchemaCompare(const struct ArrowSchema* actual, + const struct ArrowSchema* expected, + enum ArrowCompareLevel level, int* out, + struct ArrowError* reason); + /// \brief Copy format into schema->format /// /// schema must have been allocated using ArrowSchemaInitFromType() or @@ -1070,7 +1091,7 @@ ArrowErrorCode ArrowArrayViewValidate(struct ArrowArrayView* array_view, /// Given two ArrowArrayView instances, place either 0 (not equal) and /// 1 (equal) at the address pointed to by out. If the comparison determines /// that actual and expected are not equal, a reason will be communicated via -/// error if error is non-NULL. +/// reason if reason is non-NULL. /// /// Returns NANOARROW_OK if the comparison completed successfully. ArrowErrorCode ArrowArrayViewCompare(const struct ArrowArrayView* actual,