Skip to content

Commit 9c6411f

Browse files
authored
Fix serialization issue with schema evolution for query v3. (#5154)
Query v3 requires fragment metadata serialization/deserialization inside the array object. However fragment metadata capnp model wasn't including the name of the array schema that each fragment was written with , and in case of schema evolution this was leading to serializing the wrong schema (always the latest) and making the REST server read the wrong data from disk. This PR adds the missing array schema name field and adapts the ser/deser to pick the schema based on that name. [sc-48707] --- TYPE: BUG DESC: Fix serialization issue with schema evolution for query v3.
1 parent ee3a9f2 commit 9c6411f

File tree

9 files changed

+221
-167
lines changed

9 files changed

+221
-167
lines changed

test/src/unit-cppapi-schema-evolution.cc

Lines changed: 22 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
* Tests the C++ API for schema evolution.
3131
*/
3232

33+
#include <test/support/src/vfs_helpers.h>
3334
#include <test/support/tdb_catch.h>
3435
#include "test/support/src/mem_helpers.h"
3536
#include "tiledb/sm/array_schema/array_schema.h"
@@ -47,12 +48,11 @@
4748

4849
TEST_CASE(
4950
"C++ API: SchemaEvolution, add and drop attributes",
50-
"[cppapi][schema][evolution][add][drop]") {
51+
"[cppapi][schema][evolution][add][drop][rest]") {
5152
using namespace tiledb;
52-
Context ctx;
53-
VFS vfs(ctx);
54-
55-
std::string array_uri = "test_schema_evolution_array";
53+
test::VFSTestSetup vfs_test_setup;
54+
Context ctx{vfs_test_setup.ctx()};
55+
auto array_uri{vfs_test_setup.array_uri("test_schema_evolution_array")};
5656

5757
Domain domain(ctx);
5858
auto id1 = Dimension::create<int>(ctx, "d1", {{-100, 100}}, 10);
@@ -71,10 +71,6 @@ TEST_CASE(
7171
schema.set_cell_order(TILEDB_ROW_MAJOR);
7272
schema.set_tile_order(TILEDB_COL_MAJOR);
7373

74-
if (vfs.is_dir(array_uri)) {
75-
vfs.remove_dir(array_uri);
76-
}
77-
7874
Array::create(array_uri, schema);
7975

8076
auto evolution = ArraySchemaEvolution(ctx);
@@ -100,21 +96,15 @@ TEST_CASE(
10096
CHECK(attrs.count("a1") == 0);
10197
CHECK(attrs.count("a2") == 1);
10298
CHECK(attrs.count("a3") == 1);
103-
104-
// Clean up
105-
if (vfs.is_dir(array_uri)) {
106-
vfs.remove_dir(array_uri);
107-
}
10899
}
109100

110101
TEST_CASE(
111102
"C++ API: SchemaEvolution, check error when dropping dimension",
112-
"[cppapi][schema][evolution][drop]") {
103+
"[cppapi][schema][evolution][drop][rest]") {
113104
using namespace tiledb;
114-
Context ctx;
115-
VFS vfs(ctx);
116-
117-
std::string array_uri = "test_schema_evolution_array";
105+
test::VFSTestSetup vfs_test_setup;
106+
Context ctx{vfs_test_setup.ctx()};
107+
auto array_uri{vfs_test_setup.array_uri("test_schema_evolution_array")};
118108

119109
Domain domain(ctx);
120110
auto id1 = Dimension::create<int>(ctx, "d1", {{-100, 100}}, 10);
@@ -131,10 +121,6 @@ TEST_CASE(
131121
schema.set_cell_order(TILEDB_ROW_MAJOR);
132122
schema.set_tile_order(TILEDB_COL_MAJOR);
133123

134-
if (vfs.is_dir(array_uri)) {
135-
vfs.remove_dir(array_uri);
136-
}
137-
138124
Array::create(array_uri, schema);
139125

140126
auto evolution = ArraySchemaEvolution(ctx);
@@ -144,27 +130,22 @@ TEST_CASE(
144130

145131
// check that an exception is thrown
146132
CHECK_THROWS(evolution.array_evolve(array_uri));
147-
148-
// Clean up
149-
if (vfs.is_dir(array_uri)) {
150-
vfs.remove_dir(array_uri);
151-
}
152133
}
153134

154135
TEST_CASE(
155136
"C++ API: SchemaEvolution, add attributes and read",
156-
"[cppapi][schema][evolution][add]") {
137+
"[cppapi][schema][evolution][add][rest]") {
157138
using namespace tiledb;
158-
Context ctx;
159-
VFS vfs(ctx);
139+
test::VFSTestSetup vfs_test_setup;
140+
Context ctx{vfs_test_setup.ctx()};
141+
160142
auto layout = GENERATE(
161143
TILEDB_ROW_MAJOR,
162144
TILEDB_COL_MAJOR,
163145
TILEDB_UNORDERED,
164146
TILEDB_GLOBAL_ORDER);
165147
bool duplicates = GENERATE(true, false);
166-
167-
std::string array_uri = "test_schema_evolution_array_read";
148+
auto array_uri{vfs_test_setup.array_uri("test_schema_evolution_array")};
168149

169150
// Create
170151
{
@@ -183,10 +164,6 @@ TEST_CASE(
183164
schema.set_cell_order(TILEDB_ROW_MAJOR);
184165
schema.set_tile_order(TILEDB_COL_MAJOR);
185166

186-
if (vfs.is_dir(array_uri)) {
187-
vfs.remove_dir(array_uri);
188-
}
189-
190167
Array::create(array_uri, schema);
191168
}
192169

@@ -464,9 +441,11 @@ TEST_CASE(
464441
// test case.
465442
Config cfg;
466443
cfg["sm.merge_overlapping_ranges_experimental"] = "false";
444+
vfs_test_setup.update_config(cfg.ptr().get());
467445
// + Global order does not support multi-range subarrays
468446
if (layout != TILEDB_GLOBAL_ORDER) {
469-
ctx = Context(cfg);
447+
ctx = vfs_test_setup.ctx();
448+
470449
Array array(ctx, array_uri, TILEDB_READ);
471450

472451
std::vector<int> a_data(8);
@@ -683,19 +662,14 @@ TEST_CASE(
683662
1, 1, 1, 1, 3, 3, 3, 3, 4, 4, 4, 4, 1, 1, 1, 1}));
684663
}
685664
}
686-
687-
// Clean up
688-
if (vfs.is_dir(array_uri)) {
689-
vfs.remove_dir(array_uri);
690-
}
691665
}
692666

693667
TEST_CASE(
694668
"C++ API: SchemaEvolution, add and drop attributes",
695-
"[cppapi][schema][evolution][add][query-condition]") {
669+
"[cppapi][schema][evolution][add][query-condition][rest]") {
696670
using namespace tiledb;
697-
Context ctx;
698-
VFS vfs(ctx);
671+
test::VFSTestSetup vfs_test_setup;
672+
Context ctx{vfs_test_setup.ctx()};
699673
auto layout = GENERATE(
700674
TILEDB_ROW_MAJOR,
701675
TILEDB_COL_MAJOR,
@@ -705,7 +679,8 @@ TEST_CASE(
705679

706680
const char* out_str = nullptr;
707681
tiledb_layout_to_str(layout, &out_str);
708-
std::string array_uri = "test_schema_evolution_query_condition";
682+
auto array_uri{
683+
vfs_test_setup.array_uri("test_schema_evolution_query_condition")};
709684

710685
{
711686
Domain domain(ctx);
@@ -723,10 +698,6 @@ TEST_CASE(
723698
schema.set_cell_order(TILEDB_ROW_MAJOR);
724699
schema.set_tile_order(TILEDB_COL_MAJOR);
725700

726-
if (vfs.is_dir(array_uri)) {
727-
vfs.remove_dir(array_uri);
728-
}
729-
730701
Array::create(array_uri, schema);
731702
}
732703

@@ -833,11 +804,6 @@ TEST_CASE(
833804
CHECK_THAT(d1_data, Catch::Matchers::Equals(std::vector<int>{4}));
834805
CHECK_THAT(d2_data, Catch::Matchers::Equals(std::vector<int>{1}));
835806
}
836-
837-
// Cleanup.
838-
if (vfs.is_dir(array_uri)) {
839-
vfs.remove_dir(array_uri);
840-
}
841807
}
842808

843809
TEST_CASE(

tiledb/sm/fragment/fragment_metadata.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1349,7 +1349,7 @@ URI FragmentMetadata::validity_uri(const std::string& name) const {
13491349
encoded_name + "_validity" + constants::file_suffix);
13501350
}
13511351

1352-
const std::string& FragmentMetadata::array_schema_name() {
1352+
const std::string& FragmentMetadata::array_schema_name() const {
13531353
return array_schema_name_;
13541354
}
13551355

tiledb/sm/fragment/fragment_metadata.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -640,7 +640,7 @@ class FragmentMetadata {
640640
URI validity_uri(const std::string& name) const;
641641

642642
/** Return the array schema name. */
643-
const std::string& array_schema_name();
643+
const std::string& array_schema_name() const;
644644

645645
uint64_t footer_size() const;
646646

tiledb/sm/serialization/array.cc

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -310,8 +310,21 @@ void array_from_capnp(
310310
&resources,
311311
array->memory_tracker(),
312312
frag_meta_reader.getVersion());
313-
throw_if_not_ok(fragment_metadata_from_capnp(
314-
array->array_schema_latest_ptr(), frag_meta_reader, meta));
313+
314+
auto schema = array->array_schema_latest_ptr();
315+
if (frag_meta_reader.hasArraySchemaName()) {
316+
auto fragment_array_schema_name =
317+
frag_meta_reader.getArraySchemaName().cStr();
318+
schema = array->array_schemas_all().at(fragment_array_schema_name);
319+
} else if (array->array_schemas_all().size() > 1) {
320+
throw ArraySerializationException(
321+
"Cannot deserialize fragment metadata without an array schema name "
322+
"in the case of arrays with evolved schemas.");
323+
}
324+
325+
// pass the right schema to deserialize fragment metadata
326+
throw_if_not_ok(
327+
fragment_metadata_from_capnp(schema, frag_meta_reader, meta));
315328
if (client_side) {
316329
meta->loaded_metadata()->set_rtree_loaded();
317330
}

tiledb/sm/serialization/fragment_metadata.cc

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ void generic_tile_offsets_from_capnp(
117117
}
118118

119119
Status fragment_metadata_from_capnp(
120-
const shared_ptr<const ArraySchema>& array_schema,
120+
const shared_ptr<const ArraySchema>& fragment_array_schema,
121121
const capnp::FragmentMetadata::Reader& frag_meta_reader,
122122
shared_ptr<FragmentMetadata> frag_meta) {
123123
if (frag_meta_reader.hasFileSizes()) {
@@ -145,7 +145,8 @@ Status fragment_metadata_from_capnp(
145145
if (frag_meta_reader.hasFragmentUri()) {
146146
// Reconstruct the fragment uri out of the received fragment name
147147
frag_meta->fragment_uri() = deserialize_array_uri_to_absolute(
148-
frag_meta_reader.getFragmentUri().cStr(), array_schema->array_uri());
148+
frag_meta_reader.getFragmentUri().cStr(),
149+
fragment_array_schema->array_uri());
149150
}
150151
frag_meta->has_timestamps() = frag_meta_reader.getHasTimestamps();
151152
frag_meta->has_delete_meta() = frag_meta_reader.getHasDeleteMeta();
@@ -156,11 +157,13 @@ Status fragment_metadata_from_capnp(
156157
frag_meta->version() = frag_meta_reader.getVersion();
157158

158159
// Set the array schema and most importantly retrigger the build
159-
// of the internal idx_map. Also set array_schema_name which is used
160-
// in some places in the global writer
161-
frag_meta->set_array_schema(array_schema);
162-
frag_meta->set_schema_name(array_schema->name());
163-
frag_meta->set_dense(array_schema->dense());
160+
// of the internal idx_map.
161+
frag_meta->set_array_schema(fragment_array_schema);
162+
frag_meta->set_dense(fragment_array_schema->dense());
163+
164+
if (frag_meta_reader.hasArraySchemaName()) {
165+
frag_meta->set_schema_name(frag_meta_reader.getArraySchemaName().cStr());
166+
}
164167

165168
LoadedFragmentMetadata::LoadedMetadata loaded_metadata;
166169

@@ -367,7 +370,7 @@ Status fragment_metadata_from_capnp(
367370

368371
if (frag_meta_reader.hasRtree()) {
369372
auto data = frag_meta_reader.getRtree();
370-
auto& domain = array_schema->domain();
373+
auto& domain = fragment_array_schema->domain();
371374
// If there are no levels, we still need domain_ properly initialized
372375
frag_meta->loaded_metadata()->rtree().reset(
373376
&domain, constants::rtree_fanout);
@@ -391,7 +394,7 @@ Status fragment_metadata_from_capnp(
391394
RETURN_NOT_OK(status);
392395
// Whilst sparse gets its domain calculated, dense needs to have it
393396
// set here from the deserialized data
394-
if (array_schema->dense()) {
397+
if (fragment_array_schema->dense()) {
395398
frag_meta->init_domain(*ndrange);
396399
} else {
397400
const auto& frag0_dom = *ndrange;
@@ -699,6 +702,8 @@ Status fragment_metadata_to_capnp(
699702
generic_tile_offsets_to_capnp(
700703
frag_meta.generic_tile_offsets(), gt_offsets_builder);
701704

705+
frag_meta_builder->setArraySchemaName(frag_meta.array_schema_name());
706+
702707
return Status::Ok();
703708
}
704709

tiledb/sm/serialization/fragment_metadata.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,15 +54,15 @@ namespace serialization {
5454
/**
5555
* Convert Cap'n Proto message to Fragment Metadata
5656
*
57-
* @param array_schema the schema of the array the metadata belongs
57+
* @param array_schema the schema of the fragment the metadata belongs
5858
* @param frag_meta_reader cap'n proto class
5959
* @param frag_meta fragment metadata object to deserialize into
6060
* @param resources ContextResources associated
6161
* @param memory_tracker memory tracker associated
6262
* @return Status
6363
*/
6464
Status fragment_metadata_from_capnp(
65-
const shared_ptr<const ArraySchema>& array_schema,
65+
const shared_ptr<const ArraySchema>& fragment_array_schema,
6666
const capnp::FragmentMetadata::Reader& frag_meta_reader,
6767
shared_ptr<FragmentMetadata> frag_meta);
6868

tiledb/sm/serialization/tiledb-rest.capnp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1163,6 +1163,9 @@ struct FragmentMetadata {
11631163

11641164
gtOffsets @28 :GenericTileOffsets;
11651165
# the start offsets of the generic tiles stored in the metadata file
1166+
1167+
arraySchemaName @29 :Text;
1168+
# array schema name
11661169
}
11671170

11681171
struct MultiPartUploadState {

0 commit comments

Comments
 (0)