Skip to content

Commit b168f5e

Browse files
joanbm-mongodbMongoDB Bot
authored andcommitted
SERVER-98399 Time-series collections with mixed-schema data should fail validation if only the top-level mixed-schema flag is set (#34071)
GitOrigin-RevId: 1ac214a2f627476862e7803f4b6d0b89a5758665
1 parent 98604fa commit b168f5e

File tree

4 files changed

+110
-3
lines changed

4 files changed

+110
-3
lines changed
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
/**
2+
* Tests validating a time-series collection with mixed schema buckets when the mixed-schema flag
3+
* in the top-level catalog metadata, but not in the collection options (WiredTiger config string).
4+
*
5+
* This replicates the scenario where a time series collection has been created in MongoDB <5.2,
6+
* (i.e. before SERVER-60565) then later upgraded to MongoDB <6.0.17 (i.e. before SERVER-91195),
7+
* then all the way up to the current version (see SERVER-98399 for more details).
8+
*
9+
* As having the mixed-schema flag set only in the top-level catalog metadata is a precarious state,
10+
* as its value may be lost, direct manipulation of mixed-schema buckets is prevented, and
11+
* validation will flag the collection as needing manual intervention for SERVER-91194.
12+
*/
13+
(function() {
14+
"use strict";
15+
16+
load("jstests/libs/fail_point_util.js"); // For configureFailPoint
17+
18+
const conn = MongoRunner.runMongod();
19+
const testDB = conn.getDB(jsTestName());
20+
21+
const collName = "ts";
22+
23+
// Create a time-series collection containing a mixed-schema bucket
24+
assert.commandWorked(testDB.runCommand({drop: collName}));
25+
assert.commandWorked(
26+
testDB.createCollection(collName, {timeseries: {timeField: 't', metaField: 'm'}}));
27+
const coll = testDB[collName];
28+
const bucketsColl = testDB["system.buckets." + collName];
29+
30+
const bucket = {
31+
_id: ObjectId("65a6eb806ffc9fa4280ecac4"),
32+
control: {
33+
version: NumberInt(1),
34+
min: {
35+
_id: ObjectId("65a6eba7e6d2e848e08c3750"),
36+
t: ISODate("2024-01-16T20:48:00Z"),
37+
a: 1,
38+
},
39+
max: {
40+
_id: ObjectId("65a6eba7e6d2e848e08c3751"),
41+
t: ISODate("2024-01-16T20:48:39.448Z"),
42+
a: "a",
43+
},
44+
},
45+
meta: 0,
46+
data: {
47+
_id: {
48+
0: ObjectId("65a6eba7e6d2e848e08c3750"),
49+
1: ObjectId("65a6eba7e6d2e848e08c3751"),
50+
},
51+
t: {
52+
0: ISODate("2024-01-16T20:48:39.448Z"),
53+
1: ISODate("2024-01-16T20:48:39.448Z"),
54+
},
55+
a: {
56+
0: "a",
57+
1: 1,
58+
},
59+
},
60+
};
61+
62+
assert.commandWorked(
63+
testDB.runCommand({collMod: collName, timeseriesBucketsMayHaveMixedSchemaData: true}));
64+
assert.commandWorked(bucketsColl.insert(bucket));
65+
66+
// Set the mixed-schema flag only set on the top-level catalog metadata field
67+
// (md.timeseriesBucketsMayHaveMixedSchemaData), but not on the collection options
68+
// (inside md.options.storageEngine.wiredTiger.configString).
69+
const fpsimulateLegacyTimeseriesMixedSchemaFlag =
70+
configureFailPoint(conn, "simulateLegacyTimeseriesMixedSchemaFlag");
71+
assert.commandWorked(
72+
testDB.runCommand({collMod: collName, timeseriesBucketsMayHaveMixedSchemaData: true}));
73+
fpsimulateLegacyTimeseriesMixedSchemaFlag.off();
74+
75+
const bucketsCatalogEntry = bucketsColl.aggregate([{$listCatalog: {}}]).toArray()[0];
76+
const wtConfigStr = bucketsCatalogEntry.md.options.storageEngine?.wiredTiger?.configString ?? '';
77+
assert.eq(true, bucketsCatalogEntry.md.timeseriesBucketsMayHaveMixedSchemaData);
78+
assert(!wtConfigStr.includes("timeseriesBucketsMayHaveMixedSchemaData"));
79+
80+
// Validation of the collection returns the error asking for SERVER-91194 manual intervention
81+
const res = assert.commandWorked(coll.validate());
82+
assert(!res.valid);
83+
assert.eq(res.warnings.length, 0);
84+
assert.gt(res.errors.length, 0, "Validation should return at least one error.");
85+
assert.containsPrefix(
86+
"Detected a time-series bucket with mixed schema data",
87+
res.errors,
88+
"Validation of mixed schema buckets when they are not allowed should return an error stating such");
89+
90+
assert.commandWorked(bucketsColl.deleteOne({_id: bucket._id})); // Clean up for shutdown validation
91+
MongoRunner.stopMongod(conn);
92+
})();

src/mongo/db/catalog/SConscript

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -484,6 +484,7 @@ env.Library(
484484
'collection_options',
485485
'index_catalog',
486486
'index_key_validate',
487+
'storage_engine_collection_options_flags_parser',
487488
'throttle_cursor',
488489
'validate_state',
489490
],

src/mongo/db/catalog/collection_impl.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,10 @@ namespace {
8080
MONGO_FAIL_POINT_DEFINE(allowSettingMalformedCollectionValidators);
8181

8282
MONGO_FAIL_POINT_DEFINE(skipCappedDeletes);
83+
// Simulate the behavior of mixed-schema flag of MongoDB versions without SERVER-91195:
84+
// Only set the legacy time-series mixed-schema flag at the top level of the catalog,
85+
// and clear the new durable flag which is stored inside the collection options.
86+
MONGO_FAIL_POINT_DEFINE(simulateLegacyTimeseriesMixedSchemaFlag);
8387

8488
Status checkValidatorCanBeUsedOnNs(const BSONObj& validator,
8589
const NamespaceString& nss,
@@ -866,7 +870,7 @@ void CollectionImpl::setTimeseriesBucketsMayHaveMixedSchemaData(OperationContext
866870
md.options.storageEngine = setFlagToStorageEngineBson(
867871
md.options.storageEngine,
868872
backwards_compatible_collection_options::kTimeseriesBucketsMayHaveMixedSchemaData,
869-
*setting);
873+
simulateLegacyTimeseriesMixedSchemaFlag.shouldFail() ? boost::none : setting);
870874
}
871875

872876
// Also update legacy parameter for compatibility when downgrading to older sub-versions

src/mongo/db/catalog/validate_adaptor.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,13 @@
3737
#include "mongo/bson/bsonobj.h"
3838
#include "mongo/bson/util/bson_extract.h"
3939
#include "mongo/bson/util/bsoncolumn.h"
40+
#include "mongo/db/catalog/backwards_compatible_collection_options_util.h"
4041
#include "mongo/db/catalog/clustered_collection_util.h"
4142
#include "mongo/db/catalog/collection.h"
4243
#include "mongo/db/catalog/column_index_consistency.h"
4344
#include "mongo/db/catalog/index_catalog.h"
4445
#include "mongo/db/catalog/index_consistency.h"
46+
#include "mongo/db/catalog/storage_engine_collection_options_flags_parser.h"
4547
#include "mongo/db/catalog/throttle_cursor.h"
4648
#include "mongo/db/concurrency/exception_util.h"
4749
#include "mongo/db/curop.h"
@@ -821,8 +823,16 @@ void ValidateAdaptor::traverseRecordStore(OperationContext* opCtx,
821823
results->valid = false;
822824
} else if (containsMixedSchemaDataResponse.isOK() &&
823825
containsMixedSchemaDataResponse.getValue()) {
824-
bool mixedSchemaAllowed =
825-
coll->getTimeseriesBucketsMayHaveMixedSchemaData().get();
826+
// Only allow mixed-schema data if the new durable mixed-schema flag
827+
// introduced by SERVER-91195 (in `options.storageEngine`) is set.
828+
// Checking against `coll->getTimeseriesBucketsMayHaveMixedSchemaData()` is
829+
// unsafe, since only the legacy mixed-schema flag may be set, which can be
830+
// later lost on collection cloning due to SERVER-91194.
831+
bool mixedSchemaAllowed = getFlagFromStorageEngineBson(
832+
coll->getCollectionOptions().storageEngine,
833+
backwards_compatible_collection_options::
834+
kTimeseriesBucketsMayHaveMixedSchemaData)
835+
.value_or(false);
826836
if (mixedSchemaAllowed && !bucketMixedSchemaDataWarning) {
827837
bucketMixedSchemaDataWarning = true;
828838
LOGV2_WARNING(8469901,

0 commit comments

Comments
 (0)