Skip to content

Commit 56d4a5d

Browse files
SERVER-42500 Trigger best-effort shard refreshes after refineCollectionShardKey
1 parent ed4709c commit 56d4a5d

File tree

3 files changed

+91
-17
lines changed

3 files changed

+91
-17
lines changed

jstests/sharding/refine_collection_shard_key_basic.js

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,8 @@ function setupCRUDBeforeRefine() {
8686
}
8787

8888
function validateCRUDAfterRefine() {
89-
// Force a refresh on each shard to simulate the asynchronous 'setShardVersion' completing.
89+
// Force a refresh on the mongos and each shard because refineCollectionShardKey only triggers
90+
// best-effort shard refreshes.
9091
flushRoutersAndRefreshShardMetadata(st, {ns: kNsName});
9192

9293
const session = mongos.startSession({retryWrites: true});
@@ -592,5 +593,30 @@ assert.eq(3, oldTagsArr.length);
592593
assert.commandWorked(mongos.adminCommand({refineCollectionShardKey: kNsName, key: newKeyDoc}));
593594
validateUnrelatedCollAfterRefine(oldCollArr, oldChunkArr, oldTagsArr);
594595

596+
// Verify that all shards containing chunks in the namespace 'db.foo' eventually refresh (i.e. the
597+
// secondary shard will not refresh because it does not contain any chunks in 'db.foo'). NOTE: This
598+
// will only succeed in a linear jstest without failovers.
599+
dropAndReshardColl(oldKeyDoc);
600+
assert.commandWorked(mongos.getCollection(kNsName).createIndex(newKeyDoc));
601+
602+
assert.commandWorked(
603+
mongos.adminCommand({moveChunk: kNsName, find: {a: 0, b: 0}, to: secondaryShard}));
604+
assert.commandWorked(
605+
mongos.adminCommand({moveChunk: kNsName, find: {a: 0, b: 0}, to: primaryShard}));
606+
607+
const oldPrimaryEpoch = st.shard0.adminCommand({getShardVersion: kNsName, fullMetadata: true})
608+
.metadata.shardVersionEpoch.toString();
609+
const oldSecondaryEpoch = st.shard1.adminCommand({getShardVersion: kNsName, fullMetadata: true})
610+
.metadata.shardVersionEpoch.toString();
611+
612+
assert.commandWorked(mongos.adminCommand({refineCollectionShardKey: kNsName, key: newKeyDoc}));
613+
614+
assert.soon(() => oldPrimaryEpoch !==
615+
st.shard0.adminCommand({getShardVersion: kNsName, fullMetadata: true})
616+
.metadata.shardVersionEpoch.toString());
617+
assert.soon(() => oldSecondaryEpoch ===
618+
st.shard1.adminCommand({getShardVersion: kNsName, fullMetadata: true})
619+
.metadata.shardVersionEpoch.toString());
620+
595621
st.stop();
596622
})();

jstests/sharding/refine_collection_shard_key_drops_chunks.js

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -49,17 +49,14 @@ assert.eq({a: 5, b: 5}, chunkArr[1].max);
4949
assert.eq({a: 5, b: 5}, chunkArr[2]._id);
5050
assert.eq({a: MaxKey, b: MaxKey}, chunkArr[2].max);
5151

52-
assert.commandWorked(mongos.adminCommand({refineCollectionShardKey: kNsName, key: newKeyDoc}));
53-
5452
// Enable failpoint 'hangPersistCollectionAndChangedChunksAfterDropChunks' and flush the routing
5553
// table cache.
5654
assert.commandWorked(shard.adminCommand({
5755
configureFailPoint: 'hangPersistCollectionAndChangedChunksAfterDropChunks',
5856
mode: 'alwaysOn'
5957
}));
60-
const awaitShellToFlushRoutingTableCacheUpdates = startParallelShell(() => {
61-
assert.commandWorked(db.adminCommand({_flushRoutingTableCacheUpdates: 'db.foo'}));
62-
}, st.rs0.getPrimary().port);
58+
59+
assert.commandWorked(mongos.adminCommand({refineCollectionShardKey: kNsName, key: newKeyDoc}));
6360

6461
// Verify that all chunks belonging to 'db.foo' have been deleted.
6562
waitForFailpoint('Hit hangPersistCollectionAndChangedChunksAfterDropChunks', 1);
@@ -70,11 +67,13 @@ assert.eq(0, chunkArr.length);
7067
// flushing the routing table cache.
7168
assert.commandWorked(shard.adminCommand(
7269
{configureFailPoint: 'hangPersistCollectionAndChangedChunksAfterDropChunks', mode: 'off'}));
73-
awaitShellToFlushRoutingTableCacheUpdates();
7470

75-
// Verify that 'config.cache.chunks.db.foo' is as expected after refineCollectionShardKey.
76-
chunkArr = shard.getCollection(kConfigCacheChunks).find({}).sort({min: 1}).toArray();
77-
assert.eq(3, chunkArr.length);
71+
// Verify that 'config.cache.chunks.db.foo' is as expected after refineCollectionShardKey. NOTE: We
72+
// use assert.soon here because refineCollectionShardKey doesn't block for each shard to refresh.
73+
assert.soon(() => {
74+
chunkArr = shard.getCollection(kConfigCacheChunks).find({}).sort({min: 1}).toArray();
75+
return (3 === chunkArr.length);
76+
});
7877
assert.eq({a: MinKey, b: MinKey, c: MinKey, d: MinKey}, chunkArr[0]._id);
7978
assert.eq({a: 0, b: 0, c: MinKey, d: MinKey}, chunkArr[0].max);
8079
assert.eq({a: 0, b: 0, c: MinKey, d: MinKey}, chunkArr[1]._id);

src/mongo/db/s/config/sharding_catalog_manager_collection_operations.cpp

Lines changed: 56 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@
6363
#include "mongo/s/client/shard.h"
6464
#include "mongo/s/client/shard_registry.h"
6565
#include "mongo/s/grid.h"
66+
#include "mongo/s/request_types/flush_routing_table_cache_updates_gen.h"
6667
#include "mongo/s/request_types/set_shard_version_request.h"
6768
#include "mongo/s/shard_key_pattern.h"
6869
#include "mongo/s/shard_util.h"
@@ -149,6 +150,41 @@ void writeFirstChunksForCollection(OperationContext* opCtx,
149150
}
150151
}
151152

153+
void triggerFireAndForgetShardRefreshes(OperationContext* opCtx, const NamespaceString& nss) {
154+
const auto shardRegistry = Grid::get(opCtx)->shardRegistry();
155+
const auto allShards = uassertStatusOK(Grid::get(opCtx)->catalogClient()->getAllShards(
156+
opCtx, repl::ReadConcernLevel::kLocalReadConcern))
157+
.value;
158+
159+
for (const auto& shardEntry : allShards) {
160+
const auto chunk = uassertStatusOK(shardRegistry->getConfigShard()->exhaustiveFindOnConfig(
161+
opCtx,
162+
ReadPreferenceSetting{ReadPreference::PrimaryOnly},
163+
repl::ReadConcernLevel::kLocalReadConcern,
164+
ChunkType::ConfigNS,
165+
BSON(ChunkType::ns(nss.ns())
166+
<< ChunkType::shard(shardEntry.getName())),
167+
BSONObj(),
168+
1LL))
169+
.docs;
170+
171+
invariant(chunk.size() == 0 || chunk.size() == 1);
172+
173+
if (chunk.size() == 1) {
174+
const auto shard =
175+
uassertStatusOK(shardRegistry->getShard(opCtx, shardEntry.getName()));
176+
177+
// This is a best-effort attempt to refresh the shard 'shardEntry'. Fire and forget an
178+
// asynchronous '_flushRoutingTableCacheUpdates' request.
179+
shard->runFireAndForgetCommand(
180+
opCtx,
181+
ReadPreferenceSetting{ReadPreference::PrimaryOnly},
182+
NamespaceString::kAdminDb.toString(),
183+
BSON(_flushRoutingTableCacheUpdates::kCommandName << nss.ns()));
184+
}
185+
}
186+
}
187+
152188
} // namespace
153189

154190
void checkForExistingChunks(OperationContext* opCtx, const NamespaceString& nss) {
@@ -760,7 +796,8 @@ void ShardingCatalogManager::refineCollectionShardKey(OperationContext* opCtx,
760796

761797
// Update all config.tags entries for the given namespace by setting their bounds for each new
762798
// field in the refined key to MinKey (except for the global max tag where the max bounds are
763-
// set to MaxKey).
799+
// set to MaxKey). NOTE: The last update has majority write concern to ensure that all updates
800+
// are majority committed before refreshing each shard.
764801
uassertStatusOK(
765802
catalogClient->updateConfigDocuments(opCtx,
766803
TagsType::ConfigNS,
@@ -769,12 +806,13 @@ void ShardingCatalogManager::refineCollectionShardKey(OperationContext* opCtx,
769806
false, // upsert
770807
ShardingCatalogClient::kLocalWriteConcern));
771808

772-
uassertStatusOK(catalogClient->updateConfigDocument(opCtx,
773-
TagsType::ConfigNS,
774-
isGlobalMaxQuery,
775-
BSON("$max" << globalMaxBounds),
776-
false, // upsert
777-
ShardingCatalogClient::kLocalWriteConcern));
809+
uassertStatusOK(
810+
catalogClient->updateConfigDocument(opCtx,
811+
TagsType::ConfigNS,
812+
isGlobalMaxQuery,
813+
BSON("$max" << globalMaxBounds),
814+
false, // upsert
815+
ShardingCatalogClient::kMajorityWriteConcern));
778816

779817
log() << "refineCollectionShardKey: updated zone entries for '" << nss.ns() << "': took "
780818
<< executionTimer.millis() << " ms. Total time taken: " << totalTimer.millis() << " ms.";
@@ -784,6 +822,17 @@ void ShardingCatalogManager::refineCollectionShardKey(OperationContext* opCtx,
784822
nss.ns(),
785823
BSONObj(),
786824
ShardingCatalogClient::kLocalWriteConcern);
825+
826+
// Trigger refreshes on each shard containing chunks in the namespace 'nss'. Since this isn't
827+
// necessary for correctness, all refreshes are best-effort.
828+
try {
829+
triggerFireAndForgetShardRefreshes(opCtx, nss);
830+
} catch (const DBException& ex) {
831+
log() << ex.toStatus().withContext(str::stream()
832+
<< "refineCollectionShardKey: failed to best-effort "
833+
"refresh all shards containing chunks in '"
834+
<< nss.ns() << "'");
835+
}
787836
}
788837

789838
} // namespace mongo

0 commit comments

Comments
 (0)