Skip to content

Commit 6c4a45e

Browse files
authored
fix(shell-api): Fix support for sharded time series collections with getShardDistribution MONGOSH-1447 (#2189)
Before this fix: when the getShardDistribution command is invoked on a sharded time series collection, the output would say that the collection is not actually sharded. If the user invoked the same command on the related bucket collection, it would produce an incorrect output.
1 parent b7258a8 commit 6c4a45e

File tree

3 files changed

+205
-32
lines changed

3 files changed

+205
-32
lines changed

packages/shell-api/src/collection.spec.ts

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2258,8 +2258,16 @@ describe('Collection', function () {
22582258
it('throws when collection is not sharded', async function () {
22592259
const serviceProviderCursor = stubInterface<ServiceProviderCursor>();
22602260
serviceProviderCursor.limit.returns(serviceProviderCursor);
2261-
serviceProviderCursor.tryNext.resolves(null);
2262-
serviceProvider.find.returns(serviceProviderCursor as any);
2261+
// eslint-disable-next-line @typescript-eslint/no-unsafe-argument
2262+
serviceProviderCursor.tryNext.returns(null as any);
2263+
serviceProvider.find.returns(serviceProviderCursor);
2264+
2265+
const tryNext = sinon.stub();
2266+
tryNext.onCall(0).resolves({ storageStats: {} });
2267+
tryNext.onCall(1).resolves(null);
2268+
// eslint-disable-next-line @typescript-eslint/no-unsafe-argument
2269+
serviceProvider.aggregate.returns({ tryNext } as any);
2270+
22632271
const error = await collection.getShardDistribution().catch((e) => e);
22642272

22652273
expect(error).to.be.instanceOf(MongoshInvalidInputError);

packages/shell-api/src/collection.ts

Lines changed: 79 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2076,31 +2076,73 @@ export default class Collection extends ShellApiWithMongoClass {
20762076
});
20772077
}
20782078

2079-
@returnsPromise
2080-
@topologies([Topologies.Sharded])
2081-
@apiVersions([])
2082-
async getShardDistribution(): Promise<CommandResult> {
2083-
this._emitCollectionApiCall('getShardDistribution', {});
2084-
2085-
await getConfigDB(this._database); // Warns if not connected to mongos
2086-
2087-
const result = {} as Document;
2088-
const config = this._mongo.getDB('config');
2079+
/**
2080+
* Helper for getting collection info for sharded collections.
2081+
* @throws If the collection is not sharded.
2082+
* @returns collection info based on given collStats.
2083+
*/
2084+
async _getShardedCollectionInfo(
2085+
config: Database,
2086+
collStats: Document[]
2087+
): Promise<Document> {
20892088
const ns = `${this._database._name}.${this._name}`;
2090-
2091-
const configCollectionsInfo = await config
2089+
const existingConfigCollectionsInfo = await config
20922090
.getCollection('collections')
20932091
.findOne({
20942092
_id: ns,
20952093
...onlyShardedCollectionsInConfigFilter,
20962094
});
2097-
if (!configCollectionsInfo) {
2095+
2096+
if (existingConfigCollectionsInfo !== null) {
2097+
return existingConfigCollectionsInfo;
2098+
}
2099+
2100+
// If the collection info is not found, check if it is timeseries and use the bucket
2101+
const timeseriesShardStats = collStats.find(
2102+
(extractedShardStats) =>
2103+
typeof extractedShardStats.storageStats.timeseries !== 'undefined'
2104+
);
2105+
2106+
if (!timeseriesShardStats) {
20982107
throw new MongoshInvalidInputError(
20992108
`Collection ${this._name} is not sharded`,
21002109
ShellApiErrors.NotConnectedToShardedCluster
21012110
);
21022111
}
21032112

2113+
const { storageStats } = timeseriesShardStats;
2114+
2115+
const timeseries: Document = storageStats.timeseries;
2116+
const timeseriesBucketNs: string = timeseries.bucketsNs;
2117+
2118+
const timeseriesCollectionInfo = await config
2119+
.getCollection('collections')
2120+
.findOne({
2121+
_id: timeseriesBucketNs,
2122+
...onlyShardedCollectionsInConfigFilter,
2123+
});
2124+
2125+
if (!timeseriesCollectionInfo) {
2126+
throw new MongoshRuntimeError(
2127+
`Error finding collection information for ${timeseriesBucketNs}`,
2128+
CommonErrors.CommandFailed
2129+
);
2130+
}
2131+
2132+
return timeseriesCollectionInfo;
2133+
}
2134+
2135+
@returnsPromise
2136+
@topologies([Topologies.Sharded])
2137+
@apiVersions([])
2138+
async getShardDistribution(): Promise<CommandResult> {
2139+
this._emitCollectionApiCall('getShardDistribution', {});
2140+
2141+
await getConfigDB(this._database); // Warns if not connected to mongos
2142+
2143+
const result = {} as Document;
2144+
const config = this._mongo.getDB('config');
2145+
21042146
const collStats = await (
21052147
await this.aggregate({ $collStats: { storageStats: {} } })
21062148
).toArray();
@@ -2115,12 +2157,15 @@ export default class Collection extends ShellApiWithMongoClass {
21152157
avgObjSize: number;
21162158
}[] = [];
21172159

2160+
const configCollectionsInfo = await this._getShardedCollectionInfo(
2161+
config,
2162+
collStats
2163+
);
2164+
21182165
await Promise.all(
2119-
collStats.map((extShardStats) =>
2166+
collStats.map((extractedShardStats) =>
21202167
(async (): Promise<void> => {
2121-
// Extract and store only the relevant subset of the stats for this shard
2122-
const { shard } = extShardStats;
2123-
2168+
const { shard } = extractedShardStats;
21242169
// If we have an UUID, use that for lookups. If we have only the ns,
21252170
// use that. (On 5.0+ servers, config.chunk has uses the UUID, before
21262171
// that it had the ns).
@@ -2131,39 +2176,43 @@ export default class Collection extends ShellApiWithMongoClass {
21312176
const [host, numChunks] = await Promise.all([
21322177
config
21332178
.getCollection('shards')
2134-
.findOne({ _id: extShardStats.shard }),
2179+
.findOne({ _id: extractedShardStats.shard }),
21352180
config.getCollection('chunks').countDocuments(countChunksQuery),
21362181
]);
21372182
const shardStats = {
21382183
shardId: shard,
21392184
host: host !== null ? host.host : null,
2140-
size: extShardStats.storageStats.size,
2141-
count: extShardStats.storageStats.count,
2185+
size: extractedShardStats.storageStats.size,
2186+
count: extractedShardStats.storageStats.count,
21422187
numChunks: numChunks,
2143-
avgObjSize: extShardStats.storageStats.avgObjSize,
2188+
avgObjSize: extractedShardStats.storageStats.avgObjSize,
21442189
};
21452190

21462191
const key = `Shard ${shardStats.shardId} at ${shardStats.host}`;
21472192

2148-
const estChunkData =
2193+
// In sharded timeseries collections we do not have a count
2194+
// so we intentionally pass NaN as a result to the client.
2195+
const shardStatsCount: number = shardStats.count ?? NaN;
2196+
2197+
const estimatedChunkDataPerChunk =
21492198
shardStats.numChunks === 0
21502199
? 0
21512200
: shardStats.size / shardStats.numChunks;
2152-
const estChunkCount =
2201+
const estimatedDocsPerChunk =
21532202
shardStats.numChunks === 0
21542203
? 0
2155-
: Math.floor(shardStats.count / shardStats.numChunks);
2204+
: Math.floor(shardStatsCount / shardStats.numChunks);
21562205

21572206
result[key] = {
21582207
data: dataFormat(coerceToJSNumber(shardStats.size)),
2159-
docs: shardStats.count,
2208+
docs: shardStatsCount,
21602209
chunks: shardStats.numChunks,
2161-
'estimated data per chunk': dataFormat(estChunkData),
2162-
'estimated docs per chunk': estChunkCount,
2210+
'estimated data per chunk': dataFormat(estimatedChunkDataPerChunk),
2211+
'estimated docs per chunk': estimatedDocsPerChunk,
21632212
};
21642213

21652214
totals.size += coerceToJSNumber(shardStats.size);
2166-
totals.count += coerceToJSNumber(shardStats.count);
2215+
totals.count += coerceToJSNumber(shardStatsCount);
21672216
totals.numChunks += coerceToJSNumber(shardStats.numChunks);
21682217

21692218
conciseShardsStats.push(shardStats);
@@ -2326,7 +2375,7 @@ export default class Collection extends ShellApiWithMongoClass {
23262375
return await this._mongo._serviceProvider.getSearchIndexes(
23272376
this._database._name,
23282377
this._name,
2329-
indexName as string | undefined,
2378+
indexName,
23302379
{ ...(await this._database._baseOptions()), ...options }
23312380
);
23322381
}
@@ -2355,7 +2404,7 @@ export default class Collection extends ShellApiWithMongoClass {
23552404
this._name,
23562405
[
23572406
{
2358-
name: (indexName as string | undefined) ?? 'default',
2407+
name: indexName ?? 'default',
23592408
// Omitting type when it is 'search' for compat with older servers
23602409
...(type &&
23612410
type !== 'search' && { type: type as 'search' | 'vectorSearch' }),

packages/shell-api/src/shard.spec.ts

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2554,6 +2554,122 @@ describe('Shard', function () {
25542554
});
25552555
});
25562556
});
2557+
2558+
describe('collection.getShardDistribution()', function () {
2559+
let db: Database;
2560+
const dbName = 'get-shard-distribution-test';
2561+
const ns = `${dbName}.test`;
2562+
2563+
beforeEach(async function () {
2564+
db = sh._database.getSiblingDB(dbName);
2565+
await db.getCollection('test').insertOne({ key: 1 });
2566+
await db.getCollection('test').createIndex({ key: 1 });
2567+
});
2568+
2569+
afterEach(async function () {
2570+
await db.dropDatabase();
2571+
});
2572+
2573+
context('unsharded collections', function () {
2574+
it('throws an error', async function () {
2575+
const caughtError = await db
2576+
.getCollection('test')
2577+
.getShardDistribution()
2578+
.catch((e) => e);
2579+
expect(caughtError.message).includes(
2580+
'Collection test is not sharded'
2581+
);
2582+
});
2583+
});
2584+
2585+
context('sharded collections', function () {
2586+
beforeEach(async function () {
2587+
expect((await sh.enableSharding(dbName)).ok).to.equal(1);
2588+
expect(
2589+
(await sh.shardCollection(ns, { key: 1 })).collectionsharded
2590+
).to.equal(ns);
2591+
});
2592+
2593+
it('returns the correct StatsResult', async function () {
2594+
const result = await db.getCollection('test').getShardDistribution();
2595+
const shardDistributionValue = result.value as Document;
2596+
2597+
expect(result.type).to.equal('StatsResult');
2598+
2599+
const shardFields = Object.keys(shardDistributionValue).filter(
2600+
(field) => field !== 'Totals'
2601+
);
2602+
expect(shardFields.length).to.equal(1);
2603+
const shardField = shardFields[0];
2604+
expect(
2605+
shardDistributionValue[shardField]['estimated docs per chunk']
2606+
).to.equal(1);
2607+
2608+
expect(shardDistributionValue.Totals.docs).to.equal(1);
2609+
expect(shardDistributionValue.Totals.chunks).to.equal(1);
2610+
});
2611+
});
2612+
2613+
// We explicitly test sharded time series collections as it fallbacks to the bucket information
2614+
context('sharded timeseries collections', function () {
2615+
skipIfServerVersion(mongos, '< 5.1');
2616+
2617+
const timeseriesCollectionName = 'getShardDistributionTS';
2618+
const timeseriesNS = `${dbName}.${timeseriesCollectionName}`;
2619+
2620+
beforeEach(async function () {
2621+
expect((await sh.enableSharding(dbName)).ok).to.equal(1);
2622+
2623+
expect(
2624+
(
2625+
await sh.shardCollection(
2626+
timeseriesNS,
2627+
{ 'metadata.bucketId': 1 },
2628+
{
2629+
timeseries: {
2630+
timeField: 'timestamp',
2631+
metaField: 'metadata',
2632+
granularity: 'hours',
2633+
},
2634+
}
2635+
)
2636+
).collectionsharded
2637+
).to.equal(timeseriesNS);
2638+
await db.getCollection(timeseriesCollectionName).insertOne({
2639+
metadata: {
2640+
bucketId: 1,
2641+
type: 'temperature',
2642+
},
2643+
timestamp: new Date('2021-05-18T00:00:00.000Z'),
2644+
temp: 12,
2645+
});
2646+
});
2647+
2648+
it('returns the correct StatsResult', async function () {
2649+
const result = await db
2650+
.getCollection(timeseriesCollectionName)
2651+
.getShardDistribution();
2652+
const shardDistributionValue = result.value as Document;
2653+
2654+
expect(result.type).to.equal('StatsResult');
2655+
2656+
const shardFields = Object.keys(shardDistributionValue).filter(
2657+
(field) => field !== 'Totals'
2658+
);
2659+
expect(shardFields.length).to.equal(1);
2660+
const shardField = shardFields[0];
2661+
2662+
// Timeseries will have count NaN
2663+
expect(
2664+
shardDistributionValue[shardField]['estimated docs per chunk']
2665+
).to.be.NaN;
2666+
2667+
expect(shardDistributionValue.Totals.docs).to.be.NaN;
2668+
expect(shardDistributionValue.Totals.chunks).to.equal(1);
2669+
});
2670+
});
2671+
});
2672+
25572673
describe('collection.stats()', function () {
25582674
let db: Database;
25592675
let hasTotalSize: boolean;

0 commit comments

Comments
 (0)