Skip to content

Commit 27f28ee

Browse files
committed
CLOUDP-333846 Schema Analysis Redux Integration for Collection Plugin
1 parent 784662e commit 27f28ee

File tree

3 files changed

+292
-13
lines changed

3 files changed

+292
-13
lines changed

packages/compass-collection/src/modules/collection-tab.ts

Lines changed: 218 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,25 @@
11
import type { Reducer, AnyAction, Action } from 'redux';
2+
import {
3+
analyzeDocuments,
4+
SchemaParseOptions,
5+
type Schema,
6+
} from 'mongodb-schema';
7+
28
import type { CollectionMetadata } from 'mongodb-collection-model';
39
import type { ThunkAction } from 'redux-thunk';
410
import type AppRegistry from '@mongodb-js/compass-app-registry';
511
import type { workspacesServiceLocator } from '@mongodb-js/compass-workspaces/provider';
612
import type { CollectionSubtab } from '@mongodb-js/compass-workspaces';
713
import type { DataService } from '@mongodb-js/compass-connections/provider';
814
import type { experimentationServiceLocator } from '@mongodb-js/compass-telemetry/provider';
15+
import { calculateSchemaMetadata } from '@mongodb-js/compass-schema';
16+
import type { Logger } from '@mongodb-js/compass-logging/provider';
17+
import { type PreferencesAccess } from 'compass-preferences-model/provider';
18+
import { isInternalFieldPath } from 'hadron-document';
19+
import { mongoLogId } from '@mongodb-js/compass-logging';
20+
import toNS from 'mongodb-ns';
21+
22+
const DEFAULT_SAMPLE_SIZE = 100;
923

1024
function isAction<A extends AnyAction>(
1125
action: AnyAction,
@@ -22,32 +36,79 @@ type CollectionThunkAction<R, A extends AnyAction = AnyAction> = ThunkAction<
2236
dataService: DataService;
2337
workspaces: ReturnType<typeof workspacesServiceLocator>;
2438
experimentationServices: ReturnType<typeof experimentationServiceLocator>;
39+
logger: Logger;
40+
preferences: PreferencesAccess;
41+
analysisAbortControllerRef: { current?: AbortController };
2542
},
2643
A
2744
>;
2845

46+
export enum SchemaAnalysisStatus {
47+
INITIAL = 'initial',
48+
ANALYZING = 'analyzing',
49+
COMPLETED = 'completed',
50+
ERROR = 'error',
51+
}
52+
53+
type SchemaAnalysis = {
54+
status: SchemaAnalysisStatus;
55+
schema: Schema | null;
56+
sampleDocument: Document | null;
57+
schemaMetadata: {
58+
maxNestingDepth: number;
59+
validationRules: Document;
60+
} | null;
61+
error: string | null;
62+
};
63+
2964
export type CollectionState = {
3065
workspaceTabId: string;
3166
namespace: string;
3267
metadata: CollectionMetadata | null;
3368
editViewName?: string;
69+
schemaAnalysis: SchemaAnalysis;
3470
};
3571

36-
enum CollectionActions {
72+
export enum CollectionActions {
3773
CollectionMetadataFetched = 'compass-collection/CollectionMetadataFetched',
74+
SchemaAnalysisStarted = 'compass-collection/SchemaAnalysisStarted',
75+
SchemaAnalysisFinished = 'compass-collection/SchemaAnalysisFinished',
76+
SchemaAnalysisFailed = 'compass-collection/SchemaAnalysisFailed',
3877
}
3978

4079
interface CollectionMetadataFetchedAction {
4180
type: CollectionActions.CollectionMetadataFetched;
4281
metadata: CollectionMetadata;
4382
}
4483

84+
interface SchemaAnalysisStartedAction {
85+
type: CollectionActions.SchemaAnalysisStarted;
86+
analysisStartTime: number;
87+
}
88+
89+
interface SchemaAnalysisFinishedAction {
90+
type: CollectionActions.SchemaAnalysisFinished;
91+
schemaAnalysis: SchemaAnalysis;
92+
}
93+
94+
interface SchemaAnalysisFailedAction {
95+
type: CollectionActions.SchemaAnalysisFailed;
96+
error: Error;
97+
}
98+
4599
const reducer: Reducer<CollectionState, Action> = (
46100
state = {
47101
// TODO(COMPASS-7782): use hook to get the workspace tab id instead
48102
workspaceTabId: '',
49103
namespace: '',
50104
metadata: null,
105+
schemaAnalysis: {
106+
status: SchemaAnalysisStatus.INITIAL,
107+
schema: null,
108+
sampleDocument: null,
109+
schemaMetadata: null,
110+
error: null,
111+
},
51112
},
52113
action
53114
) => {
@@ -62,6 +123,53 @@ const reducer: Reducer<CollectionState, Action> = (
62123
metadata: action.metadata,
63124
};
64125
}
126+
127+
if (
128+
isAction<SchemaAnalysisStartedAction>(
129+
action,
130+
CollectionActions.SchemaAnalysisStarted
131+
)
132+
) {
133+
return {
134+
...state,
135+
schemaAnalysis: {
136+
status: SchemaAnalysisStatus.ANALYZING,
137+
schema: null,
138+
sampleDocument: null,
139+
schemaMetadata: null,
140+
error: null,
141+
},
142+
};
143+
}
144+
145+
if (
146+
isAction<SchemaAnalysisFinishedAction>(
147+
action,
148+
CollectionActions.SchemaAnalysisFinished
149+
)
150+
) {
151+
return {
152+
...state,
153+
schemaAnalysis: action.schemaAnalysis,
154+
};
155+
}
156+
157+
if (
158+
isAction<SchemaAnalysisFailedAction>(
159+
action,
160+
CollectionActions.SchemaAnalysisFailed
161+
)
162+
) {
163+
return {
164+
...state,
165+
schemaAnalysis: {
166+
...state.schemaAnalysis,
167+
status: SchemaAnalysisStatus.ERROR,
168+
error: action.error.message,
169+
},
170+
};
171+
}
172+
65173
return state;
66174
};
67175

@@ -82,6 +190,115 @@ export const selectTab = (
82190
};
83191
};
84192

193+
export const analyzeCollectionSchema = (): CollectionThunkAction<void> => {
194+
return async (
195+
dispatch,
196+
getState,
197+
{ analysisAbortControllerRef, dataService, preferences, logger }
198+
) => {
199+
const { schemaAnalysis, namespace } = getState();
200+
const analysisStatus = schemaAnalysis.status;
201+
if (analysisStatus === SchemaAnalysisStatus.ANALYZING) {
202+
logger.debug(
203+
'Schema analysis is already in progress, skipping new analysis.'
204+
);
205+
return;
206+
}
207+
208+
analysisAbortControllerRef.current = new AbortController();
209+
const abortSignal = analysisAbortControllerRef.current.signal;
210+
211+
const analysisStartTime = Date.now();
212+
213+
try {
214+
logger.debug('Schema analysis started.');
215+
216+
dispatch({
217+
type: CollectionActions.SchemaAnalysisStarted,
218+
analysisStartTime,
219+
});
220+
221+
// Sample documents
222+
const samplingOptions = { size: DEFAULT_SAMPLE_SIZE };
223+
const driverOptions = {
224+
maxTimeMS: preferences.getPreferences().maxTimeMS,
225+
signal: abortSignal,
226+
};
227+
const sampleCursor = dataService.sampleCursor(
228+
namespace,
229+
samplingOptions,
230+
driverOptions,
231+
{
232+
fallbackReadPreference: 'secondaryPreferred',
233+
}
234+
);
235+
const sampleDocuments = await sampleCursor.toArray();
236+
237+
// Analyze sampled documents
238+
const schemaParseOptions: SchemaParseOptions = {
239+
signal: abortSignal,
240+
};
241+
const schemaAccessor = await analyzeDocuments(
242+
sampleDocuments,
243+
schemaParseOptions
244+
);
245+
if (abortSignal?.aborted) {
246+
throw new Error(abortSignal?.reason || new Error('Operation aborted'));
247+
}
248+
249+
let schema: Schema | null = null;
250+
if (schemaAccessor) {
251+
schema = await schemaAccessor.getInternalSchema();
252+
// Filter out internal fields from the schema
253+
schema.fields = schema.fields.filter(
254+
({ path }) => !isInternalFieldPath(path[0])
255+
);
256+
// TODO: Transform schema to structure that will be used by the LLM.
257+
}
258+
259+
let schemaMetadata = null;
260+
if (schema !== null) {
261+
const { schema_depth } = await calculateSchemaMetadata(schema);
262+
const { database, collection } = toNS(namespace);
263+
const collInfo = await dataService.collectionInfo(database, collection);
264+
schemaMetadata = {
265+
maxNestingDepth: schema_depth,
266+
validationRules: collInfo?.validation?.validator || null,
267+
};
268+
}
269+
dispatch({
270+
type: CollectionActions.SchemaAnalysisFinished,
271+
schemaAnalysis: {
272+
status: SchemaAnalysisStatus.COMPLETED,
273+
schema,
274+
sampleDocument: sampleDocuments[0] ?? null,
275+
schemaMetadata,
276+
},
277+
});
278+
} catch (err: any) {
279+
logger.log.error(
280+
mongoLogId(1_001_000_363),
281+
'Collection',
282+
'Schema analysis failed',
283+
{
284+
namespace,
285+
error: err.message,
286+
aborted: abortSignal.aborted,
287+
...(abortSignal.aborted
288+
? { abortReason: abortSignal.reason?.message ?? abortSignal.reason }
289+
: {}),
290+
}
291+
);
292+
dispatch({
293+
type: CollectionActions.SchemaAnalysisFailed,
294+
error: err as Error,
295+
});
296+
} finally {
297+
analysisAbortControllerRef.current = undefined;
298+
}
299+
};
300+
};
301+
85302
export type CollectionTabPluginMetadata = CollectionMetadata & {
86303
/**
87304
* Initial query for the query bar

packages/compass-collection/src/stores/collection-tab.spec.ts

Lines changed: 54 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import type { CollectionTabOptions } from './collection-tab';
22
import { activatePlugin } from './collection-tab';
33
import { selectTab } from '../modules/collection-tab';
4+
import * as collectionTabModule from '../modules/collection-tab';
45
import { waitFor } from '@mongodb-js/testing-library-compass';
56
import Sinon from 'sinon';
67
import AppRegistry from '@mongodb-js/compass-app-registry';
@@ -11,6 +12,7 @@ import type { connectionInfoRefLocator } from '@mongodb-js/compass-connections/p
1112
import { createNoopLogger } from '@mongodb-js/compass-logging/provider';
1213
import { ReadOnlyPreferenceAccess } from 'compass-preferences-model/provider';
1314
import { ExperimentTestName } from '@mongodb-js/compass-telemetry/provider';
15+
import { CollectionMetadata } from 'mongodb-collection-model';
1416

1517
const defaultMetadata = {
1618
namespace: 'test.foo',
@@ -27,16 +29,6 @@ const defaultTabOptions = {
2729
namespace: defaultMetadata.namespace,
2830
};
2931

30-
const mockCollection = {
31-
_id: defaultMetadata.namespace,
32-
fetchMetadata() {
33-
return Promise.resolve(defaultMetadata);
34-
},
35-
toJSON() {
36-
return this;
37-
},
38-
};
39-
4032
const mockAtlasConnectionInfo = {
4133
current: {
4234
id: 'test-connection',
@@ -67,6 +59,9 @@ describe('Collection Tab Content store', function () {
6759
const sandbox = Sinon.createSandbox();
6860

6961
const localAppRegistry = sandbox.spy(new AppRegistry());
62+
const analyzeCollectionSchemaStub = sandbox
63+
.stub(collectionTabModule, 'analyzeCollectionSchema')
64+
.returns(() => async () => {});
7065
const dataService = {} as any;
7166
let store: ReturnType<typeof activatePlugin>['store'];
7267
let deactivate: ReturnType<typeof activatePlugin>['deactivate'];
@@ -85,8 +80,19 @@ describe('Collection Tab Content store', function () {
8580
enableGenAIFeatures: true,
8681
enableGenAIFeaturesAtlasOrg: true,
8782
cloudFeatureRolloutAccess: { GEN_AI_COMPASS: true },
88-
})
83+
}),
84+
collectionMetadata: Partial<CollectionMetadata> = defaultMetadata,
85+
analysisAbortControllerRef: { current?: AbortController } = {}
8986
) => {
87+
const mockCollection = {
88+
_id: collectionMetadata.namespace,
89+
fetchMetadata() {
90+
return Promise.resolve(collectionMetadata);
91+
},
92+
toJSON() {
93+
return this;
94+
},
95+
};
9096
({ store, deactivate } = activatePlugin(
9197
{
9298
...defaultTabOptions,
@@ -107,7 +113,7 @@ describe('Collection Tab Content store', function () {
107113
await waitFor(() => {
108114
expect(store.getState())
109115
.to.have.property('metadata')
110-
.deep.eq(defaultMetadata);
116+
.deep.eq(collectionMetadata);
111117
});
112118
return store;
113119
};
@@ -231,4 +237,40 @@ describe('Collection Tab Content store', function () {
231237
});
232238
});
233239
});
240+
241+
describe('schema analysis on collection load', function () {
242+
it('should start schema analysis if collection is not read-only and not time-series', async function () {
243+
await configureStore();
244+
245+
expect(analyzeCollectionSchemaStub).to.have.been.calledOnce;
246+
});
247+
248+
it('should not start schema analysis if collection is read-only', async function () {
249+
await configureStore(
250+
undefined,
251+
undefined,
252+
undefined,
253+
undefined,
254+
undefined,
255+
undefined,
256+
{ ...defaultMetadata, isReadonly: true }
257+
);
258+
259+
expect(analyzeCollectionSchemaStub).to.not.have.been.called;
260+
});
261+
262+
it('should not start schema analysis if collection is time-series', async function () {
263+
await configureStore(
264+
undefined,
265+
undefined,
266+
undefined,
267+
undefined,
268+
undefined,
269+
undefined,
270+
{ ...defaultMetadata, isTimeSeries: true }
271+
);
272+
273+
expect(analyzeCollectionSchemaStub).to.not.have.been.called;
274+
});
275+
});
234276
});

0 commit comments

Comments
 (0)