Skip to content

Commit 97a35b4

Browse files
authored
feat(compass-collection): Process schema into format for LLM submission for Mock Data Generator – CLOUDP-337090 (#7205)
* WIP * WIP * Re order buttons * WIP * Rename datatest-id * Move state to redux * Update tests per comments * Fix import * Test file * Tests cleanup * Add tests * Rename actions; Remove action wrapper * Address comments * WIP * WIP * Process schema * WIP * WIP * WIP * WIP * WIP * WIP * Add more test * Use type predicate validators * Constants validator * Create constant value * Package lock update * Types * Package Lock * Address comment
1 parent 8f183d0 commit 97a35b4

File tree

6 files changed

+1372
-8
lines changed

6 files changed

+1372
-8
lines changed

package-lock.json

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

packages/compass-collection/package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,8 @@
6666
"react": "^17.0.2",
6767
"react-redux": "^8.1.3",
6868
"redux": "^4.2.1",
69-
"redux-thunk": "^2.4.2"
69+
"redux-thunk": "^2.4.2",
70+
"bson": "^6.10.1"
7071
},
7172
"devDependencies": {
7273
"@mongodb-js/eslint-config-compass": "^1.4.6",

packages/compass-collection/src/modules/collection-tab.ts

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import type { Reducer, AnyAction, Action } from 'redux';
2-
import { analyzeDocuments, type Schema } from 'mongodb-schema';
2+
import { analyzeDocuments } from 'mongodb-schema';
33

44
import type { CollectionMetadata } from 'mongodb-collection-model';
55
import type { ThunkAction } from 'redux-thunk';
@@ -19,8 +19,10 @@ import {
1919
SCHEMA_ANALYSIS_STATE_INITIAL,
2020
type SchemaAnalysisError,
2121
type SchemaAnalysisState,
22+
type FieldInfo,
2223
} from '../schema-analysis-types';
2324
import { calculateSchemaDepth } from '../calculate-schema-depth';
25+
import { processSchema } from '../transform-schema-to-field-info';
2426
import type { Document, MongoError } from 'mongodb';
2527

2628
const DEFAULT_SAMPLE_SIZE = 100;
@@ -106,7 +108,7 @@ interface SchemaAnalysisStartedAction {
106108

107109
interface SchemaAnalysisFinishedAction {
108110
type: CollectionActions.SchemaAnalysisFinished;
109-
schema: Schema;
111+
processedSchema: Record<string, FieldInfo>;
110112
sampleDocument: Document;
111113
schemaMetadata: {
112114
maxNestingDepth: number;
@@ -201,7 +203,7 @@ const reducer: Reducer<CollectionState, Action> = (
201203
...state,
202204
schemaAnalysis: {
203205
status: SCHEMA_ANALYSIS_STATE_COMPLETE,
204-
schema: action.schema,
206+
processedSchema: action.processedSchema,
205207
sampleDocument: action.sampleDocument,
206208
schemaMetadata: action.schemaMetadata,
207209
},
@@ -420,7 +422,9 @@ export const analyzeCollectionSchema = (): CollectionThunkAction<
420422
schema.fields = schema.fields.filter(
421423
({ path }) => !isInternalFieldPath(path[0])
422424
);
423-
// TODO: Transform schema to structure that will be used by the LLM.
425+
426+
// Transform schema to structure that will be used by the LLM
427+
const processedSchema = processSchema(schema);
424428

425429
const maxNestingDepth = await calculateSchemaDepth(schema);
426430
const { database, collection } = toNS(namespace);
@@ -432,7 +436,7 @@ export const analyzeCollectionSchema = (): CollectionThunkAction<
432436
};
433437
dispatch({
434438
type: CollectionActions.SchemaAnalysisFinished,
435-
schema,
439+
processedSchema,
436440
sampleDocument: sampleDocuments[0],
437441
schemaMetadata,
438442
});

packages/compass-collection/src/schema-analysis-types.ts

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import type { Document } from 'mongodb';
2-
import { type Schema } from 'mongodb-schema';
2+
import type { PrimitiveSchemaType } from 'mongodb-schema';
33

44
export const SCHEMA_ANALYSIS_STATE_INITIAL = 'initial';
55
export const SCHEMA_ANALYSIS_STATE_ANALYZING = 'analyzing';
@@ -30,9 +30,35 @@ export type SchemaAnalysisErrorState = {
3030
error: SchemaAnalysisError;
3131
};
3232

33+
/**
34+
* MongoDB schema type
35+
*/
36+
export type MongoDBFieldType = PrimitiveSchemaType['name'];
37+
38+
/**
39+
* Primitive values that can appear in sample_values after BSON-to-primitive conversion.
40+
* These are the JavaScript primitive equivalents of BSON values.
41+
*/
42+
export type SampleValue =
43+
| string // String, Symbol, ObjectId, Binary, RegExp, Code, etc. (converted to string)
44+
| number // Number, Int32, Long, Double, Decimal128, Timestamp (converted via valueOf())
45+
| boolean
46+
| Date
47+
| null
48+
| undefined;
49+
50+
/**
51+
* Schema field information (for LLM processing)
52+
*/
53+
export interface FieldInfo {
54+
type: MongoDBFieldType; // MongoDB primitive type
55+
sample_values?: SampleValue[]; // Primitive sample values (limited to 10)
56+
probability?: number; // 0.0 - 1.0 field frequency
57+
}
58+
3359
export type SchemaAnalysisCompletedState = {
3460
status: typeof SCHEMA_ANALYSIS_STATE_COMPLETE;
35-
schema: Schema;
61+
processedSchema: Record<string, FieldInfo>;
3662
sampleDocument: Document;
3763
schemaMetadata: {
3864
maxNestingDepth: number;

0 commit comments

Comments
 (0)