Skip to content

Commit e3faa5a

Browse files
authored
feat(gen-ai): align schema with already existing format COMPASS-10166 (#7642)
* align schema with already existing format * co-pilot feedback * PR feedback
1 parent 420dd66 commit e3faa5a

File tree

5 files changed

+236
-11
lines changed

5 files changed

+236
-11
lines changed

packages/compass-generative-ai/src/atlas-ai-service.spec.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1059,6 +1059,10 @@ describe('AtlasAiService', function () {
10591059
expect(content[0].text).to.include(
10601060
`Collection name: "${input.collectionName}"`
10611061
);
1062+
expect(content[0].text).to.include(
1063+
`_id: 'ObjectId`,
1064+
'includes schema information in the prompt'
1065+
);
10621066
expect(res).to.deep.eq(successResponse.response);
10631067
});
10641068

packages/compass-generative-ai/src/utils/gen-ai-prompt.spec.ts

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,21 @@ const OPTIONS: PromptContextOptions = {
3535
],
3636
};
3737

38+
const expectedSampleDocuments = `[
39+
{
40+
_id: ObjectId('68a2dfe93d5adb16ebf4c866'),
41+
userId: ObjectId('68a2dfe93d5adb16ebf4c865')
42+
}
43+
]
44+
`;
45+
46+
const expectedSchema = `
47+
{
48+
_id: 'ObjectId',
49+
userId: 'ObjectId'
50+
}
51+
`;
52+
3853
describe('GenAI Prompts', function () {
3954
it('buildFindQueryPrompt', function () {
4055
const {
@@ -65,16 +80,13 @@ describe('GenAI Prompts', function () {
6580
'Schema from a sample of documents from the collection:',
6681
'includes schema text'
6782
);
68-
expect(prompt).to.include(
69-
toJSString(OPTIONS.schema),
70-
'includes actual schema'
71-
);
83+
expect(prompt).to.include(expectedSchema, 'includes actual schema');
7284
expect(prompt).to.include(
7385
'Sample documents from the collection:',
7486
'includes sample documents text'
7587
);
7688
expect(prompt).to.include(
77-
toJSString(OPTIONS.sampleDocuments),
89+
expectedSampleDocuments,
7890
'includes actual sample documents'
7991
);
8092
});
@@ -108,16 +120,13 @@ describe('GenAI Prompts', function () {
108120
'Schema from a sample of documents from the collection:',
109121
'includes schema text'
110122
);
111-
expect(prompt).to.include(
112-
toJSString(OPTIONS.schema),
113-
'includes actual schema'
114-
);
123+
expect(prompt).to.include(expectedSchema, 'includes actual schema');
115124
expect(prompt).to.include(
116125
'Sample documents from the collection:',
117126
'includes sample documents text'
118127
);
119128
expect(prompt).to.include(
120-
toJSString(OPTIONS.sampleDocuments),
129+
expectedSampleDocuments,
121130
'includes actual sample documents'
122131
);
123132
});

packages/compass-generative-ai/src/utils/gen-ai-prompt.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { toJSString } from 'mongodb-query-parser';
2+
import { flattenSchemaToObject } from './util';
23

34
// When including sample documents, we want to ensure that we do not
45
// attach large documents and exceed the limit. OpenAI roughly estimates
@@ -99,7 +100,7 @@ function buildUserPromptForQuery({
99100
if (schema) {
100101
messages.push(
101102
`Schema from a sample of documents from the collection:${withCodeFence(
102-
toJSString(schema)!
103+
toJSString(flattenSchemaToObject(schema))!
103104
)}`
104105
);
105106
}
Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
import { expect } from 'chai';
2+
import { flattenSchemaToObject } from './util';
3+
4+
const mockSchema = {
5+
_id: {
6+
types: [
7+
{
8+
bsonType: 'ObjectId',
9+
},
10+
],
11+
},
12+
name: {
13+
types: [
14+
{
15+
bsonType: 'String',
16+
},
17+
],
18+
},
19+
createdAt: {
20+
types: [
21+
{
22+
bsonType: 'Date',
23+
},
24+
],
25+
},
26+
verified: {
27+
types: [
28+
{
29+
bsonType: 'Null',
30+
},
31+
],
32+
},
33+
providers: {
34+
types: [
35+
{
36+
bsonType: 'Array',
37+
types: [
38+
{
39+
bsonType: 'String',
40+
},
41+
],
42+
},
43+
],
44+
},
45+
reviews: {
46+
types: [
47+
{
48+
bsonType: 'Array',
49+
types: [
50+
{
51+
bsonType: 'Document',
52+
fields: {
53+
name: {
54+
types: [
55+
{
56+
bsonType: 'String',
57+
},
58+
],
59+
},
60+
location: {
61+
types: [
62+
{
63+
bsonType: 'Document',
64+
fields: {
65+
country: {
66+
types: [
67+
{
68+
bsonType: 'String',
69+
},
70+
],
71+
},
72+
coordinates: {
73+
types: [
74+
{
75+
bsonType: 'Array',
76+
types: [
77+
{
78+
bsonType: 'Int32',
79+
},
80+
],
81+
},
82+
],
83+
},
84+
},
85+
},
86+
],
87+
},
88+
},
89+
},
90+
],
91+
},
92+
],
93+
},
94+
preferences: {
95+
types: [
96+
{
97+
bsonType: 'Document',
98+
fields: {
99+
notifications: {
100+
types: [
101+
{
102+
bsonType: 'Document',
103+
fields: {},
104+
},
105+
],
106+
},
107+
selectedProjectId: {
108+
types: [
109+
{
110+
bsonType: 'ObjectId',
111+
},
112+
],
113+
},
114+
},
115+
},
116+
],
117+
},
118+
metadata: {
119+
types: [
120+
{
121+
bsonType: 'Array',
122+
types: [
123+
{
124+
bsonType: 'Array',
125+
types: [
126+
{
127+
bsonType: 'Int32',
128+
},
129+
{
130+
bsonType: 'String',
131+
},
132+
],
133+
},
134+
],
135+
},
136+
],
137+
},
138+
};
139+
140+
describe('utils', function () {
141+
it('flattenSchemaToObject', function () {
142+
expect(flattenSchemaToObject(mockSchema)).to.deep.equal({
143+
_id: 'ObjectId',
144+
name: 'String',
145+
createdAt: 'Date',
146+
verified: 'Null',
147+
providers: 'String[]',
148+
'reviews.name': 'String',
149+
'reviews.location.country': 'String',
150+
'reviews.location.coordinates': 'Int32[]',
151+
'preferences.notifications': 'Document',
152+
'preferences.selectedProjectId': 'ObjectId',
153+
metadata: 'Int32[]',
154+
});
155+
});
156+
});
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,63 @@
11
import type { AnyAction } from 'redux';
2+
import type { Document } from 'mongodb';
23

34
export function isAction<A extends AnyAction>(
45
action: AnyAction,
56
type: A['type']
67
): action is A {
78
return action.type === type;
89
}
10+
11+
function processArraySchema(
12+
types: Document[],
13+
result: Record<string, string>,
14+
prefix: string
15+
): Record<string, string> {
16+
// We only consider the first bsonType for simplicity
17+
const firstType = types[0];
18+
if (!firstType) {
19+
return result;
20+
}
21+
if (firstType.bsonType === 'Document') {
22+
return processDocumentSchema(firstType.fields, result, `${prefix}.`);
23+
} else if (firstType.bsonType === 'Array') {
24+
return processArraySchema(firstType.types, result, prefix);
25+
} else {
26+
result[prefix] = `${firstType.bsonType}[]`;
27+
}
28+
return result;
29+
}
30+
31+
function processDocumentSchema(
32+
schema: Document,
33+
result: Record<string, string>,
34+
prefix: string = ''
35+
): Record<string, string> {
36+
for (const [key, value] of Object.entries(schema)) {
37+
const prefixedKey = `${prefix}${key}`;
38+
// We only consider the first bsonType for simplicity
39+
const firstType = value.types[0];
40+
if (!firstType) continue;
41+
if (firstType.bsonType === 'Document') {
42+
if (Object.keys(firstType.fields).length === 0) {
43+
result[prefixedKey] = 'Document';
44+
} else {
45+
processDocumentSchema(firstType.fields, result, `${prefixedKey}.`);
46+
}
47+
} else if (firstType.bsonType === 'Array') {
48+
processArraySchema(firstType.types, result, prefixedKey);
49+
} else if (firstType.bsonType) {
50+
result[prefixedKey] = firstType.bsonType;
51+
}
52+
}
53+
return result;
54+
}
55+
56+
/**
57+
* Flattens a schema representation into a flat object.
58+
*/
59+
export function flattenSchemaToObject(
60+
schema: Document
61+
): Record<string, string> {
62+
return processDocumentSchema(schema, {});
63+
}

0 commit comments

Comments
 (0)