Skip to content

Commit 891b707

Browse files
committed
WIP
1 parent 0dbab78 commit 891b707

File tree

3 files changed

+143
-53
lines changed

3 files changed

+143
-53
lines changed

packages/compass-collection/src/schema-analysis-types.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ export type SchemaAnalysisErrorState = {
3131

3232
export interface FieldInfo {
3333
type: string; // MongoDB type (eg. String, Double, Array, Document)
34-
sample_values: unknown[]; // Real sample values (empty array if none)
34+
sample_values?: unknown[]; // Primitive sample values (flattened for arrays)
35+
array_sample_values?: unknown[]; // Sample values of the top-level array object
3536
probability?: number; // 0.0 - 1.0 field frequency
3637
}
3738

packages/compass-collection/src/transform-schema-to-field-info.spec.ts

Lines changed: 119 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,51 @@ describe('processSchema', function () {
170170
} as SchemaType,
171171
],
172172
} as SchemaField,
173+
{
174+
name: 'age',
175+
path: ['age'],
176+
probability: 0.9,
177+
types: [
178+
{
179+
name: 'Number',
180+
bsonType: 'Number',
181+
path: ['age'],
182+
count: 3,
183+
probability: 1.0,
184+
values: [25, 30, 35],
185+
} as SchemaType,
186+
],
187+
} as SchemaField,
188+
{
189+
name: 'isActive',
190+
path: ['isActive'],
191+
probability: 0.8,
192+
types: [
193+
{
194+
name: 'Boolean',
195+
bsonType: 'Boolean',
196+
path: ['isActive'],
197+
count: 3,
198+
probability: 1.0,
199+
values: [true, false, true],
200+
} as SchemaType,
201+
],
202+
} as SchemaField,
203+
{
204+
name: 'createdAt',
205+
path: ['createdAt'],
206+
probability: 0.7,
207+
types: [
208+
{
209+
name: 'Date',
210+
bsonType: 'Date',
211+
path: ['createdAt'],
212+
count: 2,
213+
probability: 1.0,
214+
values: [new Date('2023-01-01'), new Date('2023-06-15')],
215+
} as SchemaType,
216+
],
217+
} as SchemaField,
173218
],
174219
count: 3,
175220
};
@@ -182,6 +227,21 @@ describe('processSchema', function () {
182227
sample_values: ['John', 'Jane', 'Bob'],
183228
probability: 1.0,
184229
},
230+
age: {
231+
type: 'Number',
232+
sample_values: [25, 30, 35],
233+
probability: 0.9,
234+
},
235+
isActive: {
236+
type: 'Boolean',
237+
sample_values: [true, false, true],
238+
probability: 0.8,
239+
},
240+
createdAt: {
241+
type: 'Date',
242+
sample_values: [new Date('2023-01-01'), new Date('2023-06-15')],
243+
probability: 0.7,
244+
},
185245
});
186246
});
187247

@@ -294,7 +354,8 @@ describe('processSchema', function () {
294354
expect(result).to.deep.equal({
295355
'tags[]': {
296356
type: 'String',
297-
sample_values: [['red', 'blue'], ['green']],
357+
sample_values: ['red', 'blue', 'green'],
358+
array_sample_values: [['red', 'blue'], ['green']],
298359
probability: 1.0,
299360
},
300361
});
@@ -382,6 +443,12 @@ describe('processSchema', function () {
382443
lengths: [2],
383444
averageLength: 2,
384445
totalCount: 2,
446+
values: [
447+
[
448+
{ id: 1, cost: 10.5 },
449+
{ id: 2, cost: 25.0 },
450+
],
451+
],
385452
types: [
386453
{
387454
name: 'Document',
@@ -405,6 +472,21 @@ describe('processSchema', function () {
405472
} as SchemaType,
406473
],
407474
} as SchemaField,
475+
{
476+
name: 'cost',
477+
path: ['items', 'cost'],
478+
probability: 1.0,
479+
types: [
480+
{
481+
name: 'Double',
482+
bsonType: 'Double',
483+
path: ['items', 'cost'],
484+
count: 2,
485+
probability: 1.0,
486+
values: [10.5, 25.0],
487+
} as SchemaType,
488+
],
489+
} as SchemaField,
408490
],
409491
} as DocumentSchemaType,
410492
],
@@ -421,6 +503,23 @@ describe('processSchema', function () {
421503
'items[].id': {
422504
type: 'Number',
423505
sample_values: [1, 2],
506+
array_sample_values: [
507+
[
508+
{ id: 1, cost: 10.5 },
509+
{ id: 2, cost: 25.0 },
510+
],
511+
],
512+
probability: 1.0,
513+
},
514+
'items[].cost': {
515+
type: 'Double',
516+
sample_values: [10.5, 25.0],
517+
array_sample_values: [
518+
[
519+
{ id: 1, cost: 10.5 },
520+
{ id: 2, cost: 25.0 },
521+
],
522+
],
424523
probability: 1.0,
425524
},
426525
});
@@ -521,7 +620,8 @@ describe('processSchema', function () {
521620
expect(result).to.deep.equal({
522621
'cube[][][]': {
523622
type: 'Number',
524-
sample_values: [
623+
sample_values: [1, 2, 3, 4, 5, 6, 7, 8],
624+
array_sample_values: [
525625
[
526626
[
527627
[1, 2],
@@ -558,6 +658,7 @@ describe('processSchema', function () {
558658
lengths: [2],
559659
averageLength: 2,
560660
totalCount: 2,
661+
values: [[[{ x: 1, y: 2 }], [{ x: 3, y: 4 }]]],
561662
types: [
562663
{
563664
name: 'Array',
@@ -568,6 +669,7 @@ describe('processSchema', function () {
568669
lengths: [1],
569670
averageLength: 1,
570671
totalCount: 2,
672+
values: [[{ x: 1, y: 2 }], [{ x: 3, y: 4 }]],
571673
types: [
572674
{
573675
name: 'Document',
@@ -630,11 +732,13 @@ describe('processSchema', function () {
630732
'matrix[][].x': {
631733
type: 'Number',
632734
sample_values: [1, 3],
735+
array_sample_values: [[[{ x: 1, y: 2 }], [{ x: 3, y: 4 }]]],
633736
probability: 1.0,
634737
},
635738
'matrix[][].y': {
636739
type: 'Number',
637740
sample_values: [2, 4],
741+
array_sample_values: [[[{ x: 1, y: 2 }], [{ x: 3, y: 4 }]]],
638742
probability: 1.0,
639743
},
640744
});
@@ -740,11 +844,23 @@ describe('processSchema', function () {
740844
'teams[].name': {
741845
type: 'String',
742846
sample_values: ['Team A', 'Team B'],
847+
array_sample_values: [
848+
[
849+
{ name: 'Team A', members: ['Alice', 'Bob'] },
850+
{ name: 'Team B', members: ['Charlie'] },
851+
],
852+
],
743853
probability: 1.0,
744854
},
745855
'teams[].members[]': {
746856
type: 'String',
747-
sample_values: [['Alice', 'Bob'], ['Charlie']],
857+
sample_values: ['Alice', 'Bob', 'Charlie'],
858+
array_sample_values: [
859+
[
860+
{ name: 'Team A', members: ['Alice', 'Bob'] },
861+
{ name: 'Team B', members: ['Charlie'] },
862+
],
863+
],
748864
probability: 1.0,
749865
},
750866
});

packages/compass-collection/src/transform-schema-to-field-info.ts

Lines changed: 22 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ export function processSchema(schema: Schema): Record<string, FieldInfo> {
3939

4040
// Process each top-level field
4141
for (const field of schema.fields) {
42-
processNamedField(field, [], result);
42+
processNamedField(field, '', result);
4343
}
4444

4545
return result;
@@ -50,8 +50,9 @@ export function processSchema(schema: Schema): Record<string, FieldInfo> {
5050
*/
5151
function processNamedField(
5252
field: SchemaField,
53-
pathPrefix: string[],
54-
result: Record<string, FieldInfo>
53+
pathPrefix: string,
54+
result: Record<string, FieldInfo>,
55+
arraySampleValues?: unknown[]
5556
): void {
5657
if (!field.types || field.types.length === 0) {
5758
return;
@@ -63,18 +64,24 @@ function processNamedField(
6364
return;
6465
}
6566

66-
const currentPath = [...pathPrefix, field.name];
67+
const currentPath = pathPrefix ? `${pathPrefix}.${field.name}` : field.name;
6768

6869
// Process based on the type
69-
processType(primaryType, currentPath, result, field.probability);
70+
processType(
71+
primaryType,
72+
currentPath,
73+
result,
74+
field.probability,
75+
arraySampleValues
76+
);
7077
}
7178

7279
/**
7380
* Processes a specific schema type
7481
*/
7582
function processType(
7683
type: SchemaType,
77-
currentPath: string[],
84+
currentPath: string,
7885
result: Record<string, FieldInfo>,
7986
fieldProbability?: number,
8087
arraySampleValues?: unknown[]
@@ -88,67 +95,33 @@ function processType(
8895
return;
8996
}
9097

91-
const arrayPath = [...currentPath, '[]'];
98+
const arrayPath = `${currentPath}[]`;
9299
const sampleValues = arraySampleValues || getSampleValues(arrayType);
93-
94100
processType(elementType, arrayPath, result, fieldProbability, sampleValues);
95101
} else if (type.name === 'Document' || type.bsonType === 'Document') {
96-
// Process nested document fields (and clear array sample values for nested processing)
97-
98-
// TODO: Consider
99-
// if (arraySampleValues) {
100-
// // We're in an array of documents - create the array entry
101-
// const fieldPath = buildFieldPath(currentPath);
102-
// result[fieldPath] = {
103-
// type: type.name || type.bsonType || 'Document',
104-
// sample_values: arraySampleValues,
105-
// probability: fieldProbability || 1.0,
106-
// };
107-
// }
102+
// Process nested document fields
108103

109104
const docType = type as DocumentSchemaType;
110105
if (docType.fields) {
111106
for (const nestedField of docType.fields) {
112-
processNamedField(nestedField, currentPath, result);
107+
processNamedField(nestedField, currentPath, result, arraySampleValues);
113108
}
114109
}
115110
} else {
116111
// Primitive: create entry (with passed-down array sample values if we have them)
117-
const fieldPath = buildFieldPath(currentPath);
118-
result[fieldPath] = {
112+
const fieldInfo: FieldInfo = {
119113
type: type.name || type.bsonType || 'Mixed',
120-
sample_values: arraySampleValues || getSampleValues(type),
114+
sample_values: getSampleValues(type),
121115
probability:
122116
fieldProbability || (type as PrimitiveSchemaType).probability || 1.0,
123117
};
124-
}
125-
}
126118

127-
/**
128-
* Builds a field path from path segments, handling bracket notation correctly
129-
*/
130-
function buildFieldPath(pathSegments: string[]): string {
131-
let result = '';
132-
133-
for (let i = 0; i < pathSegments.length; i++) {
134-
const segment = pathSegments[i];
135-
136-
if (segment === '[]') {
137-
// Bracket notation - append directly
138-
result += '[]';
139-
} else {
140-
// Regular field name
141-
if (result && !result.endsWith('[]')) {
142-
result += '.';
143-
} else if (result && result.endsWith('[]')) {
144-
// Add dot after brackets for nested fields
145-
result += '.';
146-
}
147-
result += segment;
119+
if (arraySampleValues !== undefined && arraySampleValues.length > 0) {
120+
fieldInfo.array_sample_values = arraySampleValues;
148121
}
149-
}
150122

151-
return result;
123+
result[currentPath] = fieldInfo;
124+
}
152125
}
153126

154127
/**

0 commit comments

Comments
 (0)