Skip to content

Commit adff1d0

Browse files
feat(compass-aggregations): refactor for get-schema to also add type information - COMPASS-6793 (#4355)
1 parent 8e76569 commit adff1d0

File tree

5 files changed

+175
-66
lines changed

5 files changed

+175
-66
lines changed

packages/compass-aggregations/src/components/aggregation-side-panel/stage-wizard-use-cases/index.ts

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,9 @@ import BasicGroupUseCase from './group/basic-group';
66
import GroupWithStatistics from './group/group-with-statistics';
77
import MatchUseCase from './match/match';
88
import GroupWithSubset from './group/group-with-subset';
9-
import type { TypeCastTypes } from 'hadron-type-checker';
9+
import type { FieldSchema } from '../../../utils/get-schema';
1010

11-
export type StageWizardFields = {
12-
name: string;
13-
type: TypeCastTypes;
14-
}[];
11+
export type StageWizardFields = FieldSchema[];
1512

1613
export type WizardComponentProps = {
1714
fields: StageWizardFields;

packages/compass-aggregations/src/components/stage-wizard/index.tsx

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ import type {
2727
import { getSchema } from '../../utils/get-schema';
2828
import { getStageHelpLink } from '../../utils/stage';
2929
import type { SortableProps } from '../pipeline-builder-workspace/pipeline-builder-ui-workspace/sortable-list';
30+
import type { FieldSchema } from '../../utils/get-schema';
31+
import type { TypeCastTypes } from 'hadron-type-checker';
3032

3133
const containerStyles = css({
3234
display: 'flex',
@@ -178,21 +180,30 @@ export default connect(
178180
.reverse()
179181
.find((x): x is StoreStage => x.type === 'stage' && !x.disabled);
180182

181-
const mappedInitialFields = initialFields.map(
182-
(x: { name: string; description: string }) => ({
183-
name: x.name,
184-
// parsed schema has the bson type Object replaced with
185-
// Document to avoid collision with JS Objects but that
186-
// shouldn't be a problem for us because we use these
187-
// as string values alongside well defined casters.
188-
type: x.description === 'Document' ? 'Object' : x.description,
189-
})
183+
const mappedInitialFields = (
184+
initialFields as {
185+
name: string;
186+
description?: string;
187+
}[]
188+
).map<FieldSchema>(({ name, description }) => {
189+
// parsed schema has the bson type Object replaced with
190+
// Document to avoid collision with JS Objects but that
191+
// shouldn't be a problem for us because we use these
192+
// as string values alongside well defined casters.
193+
const type =
194+
description === 'Document'
195+
? 'Object'
196+
: ((description ?? 'String') as TypeCastTypes);
197+
198+
return { name, type };
199+
});
200+
const previousStageFieldsWithSchema = getSchema(
201+
previousStage?.previewDocs ?? []
190202
);
191-
const previousStageFields = getSchema(previousStage?.previewDocs ?? []);
192203

193204
const fields =
194-
previousStageFields.length > 0 && autoPreview
195-
? previousStageFields.map((name) => ({ name, type: 'String' }))
205+
previousStageFieldsWithSchema.length > 0 && autoPreview
206+
? previousStageFieldsWithSchema
196207
: mappedInitialFields;
197208

198209
return {

packages/compass-aggregations/src/modules/collections-fields.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ export const fetchCollectionFields = (
184184
collection,
185185
data: {
186186
...collectionInfo,
187-
fields: getSchema(documents),
187+
fields: getSchema(documents).map(({ name }) => name),
188188
isLoading: false,
189189
},
190190
});

packages/compass-aggregations/src/utils/get-schema.spec.ts

Lines changed: 87 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,11 @@ import bson from 'bson';
33
import { getSchema } from './get-schema';
44

55
const DATA = [
6+
{
7+
useCase: 'does nothing when input is empty',
8+
input: [],
9+
output: [],
10+
},
611
{
712
useCase: '_id is always the first one',
813
input: [
@@ -11,7 +16,16 @@ const DATA = [
1116
_id: 123456,
1217
},
1318
],
14-
output: ['_id', 'data'],
19+
output: [
20+
{
21+
name: '_id',
22+
type: 'Int32',
23+
},
24+
{
25+
name: 'data',
26+
type: 'String',
27+
},
28+
],
1529
},
1630
{
1731
useCase: 'simple json object',
@@ -21,7 +35,50 @@ const DATA = [
2135
data: 'hello',
2236
},
2337
],
24-
output: ['data', 'name'],
38+
output: [
39+
{
40+
name: 'data',
41+
type: 'String',
42+
},
43+
{
44+
name: 'name',
45+
type: 'String',
46+
},
47+
],
48+
},
49+
{
50+
useCase: 'simple json object with falsy values',
51+
input: [
52+
{
53+
name: '',
54+
downloads: 0,
55+
popular: false,
56+
phoneNumbers: [],
57+
addresses: null,
58+
},
59+
],
60+
output: [
61+
{
62+
name: 'addresses',
63+
type: 'Null',
64+
},
65+
{
66+
name: 'downloads',
67+
type: 'Int32',
68+
},
69+
{
70+
name: 'name',
71+
type: 'String',
72+
},
73+
{
74+
name: 'phoneNumbers',
75+
type: 'Array',
76+
},
77+
{
78+
name: 'popular',
79+
type: 'Boolean',
80+
},
81+
],
2582
},
2683
{
2784
useCase: 'nested json object',
@@ -39,13 +96,13 @@ const DATA = [
3996
},
4097
],
4198
output: [
42-
'address',
43-
'address.city',
44-
'address.street',
45-
'address.street.name',
46-
'address.street.number',
47-
'data',
48-
'name',
99+
{ name: 'address', type: 'Object' },
100+
{ name: 'address.city', type: 'String' },
101+
{ name: 'address.street', type: 'Object' },
102+
{ name: 'address.street.name', type: 'String' },
103+
{ name: 'address.street.number', type: 'Int32' },
104+
{ name: 'data', type: 'String' },
105+
{ name: 'name', type: 'String' },
49106
],
50107
},
51108
{
@@ -77,29 +134,35 @@ const DATA = [
77134
},
78135
],
79136
output: [
80-
'data',
81-
'name',
82-
'streets',
83-
'streets._id',
84-
'streets.city',
85-
'streets.name',
86-
'streets.number',
87-
'streets.zip',
137+
{ name: 'data', type: 'String' },
138+
{ name: 'name', type: 'String' },
139+
{ name: 'streets', type: 'Array' },
140+
{ name: 'streets._id', type: 'Int32' },
141+
{ name: 'streets.city', type: 'String' },
142+
{ name: 'streets.name', type: 'String' },
143+
{ name: 'streets.number', type: 'Int32' },
144+
{ name: 'streets.zip', type: 'Int32' },
88145
],
89146
},
90147
{
91148
useCase: 'handles bson values',
92149
input: [
93150
{
94151
_id: new bson.ObjectId(),
95-
data: new bson.Int32(123),
152+
data: new bson.Double(123),
96153
address: {
97154
street: 'Alt-Moabit',
98155
number: new bson.Int32(18),
99156
},
100157
},
101158
],
102-
output: ['_id', 'address', 'address.number', 'address.street', 'data'],
159+
output: [
160+
{ name: '_id', type: 'ObjectId' },
161+
{ name: 'address', type: 'Object' },
162+
{ name: 'address.number', type: 'Int32' },
163+
{ name: 'address.street', type: 'String' },
164+
{ name: 'data', type: 'Double' },
165+
],
103166
},
104167
{
105168
useCase: 'nested array with scaler values',
@@ -112,7 +175,11 @@ const DATA = [
112175
},
113176
},
114177
],
115-
output: ['meta', 'meta.common', 'meta.common.artists'],
178+
output: [
179+
{ name: 'meta', type: 'Object' },
180+
{ name: 'meta.common', type: 'Object' },
181+
{ name: 'meta.common.artists', type: 'Array' },
182+
],
116183
},
117184
];
118185

Lines changed: 62 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,75 @@
1+
import TypeChecker from 'hadron-type-checker';
2+
import { sortedUniqBy, sortBy } from 'lodash';
3+
4+
import type { TypeCastTypes } from 'hadron-type-checker';
15
import type { Document } from 'mongodb';
26

3-
const getArrayKeys = (records: Document[]) => {
4-
return records
5-
.filter((x) => typeof x === 'object')
6-
.map((item) => getObjectKeys(item))
7-
.flat()
8-
.filter((x, i, a) => a.indexOf(x) === i)
9-
.sort();
7+
export type FieldSchema = {
8+
name: string;
9+
type: TypeCastTypes;
10+
};
11+
12+
export type DocumentSchema = FieldSchema[];
13+
14+
/**
15+
* Mapper function that maps a FieldSchema with the name prefixed with the
16+
* provided prefix, separated by a dot
17+
* */
18+
const toFieldSchemaWithPrefix = (prefix: string) => {
19+
return (fieldSchema: FieldSchema): FieldSchema => ({
20+
name: `${prefix}.${fieldSchema.name}`,
21+
type: fieldSchema.type,
22+
});
1023
};
1124

12-
const getObjectKeys = (record: Document) => {
13-
const keys: string[] = [];
25+
const getSchemaForObject = (document: Document): DocumentSchema => {
26+
const schema: DocumentSchema = [];
27+
for (const key in document) {
28+
const value = document[key];
29+
schema.push({
30+
name: key,
31+
type: TypeChecker.type(value),
32+
});
1433

15-
if (!record) {
16-
return keys;
34+
if (Array.isArray(value)) {
35+
const valueSchema = getSchemaForArray(value).map(
36+
toFieldSchemaWithPrefix(key)
37+
);
38+
schema.push(...valueSchema);
39+
} else if (
40+
typeof value === 'object' &&
41+
value !== null &&
42+
!value._bsontype
43+
) {
44+
const valueSchema = getSchemaForObject(value).map(
45+
toFieldSchemaWithPrefix(key)
46+
);
47+
schema.push(...valueSchema);
48+
}
1749
}
50+
return schema;
51+
};
52+
53+
const getSchemaForArray = (records: Document[]): DocumentSchema => {
54+
const schema: DocumentSchema = [];
1855

19-
for (const key in record) {
20-
keys.push(key);
21-
const value = record[key];
22-
23-
if (value && typeof value === 'object') {
24-
const isBson = value._bsontype;
25-
if (!isBson) {
26-
const nestedKeys = Array.isArray(value)
27-
? getArrayKeys(value)
28-
: getObjectKeys(value);
29-
nestedKeys.forEach((nestedKey) => {
30-
keys.push(`${key}.${nestedKey}`);
31-
});
32-
}
56+
for (const record of records) {
57+
if (Array.isArray(record)) {
58+
schema.push(...getSchemaForArray(record));
59+
} else if (
60+
typeof record === 'object' &&
61+
record !== null &&
62+
!record._bsontype
63+
) {
64+
schema.push(...getSchemaForObject(record));
3365
}
3466
}
3567

36-
return keys;
68+
return schema;
3769
};
3870

39-
export const getSchema = (data: Document[]) => {
40-
return getArrayKeys(data);
71+
export const getSchema = (records: Document[]): DocumentSchema => {
72+
const schema = getSchemaForArray(records);
73+
const sortedSchema = sortBy(schema, 'name');
74+
return sortedUniqBy(sortedSchema, 'name');
4175
};

0 commit comments

Comments
 (0)