diff --git a/packages/compass-collection/src/parse-faker-mapping.spec.ts b/packages/compass-collection/src/parse-faker-mapping.spec.ts new file mode 100644 index 00000000000..cbe1d37b358 --- /dev/null +++ b/packages/compass-collection/src/parse-faker-mapping.spec.ts @@ -0,0 +1,403 @@ +import { expect } from 'chai'; +import { + parseFakerMappings, + renderFakerCallTree, +} from './parse-faker-mappings'; +import type { FakerCallTree } from './parse-faker-mappings'; + +describe('parseFakerMappings', () => { + it('handles flat documents', () => { + const flatSchema = { + fields: [ + { + fieldPath: 'exploit_name', + mongoType: 'String', + fakerMethod: 'hacker.noun', + fakerArgs: [], + isArray: false, + probability: 1.0, + }, + { + fieldPath: 'atomic_number', + mongoType: 'Number', + fakerMethod: 'science.chemicalElement', + fakerArgs: [{ json: '{"property": "atomicNumber"}' }], + isArray: false, + probability: 1.0, + }, + { + fieldPath: 'is_classified', + mongoType: 'Boolean', + fakerMethod: 'datatype.boolean', + fakerArgs: [{ json: '{"probability": 0.3}' }], + isArray: false, + probability: 1.0, + }, + ], + }; + + const result = parseFakerMappings(flatSchema); + + const expected: FakerCallTree = { + exploit_name: 'faker.hacker.noun()', + atomic_number: + 'faker.science.chemicalElement({"property":"atomicNumber"})', + is_classified: 'faker.datatype.boolean({"probability":0.3})', + }; + + expect(result).to.deep.equal(expected); + }); + + it('handles nested documents (tree depth 2)', () => { + const nestedSchema = { + fields: [ + { + fieldPath: 'album_title', + mongoType: 'String', + fakerMethod: 'music.songName', + fakerArgs: [], + isArray: false, + probability: 1.0, + }, + { + fieldPath: 'release_year', + mongoType: 'Number', + fakerMethod: 'date.recent', + fakerArgs: [{ json: '{"days": 3650, "refDate": "2024-01-01"}' }], + isArray: false, + probability: 1.0, + }, + { + fieldPath: 'artist.stage_name', + mongoType: 'String', + fakerMethod: 'person.firstName', + fakerArgs: [], + isArray: false, + probability: 1.0, + }, + { + fieldPath: 'artist.genre', + mongoType: 'String', + fakerMethod: 'music.genre', + fakerArgs: [], + isArray: false, + probability: 1.0, + }, + { + fieldPath: 'distributor.airline_code', + mongoType: 'String', + fakerMethod: 'airline.airline', + fakerArgs: [{ json: '{"format": "iata"}' }], + isArray: false, + probability: 1.0, + }, + { + fieldPath: 'distributor.flight_number', + mongoType: 'String', + fakerMethod: 'airline.flightNumber', + fakerArgs: [{ json: '{"addLeadingZeros": true}' }], + isArray: false, + probability: 1.0, + }, + ], + }; + + const result = parseFakerMappings(nestedSchema); + + const expected: FakerCallTree = { + album_title: 'faker.music.songName()', + release_year: 'faker.date.recent({"days":3650,"refDate":"2024-01-01"})', + artist: { + stage_name: 'faker.person.firstName()', + genre: 'faker.music.genre()', + }, + distributor: { + airline_code: 'faker.airline.airline({"format":"iata"})', + flight_number: 'faker.airline.flightNumber({"addLeadingZeros":true})', + }, + }; + + expect(result).to.deep.equal(expected); + }); + + it('handles more nested objects (tree depth 4)', () => { + const deepNestedSchema = { + fields: [ + { + fieldPath: 'mission.spacecraft.navigation.latitude', + mongoType: 'Number', + fakerMethod: 'location.latitude', + fakerArgs: [{ json: '{"precision": 4}' }], + isArray: false, + probability: 1.0, + }, + { + fieldPath: 'mission.crew.commander.callsign', + mongoType: 'String', + fakerMethod: 'hacker.abbreviation', + fakerArgs: [], + isArray: false, + probability: 1.0, + }, + { + fieldPath: 'laboratory.experiment.sample.element', + mongoType: 'String', + fakerMethod: 'science.chemicalElement', + fakerArgs: [{ json: '{"property": "symbol"}' }], + isArray: false, + probability: 1.0, + }, + ], + }; + + const result = parseFakerMappings(deepNestedSchema); + + const expected: FakerCallTree = { + mission: { + spacecraft: { + navigation: { + latitude: 'faker.location.latitude({"precision":4})', + }, + }, + crew: { + commander: { + callsign: 'faker.hacker.abbreviation()', + }, + }, + }, + laboratory: { + experiment: { + sample: { + element: 'faker.science.chemicalElement({"property":"symbol"})', + }, + }, + }, + }; + + expect(result).to.deep.equal(expected); + }); + + it('handles nested array fields with heterogeneous arguments', () => { + const optionsArg = { json: '{"min": 1, "max": 3}' }; + const primitiveArg = 1; + + const nycConcertSchema = { + fields: [ + { + fieldPath: 'nycEvent.genres[]', + mongoType: 'String', + fakerMethod: 'helpers.arrayElements', + fakerArgs: [ + { + json: '["Jazz", "Hip-Hop", "Electronic", "Indie Rock", "R&B", "Pop", "Alternative"]', + }, + optionsArg, + ], + isArray: true, + probability: 1.0, + }, + { + fieldPath: 'nycEvent.headliners[]', + mongoType: 'String', + fakerMethod: 'helpers.arrayElements', + fakerArgs: [ + { + json: '["The Midnight Echoes", "Luna Vista", "Brooklyn Steel", "Neon Dreams", "Velvet Underground Tribute", "Electric Avenue"]', + }, + optionsArg, + ], + isArray: true, + probability: 1.0, + }, + { + fieldPath: 'nycEvent.venues[]', + mongoType: 'String', + fakerMethod: 'helpers.arrayElements', + fakerArgs: [ + { + json: '["Madison Square Garden", "Brooklyn Bowl", "Terminal 5", "Webster Hall", "Music Hall of Williamsburg", "Bowery Ballroom"]', + }, + primitiveArg, + ], + isArray: true, + probability: 0.9, + }, + ], + }; + + const result = parseFakerMappings(nycConcertSchema); + + const expected: FakerCallTree = { + nycEvent: { + genres: + "faker.helpers.arrayElements(['Jazz', 'Hip-Hop', 'Electronic', 'Indie Rock', 'R&B', 'Pop', 'Alternative'], {\"min\":1,\"max\":3})", + headliners: + "faker.helpers.arrayElements(['The Midnight Echoes', 'Luna Vista', 'Brooklyn Steel', 'Neon Dreams', 'Velvet Underground Tribute', 'Electric Avenue'], {\"min\":1,\"max\":3})", + venues: + "faker.helpers.arrayElements(['Madison Square Garden', 'Brooklyn Bowl', 'Terminal 5', 'Webster Hall', 'Music Hall of Williamsburg', 'Bowery Ballroom'], 1)", + }, + }; + + expect(result).to.deep.equal(expected); + }); +}); + +/** + * Note: The expected outputs in the suite should be runnable JS experessions, + * assuming the environment has the faker.js module ready to execute against. + */ +describe('renderFakerCallTree', () => { + it('renders flat faker call tree to formatted string', () => { + const fakerCallTree: FakerCallTree = { + exploit_name: 'faker.hacker.noun()', + atomic_number: + 'faker.science.chemicalElement({"property":"atomicNumber"})', + is_classified: 'faker.datatype.boolean({"probability":0.3})', + }; + + const result = renderFakerCallTree(fakerCallTree); + + const expected = `{ + 'exploit_name': faker.hacker.noun(), + 'atomic_number': faker.science.chemicalElement({"property":"atomicNumber"}), + 'is_classified': faker.datatype.boolean({"probability":0.3}) +}`; + + expect(result).to.equal(expected); + }); +}); + +describe('parseFakerMappings error handling', () => { + it('throws error when array field does not end with []', () => { + const invalidSchema = { + fields: [ + { + fieldPath: 'items', + mongoType: 'String', + fakerMethod: 'lorem.word', + fakerArgs: [], + isArray: true, + probability: 1.0, + }, + ], + }; + + expect(() => parseFakerMappings(invalidSchema)).to.throw( + 'expected the array-type field to end with []' + ); + }); + + it('throws error when fieldPath has empty part before []', () => { + const invalidSchema = { + fields: [ + { + fieldPath: 'items.[]', + mongoType: 'String', + fakerMethod: 'lorem.word', + fakerArgs: [], + isArray: true, + probability: 1.0, + }, + ], + }; + + expect(() => parseFakerMappings(invalidSchema)).to.throw( + 'expected fieldPath to be non-empty part before "[]"' + ); + }); + + it('throws error when fieldPath is completely empty', () => { + const invalidSchema = { + fields: [ + { + fieldPath: '', + mongoType: 'String', + fakerMethod: 'lorem.word', + fakerArgs: [], + isArray: false, + probability: 1.0, + }, + ], + }; + + expect(() => parseFakerMappings(invalidSchema)).to.throw( + 'expected part in `fieldPath` to be non-empty' + ); + }); + + it('throws error when fieldPath part is empty', () => { + const invalidSchema = { + fields: [ + { + fieldPath: 'user..name', + mongoType: 'String', + fakerMethod: 'person.firstName', + fakerArgs: [], + isArray: false, + probability: 1.0, + }, + ], + }; + + expect(() => parseFakerMappings(invalidSchema)).to.throw( + 'expected part in `fieldPath` to be non-empty' + ); + }); + + it('throws error when fieldPath contains null character', () => { + const invalidSchema = { + fields: [ + { + fieldPath: 'user\0name', + mongoType: 'String', + fakerMethod: 'person.firstName', + fakerArgs: [], + isArray: false, + probability: 1.0, + }, + ], + }; + + expect(() => parseFakerMappings(invalidSchema)).to.throw( + 'fieldPath part contains null character, which is not allowed in MongoDB field names' + ); + }); + + it('throws error when fieldPath starts with $', () => { + const invalidSchema = { + fields: [ + { + fieldPath: '$reserved', + mongoType: 'String', + fakerMethod: 'lorem.word', + fakerArgs: [], + isArray: false, + probability: 1.0, + }, + ], + }; + + expect(() => parseFakerMappings(invalidSchema)).to.throw( + "fieldPath part starts with '$', which is not allowed in MongoDB field names" + ); + }); + + it('throws error when [] appears in middle of field part', () => { + const invalidSchema = { + fields: [ + { + fieldPath: 'items[]extra.name', + mongoType: 'String', + fakerMethod: 'lorem.word', + fakerArgs: [], + isArray: false, + probability: 1.0, + }, + ], + }; + + expect(() => parseFakerMappings(invalidSchema)).to.throw( + 'Invalid fieldPath "items[]extra.name": "[]" can only appear at the very end of the `fieldPath`' + ); + }); +}); diff --git a/packages/compass-collection/src/parse-faker-mappings.ts b/packages/compass-collection/src/parse-faker-mappings.ts new file mode 100644 index 00000000000..5f6cef42cac --- /dev/null +++ b/packages/compass-collection/src/parse-faker-mappings.ts @@ -0,0 +1,205 @@ +import type { MockDataSchemaResponse } from '@mongodb-js/compass-generative-ai'; + +type FakerFieldMapping = MockDataSchemaResponse['content']['fields'][number]; + +type FakerCall = `faker.${string}(${string})`; +type FakerCallNode = { [field: string]: FakerCallNode } | FakerCall; + +/** + * Intermediary representation of a faker factory function that can be readily + * rendered to its pure string representation by {@link toFakerCallTreeString}. + * + * e.g., + * + * ```json + * { + * "user_name": "faker.name.fullName()", + * "associate": { + * "name": "faker.name.fullName()" + * } + * } + * ``` + */ +export type FakerCallTree = { [field: string]: FakerCallNode }; + +export function parseFakerMappings(schema: { + fields: Array; +}): FakerCallTree { + for (const field of schema.fields) { + validateFieldPathParts(field); + validateOnlySquareBracketsEndFinalPart(field); + } + return constructFakerCallTree(schema.fields); +} + +/** + * Assumptions: + * - Input does not hold conflicts between overlapping paths like "venue.ticket.is_discounted" is a boolean but "venue.ticket" is also a primitive + */ +function constructFakerCallTree( + fields: Array +): FakerCallTree { + // ensures parent nodes are created before their children + const sortedFields = [...fields].sort( + (f1, f2) => countSeparators(f1.fieldPath) - countSeparators(f2.fieldPath) + ); + + const result: FakerCallTree = {}; + for (const mapping of sortedFields) { + const fieldParts = mapping.fieldPath.split('.'); + + if (mapping.isArray) { + const finalPart = fieldParts[fieldParts.length - 1]; + if (!finalPart.endsWith('[]')) { + throw Error('expected the array-type field to end with []'); + } + + const partName = finalPart.replace('[]', ''); + if (!partName.length) { + throw Error('expected fieldPath to be non-empty part before "[]"'); + } + + fieldParts[fieldParts.length - 1] = partName; + } + + let node = result; + for (let i = 0; i < fieldParts.length; i++) { + if (i === fieldParts.length - 1) { + const fakerCall: FakerCall = `faker.${ + mapping.fakerMethod + }(${handleFieldArguments(mapping.fakerArgs)})`; + node[fieldParts[i]] = fakerCall; + continue; + } + + const part = fieldParts[i]; + if (typeof node[part] !== 'object' || node[part] === null) { + node[part] = {}; + } + node = node[part]; + } + } + + return result; +} + +function handleFieldArguments( + fieldArgs: FakerFieldMapping['fakerArgs'] +): string { + const argParts: string[] = []; + + for (const i of fieldArgs) { + if (typeof i === 'string') { + argParts.push(`'${i}'`); + } else if (typeof i === 'number' || typeof i === 'boolean' || i === null) { + argParts.push(`${i}`); + } else if (i.json) { + const subargs = JSON.parse(i.json); + + if (Array.isArray(subargs)) { + const arrayArgsStr = + '[' + subargs.map((sa) => `'${sa}'`).join(', ') + ']'; + argParts.push(arrayArgsStr); + } else { + argParts.push(JSON.stringify(subargs)); + } + } else { + argParts.push(`${JSON.parse(i.json)}`); + } + } + + return argParts.join(', '); +} + +/** + * note: assumes {@link validateOnlySquareBracketsEndFinalPart} has been called on the input schema's faker field mappings + */ +function countSeparators(input: string): number { + const c1 = input.split('.').length - 1; + const c2 = input.split('[]').length - 1; + return c1 + c2; +} + +export function renderFakerCallTree(input: FakerCallTree): string { + let result = '\n'; + + function inner(data: FakerCallTree, indent: number): string { + const entries = Object.entries(data); + let innerContent = ''; + + entries.forEach(([key, value], index) => { + let line = `${'\t'.repeat(indent)}'${key}': `; + + if (typeof value === 'string') { + // base case + line += value; + } else if (typeof value === 'object' && !Array.isArray(value)) { + // recursive case + line += '{\n'; + line += inner(value, indent + 1); + line += `${'\t'.repeat(indent)}}`; + } else { + throw Error( + 'expected intermediary field value to be a string or plain object' + ); + } + + if (index !== entries.length - 1) { + line += ','; + } + + line += '\n'; + innerContent += line; + }); + + return innerContent; + } + + result += inner(input, 1); + + return '{' + result + '}'; +} + +function validateOnlySquareBracketsEndFinalPart(mapping: FakerFieldMapping) { + const parts = mapping.fieldPath.split('.'); + for (const part of parts) { + const idx = part.indexOf('[]'); + if (idx !== -1 && idx !== part.length - 2) { + throw new Error( + `Invalid fieldPath "${mapping.fieldPath}": "[]" can only appear at the very end of the \`fieldPath\`` + ); + } + } +} + +function validateFieldPathParts(mapping: FakerFieldMapping) { + const fieldParts = mapping.fieldPath.split('.'); + + if (fieldParts.length === 0) { + throw Error('expected `fieldPath` to be non-empty'); + } + + for (const part of fieldParts) { + if (part.trim().length === 0) { + throw Error('expected part in `fieldPath` to be non-empty'); + } + + if (part.includes('\0')) { + throw Error( + 'fieldPath part contains null character, which is not allowed in MongoDB field names' + ); + } + + if (part.startsWith('$')) { + throw Error( + "fieldPath part starts with '$', which is not allowed in MongoDB field names" + ); + } + + if (part.includes('.')) { + throw Error( + "fieldPath part contains '.', which is not allowed in MongoDB field names" + ); + } + } +}