diff --git a/package.json b/package.json index aa169a0..5bfc874 100644 --- a/package.json +++ b/package.json @@ -33,7 +33,7 @@ ".esm-wrapper.mjs" ], "scripts": { - "test": "nyc mocha --timeout 5000 --colors -r ts-node/register test/*.ts", + "test": "nyc mocha --timeout 5000 --colors -r ts-node/register test/*.ts src/**/*.test.ts", "test-example-parse-from-file": "ts-node examples/parse-from-file.ts", "test-example-parse-schema": "ts-node examples/parse-schema.ts", "test-time": "ts-node ./test/time-testing.ts", diff --git a/src/index.ts b/src/index.ts index 3dfc6ed..f4c6e44 100644 --- a/src/index.ts +++ b/src/index.ts @@ -18,7 +18,7 @@ import type { SimplifiedSchema } from './schema-analyzer'; import * as schemaStats from './stats'; -import { AnyIterable, StandardJSONSchema, MongoDBJSONSchema, ExtendedJSONSchema } from './types'; +import { AnyIterable, StandardJSONSchema, MongoDBJSONSchema, ExpandedJSONSchema } from './types'; /** * Analyze documents - schema can be retrieved in different formats. @@ -77,7 +77,7 @@ export type { SimplifiedSchema, StandardJSONSchema, MongoDBJSONSchema, - ExtendedJSONSchema + ExpandedJSONSchema }; export { diff --git a/src/schema-accessor.ts b/src/schema-accessor.ts index cd70681..88f79e6 100644 --- a/src/schema-accessor.ts +++ b/src/schema-accessor.ts @@ -1,11 +1,11 @@ import { Schema as InternalSchema } from './schema-analyzer'; -import convertors from './schema-convertors'; -import { ExtendedJSONSchema, MongoDBJSONSchema, StandardJSONSchema } from './types'; +import { convertors } from './schema-convertors'; +import { ExpandedJSONSchema, MongoDBJSONSchema, StandardJSONSchema } from './types'; export interface SchemaAccessor { getStandardJsonSchema: () => Promise; getMongoDBJsonSchema: () => Promise; - getExtendedJsonSchema: () => Promise; + getExpandedJSONSchema: () => Promise; getInternalSchema: () => Promise; } @@ -23,13 +23,13 @@ export class InternalSchemaBasedAccessor implements SchemaAccessor { private internalSchema: InternalSchema; private standardJSONSchema?: StandardJSONSchema; private mongodbJSONSchema?: MongoDBJSONSchema; - private extendedJSONSchema?: ExtendedJSONSchema; + private ExpandedJSONSchema?: ExpandedJSONSchema; constructor(internalSchema: InternalSchema) { this.internalSchema = internalSchema; } - async getInternalSchema(options?: Options): Promise { + async getInternalSchema(): Promise { return this.internalSchema; } @@ -41,7 +41,7 @@ export class InternalSchemaBasedAccessor implements SchemaAccessor { return this.mongodbJSONSchema ??= await convertors.internalSchemaToMongoDB(this.internalSchema, options); } - async getExtendedJsonSchema(options: Options = {}): Promise { - return this.extendedJSONSchema ??= await convertors.internalSchemaToExtended(this.internalSchema, options); + async getExpandedJSONSchema(options: Options = {}): Promise { + return this.ExpandedJSONSchema ??= await convertors.internalSchemaToExpanded(this.internalSchema, options); } } diff --git a/src/schema-convertors.ts b/src/schema-convertors.ts deleted file mode 100644 index 435e991..0000000 --- a/src/schema-convertors.ts +++ /dev/null @@ -1,35 +0,0 @@ -import { Schema as InternalSchema } from './schema-analyzer'; -import { ExtendedJSONSchema, MongoDBJSONSchema, StandardJSONSchema } from './types'; - -function internalSchemaToStandard( - internalSchema: InternalSchema, - options: { - signal?: AbortSignal -}): StandardJSONSchema { - // TODO: COMPASS-8700 - return {}; -} - -function internalSchemaToMongoDB( - internalSchema: InternalSchema, - options: { - signal?: AbortSignal -}): MongoDBJSONSchema { - // TODO: COMPASS-8701 - return {} as MongoDBJSONSchema; -} - -function internalSchemaToExtended( - internalSchema: InternalSchema, - options: { - signal?: AbortSignal -}): ExtendedJSONSchema { - // TODO: COMPASS-8702 - return {} as ExtendedJSONSchema; -} - -export default { - internalSchemaToStandard, - internalSchemaToMongoDB, - internalSchemaToExtended -}; diff --git a/src/schema-convertors/index.ts b/src/schema-convertors/index.ts new file mode 100644 index 0000000..273310d --- /dev/null +++ b/src/schema-convertors/index.ts @@ -0,0 +1,9 @@ +import internalSchemaToExpanded from './internalToExpanded'; +import internalSchemaToMongoDB from './internalToMongoDB'; +import internalSchemaToStandard from './internalToStandard'; + +export const convertors = { + internalSchemaToStandard, + internalSchemaToMongoDB, + internalSchemaToExpanded +}; diff --git a/src/schema-convertors/internalToExpanded.ts b/src/schema-convertors/internalToExpanded.ts new file mode 100644 index 0000000..be91d93 --- /dev/null +++ b/src/schema-convertors/internalToExpanded.ts @@ -0,0 +1,12 @@ +import { InternalSchema } from '..'; +import { ExpandedJSONSchema } from '../types'; + +export default function internalSchemaToExpanded( + /* eslint @typescript-eslint/no-unused-vars: 0 */ + internalSchema: InternalSchema, + options: { + signal?: AbortSignal +}): Promise { + // TODO: COMPASS-8702 + return Promise.resolve({} as ExpandedJSONSchema); +} diff --git a/src/schema-convertors/internalToMongoDB.test.ts b/src/schema-convertors/internalToMongoDB.test.ts new file mode 100644 index 0000000..42813bc --- /dev/null +++ b/src/schema-convertors/internalToMongoDB.test.ts @@ -0,0 +1,1768 @@ +import assert from 'assert'; +import internalSchemaToMongoDB from './internalToMongoDB'; + +describe('internalSchemaToMongoDB', async function() { + describe('Converts: ', async function() { + it('all the types', async function() { + const internal = { + count: 1, + fields: [ + { + name: '_id', + path: [ + '_id' + ], + count: 1, + type: 'ObjectId', + probability: 0.8, + hasDuplicates: false, + types: [ + { + name: 'ObjectId', + path: [ + '_id' + ], + count: 1, + probability: 0.8, + unique: 1, + hasDuplicates: false, + values: [ + '642d766b7300158b1f22e972' + ], + bsonType: 'ObjectId' + } + ] + }, + { + name: 'array', + path: [ + 'array' + ], + count: 1, + type: 'Array', + probability: 0.8, + hasDuplicates: false, + types: [ + { + name: 'Array', + path: [ + 'array' + ], + count: 1, + probability: 0.8, + bsonType: 'Array', + types: [ + { + name: 'Number', + path: [ + 'array' + ], + count: 3, + probability: 0.8, + unique: 3, + hasDuplicates: false, + values: [ + 1, + 2, + 3 + ], + bsonType: 'Number' + } + ], + totalCount: 3, + lengths: [ + 3 + ], + averageLength: 3 + } + ] + }, + { + name: 'binaries', + path: [ + 'binaries' + ], + count: 1, + type: 'Document', + probability: 0.8, + hasDuplicates: false, + types: [ + { + name: 'Document', + path: [ + 'binaries' + ], + count: 1, + probability: 0.8, + bsonType: 'Document', + fields: [ + { + name: 'binaryOld', + path: [ + 'binaries', + 'binaryOld' + ], + count: 1, + type: 'Binary', + probability: 0.8, + hasDuplicates: false, + types: [ + { + name: 'Binary', + path: [ + 'binaries', + 'binaryOld' + ], + count: 1, + probability: 0.8, + unique: 1, + hasDuplicates: false, + values: [ + '//8=' + ], + bsonType: 'Binary' + } + ] + }, + { + name: 'compressedTimeSeries', + path: [ + 'binaries', + 'compressedTimeSeries' + ], + count: 1, + type: 'Binary', + probability: 0.8, + hasDuplicates: false, + types: [ + { + name: 'Binary', + path: [ + 'binaries', + 'compressedTimeSeries' + ], + count: 1, + probability: 0.8, + unique: 1, + hasDuplicates: false, + values: [ + 'c//SZESzTGmQ6OfR38A11A==' + ], + bsonType: 'Binary' + } + ] + }, + { + name: 'custom', + path: [ + 'binaries', + 'custom' + ], + count: 1, + type: 'Binary', + probability: 0.8, + hasDuplicates: false, + types: [ + { + name: 'Binary', + path: [ + 'binaries', + 'custom' + ], + count: 1, + probability: 0.8, + unique: 1, + hasDuplicates: false, + values: [ + '//8=' + ], + bsonType: 'Binary' + } + ] + }, + { + name: 'encrypted', + path: [ + 'binaries', + 'encrypted' + ], + count: 1, + type: 'Binary', + probability: 0.8, + hasDuplicates: false, + types: [ + { + name: 'Binary', + path: [ + 'binaries', + 'encrypted' + ], + count: 1, + probability: 0.8, + unique: 1, + hasDuplicates: false, + values: [ + 'c//SZESzTGmQ6OfR38A11A==' + ], + bsonType: 'Binary' + } + ] + }, + { + name: 'functionData', + path: [ + 'binaries', + 'functionData' + ], + count: 1, + type: 'Binary', + probability: 0.8, + hasDuplicates: false, + types: [ + { + name: 'Binary', + path: [ + 'binaries', + 'functionData' + ], + count: 1, + probability: 0.8, + unique: 1, + hasDuplicates: false, + values: [ + '//8=' + ], + bsonType: 'Binary' + } + ] + }, + { + name: 'generic', + path: [ + 'binaries', + 'generic' + ], + count: 1, + type: 'Binary', + probability: 0.8, + hasDuplicates: false, + types: [ + { + name: 'Binary', + path: [ + 'binaries', + 'generic' + ], + count: 1, + probability: 0.8, + unique: 1, + hasDuplicates: false, + values: [ + 'AQID' + ], + bsonType: 'Binary' + } + ] + }, + { + name: 'md5', + path: [ + 'binaries', + 'md5' + ], + count: 1, + type: 'Binary', + probability: 0.8, + hasDuplicates: false, + types: [ + { + name: 'Binary', + path: [ + 'binaries', + 'md5' + ], + count: 1, + probability: 0.8, + unique: 1, + hasDuplicates: false, + values: [ + 'c//SZESzTGmQ6OfR38A11A==' + ], + bsonType: 'Binary' + } + ] + }, + { + name: 'uuid', + path: [ + 'binaries', + 'uuid' + ], + count: 1, + type: 'Binary', + probability: 0.8, + hasDuplicates: false, + types: [ + { + name: 'Binary', + path: [ + 'binaries', + 'uuid' + ], + count: 1, + probability: 0.8, + unique: 1, + hasDuplicates: false, + values: [ + 'aaaaaaaa-aaaa-4aaa-aaaa-aaaaaaaaaaaa' + ], + bsonType: 'Binary' + } + ] + }, + { + name: 'uuidOld', + path: [ + 'binaries', + 'uuidOld' + ], + count: 1, + type: 'Binary', + probability: 0.8, + hasDuplicates: false, + types: [ + { + name: 'Binary', + path: [ + 'binaries', + 'uuidOld' + ], + count: 1, + probability: 0.8, + unique: 1, + hasDuplicates: false, + values: [ + 'c//SZESzTGmQ6OfR38A11A==' + ], + bsonType: 'Binary' + } + ] + } + ] + } + ] + }, + { + name: 'binData', + path: [ + 'binData' + ], + count: 1, + type: 'Binary', + probability: 0.8, + hasDuplicates: false, + types: [ + { + name: 'Binary', + path: [ + 'binData' + ], + count: 1, + probability: 0.8, + unique: 1, + hasDuplicates: false, + values: [ + 'AQID' + ], + bsonType: 'Binary' + } + ] + }, + { + name: 'boolean', + path: [ + 'boolean' + ], + count: 1, + type: 'Boolean', + probability: 0.8, + hasDuplicates: false, + types: [ + { + name: 'Boolean', + path: [ + 'boolean' + ], + count: 1, + probability: 0.8, + unique: 1, + hasDuplicates: false, + values: [ + true + ], + bsonType: 'Boolean' + } + ] + }, + { + name: 'date', + path: [ + 'date' + ], + count: 1, + type: 'Date', + probability: 0.8, + hasDuplicates: false, + types: [ + { + name: 'Date', + path: [ + 'date' + ], + count: 1, + probability: 0.8, + unique: 1, + hasDuplicates: false, + values: [ + '2023-04-05T13:25:08.445Z' + ], + bsonType: 'Date' + } + ] + }, + { + name: 'dbRef', + path: [ + 'dbRef' + ], + count: 1, + type: 'DBRef', + probability: 0.8, + hasDuplicates: false, + types: [ + { + name: 'DBRef', + path: [ + 'dbRef' + ], + count: 1, + probability: 0.8, + unique: 1, + hasDuplicates: false, + values: [ + { + $ref: 'namespace', + $id: '642d76b4b7ebfab15d3c4a78' + } + ], + bsonType: 'DBRef' + } + ] + }, + { + name: 'decimal', + path: [ + 'decimal' + ], + count: 1, + type: 'Decimal128', + probability: 0.8, + hasDuplicates: false, + types: [ + { + name: 'Decimal128', + path: [ + 'decimal' + ], + count: 1, + probability: 0.8, + unique: 1, + hasDuplicates: false, + values: [ + { + $numberDecimal: '5.477284286264328586719275128128001E-4088' + } + ], + bsonType: 'Decimal128' + } + ] + }, + { + name: 'double', + path: [ + 'double' + ], + count: 1, + type: 'Double', + probability: 0.8, + hasDuplicates: false, + types: [ + { + name: 'Double', + path: [ + 'double' + ], + count: 1, + probability: 0.8, + unique: 1, + hasDuplicates: false, + values: [ + 1.2 + ], + bsonType: 'Double' + } + ] + }, + { + name: 'int', + path: [ + 'int' + ], + count: 1, + type: 'Int32', + probability: 0.8, + hasDuplicates: false, + types: [ + { + name: 'Int32', + path: [ + 'int' + ], + count: 1, + probability: 0.8, + unique: 1, + hasDuplicates: false, + values: [ + 12345 + ], + bsonType: 'Int32' + } + ] + }, + { + name: 'javascript', + path: [ + 'javascript' + ], + count: 1, + type: 'Code', + probability: 0.8, + hasDuplicates: false, + types: [ + { + name: 'Code', + path: [ + 'javascript' + ], + count: 1, + probability: 0.8, + unique: 1, + hasDuplicates: false, + values: [ + { + code: 'function() {}' + } + ], + bsonType: 'Code' + } + ] + }, + { + name: 'javascriptWithScope', + path: [ + 'javascriptWithScope' + ], + count: 1, + type: 'Code', + probability: 0.8, + hasDuplicates: false, + types: [ + { + name: 'Code', + path: [ + 'javascriptWithScope' + ], + count: 1, + probability: 0.8, + unique: 1, + hasDuplicates: false, + values: [ + { + code: 'function() {}', + scope: { + foo: 1, + bar: 'a' + } + } + ], + bsonType: 'Code' + } + ] + }, + { + name: 'long', + path: [ + 'long' + ], + count: 1, + type: 'Long', + probability: 0.8, + hasDuplicates: false, + types: [ + { + name: 'Long', + path: [ + 'long' + ], + count: 1, + probability: 0.8, + unique: 1, + hasDuplicates: false, + values: [ + { + low: -1395630315, + high: 28744523, + unsigned: false + } + ], + bsonType: 'Long' + } + ] + }, + { + name: 'maxKey', + path: [ + 'maxKey' + ], + count: 1, + type: 'MaxKey', + probability: 0.8, + hasDuplicates: false, + types: [ + { + name: 'MaxKey', + path: [ + 'maxKey' + ], + count: 1, + probability: 0.8, + unique: 1, + hasDuplicates: false, + values: [ + {} + ], + bsonType: 'MaxKey' + } + ] + }, + { + name: 'minKey', + path: [ + 'minKey' + ], + count: 1, + type: 'MinKey', + probability: 0.8, + hasDuplicates: false, + types: [ + { + name: 'MinKey', + path: [ + 'minKey' + ], + count: 1, + probability: 0.8, + unique: 1, + hasDuplicates: false, + values: [ + {} + ], + bsonType: 'MinKey' + } + ] + }, + { + name: 'null', + path: [ + 'null' + ], + count: 1, + type: 'Null', + probability: 0.8, + hasDuplicates: true, + types: [ + { + name: 'Null', + path: [ + 'null' + ], + count: 1, + probability: 0.8, + unique: 1, + hasDuplicates: true, + bsonType: 'Null' + } + ] + }, + { + name: 'object', + path: [ + 'object' + ], + count: 1, + type: 'Document', + probability: 0.8, + hasDuplicates: false, + types: [ + { + name: 'Document', + path: [ + 'object' + ], + count: 1, + probability: 0.8, + bsonType: 'Document', + fields: [ + { + name: 'key', + path: [ + 'object', + 'key' + ], + count: 1, + type: 'String', + probability: 0.8, + hasDuplicates: false, + types: [ + { + name: 'String', + path: [ + 'object', + 'key' + ], + count: 1, + probability: 0.8, + unique: 1, + hasDuplicates: false, + values: [ + 'value' + ], + bsonType: 'String' + } + ] + } + ] + } + ] + }, + { + name: 'objectId', + path: [ + 'objectId' + ], + count: 1, + type: 'ObjectId', + probability: 0.8, + hasDuplicates: false, + types: [ + { + name: 'ObjectId', + path: [ + 'objectId' + ], + count: 1, + probability: 0.8, + unique: 1, + hasDuplicates: false, + values: [ + '642d766c7300158b1f22e975' + ], + bsonType: 'ObjectId' + } + ] + }, + { + name: 'regex', + path: [ + 'regex' + ], + count: 1, + type: 'BSONRegExp', + probability: 0.8, + hasDuplicates: false, + types: [ + { + name: 'BSONRegExp', + path: [ + 'regex' + ], + count: 1, + probability: 0.8, + unique: 1, + hasDuplicates: false, + values: [ + { + pattern: 'pattern', + options: 'i' + } + ], + bsonType: 'BSONRegExp' + } + ] + }, + { + name: 'string', + path: [ + 'string' + ], + count: 1, + type: 'String', + probability: 0.8, + hasDuplicates: false, + types: [ + { + name: 'String', + path: [ + 'string' + ], + count: 1, + probability: 0.8, + unique: 1, + hasDuplicates: false, + values: [ + 'Hello, world!' + ], + bsonType: 'String' + } + ] + }, + { + name: 'symbol', + path: [ + 'symbol' + ], + count: 1, + type: 'BSONSymbol', + probability: 0.8, + hasDuplicates: false, + types: [ + { + name: 'BSONSymbol', + path: [ + 'symbol' + ], + count: 1, + probability: 0.8, + unique: 1, + hasDuplicates: false, + values: [ + 'symbol' + ], + bsonType: 'BSONSymbol' + } + ] + }, + { + name: 'timestamp', + path: [ + 'timestamp' + ], + count: 1, + type: 'Timestamp', + probability: 0.8, + hasDuplicates: false, + types: [ + { + name: 'Timestamp', + path: [ + 'timestamp' + ], + count: 1, + probability: 0.8, + unique: 1, + hasDuplicates: false, + values: [ + { + $timestamp: '7218556297505931265' + } + ], + bsonType: 'Timestamp' + } + ] + } + ] + }; + const standard = await internalSchemaToMongoDB(internal); + assert.deepStrictEqual(standard, { + bsonType: 'object', + required: [], + properties: { + _id: { + bsonType: 'objectId' + }, + array: { + bsonType: 'array', + items: { + bsonType: 'double' + } + }, + binData: { + bsonType: 'binData' + }, + binaries: { + bsonType: 'object', + properties: { + binaryOld: { + bsonType: 'binData' + }, + compressedTimeSeries: { + bsonType: 'binData' + }, + custom: { + bsonType: 'binData' + }, + encrypted: { + bsonType: 'binData' + }, + functionData: { + bsonType: 'binData' + }, + generic: { + bsonType: 'binData' + }, + md5: { + bsonType: 'binData' + }, + uuid: { + bsonType: 'binData' + }, + uuidOld: { + bsonType: 'binData' + } + }, + required: [] + }, + boolean: { + bsonType: 'bool' + }, + date: { + bsonType: 'date' + }, + dbRef: { + bsonType: 'dbPointer' + }, + decimal: { + bsonType: 'decimal' + }, + double: { + bsonType: 'double' + }, + int: { + bsonType: 'int' + }, + javascript: { + bsonType: 'javascript' + }, + javascriptWithScope: { + bsonType: 'javascript' + }, + long: { + bsonType: 'long' + }, + maxKey: { + bsonType: 'maxKey' + }, + minKey: { + bsonType: 'minKey' + }, + null: { + bsonType: 'null' + }, + object: { + bsonType: 'object', + properties: { + key: { + bsonType: 'string' + } + }, + required: [] + }, + objectId: { + bsonType: 'objectId' + }, + regex: { + bsonType: 'regex' + }, + string: { + bsonType: 'string' + }, + symbol: { + bsonType: 'symbol' + }, + timestamp: { + bsonType: 'timestamp' + } + } + }); + }); + + it('nested document/object', async function() { + const internal = { + count: 2, + fields: [ + { + name: 'author', + path: [ + 'author' + ], + count: 1, + type: [ + 'Document', + 'Undefined' + ], + probability: 1, + hasDuplicates: false, + types: [ + { + name: 'Document', + path: [ + 'author' + ], + count: 1, + probability: 0.5, + bsonType: 'Document', + fields: [ + { + name: 'name', + path: [ + 'author', + 'name' + ], + count: 1, + type: 'String', + probability: 1, + hasDuplicates: false, + types: [ + { + name: 'String', + path: [ + 'author', + 'name' + ], + count: 1, + probability: 1, + unique: 1, + hasDuplicates: false, + values: [ + 'Peter Sonder' + ], + bsonType: 'String' + } + ] + }, + { + name: 'rating', + path: [ + 'author', + 'rating' + ], + count: 1, + type: 'Double', + probability: 1, + hasDuplicates: false, + types: [ + { + name: 'Double', + path: [ + 'author', + 'rating' + ], + count: 1, + probability: 1, + unique: 1, + hasDuplicates: false, + values: [ + 1.3 + ], + bsonType: 'Double' + } + ] + } + ] + }, + { + name: 'Undefined', + bsonType: 'Undefined', + unique: 1, + hasDuplicates: false, + path: [ + 'author' + ], + count: 1, + probability: 0.5 + } + ] + } + ] + }; + const standard = await internalSchemaToMongoDB(internal); + assert.deepStrictEqual(standard, { + bsonType: 'object', + required: ['author'], + properties: { + author: { + bsonType: 'object', + required: ['name', 'rating'], + properties: { + name: { + bsonType: 'string' + }, + rating: { + bsonType: 'double' + } + } + } + } + }); + }); + + describe('arrays', async function() { + it('array - single type', async function() { + const internal = { + count: 2, + fields: [ + { + name: 'genres', + path: [ + 'genres' + ], + count: 1, + type: [ + 'array', + 'Undefined' + ], + probability: 0.5, + hasDuplicates: false, + types: [ + { + name: 'array', + path: [ + 'genres' + ], + count: 1, + probability: 0.5, + bsonType: 'Array', + types: [ + { + name: 'String', + path: [ + 'genres' + ], + count: 2, + probability: 1, + unique: 2, + hasDuplicates: false, + values: [ + 'crimi', + 'comedy' + ], + bsonType: 'String' + } + ], + totalCount: 2, + lengths: [ + 2 + ], + averageLength: 2 + }, + { + name: 'Undefined', + bsonType: 'Undefined', + unique: 1, + hasDuplicates: false, + path: [ + 'genres' + ], + count: 1, + probability: 0.5 + } + ] + } + ] + }; + const standard = await internalSchemaToMongoDB(internal); + assert.deepStrictEqual(standard, { + bsonType: 'object', + required: [], + properties: { + genres: { + bsonType: 'array', + items: { + bsonType: 'string' + } + } + } + }); + }); + + it('array - complex mixed type', async function() { + const internal = { + count: 2, + fields: [ + { + name: 'genres', + path: [ + 'genres' + ], + count: 1, + type: [ + 'Array', + 'Undefined' + ], + probability: 0.5, + hasDuplicates: false, + types: [ + { + name: 'Array', + path: [ + 'genres' + ], + count: 1, + probability: 0.5, + bsonType: 'Array', + types: [ + { + name: 'String', + path: [ + 'genres' + ], + count: 2, + probability: 0.6666666666666666, + unique: 2, + hasDuplicates: false, + values: [ + 'crimi', + 'comedy' + ], + bsonType: 'String' + }, + { + name: 'Document', + path: [ + 'genres' + ], + count: 1, + probability: 0.3333333333333333, + bsonType: 'Document', + fields: [ + { + name: 'long', + path: [ + 'genres', + 'long' + ], + count: 1, + type: 'String', + probability: 1, + hasDuplicates: false, + types: [ + { + name: 'String', + path: [ + 'genres', + 'long' + ], + count: 1, + probability: 1, + unique: 1, + hasDuplicates: false, + values: [ + 'science fiction' + ], + bsonType: 'String' + } + ] + }, + { + name: 'short', + path: [ + 'genres', + 'short' + ], + count: 1, + type: 'String', + probability: 1, + hasDuplicates: false, + types: [ + { + name: 'String', + path: [ + 'genres', + 'short' + ], + count: 1, + probability: 1, + unique: 1, + hasDuplicates: false, + values: [ + 'scifi' + ], + bsonType: 'String' + } + ] + } + ] + } + ], + totalCount: 3, + lengths: [ + 3 + ], + averageLength: 3 + }, + { + name: 'Undefined', + bsonType: 'Undefined', + unique: 1, + hasDuplicates: false, + path: [ + 'genres' + ], + count: 1, + probability: 0.5 + } + ] + } + ] + }; + const standard = await internalSchemaToMongoDB(internal); + assert.deepStrictEqual(standard, { + bsonType: 'object', + required: [], + properties: { + genres: { + bsonType: 'array', + items: { + anyOf: [ + { + bsonType: 'string' + }, + { + bsonType: 'object', + required: ['long', 'short'], + properties: { + long: { + bsonType: 'string' + }, + short: { + bsonType: 'string' + } + } + } + ] + } + } + } + }); + }); + + it('array - simple mixed type', async function() { + const internal = { + count: 2, + fields: [ + { + name: 'arrayMixedType', + path: [ + 'arrayMixedType' + ], + count: 1, + type: 'Array', + probability: 1, + hasDuplicates: false, + types: [ + { + name: 'Array', + path: [ + 'arrayMixedType' + ], + count: 1, + probability: 1, + bsonType: 'Array', + types: [ + { + name: 'int32', + path: [ + 'arrayMixedType' + ], + count: 2, + probability: 0.6666666666666666, + unique: 2, + hasDuplicates: false, + values: [ + 1, + 3 + ], + bsonType: 'Int32' + }, + { + name: 'String', + path: [ + 'arrayMixedType' + ], + count: 1, + probability: 0.3333333333333333, + unique: 1, + hasDuplicates: false, + values: [ + '2' + ], + bsonType: 'String' + } + ], + totalCount: 3, + lengths: [ + 3 + ], + averageLength: 3 + } + ] + } + ] + }; + const standard = await internalSchemaToMongoDB(internal); + assert.deepStrictEqual(standard, { + bsonType: 'object', + required: ['arrayMixedType'], + properties: { + arrayMixedType: { + bsonType: 'array', + items: { + bsonType: ['int', 'string'] + } + } + } + }); + }); + }); + + describe('mixed types', async function() { + it('simple mixed type', async function() { + const internal = { + count: 2, + fields: [ + { + name: 'mixedType', + path: [ + 'mixedType' + ], + count: 2, + type: [ + 'Int32', + 'String', + 'Undefined' + ], + probability: 0.6666666666666666, + hasDuplicates: false, + types: [ + { + name: 'Int32', + path: [ + 'mixedType' + ], + count: 1, + probability: 0.3333333333333333, + unique: 1, + hasDuplicates: false, + values: [ + 1 + ], + bsonType: 'Int32' + }, + { + name: 'String', + path: [ + 'mixedType' + ], + count: 1, + probability: 0.3333333333333333, + unique: 1, + hasDuplicates: false, + values: [ + 'abc' + ], + bsonType: 'String' + }, + { + name: 'Undefined', + bsonType: 'Undefined', + unique: 1, + hasDuplicates: false, + path: [ + 'mixedType' + ], + count: 1, + probability: 0.3333333333333333 + } + ] + } + ] + }; + const standard = await internalSchemaToMongoDB(internal); + assert.deepStrictEqual(standard, { + bsonType: 'object', + required: [], + properties: { + mixedType: { + bsonType: ['int', 'string'] + } + } + }); + }); + + it('complex mixed type', async function() { + const internal = { + count: 2, + fields: [ + { + name: 'mixedComplexType', + path: [ + 'mixedComplexType' + ], + count: 2, + type: [ + 'Array', + 'Document', + 'Undefined' + ], + probability: 0.6666666666666666, + hasDuplicates: false, + types: [ + { + name: 'Array', + path: [ + 'mixedComplexType' + ], + count: 1, + probability: 0.3333333333333333, + bsonType: 'Array', + types: [ + { + name: 'Int32', + path: [ + 'mixedComplexType' + ], + count: 3, + probability: 1, + unique: 3, + hasDuplicates: false, + values: [ + 1, + 2, + 3 + ], + bsonType: 'Int32' + } + ], + totalCount: 3, + lengths: [ + 3 + ], + averageLength: 3 + }, + { + name: 'Document', + path: [ + 'mixedComplexType' + ], + count: 1, + probability: 0.3333333333333333, + bsonType: 'Document', + fields: [ + { + name: 'a', + path: [ + 'mixedComplexType', + 'a' + ], + count: 1, + type: 'String', + probability: 1, + hasDuplicates: false, + types: [ + { + name: 'String', + path: [ + 'mixedComplexType', + 'a' + ], + count: 1, + probability: 1, + unique: 1, + hasDuplicates: false, + values: [ + 'bc' + ], + bsonType: 'String' + } + ] + } + ] + }, + { + name: 'Undefined', + bsonType: 'Undefined', + unique: 1, + hasDuplicates: false, + path: [ + 'mixedComplexType' + ], + count: 1, + probability: 0.3333333333333333 + } + ] + } + ] + }; + const standard = await internalSchemaToMongoDB(internal); + assert.deepStrictEqual(standard, { + bsonType: 'object', + required: [], + properties: { + mixedComplexType: { + anyOf: [ + { + bsonType: 'array', + items: { + bsonType: 'int' + } + }, + { + bsonType: 'object', + required: ['a'], + properties: { + a: { + bsonType: 'string' + } + } + } + ] + } + } + }); + }); + }); + + it('can be aborted', async function() { + const internal = { + count: 2, + fields: [ + { + name: 'mixedComplexType', + path: [ + 'mixedComplexType' + ], + count: 2, + type: [ + 'Array', + 'Document', + 'Undefined' + ], + probability: 0.6666666666666666, + hasDuplicates: false, + types: [ + { + name: 'Array', + path: [ + 'mixedComplexType' + ], + count: 1, + probability: 0.3333333333333333, + bsonType: 'Array', + types: [ + { + name: 'Int32', + path: [ + 'mixedComplexType' + ], + count: 3, + probability: 1, + unique: 3, + hasDuplicates: false, + values: [ + 1, + 2, + 3 + ], + bsonType: 'Int32' + } + ], + totalCount: 3, + lengths: [ + 3 + ], + averageLength: 3 + }, + { + name: 'Document', + path: [ + 'mixedComplexType' + ], + count: 1, + probability: 0.3333333333333333, + bsonType: 'Document', + fields: [ + { + name: 'a', + path: [ + 'mixedComplexType', + 'a' + ], + count: 1, + type: 'String', + probability: 1, + hasDuplicates: false, + types: [ + { + name: 'String', + path: [ + 'mixedComplexType', + 'a' + ], + count: 1, + probability: 1, + unique: 1, + hasDuplicates: false, + values: [ + 'bc' + ], + bsonType: 'String' + } + ] + } + ] + }, + { + name: 'Undefined', + bsonType: 'Undefined', + unique: 1, + hasDuplicates: false, + path: [ + 'mixedComplexType' + ], + count: 1, + probability: 0.3333333333333333 + } + ] + } + ] + }; + const abortController = new AbortController(); + const promise = internalSchemaToMongoDB(internal, { signal: abortController.signal }); + abortController.abort(new Error('Too long, didn\'t wait.')); + await assert.rejects(promise, { + name: 'Error', + message: 'Too long, didn\'t wait.' + }); + }); + }); +}); diff --git a/src/schema-convertors/internalToMongoDB.ts b/src/schema-convertors/internalToMongoDB.ts new file mode 100644 index 0000000..7727c84 --- /dev/null +++ b/src/schema-convertors/internalToMongoDB.ts @@ -0,0 +1,111 @@ +import { ArraySchemaType, DocumentSchemaType, Schema as InternalSchema, SchemaType } from '../schema-analyzer'; +import { MongoDBJSONSchema } from '../types'; + +const InternalTypeToBsonTypeMap: Record< + SchemaType['name'] | 'Double' | 'BSONSymbol', + string +> = { + Double: 'double', + Number: 'double', + String: 'string', + Document: 'object', + Array: 'array', + Binary: 'binData', + Undefined: 'undefined', + ObjectId: 'objectId', + Boolean: 'bool', + Date: 'date', + Null: 'null', + RegExp: 'regex', + BSONRegExp: 'regex', + DBRef: 'dbPointer', + BSONSymbol: 'symbol', + Symbol: 'symbol', + Code: 'javascript', + Int32: 'int', + Timestamp: 'timestamp', + Long: 'long', + Decimal128: 'decimal', + MinKey: 'minKey', + MaxKey: 'maxKey' +}; + +const convertInternalType = (type: string) => { + const bsonType = InternalTypeToBsonTypeMap[type]; + if (!bsonType) throw new Error(`Encountered unknown type: ${type}`); + return bsonType; +}; + +async function allowAbort(signal?: AbortSignal) { + return new Promise((resolve, reject) => + setTimeout(() => { + if (signal?.aborted) return reject(signal?.reason || new Error('Operation aborted')); + resolve(); + }) + ); +} + +async function parseType(type: SchemaType, signal?: AbortSignal): Promise { + await allowAbort(signal); + const schema: MongoDBJSONSchema = { + bsonType: convertInternalType(type.bsonType) + }; + switch (type.bsonType) { + case 'Array': + schema.items = await parseTypes((type as ArraySchemaType).types); + break; + case 'Document': + Object.assign(schema, + await parseFields((type as DocumentSchemaType).fields, signal) + ); + break; + } + + return schema; +} + +async function parseTypes(types: SchemaType[], signal?: AbortSignal): Promise { + await allowAbort(signal); + const definedTypes = types.filter(type => type.bsonType.toLowerCase() !== 'undefined'); + const isSingleType = definedTypes.length === 1; + if (isSingleType) { + return parseType(definedTypes[0], signal); + } + const parsedTypes = await Promise.all(definedTypes.map(type => parseType(type, signal))); + if (definedTypes.some(type => ['Document', 'Array'].includes(type.bsonType))) { + return { + anyOf: parsedTypes + }; + } + return { + bsonType: definedTypes.map((type) => convertInternalType(type.bsonType)) + }; +} + +async function parseFields(fields: DocumentSchemaType['fields'], signal?: AbortSignal): Promise<{ + required: MongoDBJSONSchema['required'], + properties: MongoDBJSONSchema['properties'], +}> { + const required = []; + const properties: MongoDBJSONSchema['properties'] = {}; + for (const field of fields) { + if (field.probability === 1) required.push(field.name); + properties[field.name] = await parseTypes(field.types, signal); + } + + return { required, properties }; +} + +export default async function internalSchemaToMongodb( + internalSchema: InternalSchema, + options: { + signal?: AbortSignal +} = {}): Promise { + const { required, properties } = await parseFields(internalSchema.fields, options.signal); + const schema: MongoDBJSONSchema = { + bsonType: 'object', + required, + properties + }; + return schema; +} diff --git a/src/schema-convertors/internalToStandard.ts b/src/schema-convertors/internalToStandard.ts new file mode 100644 index 0000000..3d58125 --- /dev/null +++ b/src/schema-convertors/internalToStandard.ts @@ -0,0 +1,12 @@ +import { InternalSchema } from '..'; +import { StandardJSONSchema } from '../types'; + +export default function internalSchemaToStandard( + /* eslint @typescript-eslint/no-unused-vars: 0 */ + internalSchema: InternalSchema, + options: { + signal?: AbortSignal +}): Promise { + // TODO: COMPASS-8700 + return Promise.resolve({} as StandardJSONSchema); +} diff --git a/src/types.ts b/src/types.ts index 2b6c0c3..5cd51e2 100644 --- a/src/types.ts +++ b/src/types.ts @@ -9,7 +9,7 @@ export type MongoDBJSONSchema = Pick