Skip to content

Commit ed6277c

Browse files
committed
feat: add SchemaAccessor COMPASS-8799
1 parent 9372e5a commit ed6277c

File tree

8 files changed

+156
-33
lines changed

8 files changed

+156
-33
lines changed

package-lock.json

Lines changed: 9 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@
5656
"@types/mocha": "^10.0.1",
5757
"@types/node": "^18.11.18",
5858
"@types/reservoir": "^0.1.0",
59+
"@types/json-schema": "^7.0.15",
5960
"@typescript-eslint/eslint-plugin": "^5.47.1",
6061
"@typescript-eslint/parser": "^5.47.1",
6162
"bson": "^6.7.0",

src/index.ts

Lines changed: 19 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import { InternalSchemaBasedAccessor, SchemaAccessor } from './schema-accessor';
12
import { SchemaAnalyzer } from './schema-analyzer';
23
import type {
34
ArraySchemaType,
@@ -6,7 +7,7 @@ import type {
67
DocumentSchemaType,
78
PrimitiveSchemaType,
89
SchemaType,
9-
Schema,
10+
Schema as InternalSchema,
1011
SchemaField,
1112
SchemaParseOptions,
1213
SimplifiedSchemaBaseType,
@@ -17,31 +18,18 @@ import type {
1718
SimplifiedSchema
1819
} from './schema-analyzer';
1920
import * as schemaStats from './stats';
21+
import { AnyIterable, StandardJSONSchema, MongodbJSONSchema, ExtendedJSONSchema } from './types';
22+
import { getCompletedSchemaAnalyzer } from './utils';
2023

21-
type AnyIterable<T = any> = Iterable<T> | AsyncIterable<T>;
22-
23-
function verifyStreamSource(
24-
source: AnyIterable
25-
): AnyIterable {
26-
if (!(Symbol.iterator in source) && !(Symbol.asyncIterator in source)) {
27-
throw new Error(
28-
'Unknown input type for `docs`. Must be an array, ' +
29-
'stream or MongoDB Cursor.'
30-
);
31-
}
32-
33-
return source;
34-
}
35-
36-
async function getCompletedSchemaAnalyzer(
24+
/**
25+
* Analyze documents - schema can be retrieved in different formats.
26+
*/
27+
async function analyzeDocuments(
3728
source: AnyIterable,
3829
options?: SchemaParseOptions
39-
): Promise<SchemaAnalyzer> {
40-
const analyzer = new SchemaAnalyzer(options);
41-
for await (const doc of verifyStreamSource(source)) {
42-
analyzer.analyzeDoc(doc);
43-
}
44-
return analyzer;
30+
): Promise<SchemaAccessor> {
31+
const internalSchema = (await getCompletedSchemaAnalyzer(source, options)).getResult();
32+
return new InternalSchemaBasedAccessor(internalSchema, options?.signal);
4533
}
4634

4735
/**
@@ -51,7 +39,7 @@ async function getCompletedSchemaAnalyzer(
5139
async function parseSchema(
5240
source: AnyIterable,
5341
options?: SchemaParseOptions
54-
): Promise<Schema> {
42+
): Promise<InternalSchema> {
5543
return (await getCompletedSchemaAnalyzer(source, options)).getResult();
5644
}
5745

@@ -78,19 +66,24 @@ export type {
7866
DocumentSchemaType,
7967
PrimitiveSchemaType,
8068
SchemaType,
81-
Schema,
69+
InternalSchema as Schema,
70+
InternalSchema,
8271
SchemaField,
8372
SchemaParseOptions,
8473
SimplifiedSchemaBaseType,
8574
SimplifiedSchemaArrayType,
8675
SimplifiedSchemaDocumentType,
8776
SimplifiedSchemaType,
8877
SimplifiedSchemaField,
89-
SimplifiedSchema
78+
SimplifiedSchema,
79+
StandardJSONSchema,
80+
MongodbJSONSchema,
81+
ExtendedJSONSchema
9082
};
9183

9284
export {
9385
parseSchema,
86+
analyzeDocuments,
9487
getSchemaPaths,
9588
getSimplifiedSchema,
9689
SchemaAnalyzer,

src/schema-accessor.ts

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
import { Schema as InternalSchema } from './schema-analyzer';
2+
import { internalSchemaToExtended, internalSchemaToMongodb, internalSchemaToStandard } from './schema-convertors';
3+
import { ExtendedJSONSchema, MongodbJSONSchema, StandardJSONSchema } from './types';
4+
5+
export interface SchemaAccessor {
6+
getStandardJsonSchema: () => Promise<StandardJSONSchema>;
7+
getMongodbJsonSchema: () => Promise<MongodbJSONSchema>;
8+
getExtendedJsonSchema: () => Promise<ExtendedJSONSchema>;
9+
getInternalSchema: () => Promise<InternalSchema>;
10+
}
11+
12+
/**
13+
* Accessor for different schema formats.
14+
* Internal schema is provided at initialization,
15+
* the others are converted lazily and memoized.
16+
* Conversion can be aborted.
17+
*/
18+
export class InternalSchemaBasedAccessor implements SchemaAccessor {
19+
private internalSchema: InternalSchema;
20+
private standardJSONSchema?: StandardJSONSchema;
21+
private mongodbJSONSchema?: MongodbJSONSchema;
22+
private extendedJSONSchema?: ExtendedJSONSchema;
23+
private signal?: AbortSignal;
24+
25+
constructor(internalSchema: InternalSchema, signal?: AbortSignal) {
26+
this.signal = signal;
27+
this.internalSchema = internalSchema;
28+
}
29+
30+
async getInternalSchema(): Promise<InternalSchema> {
31+
return this.internalSchema;
32+
}
33+
34+
async getStandardJsonSchema(): Promise<StandardJSONSchema> {
35+
if (this.standardJSONSchema) return this.standardJSONSchema;
36+
return this.standardJSONSchema = await internalSchemaToStandard(this.internalSchema, { signal: this.signal });
37+
}
38+
39+
async getMongodbJsonSchema(): Promise<MongodbJSONSchema> {
40+
if (this.mongodbJSONSchema) return this.mongodbJSONSchema;
41+
return this.mongodbJSONSchema = await internalSchemaToMongodb(this.internalSchema, { signal: this.signal });
42+
}
43+
44+
async getExtendedJsonSchema(): Promise<ExtendedJSONSchema> {
45+
if (this.extendedJSONSchema) return this.extendedJSONSchema;
46+
return this.extendedJSONSchema = await internalSchemaToExtended(this.internalSchema, { signal: this.signal });
47+
}
48+
}

src/schema-analyzer.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@ type SemanticTypeMap = {
163163
export type SchemaParseOptions = {
164164
semanticTypes?: boolean | SemanticTypeMap;
165165
storeValues?: boolean;
166+
signal?: AbortSignal;
166167
};
167168

168169
/**

src/schema-convertors.ts

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import { Schema as InternalSchema } from './schema-analyzer';
2+
import { ExtendedJSONSchema, MongodbJSONSchema, StandardJSONSchema } from './types';
3+
4+
export function internalSchemaToStandard(
5+
internalSchema: InternalSchema,
6+
options: {
7+
signal?: AbortSignal
8+
}): StandardJSONSchema {
9+
// TODO: COMPASS-8700
10+
return {};
11+
}
12+
13+
export function internalSchemaToMongodb(
14+
internalSchema: InternalSchema,
15+
options: {
16+
signal?: AbortSignal
17+
}): MongodbJSONSchema {
18+
// TODO: COMPASS-8701
19+
return {} as MongodbJSONSchema;
20+
}
21+
22+
export function internalSchemaToExtended(
23+
internalSchema: InternalSchema,
24+
options: {
25+
signal?: AbortSignal
26+
}): ExtendedJSONSchema {
27+
// TODO: COMPASS-8702
28+
return {} as ExtendedJSONSchema;
29+
}

src/types.ts

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import { JSONSchema4 } from 'json-schema';
2+
3+
export type StandardJSONSchema = JSONSchema4;
4+
5+
export type MongodbJSONSchema = Pick<StandardJSONSchema, 'title' | 'required' | 'description'> & {
6+
bsonType: string;
7+
properties?: Record<string, MongodbJSONSchema>;
8+
items?: MongodbJSONSchema[];
9+
anyOf?: MongodbJSONSchema[];
10+
}
11+
12+
export type ExtendedJSONSchema = StandardJSONSchema & {
13+
['x-bsonType']: string;
14+
['x-metadata']: {
15+
hasDuplicates: boolean;
16+
probability: number;
17+
count: number;
18+
};
19+
['x-sampleValues']: any[];
20+
}
21+
22+
export type AnyIterable<T = any> = Iterable<T> | AsyncIterable<T>;

src/utils.ts

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import { SchemaAnalyzer, SchemaParseOptions } from './schema-analyzer';
2+
import { AnyIterable } from './types';
3+
4+
export function verifyStreamSource(
5+
source: AnyIterable
6+
): AnyIterable {
7+
if (!(Symbol.iterator in source) && !(Symbol.asyncIterator in source)) {
8+
throw new Error(
9+
'Unknown input type for `docs`. Must be an array, ' +
10+
'stream or MongoDB Cursor.'
11+
);
12+
}
13+
14+
return source;
15+
}
16+
17+
export async function getCompletedSchemaAnalyzer(
18+
source: AnyIterable,
19+
options?: SchemaParseOptions
20+
): Promise<SchemaAnalyzer> {
21+
const analyzer = new SchemaAnalyzer(options);
22+
for await (const doc of verifyStreamSource(source)) {
23+
if (options?.signal?.aborted) throw options.signal.aborted;
24+
analyzer.analyzeDoc(doc);
25+
}
26+
return analyzer;
27+
}

0 commit comments

Comments
 (0)