diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..5089a22c --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,7 @@ +# Contributing + +## Release + +```sh +npm run release +``` diff --git a/package.json b/package.json index b70ab4bd..7497c5eb 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@algolia/json-stream-analyzer", - "version": "0.2.17", + "version": "1.0.0-beta.3", "main": "index.js", "repository": "git@github.com:algolia/json-stream-analyzer.git", "author": "Jonathan Montane ", @@ -32,7 +32,7 @@ "pre-release": "yarn test && yarn build && cp package.json dist/ && npm publish dist/ --dry-run --access public ", "re-link": "cd dist/ && (yarn unlink || echo 'no package to unlink') && yarn link && cd .. && yarn link @algolia/json-stream-analyzer", "pre-release-test": "yarn pre-release && yarn re-link && jest --config jest.pre-release.config.js", - "release": "(npm whoami || (echo 'this command must be run from npm and not yarn' && exit 1)) && yarn pre-release-test && yarn version && cp package.json dist/ && cp README.md dist/ && cp LICENSE dist/ && npm publish dist/ --access public && git push origin master" + "release": "(npm whoami || (echo 'this command must be run from npm and not yarn' && exit 1)) && yarn pre-release-test && yarn version && cp package.json dist/ && cp README.md dist/ && cp LICENSE dist/ && npm publish dist/ --access public && git push origin" }, "engines": { "node": ">=14.0.0" diff --git a/src/convert.test.ts b/src/convert.test.ts index c608baa2..07b60292 100644 --- a/src/convert.test.ts +++ b/src/convert.test.ts @@ -1,4 +1,6 @@ import convertToSchema from './convert'; +import type { ModelOptions } from './interfaces'; +import type { ObjectType } from './types'; describe('convertToSchema', () => { it('transforms Array into ArrayType', () => { @@ -43,3 +45,116 @@ describe('convertToSchema', () => { expect(converted.counter).toBe(1); }); }); + +describe('statistics', () => { + it('should collect statistics', () => { + const options: ModelOptions = { + collectStatistics: { array: true, boolean: true }, + }; + const converted = convertToSchema( + { bool: true, arr: ['foo', 'bar'] }, + undefined, + options + ) as ObjectType; + + expect(converted.type).toBe('Object'); + + const simplifiedSchema = JSON.parse(JSON.stringify(converted.schema)); + expect(simplifiedSchema).toMatchInlineSnapshot(` + Object { + "arr": Object { + "counter": 1, + "stats": Object { + "lengths": Object { + "2": 1, + }, + }, + "type": "Array", + "types": Object { + "String": Object { + "counter": 1, + "type": "String", + }, + }, + }, + "bool": Object { + "counter": 1, + "stats": Object { + "trueVal": 1, + }, + "type": "Boolean", + }, + } + `); + }); +}); + +describe('modifiers', () => { + it('should allow modification of object', () => { + const options: ModelOptions = { + modifier: (path, content) => { + if ( + path.length !== 1 || + path[0] !== 'foo' || + typeof content !== 'object' + ) { + return content; + } + + let copy: Record = {}; + for (const [key, val] of Object.entries(content)) { + if (typeof val === 'object') { + copy = { ...copy, ...val }; + } else { + copy[key] = val; + } + } + return copy; + }, + }; + const converted = convertToSchema( + { + foo: { + shouldRemove: { + foo: 2, + }, + alsoRemove: { + bar: 1, + }, + notRemoved: true, + }, + }, + undefined, + options + ) as ObjectType; + + expect(converted.type).toBe('Object'); + + const simplifiedSchema = JSON.parse(JSON.stringify(converted.schema)); + expect(simplifiedSchema).toMatchInlineSnapshot(` + Object { + "foo": Object { + "counter": 1, + "schema": Object { + "bar": Object { + "counter": 1, + "type": "Number", + }, + "foo": Object { + "counter": 1, + "type": "Number", + }, + "notRemoved": Object { + "counter": 1, + "stats": Object { + "trueVal": 0, + }, + "type": "Boolean", + }, + }, + "type": "Object", + }, + } + `); + }); +}); diff --git a/src/convert.ts b/src/convert.ts index 1fe37ea4..ce741f59 100644 --- a/src/convert.ts +++ b/src/convert.ts @@ -1,4 +1,4 @@ -import type { SchemaType, SchemaObject } from './interfaces'; +import type { SchemaType, SchemaObject, ModelOptions } from './interfaces'; import { ArrayType, BooleanType, @@ -9,13 +9,29 @@ import { StringType, } from './types'; -const convertToSchema = (content: any, tag?: any): SchemaType => { +const convertToSchema = ( + content: any, + tag?: any, + options?: ModelOptions, + path: string[] = [] +): SchemaType => { + if (options?.modifier) { + // eslint-disable-next-line no-param-reassign + content = options.modifier(path, content); + } + if (typeof content === 'number') { return new NumberType({ counter: 1, tag }); } if (typeof content === 'boolean') { - return new BooleanType({ counter: 1, tag }); + return new BooleanType({ + counter: 1, + tag, + stats: options?.collectStatistics?.boolean + ? { trueVal: content === true ? 1 : 0 } + : undefined, + }); } if (typeof content === 'string') { @@ -33,7 +49,7 @@ const convertToSchema = (content: any, tag?: any): SchemaType => { types = { Missing: new MissingType({ counter: 1, tag }) }; } else { types = content.reduce((partial, item) => { - const schema = convertToSchema(item, tag); + const schema = convertToSchema(item, tag, options, [...path]); const update: SchemaObject = {}; if (partial[schema.type]) { update[schema.type] = partial[schema.type].combine(schema, { @@ -46,12 +62,25 @@ const convertToSchema = (content: any, tag?: any): SchemaType => { return { ...partial, ...update }; }, {}); } - return new ArrayType({ counter: 1, tag }, types); + + return new ArrayType( + { + counter: 1, + tag, + stats: options?.collectStatistics?.array + ? { lengths: { [content.length]: 1 } } + : undefined, + }, + types + ); } const schema: SchemaObject = Object.entries(content).reduce( (schemas: SchemaObject, [key, subContent]) => { - return { ...schemas, [key]: convertToSchema(subContent, tag) }; + return { + ...schemas, + [key]: convertToSchema(subContent, tag, options, [...path, key]), + }; }, {} ); diff --git a/src/interfaces.ts b/src/interfaces.ts index a1df3082..5371f5bd 100644 --- a/src/interfaces.ts +++ b/src/interfaces.ts @@ -116,3 +116,14 @@ export interface Model { addToModel: (record: any) => void; traverseSchema: (path: string[]) => { schema?: SchemaType; path: string[] }; } + +export interface ModelArgs { + tag: (record: any) => any; +} +export interface ModelOptions { + collectStatistics?: { + array?: boolean; + boolean?: boolean; + }; + modifier?: (path: string[], node: any) => any; +} diff --git a/src/models/SimpleTag.ts b/src/models/SimpleTag.ts index 4321e9ce..ae22eeb4 100644 --- a/src/models/SimpleTag.ts +++ b/src/models/SimpleTag.ts @@ -1,17 +1,26 @@ import convertToSchema from '../convert'; -import type { SchemaType, Diagnostic, Model } from '../interfaces'; +import type { + SchemaType, + Diagnostic, + Model, + ModelOptions, + ModelArgs, +} from '../interfaces'; export class SimpleTagModel implements Model { - tag: (record: any) => any; + options: ModelOptions; schema?: SchemaType; - constructor({ tag }: { tag: (record: any) => any }) { - this.tag = tag; + constructor(options: ModelArgs & ModelOptions) { + this.options = options; + this.tag = options.tag; } + tag: ModelArgs['tag'] = () => null; + convert = (record: any): SchemaType => { const tag = this.tag(record); - return convertToSchema(record, tag); + return convertToSchema(record, tag, this.options); }; combineTag = (firstTag: any): any => { @@ -32,7 +41,7 @@ export class SimpleTagModel implements Model { diagnoseRecord = (record: any): Diagnostic[] => { const tag = this.tag(record); - const recordSchema = convertToSchema(record, tag); + const recordSchema = convertToSchema(record, tag, this.options); let combined; if (this.schema) { @@ -55,7 +64,7 @@ export class SimpleTagModel implements Model { } }; - traverseSchema = (path: string[]) => { + traverseSchema = (path: string[]): ReturnType => { if (!this.schema) { return { path, schema: this.schema }; } diff --git a/src/types/ArrayType.test.ts b/src/types/ArrayType.test.ts index 354e9d63..44670e75 100644 --- a/src/types/ArrayType.test.ts +++ b/src/types/ArrayType.test.ts @@ -42,6 +42,19 @@ describe('ArrayType simple test case', () => { expect(combined.counter).toBe(2); }); + it('combines lengths', () => { + const b1 = new ArrayType({ stats: { lengths: { 1: 1 } } }); + const b2 = new ArrayType({ stats: { lengths: { 2: 1 } } }); + const b3 = new ArrayType({ stats: { lengths: { 2: 1 } } }); + + let combined = b1.combine(b2) as ArrayType; + combined = combined.combine(b3) as ArrayType; + + expect(combined.type).toBe('Array'); + expect(combined.counter).toBe(3); + expect(combined.stats).toStrictEqual({ lengths: { 1: 1, 2: 2 } }); + }); + it('combine does not mutate inputs', () => { const b1 = new ArrayType(); const b2 = new ArrayType(); @@ -128,12 +141,27 @@ describe('ArrayType simple test case', () => { describe('Simple Array Type test case', () => { it('defines correct schema for string arrays', () => { + const converted = convertToSchema(['someText', 'someText'], undefined, { + collectStatistics: { + array: true, + }, + }) as ArrayType; + + expect(converted.type).toBe('Array'); + expect(converted.types.String).toBeDefined(); + expect(converted.types.String.counter).toBe(1); + expect(converted.counter).toBe(1); + expect(converted.stats).toStrictEqual({ lengths: { 2: 1 } }); + }); + + it('ignore array length by default', () => { const converted = convertToSchema(['someText', 'someText']) as ArrayType; expect(converted.type).toBe('Array'); expect(converted.types.String).toBeDefined(); expect(converted.types.String.counter).toBe(1); expect(converted.counter).toBe(1); + expect(converted.stats).toStrictEqual({ lengths: {} }); }); it('defines correct schema for boolean arrays', () => { diff --git a/src/types/ArrayType.ts b/src/types/ArrayType.ts index 3217f46e..0fea245c 100644 --- a/src/types/ArrayType.ts +++ b/src/types/ArrayType.ts @@ -15,6 +15,10 @@ const isMultiType = (types: string[]): boolean => { return types.length > 2; }; +export interface ArrayStatistics { + lengths: Record; +} + export class ArrayType implements SchemaType { /** * Unique type ID that can be used to discriminate between different Schema @@ -54,18 +58,27 @@ export class ArrayType implements SchemaType { */ types: SchemaObject; + stats: ArrayStatistics; + constructor( - { counter = 1, tag }: SchemaTypeParams = { counter: 1 }, + { + counter = 1, + tag, + stats, + }: SchemaTypeParams & { stats?: ArrayStatistics } = { + counter: 1, + }, types: SchemaObject = {} ) { this.counter = counter; this.tag = tag; this.type = 'Array'; this.types = types; + this.stats = stats || { lengths: {} }; } /** - * A typeguard to ensure that another SchemaType is of the same type. + * A type guard to ensure that another SchemaType is of the same type. * * @param other - The schema to test. * @returns Whether the schema to test is an ArrayType. @@ -83,6 +96,7 @@ export class ArrayType implements SchemaType { const result = new ArrayType({ counter: this.counter, tag: this.tag, + stats: { ...this.stats }, }); result.types = Object.entries(this.types).reduce( (partial: SchemaObject, [key, schema]) => { @@ -139,11 +153,18 @@ export class ArrayType implements SchemaType { ); const combinedCounter = counter || this.counter + other.counter; + const combinedStats = { ...this.stats }; + for (const [key, val] of Object.entries(other.stats.lengths)) { + const keyN = key as unknown as number; + combinedStats.lengths[keyN] = (combinedStats.lengths[keyN] || 0) + val; + } + // @ts-expect-error ts(2351) return new this.constructor( { counter: combinedCounter, tag: combineTag(this.tag, other.tag), + stats: combinedStats, }, combinedTypes ); diff --git a/src/types/BooleanType.test.ts b/src/types/BooleanType.test.ts index c5dd2117..205fb76e 100644 --- a/src/types/BooleanType.test.ts +++ b/src/types/BooleanType.test.ts @@ -150,5 +150,14 @@ describe('BooleanType simple test case', () => { expect(combined.types.Boolean.counter).toBe(1); expect(combined.types.Missing.counter).toBe(1); }); + + it('should combine stats', () => { + const b1 = new BooleanType({ counter: 2, stats: { trueVal: 2 } }); + const b2 = new BooleanType({ counter: 1, stats: { trueVal: 0 } }); + + const combined = b1.combine(b2) as BooleanType; + + expect(combined.stats).toStrictEqual({ trueVal: 2 }); + }); }); }); diff --git a/src/types/BooleanType.ts b/src/types/BooleanType.ts index a18ae0fe..20aae41a 100644 --- a/src/types/BooleanType.ts +++ b/src/types/BooleanType.ts @@ -9,6 +9,10 @@ import { keepFirst } from '../tags/combiners'; import { UnionType } from './UnionType'; +export interface BooleanStatistics { + trueVal: number; +} + export class BooleanType implements SchemaType { /** * Unique type ID that can be used to discriminate between different Schema @@ -37,22 +41,43 @@ export class BooleanType implements SchemaType { */ counter: number; - constructor({ counter = 1, tag }: SchemaTypeParams = { counter: 1 }) { + stats: BooleanStatistics; + + constructor( + { + counter = 1, + tag, + stats, + }: SchemaTypeParams & { stats?: BooleanStatistics } = { + counter: 1, + } + ) { this.counter = counter; this.tag = tag; this.type = 'Boolean'; + this.stats = stats || { trueVal: 0 }; + } + + /** + * A type guard to ensure that another SchemaType is of the same type. + * + * @param other - The schema to test. + * @returns Whether the schema to test is an BooleanType. + */ + isSameType(other: SchemaType): other is BooleanType { + return other.type === this.type; } /** * Generic method to merge two SchemaType into a single model of the correct - * type, and that can be overriden when a more advanced logic is needed + * type, and that can be overridden when a more advanced logic is needed * (e.g. For Object, Arrays, etc.). * * If the 2 models are of the same type, we can safely merge them together, * otherwise we combine them into a UnionType. * * Important: If you override this method to have a more specific combination - * behaviour, it **MUST** first check that the types are identical, and combine + * behavior, it **MUST** first check that the types are identical, and combine * the two different SchemaTypes into a UnionType if they are not. * * @param other - The schema to combine it with. @@ -68,11 +93,15 @@ export class BooleanType implements SchemaType { combineTag: keepFirst, } ): SchemaType => { - if (other.type === this.type) { + if (this.isSameType(other)) { + const combinedStats = { ...this.stats }; + combinedStats.trueVal += other.stats.trueVal; + // @ts-expect-error ts(2351) const result = new other.constructor({ counter: counter || this.counter + other.counter, tag: combineTag(this.tag, other.tag), + stats: combinedStats, }); return result; } @@ -84,7 +113,7 @@ export class BooleanType implements SchemaType { }; /** - * Generic method to create a copy of the current model. It is overriden when + * Generic method to create a copy of the current model. It is overridden when * a more advanced logic is needed. * * For immutability purposes. @@ -93,7 +122,11 @@ export class BooleanType implements SchemaType { */ copy = () => { // @ts-expect-error ts(2351) - return new this.constructor({ counter: this.counter, tag: this.tag }); + return new this.constructor({ + counter: this.counter, + tag: this.tag, + stats: { ...this.stats }, + }); }; // eslint-disable-next-line @typescript-eslint/no-unused-vars diff --git a/src/types/ObjectType.test.ts b/src/types/ObjectType.test.ts index 098f30b4..6703c3bb 100644 --- a/src/types/ObjectType.test.ts +++ b/src/types/ObjectType.test.ts @@ -148,6 +148,7 @@ describe('ObjectType schema test case', () => { const expectedSchema = { someNums: { type: 'Array', + stats: { lengths: {} }, counter: 1, types: { Number: { @@ -159,6 +160,7 @@ describe('ObjectType schema test case', () => { isSimple: { type: 'Boolean', counter: 1, + stats: { trueVal: 0 }, }, nested: { type: 'Object', @@ -211,6 +213,7 @@ describe('ObjectType schema test case', () => { const expectedSchema = { someNums: { type: 'Array', + stats: { lengths: {} }, counter: 2, tag: 'm1', types: { @@ -233,6 +236,7 @@ describe('ObjectType schema test case', () => { Boolean: { type: 'Boolean', counter: 1, + stats: { trueVal: 0 }, tag: 'm1', }, }, @@ -270,12 +274,14 @@ describe('ObjectType schema test case', () => { schema: { tests: { type: 'Array', + stats: { lengths: {} }, counter: 1, tag: 'm2', types: { Boolean: { type: 'Boolean', counter: 1, + stats: { trueVal: 0 }, tag: 'm2', }, }, @@ -323,6 +329,7 @@ describe('ObjectType schema test case', () => { types: { Array: { type: 'Array', + stats: { lengths: {} }, counter: 1, types: { Object: {